backup: move RejectFuncs to archiver package

This commit is contained in:
Michael Eischer 2024-08-27 12:30:25 +02:00
parent f9dbcd2531
commit 41c031a19e
5 changed files with 576 additions and 567 deletions

View file

@ -319,18 +319,18 @@ func collectRejectByNameFuncs(opts BackupOptions, repo *repository.Repository) (
// collectRejectFuncs returns a list of all functions which may reject data
// from being saved in a snapshot based on path and file info
func collectRejectFuncs(opts BackupOptions, targets []string, fs fs.FS) (funcs []RejectFunc, err error) {
func collectRejectFuncs(opts BackupOptions, targets []string, fs fs.FS) (funcs []archiver.RejectFunc, err error) {
// allowed devices
if opts.ExcludeOtherFS && !opts.Stdin {
f, err := rejectByDevice(targets, fs)
if opts.ExcludeOtherFS && !opts.Stdin && !opts.StdinCommand {
f, err := archiver.RejectByDevice(targets, fs)
if err != nil {
return nil, err
}
funcs = append(funcs, f)
}
if len(opts.ExcludeLargerThan) != 0 && !opts.Stdin {
f, err := rejectBySize(opts.ExcludeLargerThan)
if len(opts.ExcludeLargerThan) != 0 && !opts.Stdin && !opts.StdinCommand {
f, err := archiver.RejectBySize(opts.ExcludeLargerThan)
if err != nil {
return nil, err
}
@ -342,7 +342,7 @@ func collectRejectFuncs(opts BackupOptions, targets []string, fs fs.FS) (funcs [
}
for _, spec := range opts.ExcludeIfPresent {
f, err := rejectIfPresent(spec)
f, err := archiver.RejectIfPresent(spec, Warnf)
if err != nil {
return nil, err
}

View file

@ -4,10 +4,8 @@ import (
"bufio"
"bytes"
"fmt"
"io"
"os"
"strings"
"sync"
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/errors"
@ -15,64 +13,14 @@ import (
"github.com/restic/restic/internal/fs"
"github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/textfile"
"github.com/restic/restic/internal/ui"
"github.com/spf13/pflag"
)
type rejectionCache struct {
m map[string]bool
mtx sync.Mutex
}
// Lock locks the mutex in rc.
func (rc *rejectionCache) Lock() {
if rc != nil {
rc.mtx.Lock()
}
}
// Unlock unlocks the mutex in rc.
func (rc *rejectionCache) Unlock() {
if rc != nil {
rc.mtx.Unlock()
}
}
// Get returns the last stored value for dir and a second boolean that
// indicates whether that value was actually written to the cache. It is the
// callers responsibility to call rc.Lock and rc.Unlock before using this
// method, otherwise data races may occur.
func (rc *rejectionCache) Get(dir string) (bool, bool) {
if rc == nil || rc.m == nil {
return false, false
}
v, ok := rc.m[dir]
return v, ok
}
// Store stores a new value for dir. It is the callers responsibility to call
// rc.Lock and rc.Unlock before using this method, otherwise data races may
// occur.
func (rc *rejectionCache) Store(dir string, rejected bool) {
if rc == nil {
return
}
if rc.m == nil {
rc.m = make(map[string]bool)
}
rc.m[dir] = rejected
}
// RejectByNameFunc is a function that takes a filename of a
// file that would be included in the backup. The function returns true if it
// should be excluded (rejected) from the backup.
type RejectByNameFunc func(path string) bool
// RejectFunc is a function that takes a filename and os.FileInfo of a
// file that would be included in the backup. The function returns true if it
// should be excluded (rejected) from the backup.
type RejectFunc func(path string, fi os.FileInfo, fs fs.FS) bool
// rejectByPattern returns a RejectByNameFunc which rejects files that match
// one of the patterns.
func rejectByPattern(patterns []string) RejectByNameFunc {
@ -104,239 +52,6 @@ func rejectByInsensitivePattern(patterns []string) RejectByNameFunc {
}
}
// rejectIfPresent returns a RejectByNameFunc which itself returns whether a path
// should be excluded. The RejectByNameFunc considers a file to be excluded when
// it resides in a directory with an exclusion file, that is specified by
// excludeFileSpec in the form "filename[:content]". The returned error is
// non-nil if the filename component of excludeFileSpec is empty. If rc is
// non-nil, it is going to be used in the RejectByNameFunc to expedite the evaluation
// of a directory based on previous visits.
func rejectIfPresent(excludeFileSpec string) (RejectFunc, error) {
if excludeFileSpec == "" {
return nil, errors.New("name for exclusion tagfile is empty")
}
colon := strings.Index(excludeFileSpec, ":")
if colon == 0 {
return nil, fmt.Errorf("no name for exclusion tagfile provided")
}
tf, tc := "", ""
if colon > 0 {
tf = excludeFileSpec[:colon]
tc = excludeFileSpec[colon+1:]
} else {
tf = excludeFileSpec
}
debug.Log("using %q as exclusion tagfile", tf)
rc := &rejectionCache{}
return func(filename string, _ os.FileInfo, fs fs.FS) bool {
return isExcludedByFile(filename, tf, tc, rc, fs)
}, nil
}
// isExcludedByFile interprets filename as a path and returns true if that file
// is in an excluded directory. A directory is identified as excluded if it contains a
// tagfile which bears the name specified in tagFilename and starts with
// header. If rc is non-nil, it is used to expedite the evaluation of a
// directory based on previous visits.
func isExcludedByFile(filename, tagFilename, header string, rc *rejectionCache, fs fs.FS) bool {
if tagFilename == "" {
return false
}
if fs.Base(filename) == tagFilename {
return false // do not exclude the tagfile itself
}
rc.Lock()
defer rc.Unlock()
dir := fs.Dir(filename)
rejected, visited := rc.Get(dir)
if visited {
return rejected
}
rejected = isDirExcludedByFile(dir, tagFilename, header, fs)
rc.Store(dir, rejected)
return rejected
}
func isDirExcludedByFile(dir, tagFilename, header string, fs fs.FS) bool {
tf := fs.Join(dir, tagFilename)
_, err := fs.Lstat(tf)
if os.IsNotExist(err) {
return false
}
if err != nil {
Warnf("could not access exclusion tagfile: %v", err)
return false
}
// when no signature is given, the mere presence of tf is enough reason
// to exclude filename
if len(header) == 0 {
return true
}
// From this stage, errors mean tagFilename exists but it is malformed.
// Warnings will be generated so that the user is informed that the
// indented ignore-action is not performed.
f, err := fs.OpenFile(tf, os.O_RDONLY, 0)
if err != nil {
Warnf("could not open exclusion tagfile: %v", err)
return false
}
defer func() {
_ = f.Close()
}()
buf := make([]byte, len(header))
_, err = io.ReadFull(f, buf)
// EOF is handled with a dedicated message, otherwise the warning were too cryptic
if err == io.EOF {
Warnf("invalid (too short) signature in exclusion tagfile %q\n", tf)
return false
}
if err != nil {
Warnf("could not read signature from exclusion tagfile %q: %v\n", tf, err)
return false
}
if !bytes.Equal(buf, []byte(header)) {
Warnf("invalid signature in exclusion tagfile %q\n", tf)
return false
}
return true
}
// DeviceMap is used to track allowed source devices for backup. This is used to
// check for crossing mount points during backup (for --one-file-system). It
// maps the name of a source path to its device ID.
type DeviceMap map[string]uint64
// NewDeviceMap creates a new device map from the list of source paths.
func NewDeviceMap(allowedSourcePaths []string, fs fs.FS) (DeviceMap, error) {
deviceMap := make(map[string]uint64)
for _, item := range allowedSourcePaths {
item, err := fs.Abs(fs.Clean(item))
if err != nil {
return nil, err
}
fi, err := fs.Lstat(item)
if err != nil {
return nil, err
}
id, err := fs.DeviceID(fi)
if err != nil {
return nil, err
}
deviceMap[item] = id
}
if len(deviceMap) == 0 {
return nil, errors.New("zero allowed devices")
}
return deviceMap, nil
}
// IsAllowed returns true if the path is located on an allowed device.
func (m DeviceMap) IsAllowed(item string, deviceID uint64, fs fs.FS) (bool, error) {
for dir := item; ; dir = fs.Dir(dir) {
debug.Log("item %v, test dir %v", item, dir)
// find a parent directory that is on an allowed device (otherwise
// we would not traverse the directory at all)
allowedID, ok := m[dir]
if !ok {
if dir == fs.Dir(dir) {
// arrived at root, no allowed device found. this should not happen.
break
}
continue
}
// if the item has a different device ID than the parent directory,
// we crossed a file system boundary
if allowedID != deviceID {
debug.Log("item %v (dir %v) on disallowed device %d", item, dir, deviceID)
return false, nil
}
// item is on allowed device, accept it
debug.Log("item %v allowed", item)
return true, nil
}
return false, fmt.Errorf("item %v (device ID %v) not found, deviceMap: %v", item, deviceID, m)
}
// rejectByDevice returns a RejectFunc that rejects files which are on a
// different file systems than the files/dirs in samples.
func rejectByDevice(samples []string, filesystem fs.FS) (RejectFunc, error) {
deviceMap, err := NewDeviceMap(samples, filesystem)
if err != nil {
return nil, err
}
debug.Log("allowed devices: %v\n", deviceMap)
return func(item string, fi os.FileInfo, fs fs.FS) bool {
id, err := fs.DeviceID(fi)
if err != nil {
// This should never happen because gatherDevices() would have
// errored out earlier. If it still does that's a reason to panic.
panic(err)
}
allowed, err := deviceMap.IsAllowed(fs.Clean(item), id, fs)
if err != nil {
// this should not happen
panic(fmt.Sprintf("error checking device ID of %v: %v", item, err))
}
if allowed {
// accept item
return false
}
// reject everything except directories
if !fi.IsDir() {
return true
}
// special case: make sure we keep mountpoints (directories which
// contain a mounted file system). Test this by checking if the parent
// directory would be included.
parentDir := fs.Dir(fs.Clean(item))
parentFI, err := fs.Lstat(parentDir)
if err != nil {
debug.Log("item %v: error running lstat() on parent directory: %v", item, err)
// if in doubt, reject
return true
}
parentDeviceID, err := fs.DeviceID(parentFI)
if err != nil {
debug.Log("item %v: getting device ID of parent directory: %v", item, err)
// if in doubt, reject
return true
}
parentAllowed, err := deviceMap.IsAllowed(parentDir, parentDeviceID, fs)
if err != nil {
debug.Log("item %v: error checking parent directory: %v", item, err)
// if in doubt, reject
return true
}
if parentAllowed {
// we found a mount point, so accept the directory
return false
}
// reject everything else
return true
}, nil
}
// rejectResticCache returns a RejectByNameFunc that rejects the restic cache
// directory (if set).
func rejectResticCache(repo *repository.Repository) (RejectByNameFunc, error) {
@ -361,28 +76,6 @@ func rejectResticCache(repo *repository.Repository) (RejectByNameFunc, error) {
}, nil
}
func rejectBySize(maxSizeStr string) (RejectFunc, error) {
maxSize, err := ui.ParseBytes(maxSizeStr)
if err != nil {
return nil, err
}
return func(item string, fi os.FileInfo, _ fs.FS) bool {
// directory will be ignored
if fi.IsDir() {
return false
}
filesize := fi.Size()
if filesize > maxSize {
debug.Log("file %s is oversize: %d", item, filesize)
return true
}
return false
}, nil
}
// readPatternsFromFiles reads all files and returns the list of
// patterns. For each line, leading and trailing white space is removed
// and comment lines are ignored. For each remaining pattern, environment

View file

@ -1,12 +1,7 @@
package main
import (
"os"
"path/filepath"
"testing"
"github.com/restic/restic/internal/fs"
"github.com/restic/restic/internal/test"
)
func TestRejectByPattern(t *testing.T) {
@ -62,252 +57,3 @@ func TestRejectByInsensitivePattern(t *testing.T) {
})
}
}
func TestIsExcludedByFile(t *testing.T) {
const (
tagFilename = "CACHEDIR.TAG"
header = "Signature: 8a477f597d28d172789f06886806bc55"
)
tests := []struct {
name string
tagFile string
content string
want bool
}{
{"NoTagfile", "", "", false},
{"EmptyTagfile", tagFilename, "", true},
{"UnnamedTagFile", "", header, false},
{"WrongTagFile", "notatagfile", header, false},
{"IncorrectSig", tagFilename, header[1:], false},
{"ValidSig", tagFilename, header, true},
{"ValidPlusStuff", tagFilename, header + "foo", true},
{"ValidPlusNewlineAndStuff", tagFilename, header + "\nbar", true},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
tempDir := test.TempDir(t)
foo := filepath.Join(tempDir, "foo")
err := os.WriteFile(foo, []byte("foo"), 0666)
if err != nil {
t.Fatalf("could not write file: %v", err)
}
if tc.tagFile != "" {
tagFile := filepath.Join(tempDir, tc.tagFile)
err = os.WriteFile(tagFile, []byte(tc.content), 0666)
if err != nil {
t.Fatalf("could not write tagfile: %v", err)
}
}
h := header
if tc.content == "" {
h = ""
}
if got := isExcludedByFile(foo, tagFilename, h, nil, &fs.Local{}); tc.want != got {
t.Fatalf("expected %v, got %v", tc.want, got)
}
})
}
}
// TestMultipleIsExcludedByFile is for testing that multiple instances of
// the --exclude-if-present parameter (or the shortcut --exclude-caches do not
// cancel each other out. It was initially written to demonstrate a bug in
// rejectIfPresent.
func TestMultipleIsExcludedByFile(t *testing.T) {
tempDir := test.TempDir(t)
// Create some files in a temporary directory.
// Files in UPPERCASE will be used as exclusion triggers later on.
// We will test the inclusion later, so we add the expected value as
// a bool.
files := []struct {
path string
incl bool
}{
{"42", true},
// everything in foodir except the NOFOO tagfile
// should not be included.
{"foodir/NOFOO", true},
{"foodir/foo", false},
{"foodir/foosub/underfoo", false},
// everything in bardir except the NOBAR tagfile
// should not be included.
{"bardir/NOBAR", true},
{"bardir/bar", false},
{"bardir/barsub/underbar", false},
// everything in bazdir should be included.
{"bazdir/baz", true},
{"bazdir/bazsub/underbaz", true},
}
var errs []error
for _, f := range files {
// create directories first, then the file
p := filepath.Join(tempDir, filepath.FromSlash(f.path))
errs = append(errs, os.MkdirAll(filepath.Dir(p), 0700))
errs = append(errs, os.WriteFile(p, []byte(f.path), 0600))
}
test.OKs(t, errs) // see if anything went wrong during the creation
// create two rejection functions, one that tests for the NOFOO file
// and one for the NOBAR file
fooExclude, _ := rejectIfPresent("NOFOO")
barExclude, _ := rejectIfPresent("NOBAR")
// To mock the archiver scanning walk, we create filepath.WalkFn
// that tests against the two rejection functions and stores
// the result in a map against we can test later.
m := make(map[string]bool)
walk := func(p string, fi os.FileInfo, err error) error {
if err != nil {
return err
}
excludedByFoo := fooExclude(p, nil, &fs.Local{})
excludedByBar := barExclude(p, nil, &fs.Local{})
excluded := excludedByFoo || excludedByBar
// the log message helps debugging in case the test fails
t.Logf("%q: %v || %v = %v", p, excludedByFoo, excludedByBar, excluded)
m[p] = !excluded
if excluded {
return filepath.SkipDir
}
return nil
}
// walk through the temporary file and check the error
test.OK(t, filepath.Walk(tempDir, walk))
// compare whether the walk gave the expected values for the test cases
for _, f := range files {
p := filepath.Join(tempDir, filepath.FromSlash(f.path))
if m[p] != f.incl {
t.Errorf("inclusion status of %s is wrong: want %v, got %v", f.path, f.incl, m[p])
}
}
}
// TestIsExcludedByFileSize is for testing the instance of
// --exclude-larger-than parameters
func TestIsExcludedByFileSize(t *testing.T) {
tempDir := test.TempDir(t)
// Max size of file is set to be 1k
maxSizeStr := "1k"
// Create some files in a temporary directory.
// Files in UPPERCASE will be used as exclusion triggers later on.
// We will test the inclusion later, so we add the expected value as
// a bool.
files := []struct {
path string
size int64
incl bool
}{
{"42", 100, true},
// everything in foodir except the FOOLARGE tagfile
// should not be included.
{"foodir/FOOLARGE", 2048, false},
{"foodir/foo", 1002, true},
{"foodir/foosub/underfoo", 100, true},
// everything in bardir except the BARLARGE tagfile
// should not be included.
{"bardir/BARLARGE", 1030, false},
{"bardir/bar", 1000, true},
{"bardir/barsub/underbar", 500, true},
// everything in bazdir should be included.
{"bazdir/baz", 100, true},
{"bazdir/bazsub/underbaz", 200, true},
}
var errs []error
for _, f := range files {
// create directories first, then the file
p := filepath.Join(tempDir, filepath.FromSlash(f.path))
errs = append(errs, os.MkdirAll(filepath.Dir(p), 0700))
file, err := os.OpenFile(p, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0600)
errs = append(errs, err)
if err == nil {
// create a file with given size
errs = append(errs, file.Truncate(f.size))
}
errs = append(errs, file.Close())
}
test.OKs(t, errs) // see if anything went wrong during the creation
// create rejection function
sizeExclude, _ := rejectBySize(maxSizeStr)
// To mock the archiver scanning walk, we create filepath.WalkFn
// that tests against the two rejection functions and stores
// the result in a map against we can test later.
m := make(map[string]bool)
walk := func(p string, fi os.FileInfo, err error) error {
if err != nil {
return err
}
excluded := sizeExclude(p, fi, nil)
// the log message helps debugging in case the test fails
t.Logf("%q: dir:%t; size:%d; excluded:%v", p, fi.IsDir(), fi.Size(), excluded)
m[p] = !excluded
return nil
}
// walk through the temporary file and check the error
test.OK(t, filepath.Walk(tempDir, walk))
// compare whether the walk gave the expected values for the test cases
for _, f := range files {
p := filepath.Join(tempDir, filepath.FromSlash(f.path))
if m[p] != f.incl {
t.Errorf("inclusion status of %s is wrong: want %v, got %v", f.path, f.incl, m[p])
}
}
}
func TestDeviceMap(t *testing.T) {
deviceMap := DeviceMap{
filepath.FromSlash("/"): 1,
filepath.FromSlash("/usr/local"): 5,
}
var tests = []struct {
item string
deviceID uint64
allowed bool
}{
{"/root", 1, true},
{"/usr", 1, true},
{"/proc", 2, false},
{"/proc/1234", 2, false},
{"/usr", 3, false},
{"/usr/share", 3, false},
{"/usr/local", 5, true},
{"/usr/local/foobar", 5, true},
{"/usr/local/foobar/submount", 23, false},
{"/usr/local/foobar/submount/file", 23, false},
{"/usr/local/foobar/outhersubmount", 1, false},
{"/usr/local/foobar/outhersubmount/otherfile", 1, false},
}
for _, test := range tests {
t.Run("", func(t *testing.T) {
res, err := deviceMap.IsAllowed(filepath.FromSlash(test.item), test.deviceID, &fs.Local{})
if err != nil {
t.Fatal(err)
}
if res != test.allowed {
t.Fatalf("wrong result returned by IsAllowed(%v): want %v, got %v", test.item, test.allowed, res)
}
})
}
}

View file

@ -0,0 +1,311 @@
package archiver
import (
"bytes"
"fmt"
"io"
"os"
"strings"
"sync"
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/fs"
"github.com/restic/restic/internal/ui"
)
type rejectionCache struct {
m map[string]bool
mtx sync.Mutex
}
func newRejectionCache() *rejectionCache {
return &rejectionCache{m: make(map[string]bool)}
}
// Lock locks the mutex in rc.
func (rc *rejectionCache) Lock() {
rc.mtx.Lock()
}
// Unlock unlocks the mutex in rc.
func (rc *rejectionCache) Unlock() {
rc.mtx.Unlock()
}
// Get returns the last stored value for dir and a second boolean that
// indicates whether that value was actually written to the cache. It is the
// callers responsibility to call rc.Lock and rc.Unlock before using this
// method, otherwise data races may occur.
func (rc *rejectionCache) Get(dir string) (bool, bool) {
v, ok := rc.m[dir]
return v, ok
}
// Store stores a new value for dir. It is the callers responsibility to call
// rc.Lock and rc.Unlock before using this method, otherwise data races may
// occur.
func (rc *rejectionCache) Store(dir string, rejected bool) {
rc.m[dir] = rejected
}
// RejectFunc is a function that takes a filename and os.FileInfo of a
// file that would be included in the backup. The function returns true if it
// should be excluded (rejected) from the backup.
type RejectFunc func(path string, fi os.FileInfo, fs fs.FS) bool
// RejectIfPresent returns a RejectByNameFunc which itself returns whether a path
// should be excluded. The RejectByNameFunc considers a file to be excluded when
// it resides in a directory with an exclusion file, that is specified by
// excludeFileSpec in the form "filename[:content]". The returned error is
// non-nil if the filename component of excludeFileSpec is empty. If rc is
// non-nil, it is going to be used in the RejectByNameFunc to expedite the evaluation
// of a directory based on previous visits.
func RejectIfPresent(excludeFileSpec string, warnf func(msg string, args ...interface{})) (RejectFunc, error) {
if excludeFileSpec == "" {
return nil, errors.New("name for exclusion tagfile is empty")
}
colon := strings.Index(excludeFileSpec, ":")
if colon == 0 {
return nil, fmt.Errorf("no name for exclusion tagfile provided")
}
tf, tc := "", ""
if colon > 0 {
tf = excludeFileSpec[:colon]
tc = excludeFileSpec[colon+1:]
} else {
tf = excludeFileSpec
}
debug.Log("using %q as exclusion tagfile", tf)
rc := newRejectionCache()
return func(filename string, _ os.FileInfo, fs fs.FS) bool {
return isExcludedByFile(filename, tf, tc, rc, fs, warnf)
}, nil
}
// isExcludedByFile interprets filename as a path and returns true if that file
// is in an excluded directory. A directory is identified as excluded if it contains a
// tagfile which bears the name specified in tagFilename and starts with
// header. If rc is non-nil, it is used to expedite the evaluation of a
// directory based on previous visits.
func isExcludedByFile(filename, tagFilename, header string, rc *rejectionCache, fs fs.FS, warnf func(msg string, args ...interface{})) bool {
if tagFilename == "" {
return false
}
if fs.Base(filename) == tagFilename {
return false // do not exclude the tagfile itself
}
rc.Lock()
defer rc.Unlock()
dir := fs.Dir(filename)
rejected, visited := rc.Get(dir)
if visited {
return rejected
}
rejected = isDirExcludedByFile(dir, tagFilename, header, fs, warnf)
rc.Store(dir, rejected)
return rejected
}
func isDirExcludedByFile(dir, tagFilename, header string, fs fs.FS, warnf func(msg string, args ...interface{})) bool {
tf := fs.Join(dir, tagFilename)
_, err := fs.Lstat(tf)
if os.IsNotExist(err) {
return false
}
if err != nil {
warnf("could not access exclusion tagfile: %v", err)
return false
}
// when no signature is given, the mere presence of tf is enough reason
// to exclude filename
if len(header) == 0 {
return true
}
// From this stage, errors mean tagFilename exists but it is malformed.
// Warnings will be generated so that the user is informed that the
// indented ignore-action is not performed.
f, err := fs.OpenFile(tf, os.O_RDONLY, 0)
if err != nil {
warnf("could not open exclusion tagfile: %v", err)
return false
}
defer func() {
_ = f.Close()
}()
buf := make([]byte, len(header))
_, err = io.ReadFull(f, buf)
// EOF is handled with a dedicated message, otherwise the warning were too cryptic
if err == io.EOF {
warnf("invalid (too short) signature in exclusion tagfile %q\n", tf)
return false
}
if err != nil {
warnf("could not read signature from exclusion tagfile %q: %v\n", tf, err)
return false
}
if !bytes.Equal(buf, []byte(header)) {
warnf("invalid signature in exclusion tagfile %q\n", tf)
return false
}
return true
}
// deviceMap is used to track allowed source devices for backup. This is used to
// check for crossing mount points during backup (for --one-file-system). It
// maps the name of a source path to its device ID.
type deviceMap map[string]uint64
// newDeviceMap creates a new device map from the list of source paths.
func newDeviceMap(allowedSourcePaths []string, fs fs.FS) (deviceMap, error) {
deviceMap := make(map[string]uint64)
for _, item := range allowedSourcePaths {
item, err := fs.Abs(fs.Clean(item))
if err != nil {
return nil, err
}
fi, err := fs.Lstat(item)
if err != nil {
return nil, err
}
id, err := fs.DeviceID(fi)
if err != nil {
return nil, err
}
deviceMap[item] = id
}
if len(deviceMap) == 0 {
return nil, errors.New("zero allowed devices")
}
return deviceMap, nil
}
// IsAllowed returns true if the path is located on an allowed device.
func (m deviceMap) IsAllowed(item string, deviceID uint64, fs fs.FS) (bool, error) {
for dir := item; ; dir = fs.Dir(dir) {
debug.Log("item %v, test dir %v", item, dir)
// find a parent directory that is on an allowed device (otherwise
// we would not traverse the directory at all)
allowedID, ok := m[dir]
if !ok {
if dir == fs.Dir(dir) {
// arrived at root, no allowed device found. this should not happen.
break
}
continue
}
// if the item has a different device ID than the parent directory,
// we crossed a file system boundary
if allowedID != deviceID {
debug.Log("item %v (dir %v) on disallowed device %d", item, dir, deviceID)
return false, nil
}
// item is on allowed device, accept it
debug.Log("item %v allowed", item)
return true, nil
}
return false, fmt.Errorf("item %v (device ID %v) not found, deviceMap: %v", item, deviceID, m)
}
// RejectByDevice returns a RejectFunc that rejects files which are on a
// different file systems than the files/dirs in samples.
func RejectByDevice(samples []string, filesystem fs.FS) (RejectFunc, error) {
deviceMap, err := newDeviceMap(samples, filesystem)
if err != nil {
return nil, err
}
debug.Log("allowed devices: %v\n", deviceMap)
return func(item string, fi os.FileInfo, fs fs.FS) bool {
id, err := fs.DeviceID(fi)
if err != nil {
// This should never happen because gatherDevices() would have
// errored out earlier. If it still does that's a reason to panic.
panic(err)
}
allowed, err := deviceMap.IsAllowed(fs.Clean(item), id, fs)
if err != nil {
// this should not happen
panic(fmt.Sprintf("error checking device ID of %v: %v", item, err))
}
if allowed {
// accept item
return false
}
// reject everything except directories
if !fi.IsDir() {
return true
}
// special case: make sure we keep mountpoints (directories which
// contain a mounted file system). Test this by checking if the parent
// directory would be included.
parentDir := fs.Dir(fs.Clean(item))
parentFI, err := fs.Lstat(parentDir)
if err != nil {
debug.Log("item %v: error running lstat() on parent directory: %v", item, err)
// if in doubt, reject
return true
}
parentDeviceID, err := fs.DeviceID(parentFI)
if err != nil {
debug.Log("item %v: getting device ID of parent directory: %v", item, err)
// if in doubt, reject
return true
}
parentAllowed, err := deviceMap.IsAllowed(parentDir, parentDeviceID, fs)
if err != nil {
debug.Log("item %v: error checking parent directory: %v", item, err)
// if in doubt, reject
return true
}
if parentAllowed {
// we found a mount point, so accept the directory
return false
}
// reject everything else
return true
}, nil
}
func RejectBySize(maxSizeStr string) (RejectFunc, error) {
maxSize, err := ui.ParseBytes(maxSizeStr)
if err != nil {
return nil, err
}
return func(item string, fi os.FileInfo, _ fs.FS) bool {
// directory will be ignored
if fi.IsDir() {
return false
}
filesize := fi.Size()
if filesize > maxSize {
debug.Log("file %s is oversize: %d", item, filesize)
return true
}
return false
}, nil
}

View file

@ -0,0 +1,259 @@
package archiver
import (
"os"
"path/filepath"
"testing"
"github.com/restic/restic/internal/fs"
"github.com/restic/restic/internal/test"
)
func TestIsExcludedByFile(t *testing.T) {
const (
tagFilename = "CACHEDIR.TAG"
header = "Signature: 8a477f597d28d172789f06886806bc55"
)
tests := []struct {
name string
tagFile string
content string
want bool
}{
{"NoTagfile", "", "", false},
{"EmptyTagfile", tagFilename, "", true},
{"UnnamedTagFile", "", header, false},
{"WrongTagFile", "notatagfile", header, false},
{"IncorrectSig", tagFilename, header[1:], false},
{"ValidSig", tagFilename, header, true},
{"ValidPlusStuff", tagFilename, header + "foo", true},
{"ValidPlusNewlineAndStuff", tagFilename, header + "\nbar", true},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
tempDir := test.TempDir(t)
foo := filepath.Join(tempDir, "foo")
err := os.WriteFile(foo, []byte("foo"), 0666)
if err != nil {
t.Fatalf("could not write file: %v", err)
}
if tc.tagFile != "" {
tagFile := filepath.Join(tempDir, tc.tagFile)
err = os.WriteFile(tagFile, []byte(tc.content), 0666)
if err != nil {
t.Fatalf("could not write tagfile: %v", err)
}
}
h := header
if tc.content == "" {
h = ""
}
if got := isExcludedByFile(foo, tagFilename, h, newRejectionCache(), &fs.Local{}, func(msg string, args ...interface{}) { t.Logf(msg, args...) }); tc.want != got {
t.Fatalf("expected %v, got %v", tc.want, got)
}
})
}
}
// TestMultipleIsExcludedByFile is for testing that multiple instances of
// the --exclude-if-present parameter (or the shortcut --exclude-caches do not
// cancel each other out. It was initially written to demonstrate a bug in
// rejectIfPresent.
func TestMultipleIsExcludedByFile(t *testing.T) {
tempDir := test.TempDir(t)
// Create some files in a temporary directory.
// Files in UPPERCASE will be used as exclusion triggers later on.
// We will test the inclusion later, so we add the expected value as
// a bool.
files := []struct {
path string
incl bool
}{
{"42", true},
// everything in foodir except the NOFOO tagfile
// should not be included.
{"foodir/NOFOO", true},
{"foodir/foo", false},
{"foodir/foosub/underfoo", false},
// everything in bardir except the NOBAR tagfile
// should not be included.
{"bardir/NOBAR", true},
{"bardir/bar", false},
{"bardir/barsub/underbar", false},
// everything in bazdir should be included.
{"bazdir/baz", true},
{"bazdir/bazsub/underbaz", true},
}
var errs []error
for _, f := range files {
// create directories first, then the file
p := filepath.Join(tempDir, filepath.FromSlash(f.path))
errs = append(errs, os.MkdirAll(filepath.Dir(p), 0700))
errs = append(errs, os.WriteFile(p, []byte(f.path), 0600))
}
test.OKs(t, errs) // see if anything went wrong during the creation
// create two rejection functions, one that tests for the NOFOO file
// and one for the NOBAR file
fooExclude, _ := RejectIfPresent("NOFOO", nil)
barExclude, _ := RejectIfPresent("NOBAR", nil)
// To mock the archiver scanning walk, we create filepath.WalkFn
// that tests against the two rejection functions and stores
// the result in a map against we can test later.
m := make(map[string]bool)
walk := func(p string, fi os.FileInfo, err error) error {
if err != nil {
return err
}
excludedByFoo := fooExclude(p, nil, &fs.Local{})
excludedByBar := barExclude(p, nil, &fs.Local{})
excluded := excludedByFoo || excludedByBar
// the log message helps debugging in case the test fails
t.Logf("%q: %v || %v = %v", p, excludedByFoo, excludedByBar, excluded)
m[p] = !excluded
if excluded {
return filepath.SkipDir
}
return nil
}
// walk through the temporary file and check the error
test.OK(t, filepath.Walk(tempDir, walk))
// compare whether the walk gave the expected values for the test cases
for _, f := range files {
p := filepath.Join(tempDir, filepath.FromSlash(f.path))
if m[p] != f.incl {
t.Errorf("inclusion status of %s is wrong: want %v, got %v", f.path, f.incl, m[p])
}
}
}
// TestIsExcludedByFileSize is for testing the instance of
// --exclude-larger-than parameters
func TestIsExcludedByFileSize(t *testing.T) {
tempDir := test.TempDir(t)
// Max size of file is set to be 1k
maxSizeStr := "1k"
// Create some files in a temporary directory.
// Files in UPPERCASE will be used as exclusion triggers later on.
// We will test the inclusion later, so we add the expected value as
// a bool.
files := []struct {
path string
size int64
incl bool
}{
{"42", 100, true},
// everything in foodir except the FOOLARGE tagfile
// should not be included.
{"foodir/FOOLARGE", 2048, false},
{"foodir/foo", 1002, true},
{"foodir/foosub/underfoo", 100, true},
// everything in bardir except the BARLARGE tagfile
// should not be included.
{"bardir/BARLARGE", 1030, false},
{"bardir/bar", 1000, true},
{"bardir/barsub/underbar", 500, true},
// everything in bazdir should be included.
{"bazdir/baz", 100, true},
{"bazdir/bazsub/underbaz", 200, true},
}
var errs []error
for _, f := range files {
// create directories first, then the file
p := filepath.Join(tempDir, filepath.FromSlash(f.path))
errs = append(errs, os.MkdirAll(filepath.Dir(p), 0700))
file, err := os.OpenFile(p, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0600)
errs = append(errs, err)
if err == nil {
// create a file with given size
errs = append(errs, file.Truncate(f.size))
}
errs = append(errs, file.Close())
}
test.OKs(t, errs) // see if anything went wrong during the creation
// create rejection function
sizeExclude, _ := RejectBySize(maxSizeStr)
// To mock the archiver scanning walk, we create filepath.WalkFn
// that tests against the two rejection functions and stores
// the result in a map against we can test later.
m := make(map[string]bool)
walk := func(p string, fi os.FileInfo, err error) error {
if err != nil {
return err
}
excluded := sizeExclude(p, fi, nil)
// the log message helps debugging in case the test fails
t.Logf("%q: dir:%t; size:%d; excluded:%v", p, fi.IsDir(), fi.Size(), excluded)
m[p] = !excluded
return nil
}
// walk through the temporary file and check the error
test.OK(t, filepath.Walk(tempDir, walk))
// compare whether the walk gave the expected values for the test cases
for _, f := range files {
p := filepath.Join(tempDir, filepath.FromSlash(f.path))
if m[p] != f.incl {
t.Errorf("inclusion status of %s is wrong: want %v, got %v", f.path, f.incl, m[p])
}
}
}
func TestDeviceMap(t *testing.T) {
deviceMap := deviceMap{
filepath.FromSlash("/"): 1,
filepath.FromSlash("/usr/local"): 5,
}
var tests = []struct {
item string
deviceID uint64
allowed bool
}{
{"/root", 1, true},
{"/usr", 1, true},
{"/proc", 2, false},
{"/proc/1234", 2, false},
{"/usr", 3, false},
{"/usr/share", 3, false},
{"/usr/local", 5, true},
{"/usr/local/foobar", 5, true},
{"/usr/local/foobar/submount", 23, false},
{"/usr/local/foobar/submount/file", 23, false},
{"/usr/local/foobar/outhersubmount", 1, false},
{"/usr/local/foobar/outhersubmount/otherfile", 1, false},
}
for _, test := range tests {
t.Run("", func(t *testing.T) {
res, err := deviceMap.IsAllowed(filepath.FromSlash(test.item), test.deviceID, &fs.Local{})
if err != nil {
t.Fatal(err)
}
if res != test.allowed {
t.Fatalf("wrong result returned by IsAllowed(%v): want %v, got %v", test.item, test.allowed, res)
}
})
}
}