From f9dbcd25319a742e3036a447c92618b61de9a178 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Tue, 27 Aug 2024 12:07:26 +0200 Subject: [PATCH] backup: convert reject funcs to use FS interface Depending on parameters the paths in a snapshot do not directly correspond to real paths on the filesystem. Therefore, reject funcs must use the FS interface to work correctly. --- cmd/restic/cmd_backup.go | 80 +++++++++++++++--------------- cmd/restic/exclude.go | 50 +++++++++---------- cmd/restic/exclude_test.go | 11 ++-- internal/archiver/archiver.go | 10 ++-- internal/archiver/archiver_test.go | 14 +++--- internal/archiver/scanner.go | 8 +-- internal/archiver/scanner_test.go | 2 +- internal/fs/fs_local.go | 6 +++ internal/fs/fs_reader.go | 4 ++ internal/fs/interface.go | 1 + 10 files changed, 98 insertions(+), 88 deletions(-) diff --git a/cmd/restic/cmd_backup.go b/cmd/restic/cmd_backup.go index 562108a33..1fdec081b 100644 --- a/cmd/restic/cmd_backup.go +++ b/cmd/restic/cmd_backup.go @@ -314,6 +314,29 @@ func collectRejectByNameFuncs(opts BackupOptions, repo *repository.Repository) ( } fs = append(fs, fsPatterns...) + return fs, nil +} + +// collectRejectFuncs returns a list of all functions which may reject data +// from being saved in a snapshot based on path and file info +func collectRejectFuncs(opts BackupOptions, targets []string, fs fs.FS) (funcs []RejectFunc, err error) { + // allowed devices + if opts.ExcludeOtherFS && !opts.Stdin { + f, err := rejectByDevice(targets, fs) + if err != nil { + return nil, err + } + funcs = append(funcs, f) + } + + if len(opts.ExcludeLargerThan) != 0 && !opts.Stdin { + f, err := rejectBySize(opts.ExcludeLargerThan) + if err != nil { + return nil, err + } + funcs = append(funcs, f) + } + if opts.ExcludeCaches { opts.ExcludeIfPresent = append(opts.ExcludeIfPresent, "CACHEDIR.TAG:Signature: 8a477f597d28d172789f06886806bc55") } @@ -324,33 +347,10 @@ func collectRejectByNameFuncs(opts BackupOptions, repo *repository.Repository) ( return nil, err } - fs = append(fs, f) + funcs = append(funcs, f) } - return fs, nil -} - -// collectRejectFuncs returns a list of all functions which may reject data -// from being saved in a snapshot based on path and file info -func collectRejectFuncs(opts BackupOptions, targets []string) (fs []RejectFunc, err error) { - // allowed devices - if opts.ExcludeOtherFS && !opts.Stdin { - f, err := rejectByDevice(targets) - if err != nil { - return nil, err - } - fs = append(fs, f) - } - - if len(opts.ExcludeLargerThan) != 0 && !opts.Stdin { - f, err := rejectBySize(opts.ExcludeLargerThan) - if err != nil { - return nil, err - } - fs = append(fs, f) - } - - return fs, nil + return funcs, nil } // collectTargets returns a list of target files/dirs from several sources. @@ -505,12 +505,6 @@ func runBackup(ctx context.Context, opts BackupOptions, gopts GlobalOptions, ter return err } - // rejectFuncs collect functions that can reject items from the backup based on path and file info - rejectFuncs, err := collectRejectFuncs(opts, targets) - if err != nil { - return err - } - var parentSnapshot *restic.Snapshot if !opts.Stdin { parentSnapshot, err = findParentSnapshot(ctx, repo, opts, targets, timeStamp) @@ -547,15 +541,6 @@ func runBackup(ctx context.Context, opts BackupOptions, gopts GlobalOptions, ter return true } - selectFilter := func(item string, fi os.FileInfo) bool { - for _, reject := range rejectFuncs { - if reject(item, fi) { - return false - } - } - return true - } - var targetFS fs.FS = fs.Local{} if runtime.GOOS == "windows" && opts.UseFsSnapshot { if err = fs.HasSufficientPrivilegesForVSS(); err != nil { @@ -598,6 +583,21 @@ func runBackup(ctx context.Context, opts BackupOptions, gopts GlobalOptions, ter targets = []string{filename} } + // rejectFuncs collect functions that can reject items from the backup based on path and file info + rejectFuncs, err := collectRejectFuncs(opts, targets, targetFS) + if err != nil { + return err + } + + selectFilter := func(item string, fi os.FileInfo, fs fs.FS) bool { + for _, reject := range rejectFuncs { + if reject(item, fi, fs) { + return false + } + } + return true + } + wg, wgCtx := errgroup.WithContext(ctx) cancelCtx, cancel := context.WithCancel(wgCtx) defer cancel() diff --git a/cmd/restic/exclude.go b/cmd/restic/exclude.go index 4657e4915..f1e1011f2 100644 --- a/cmd/restic/exclude.go +++ b/cmd/restic/exclude.go @@ -6,7 +6,6 @@ import ( "fmt" "io" "os" - "path/filepath" "strings" "sync" @@ -72,7 +71,7 @@ type RejectByNameFunc func(path string) bool // RejectFunc is a function that takes a filename and os.FileInfo of a // file that would be included in the backup. The function returns true if it // should be excluded (rejected) from the backup. -type RejectFunc func(path string, fi os.FileInfo) bool +type RejectFunc func(path string, fi os.FileInfo, fs fs.FS) bool // rejectByPattern returns a RejectByNameFunc which rejects files that match // one of the patterns. @@ -112,7 +111,7 @@ func rejectByInsensitivePattern(patterns []string) RejectByNameFunc { // non-nil if the filename component of excludeFileSpec is empty. If rc is // non-nil, it is going to be used in the RejectByNameFunc to expedite the evaluation // of a directory based on previous visits. -func rejectIfPresent(excludeFileSpec string) (RejectByNameFunc, error) { +func rejectIfPresent(excludeFileSpec string) (RejectFunc, error) { if excludeFileSpec == "" { return nil, errors.New("name for exclusion tagfile is empty") } @@ -129,10 +128,9 @@ func rejectIfPresent(excludeFileSpec string) (RejectByNameFunc, error) { } debug.Log("using %q as exclusion tagfile", tf) rc := &rejectionCache{} - fn := func(filename string) bool { - return isExcludedByFile(filename, tf, tc, rc) - } - return fn, nil + return func(filename string, _ os.FileInfo, fs fs.FS) bool { + return isExcludedByFile(filename, tf, tc, rc, fs) + }, nil } // isExcludedByFile interprets filename as a path and returns true if that file @@ -140,28 +138,28 @@ func rejectIfPresent(excludeFileSpec string) (RejectByNameFunc, error) { // tagfile which bears the name specified in tagFilename and starts with // header. If rc is non-nil, it is used to expedite the evaluation of a // directory based on previous visits. -func isExcludedByFile(filename, tagFilename, header string, rc *rejectionCache) bool { +func isExcludedByFile(filename, tagFilename, header string, rc *rejectionCache, fs fs.FS) bool { if tagFilename == "" { return false } - dir, base := filepath.Split(filename) - if base == tagFilename { + if fs.Base(filename) == tagFilename { return false // do not exclude the tagfile itself } rc.Lock() defer rc.Unlock() + dir := fs.Dir(filename) rejected, visited := rc.Get(dir) if visited { return rejected } - rejected = isDirExcludedByFile(dir, tagFilename, header) + rejected = isDirExcludedByFile(dir, tagFilename, header, fs) rc.Store(dir, rejected) return rejected } -func isDirExcludedByFile(dir, tagFilename, header string) bool { - tf := filepath.Join(dir, tagFilename) +func isDirExcludedByFile(dir, tagFilename, header string, fs fs.FS) bool { + tf := fs.Join(dir, tagFilename) _, err := fs.Lstat(tf) if os.IsNotExist(err) { return false @@ -178,7 +176,7 @@ func isDirExcludedByFile(dir, tagFilename, header string) bool { // From this stage, errors mean tagFilename exists but it is malformed. // Warnings will be generated so that the user is informed that the // indented ignore-action is not performed. - f, err := os.Open(tf) + f, err := fs.OpenFile(tf, os.O_RDONLY, 0) if err != nil { Warnf("could not open exclusion tagfile: %v", err) return false @@ -210,11 +208,11 @@ func isDirExcludedByFile(dir, tagFilename, header string) bool { type DeviceMap map[string]uint64 // NewDeviceMap creates a new device map from the list of source paths. -func NewDeviceMap(allowedSourcePaths []string) (DeviceMap, error) { +func NewDeviceMap(allowedSourcePaths []string, fs fs.FS) (DeviceMap, error) { deviceMap := make(map[string]uint64) for _, item := range allowedSourcePaths { - item, err := filepath.Abs(filepath.Clean(item)) + item, err := fs.Abs(fs.Clean(item)) if err != nil { return nil, err } @@ -240,15 +238,15 @@ func NewDeviceMap(allowedSourcePaths []string) (DeviceMap, error) { } // IsAllowed returns true if the path is located on an allowed device. -func (m DeviceMap) IsAllowed(item string, deviceID uint64) (bool, error) { - for dir := item; ; dir = filepath.Dir(dir) { +func (m DeviceMap) IsAllowed(item string, deviceID uint64, fs fs.FS) (bool, error) { + for dir := item; ; dir = fs.Dir(dir) { debug.Log("item %v, test dir %v", item, dir) // find a parent directory that is on an allowed device (otherwise // we would not traverse the directory at all) allowedID, ok := m[dir] if !ok { - if dir == filepath.Dir(dir) { + if dir == fs.Dir(dir) { // arrived at root, no allowed device found. this should not happen. break } @@ -272,14 +270,14 @@ func (m DeviceMap) IsAllowed(item string, deviceID uint64) (bool, error) { // rejectByDevice returns a RejectFunc that rejects files which are on a // different file systems than the files/dirs in samples. -func rejectByDevice(samples []string) (RejectFunc, error) { - deviceMap, err := NewDeviceMap(samples) +func rejectByDevice(samples []string, filesystem fs.FS) (RejectFunc, error) { + deviceMap, err := NewDeviceMap(samples, filesystem) if err != nil { return nil, err } debug.Log("allowed devices: %v\n", deviceMap) - return func(item string, fi os.FileInfo) bool { + return func(item string, fi os.FileInfo, fs fs.FS) bool { id, err := fs.DeviceID(fi) if err != nil { // This should never happen because gatherDevices() would have @@ -287,7 +285,7 @@ func rejectByDevice(samples []string) (RejectFunc, error) { panic(err) } - allowed, err := deviceMap.IsAllowed(filepath.Clean(item), id) + allowed, err := deviceMap.IsAllowed(fs.Clean(item), id, fs) if err != nil { // this should not happen panic(fmt.Sprintf("error checking device ID of %v: %v", item, err)) @@ -306,7 +304,7 @@ func rejectByDevice(samples []string) (RejectFunc, error) { // special case: make sure we keep mountpoints (directories which // contain a mounted file system). Test this by checking if the parent // directory would be included. - parentDir := filepath.Dir(filepath.Clean(item)) + parentDir := fs.Dir(fs.Clean(item)) parentFI, err := fs.Lstat(parentDir) if err != nil { @@ -322,7 +320,7 @@ func rejectByDevice(samples []string) (RejectFunc, error) { return true } - parentAllowed, err := deviceMap.IsAllowed(parentDir, parentDeviceID) + parentAllowed, err := deviceMap.IsAllowed(parentDir, parentDeviceID, fs) if err != nil { debug.Log("item %v: error checking parent directory: %v", item, err) // if in doubt, reject @@ -369,7 +367,7 @@ func rejectBySize(maxSizeStr string) (RejectFunc, error) { return nil, err } - return func(item string, fi os.FileInfo) bool { + return func(item string, fi os.FileInfo, _ fs.FS) bool { // directory will be ignored if fi.IsDir() { return false diff --git a/cmd/restic/exclude_test.go b/cmd/restic/exclude_test.go index 9a24418ae..166ee1d84 100644 --- a/cmd/restic/exclude_test.go +++ b/cmd/restic/exclude_test.go @@ -5,6 +5,7 @@ import ( "path/filepath" "testing" + "github.com/restic/restic/internal/fs" "github.com/restic/restic/internal/test" ) @@ -102,7 +103,7 @@ func TestIsExcludedByFile(t *testing.T) { if tc.content == "" { h = "" } - if got := isExcludedByFile(foo, tagFilename, h, nil); tc.want != got { + if got := isExcludedByFile(foo, tagFilename, h, nil, &fs.Local{}); tc.want != got { t.Fatalf("expected %v, got %v", tc.want, got) } }) @@ -164,8 +165,8 @@ func TestMultipleIsExcludedByFile(t *testing.T) { if err != nil { return err } - excludedByFoo := fooExclude(p) - excludedByBar := barExclude(p) + excludedByFoo := fooExclude(p, nil, &fs.Local{}) + excludedByBar := barExclude(p, nil, &fs.Local{}) excluded := excludedByFoo || excludedByBar // the log message helps debugging in case the test fails t.Logf("%q: %v || %v = %v", p, excludedByFoo, excludedByBar, excluded) @@ -249,7 +250,7 @@ func TestIsExcludedByFileSize(t *testing.T) { return err } - excluded := sizeExclude(p, fi) + excluded := sizeExclude(p, fi, nil) // the log message helps debugging in case the test fails t.Logf("%q: dir:%t; size:%d; excluded:%v", p, fi.IsDir(), fi.Size(), excluded) m[p] = !excluded @@ -299,7 +300,7 @@ func TestDeviceMap(t *testing.T) { for _, test := range tests { t.Run("", func(t *testing.T) { - res, err := deviceMap.IsAllowed(filepath.FromSlash(test.item), test.deviceID) + res, err := deviceMap.IsAllowed(filepath.FromSlash(test.item), test.deviceID, &fs.Local{}) if err != nil { t.Fatal(err) } diff --git a/internal/archiver/archiver.go b/internal/archiver/archiver.go index 397347bcb..eab65bb5f 100644 --- a/internal/archiver/archiver.go +++ b/internal/archiver/archiver.go @@ -25,7 +25,7 @@ type SelectByNameFunc func(item string) bool // SelectFunc returns true for all items that should be included (files and // dirs). If false is returned, files are ignored and dirs are not even walked. -type SelectFunc func(item string, fi os.FileInfo) bool +type SelectFunc func(item string, fi os.FileInfo, fs fs.FS) bool // ErrorFunc is called when an error during archiving occurs. When nil is // returned, the archiver continues, otherwise it aborts and passes the error @@ -178,12 +178,12 @@ func (o Options) ApplyDefaults() Options { } // New initializes a new archiver. -func New(repo archiverRepo, fs fs.FS, opts Options) *Archiver { +func New(repo archiverRepo, filesystem fs.FS, opts Options) *Archiver { arch := &Archiver{ Repo: repo, SelectByName: func(_ string) bool { return true }, - Select: func(_ string, _ os.FileInfo) bool { return true }, - FS: fs, + Select: func(_ string, _ os.FileInfo, _ fs.FS) bool { return true }, + FS: filesystem, Options: opts.ApplyDefaults(), CompleteItem: func(string, *restic.Node, *restic.Node, ItemStats, time.Duration) {}, @@ -448,7 +448,7 @@ func (arch *Archiver) save(ctx context.Context, snPath, target string, previous } return futureNode{}, true, nil } - if !arch.Select(abstarget, fi) { + if !arch.Select(abstarget, fi, arch.FS) { debug.Log("%v is excluded", target) return futureNode{}, true, nil } diff --git a/internal/archiver/archiver_test.go b/internal/archiver/archiver_test.go index 74fecef80..b56452182 100644 --- a/internal/archiver/archiver_test.go +++ b/internal/archiver/archiver_test.go @@ -1529,7 +1529,7 @@ func TestArchiverSnapshotSelect(t *testing.T) { }, "other": TestFile{Content: "another file"}, }, - selFn: func(item string, fi os.FileInfo) bool { + selFn: func(item string, fi os.FileInfo, _ fs.FS) bool { return true }, }, @@ -1546,7 +1546,7 @@ func TestArchiverSnapshotSelect(t *testing.T) { }, "other": TestFile{Content: "another file"}, }, - selFn: func(item string, fi os.FileInfo) bool { + selFn: func(item string, fi os.FileInfo, _ fs.FS) bool { return false }, err: "snapshot is empty", @@ -1573,7 +1573,7 @@ func TestArchiverSnapshotSelect(t *testing.T) { }, "other": TestFile{Content: "another file"}, }, - selFn: func(item string, fi os.FileInfo) bool { + selFn: func(item string, fi os.FileInfo, _ fs.FS) bool { return filepath.Ext(item) != ".txt" }, }, @@ -1597,8 +1597,8 @@ func TestArchiverSnapshotSelect(t *testing.T) { }, "other": TestFile{Content: "another file"}, }, - selFn: func(item string, fi os.FileInfo) bool { - return filepath.Base(item) != "subdir" + selFn: func(item string, fi os.FileInfo, fs fs.FS) bool { + return fs.Base(item) != "subdir" }, }, { @@ -1606,8 +1606,8 @@ func TestArchiverSnapshotSelect(t *testing.T) { src: TestDir{ "foo": TestFile{Content: "foo"}, }, - selFn: func(item string, fi os.FileInfo) bool { - return filepath.IsAbs(item) + selFn: func(item string, fi os.FileInfo, fs fs.FS) bool { + return fs.IsAbs(item) }, }, } diff --git a/internal/archiver/scanner.go b/internal/archiver/scanner.go index 433d38821..cb74a31d6 100644 --- a/internal/archiver/scanner.go +++ b/internal/archiver/scanner.go @@ -22,11 +22,11 @@ type Scanner struct { } // NewScanner initializes a new Scanner. -func NewScanner(fs fs.FS) *Scanner { +func NewScanner(filesystem fs.FS) *Scanner { return &Scanner{ - FS: fs, + FS: filesystem, SelectByName: func(_ string) bool { return true }, - Select: func(_ string, _ os.FileInfo) bool { return true }, + Select: func(_ string, _ os.FileInfo, _ fs.FS) bool { return true }, Error: func(_ string, err error) error { return err }, Result: func(_ string, _ ScanStats) {}, } @@ -115,7 +115,7 @@ func (s *Scanner) scan(ctx context.Context, stats ScanStats, target string) (Sca } // run remaining select functions that require file information - if !s.Select(target, fi) { + if !s.Select(target, fi, s.FS) { return stats, nil } diff --git a/internal/archiver/scanner_test.go b/internal/archiver/scanner_test.go index b5b7057b8..e4e2c9f59 100644 --- a/internal/archiver/scanner_test.go +++ b/internal/archiver/scanner_test.go @@ -56,7 +56,7 @@ func TestScanner(t *testing.T) { }, }, }, - selFn: func(item string, fi os.FileInfo) bool { + selFn: func(item string, fi os.FileInfo, fs fs.FS) bool { if fi.IsDir() { return true } diff --git a/internal/fs/fs_local.go b/internal/fs/fs_local.go index 0bcbf7f3a..33d83bf63 100644 --- a/internal/fs/fs_local.go +++ b/internal/fs/fs_local.go @@ -46,6 +46,12 @@ func (fs Local) Lstat(name string) (os.FileInfo, error) { return os.Lstat(fixpath(name)) } +// DeviceID extracts the DeviceID from the given FileInfo. If the fs does +// not support a DeviceID, it returns an error instead +func (fs Local) DeviceID(fi os.FileInfo) (deviceID uint64, err error) { + return DeviceID(fi) +} + // Join joins any number of path elements into a single path, adding a // Separator if necessary. Join calls Clean on the result; in particular, all // empty strings are ignored. On Windows, the result is a UNC path if and only diff --git a/internal/fs/fs_reader.go b/internal/fs/fs_reader.go index 93a42f9eb..b3371a8c9 100644 --- a/internal/fs/fs_reader.go +++ b/internal/fs/fs_reader.go @@ -122,6 +122,10 @@ func (fs *Reader) Lstat(name string) (os.FileInfo, error) { return nil, pathError("lstat", name, os.ErrNotExist) } +func (fs *Reader) DeviceID(_ os.FileInfo) (deviceID uint64, err error) { + return 0, errors.New("Device IDs are not supported") +} + // Join joins any number of path elements into a single path, adding a // Separator if necessary. Join calls Clean on the result; in particular, all // empty strings are ignored. On Windows, the result is a UNC path if and only diff --git a/internal/fs/interface.go b/internal/fs/interface.go index e1f4ef2d9..1c27c1c13 100644 --- a/internal/fs/interface.go +++ b/internal/fs/interface.go @@ -10,6 +10,7 @@ type FS interface { OpenFile(name string, flag int, perm os.FileMode) (File, error) Stat(name string) (os.FileInfo, error) Lstat(name string) (os.FileInfo, error) + DeviceID(fi os.FileInfo) (deviceID uint64, err error) Join(elem ...string) string Separator() string