forked from TrueCloudLab/restic
Merge pull request #1912 from askielboe/select-funcs
Support for different kinds of select functions
This commit is contained in:
commit
6b9dde3ce8
6 changed files with 110 additions and 56 deletions
14
changelog/unreleased/issue-1909
Normal file
14
changelog/unreleased/issue-1909
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
Enhancement: Reject files/dirs by name first
|
||||||
|
|
||||||
|
The current scanner/archiver code had an architectural limitation: it always
|
||||||
|
ran the `lstat()` system call on all files and directories before a decision to
|
||||||
|
include/exclude the file/dir was made. This lead to a lot of unnecessary system
|
||||||
|
calls for items that could have been rejected by their name or path only.
|
||||||
|
|
||||||
|
We've changed the archiver/scanner implementation so that it now first rejects
|
||||||
|
by name/path, and only runs the system call on the remaining items. This
|
||||||
|
reduces the number of `lstat()` system calls a lot (depending on the exclude
|
||||||
|
settings).
|
||||||
|
|
||||||
|
https://github.com/restic/restic/issues/1909
|
||||||
|
https://github.com/restic/restic/pull/1912
|
|
@ -186,18 +186,9 @@ func (opts BackupOptions) Check(gopts GlobalOptions, args []string) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// collectRejectFuncs returns a list of all functions which may reject data
|
// collectRejectByNameFuncs returns a list of all functions which may reject data
|
||||||
// from being saved in a snapshot
|
// from being saved in a snapshot based on path only
|
||||||
func collectRejectFuncs(opts BackupOptions, repo *repository.Repository, targets []string) (fs []RejectFunc, err error) {
|
func collectRejectByNameFuncs(opts BackupOptions, repo *repository.Repository, targets []string) (fs []RejectByNameFunc, err error) {
|
||||||
// allowed devices
|
|
||||||
if opts.ExcludeOtherFS && !opts.Stdin {
|
|
||||||
f, err := rejectByDevice(targets)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
fs = append(fs, f)
|
|
||||||
}
|
|
||||||
|
|
||||||
// exclude restic cache
|
// exclude restic cache
|
||||||
if repo.Cache != nil {
|
if repo.Cache != nil {
|
||||||
f, err := rejectResticCache(repo)
|
f, err := rejectResticCache(repo)
|
||||||
|
@ -237,6 +228,21 @@ func collectRejectFuncs(opts BackupOptions, repo *repository.Repository, targets
|
||||||
return fs, nil
|
return fs, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// collectRejectFuncs returns a list of all functions which may reject data
|
||||||
|
// from being saved in a snapshot based on path and file info
|
||||||
|
func collectRejectFuncs(opts BackupOptions, repo *repository.Repository, targets []string) (fs []RejectFunc, err error) {
|
||||||
|
// allowed devices
|
||||||
|
if opts.ExcludeOtherFS && !opts.Stdin {
|
||||||
|
f, err := rejectByDevice(targets)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
fs = append(fs, f)
|
||||||
|
}
|
||||||
|
|
||||||
|
return fs, nil
|
||||||
|
}
|
||||||
|
|
||||||
// readExcludePatternsFromFiles reads all exclude files and returns the list of
|
// readExcludePatternsFromFiles reads all exclude files and returns the list of
|
||||||
// exclude patterns. For each line, leading and trailing white space is removed
|
// exclude patterns. For each line, leading and trailing white space is removed
|
||||||
// and comment lines are ignored. For each remaining pattern, environment
|
// and comment lines are ignored. For each remaining pattern, environment
|
||||||
|
@ -393,7 +399,13 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, term *termstatus.Termina
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// rejectFuncs collect functions that can reject items from the backup
|
// rejectByNameFuncs collect functions that can reject items from the backup based on path only
|
||||||
|
rejectByNameFuncs, err := collectRejectByNameFuncs(opts, repo, targets)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// rejectFuncs collect functions that can reject items from the backup based on path and file info
|
||||||
rejectFuncs, err := collectRejectFuncs(opts, repo, targets)
|
rejectFuncs, err := collectRejectFuncs(opts, repo, targets)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -414,6 +426,15 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, term *termstatus.Termina
|
||||||
p.V("using parent snapshot %v\n", parentSnapshotID.Str())
|
p.V("using parent snapshot %v\n", parentSnapshotID.Str())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
selectByNameFilter := func(item string) bool {
|
||||||
|
for _, reject := range rejectByNameFuncs {
|
||||||
|
if reject(item) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
selectFilter := func(item string, fi os.FileInfo) bool {
|
selectFilter := func(item string, fi os.FileInfo) bool {
|
||||||
for _, reject := range rejectFuncs {
|
for _, reject := range rejectFuncs {
|
||||||
if reject(item, fi) {
|
if reject(item, fi) {
|
||||||
|
@ -436,6 +457,7 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, term *termstatus.Termina
|
||||||
}
|
}
|
||||||
|
|
||||||
sc := archiver.NewScanner(targetFS)
|
sc := archiver.NewScanner(targetFS)
|
||||||
|
sc.SelectByName = selectByNameFilter
|
||||||
sc.Select = selectFilter
|
sc.Select = selectFilter
|
||||||
sc.Error = p.ScannerError
|
sc.Error = p.ScannerError
|
||||||
sc.Result = p.ReportTotal
|
sc.Result = p.ReportTotal
|
||||||
|
@ -444,6 +466,7 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, term *termstatus.Termina
|
||||||
t.Go(func() error { return sc.Scan(t.Context(gopts.ctx), targets) })
|
t.Go(func() error { return sc.Scan(t.Context(gopts.ctx), targets) })
|
||||||
|
|
||||||
arch := archiver.New(repo, targetFS, archiver.Options{})
|
arch := archiver.New(repo, targetFS, archiver.Options{})
|
||||||
|
arch.SelectByName = selectByNameFilter
|
||||||
arch.Select = selectFilter
|
arch.Select = selectFilter
|
||||||
arch.WithAtime = opts.WithAtime
|
arch.WithAtime = opts.WithAtime
|
||||||
arch.Error = p.Error
|
arch.Error = p.Error
|
||||||
|
|
|
@ -60,15 +60,20 @@ func (rc *rejectionCache) Store(dir string, rejected bool) {
|
||||||
rc.m[dir] = rejected
|
rc.m[dir] = rejected
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// RejectByNameFunc is a function that takes a filename of a
|
||||||
|
// file that would be included in the backup. The function returns true if it
|
||||||
|
// should be excluded (rejected) from the backup.
|
||||||
|
type RejectByNameFunc func(path string) bool
|
||||||
|
|
||||||
// RejectFunc is a function that takes a filename and os.FileInfo of a
|
// RejectFunc is a function that takes a filename and os.FileInfo of a
|
||||||
// file that would be included in the backup. The function returns true if it
|
// file that would be included in the backup. The function returns true if it
|
||||||
// should be excluded (rejected) from the backup.
|
// should be excluded (rejected) from the backup.
|
||||||
type RejectFunc func(path string, fi os.FileInfo) bool
|
type RejectFunc func(path string, fi os.FileInfo) bool
|
||||||
|
|
||||||
// rejectByPattern returns a RejectFunc which rejects files that match
|
// rejectByPattern returns a RejectByNameFunc which rejects files that match
|
||||||
// one of the patterns.
|
// one of the patterns.
|
||||||
func rejectByPattern(patterns []string) RejectFunc {
|
func rejectByPattern(patterns []string) RejectByNameFunc {
|
||||||
return func(item string, fi os.FileInfo) bool {
|
return func(item string) bool {
|
||||||
matched, _, err := filter.List(patterns, item)
|
matched, _, err := filter.List(patterns, item)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
Warnf("error for exclude pattern: %v", err)
|
Warnf("error for exclude pattern: %v", err)
|
||||||
|
@ -83,14 +88,14 @@ func rejectByPattern(patterns []string) RejectFunc {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// rejectIfPresent returns a RejectFunc which itself returns whether a path
|
// rejectIfPresent returns a RejectByNameFunc which itself returns whether a path
|
||||||
// should be excluded. The RejectFunc considers a file to be excluded when
|
// should be excluded. The RejectByNameFunc considers a file to be excluded when
|
||||||
// it resides in a directory with an exclusion file, that is specified by
|
// it resides in a directory with an exclusion file, that is specified by
|
||||||
// excludeFileSpec in the form "filename[:content]". The returned error is
|
// excludeFileSpec in the form "filename[:content]". The returned error is
|
||||||
// non-nil if the filename component of excludeFileSpec is empty. If rc is
|
// non-nil if the filename component of excludeFileSpec is empty. If rc is
|
||||||
// non-nil, it is going to be used in the RejectFunc to expedite the evaluation
|
// non-nil, it is going to be used in the RejectByNameFunc to expedite the evaluation
|
||||||
// of a directory based on previous visits.
|
// of a directory based on previous visits.
|
||||||
func rejectIfPresent(excludeFileSpec string) (RejectFunc, error) {
|
func rejectIfPresent(excludeFileSpec string) (RejectByNameFunc, error) {
|
||||||
if excludeFileSpec == "" {
|
if excludeFileSpec == "" {
|
||||||
return nil, errors.New("name for exclusion tagfile is empty")
|
return nil, errors.New("name for exclusion tagfile is empty")
|
||||||
}
|
}
|
||||||
|
@ -107,7 +112,7 @@ func rejectIfPresent(excludeFileSpec string) (RejectFunc, error) {
|
||||||
}
|
}
|
||||||
debug.Log("using %q as exclusion tagfile", tf)
|
debug.Log("using %q as exclusion tagfile", tf)
|
||||||
rc := &rejectionCache{}
|
rc := &rejectionCache{}
|
||||||
fn := func(filename string, _ os.FileInfo) bool {
|
fn := func(filename string) bool {
|
||||||
return isExcludedByFile(filename, tf, tc, rc)
|
return isExcludedByFile(filename, tf, tc, rc)
|
||||||
}
|
}
|
||||||
return fn, nil
|
return fn, nil
|
||||||
|
@ -252,11 +257,11 @@ func rejectByDevice(samples []string) (RejectFunc, error) {
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// rejectResticCache returns a RejectFunc that rejects the restic cache
|
// rejectResticCache returns a RejectByNameFunc that rejects the restic cache
|
||||||
// directory (if set).
|
// directory (if set).
|
||||||
func rejectResticCache(repo *repository.Repository) (RejectFunc, error) {
|
func rejectResticCache(repo *repository.Repository) (RejectByNameFunc, error) {
|
||||||
if repo.Cache == nil {
|
if repo.Cache == nil {
|
||||||
return func(string, os.FileInfo) bool {
|
return func(string) bool {
|
||||||
return false
|
return false
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
@ -266,7 +271,7 @@ func rejectResticCache(repo *repository.Repository) (RejectFunc, error) {
|
||||||
return nil, errors.New("cacheBase is empty string")
|
return nil, errors.New("cacheBase is empty string")
|
||||||
}
|
}
|
||||||
|
|
||||||
return func(item string, _ os.FileInfo) bool {
|
return func(item string) bool {
|
||||||
if fs.HasPathPrefix(cacheBase, item) {
|
if fs.HasPathPrefix(cacheBase, item) {
|
||||||
debug.Log("rejecting restic cache directory %v", item)
|
debug.Log("rejecting restic cache directory %v", item)
|
||||||
return true
|
return true
|
||||||
|
|
|
@ -27,7 +27,7 @@ func TestRejectByPattern(t *testing.T) {
|
||||||
for _, tc := range tests {
|
for _, tc := range tests {
|
||||||
t.Run("", func(t *testing.T) {
|
t.Run("", func(t *testing.T) {
|
||||||
reject := rejectByPattern(patterns)
|
reject := rejectByPattern(patterns)
|
||||||
res := reject(tc.filename, nil)
|
res := reject(tc.filename)
|
||||||
if res != tc.reject {
|
if res != tc.reject {
|
||||||
t.Fatalf("wrong result for filename %v: want %v, got %v",
|
t.Fatalf("wrong result for filename %v: want %v, got %v",
|
||||||
tc.filename, tc.reject, res)
|
tc.filename, tc.reject, res)
|
||||||
|
@ -140,8 +140,8 @@ func TestMultipleIsExcludedByFile(t *testing.T) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
excludedByFoo := fooExclude(p, fi)
|
excludedByFoo := fooExclude(p)
|
||||||
excludedByBar := barExclude(p, fi)
|
excludedByBar := barExclude(p)
|
||||||
excluded := excludedByFoo || excludedByBar
|
excluded := excludedByFoo || excludedByBar
|
||||||
// the log message helps debugging in case the test fails
|
// the log message helps debugging in case the test fails
|
||||||
t.Logf("%q: %v || %v = %v", p, excludedByFoo, excludedByBar, excluded)
|
t.Logf("%q: %v || %v = %v", p, excludedByFoo, excludedByBar, excluded)
|
||||||
|
|
|
@ -16,6 +16,10 @@ import (
|
||||||
tomb "gopkg.in/tomb.v2"
|
tomb "gopkg.in/tomb.v2"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// SelectByNameFunc returns true for all items that should be included (files and
|
||||||
|
// dirs). If false is returned, files are ignored and dirs are not even walked.
|
||||||
|
type SelectByNameFunc func(item string) bool
|
||||||
|
|
||||||
// SelectFunc returns true for all items that should be included (files and
|
// SelectFunc returns true for all items that should be included (files and
|
||||||
// dirs). If false is returned, files are ignored and dirs are not even walked.
|
// dirs). If false is returned, files are ignored and dirs are not even walked.
|
||||||
type SelectFunc func(item string, fi os.FileInfo) bool
|
type SelectFunc func(item string, fi os.FileInfo) bool
|
||||||
|
@ -43,10 +47,11 @@ func (s *ItemStats) Add(other ItemStats) {
|
||||||
|
|
||||||
// Archiver saves a directory structure to the repo.
|
// Archiver saves a directory structure to the repo.
|
||||||
type Archiver struct {
|
type Archiver struct {
|
||||||
Repo restic.Repository
|
Repo restic.Repository
|
||||||
Select SelectFunc
|
SelectByName SelectByNameFunc
|
||||||
FS fs.FS
|
Select SelectFunc
|
||||||
Options Options
|
FS fs.FS
|
||||||
|
Options Options
|
||||||
|
|
||||||
blobSaver *BlobSaver
|
blobSaver *BlobSaver
|
||||||
fileSaver *FileSaver
|
fileSaver *FileSaver
|
||||||
|
@ -119,10 +124,11 @@ func (o Options) ApplyDefaults() Options {
|
||||||
// New initializes a new archiver.
|
// New initializes a new archiver.
|
||||||
func New(repo restic.Repository, fs fs.FS, opts Options) *Archiver {
|
func New(repo restic.Repository, fs fs.FS, opts Options) *Archiver {
|
||||||
arch := &Archiver{
|
arch := &Archiver{
|
||||||
Repo: repo,
|
Repo: repo,
|
||||||
Select: func(string, os.FileInfo) bool { return true },
|
SelectByName: func(item string) bool { return true },
|
||||||
FS: fs,
|
Select: func(item string, fi os.FileInfo) bool { return true },
|
||||||
Options: opts.ApplyDefaults(),
|
FS: fs,
|
||||||
|
Options: opts.ApplyDefaults(),
|
||||||
|
|
||||||
CompleteItem: func(string, *restic.Node, *restic.Node, ItemStats, time.Duration) {},
|
CompleteItem: func(string, *restic.Node, *restic.Node, ItemStats, time.Duration) {},
|
||||||
StartFile: func(string) {},
|
StartFile: func(string) {},
|
||||||
|
@ -294,10 +300,10 @@ func (fn *FutureNode) wait(ctx context.Context) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Save saves a target (file or directory) to the repo. If the item is
|
// Save saves a target (file or directory) to the repo. If the item is
|
||||||
// excluded,this function returns a nil node and error, with excluded set to
|
// excluded, this function returns a nil node and error, with excluded set to
|
||||||
// true.
|
// true.
|
||||||
//
|
//
|
||||||
// Errors and completion is needs to be handled by the caller.
|
// Errors and completion needs to be handled by the caller.
|
||||||
//
|
//
|
||||||
// snPath is the path within the current snapshot.
|
// snPath is the path within the current snapshot.
|
||||||
func (arch *Archiver) Save(ctx context.Context, snPath, target string, previous *restic.Node) (fn FutureNode, excluded bool, err error) {
|
func (arch *Archiver) Save(ctx context.Context, snPath, target string, previous *restic.Node) (fn FutureNode, excluded bool, err error) {
|
||||||
|
@ -316,6 +322,13 @@ func (arch *Archiver) Save(ctx context.Context, snPath, target string, previous
|
||||||
|
|
||||||
fn.absTarget = abstarget
|
fn.absTarget = abstarget
|
||||||
|
|
||||||
|
// exclude files by path before running Lstat to reduce number of lstat calls
|
||||||
|
if !arch.SelectByName(abstarget) {
|
||||||
|
debug.Log("%v is excluded by path", target)
|
||||||
|
return FutureNode{}, true, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// get file info and run remaining select functions that require file information
|
||||||
fi, err := arch.FS.Lstat(target)
|
fi, err := arch.FS.Lstat(target)
|
||||||
if !arch.Select(abstarget, fi) {
|
if !arch.Select(abstarget, fi) {
|
||||||
debug.Log("%v is excluded", target)
|
debug.Log("%v is excluded", target)
|
||||||
|
|
|
@ -12,23 +12,21 @@ import (
|
||||||
// stats concerning the files and folders found. Select is used to decide which
|
// stats concerning the files and folders found. Select is used to decide which
|
||||||
// items should be included. Error is called when an error occurs.
|
// items should be included. Error is called when an error occurs.
|
||||||
type Scanner struct {
|
type Scanner struct {
|
||||||
FS fs.FS
|
FS fs.FS
|
||||||
Select SelectFunc
|
SelectByName SelectByNameFunc
|
||||||
Error ErrorFunc
|
Select SelectFunc
|
||||||
Result func(item string, s ScanStats)
|
Error ErrorFunc
|
||||||
|
Result func(item string, s ScanStats)
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewScanner initializes a new Scanner.
|
// NewScanner initializes a new Scanner.
|
||||||
func NewScanner(fs fs.FS) *Scanner {
|
func NewScanner(fs fs.FS) *Scanner {
|
||||||
return &Scanner{
|
return &Scanner{
|
||||||
FS: fs,
|
FS: fs,
|
||||||
Select: func(item string, fi os.FileInfo) bool {
|
SelectByName: func(item string) bool { return true },
|
||||||
return true
|
Select: func(item string, fi os.FileInfo) bool { return true },
|
||||||
},
|
Error: func(item string, fi os.FileInfo, err error) error { return err },
|
||||||
Error: func(item string, fi os.FileInfo, err error) error {
|
Result: func(item string, s ScanStats) {},
|
||||||
return err
|
|
||||||
},
|
|
||||||
Result: func(item string, s ScanStats) {},
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -70,17 +68,18 @@ func (s *Scanner) scan(ctx context.Context, stats ScanStats, target string) (Sca
|
||||||
return stats, ctx.Err()
|
return stats, ctx.Err()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// exclude files by path before running stat to reduce number of lstat calls
|
||||||
|
if !s.SelectByName(target) {
|
||||||
|
return stats, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// get file information
|
||||||
fi, err := s.FS.Lstat(target)
|
fi, err := s.FS.Lstat(target)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// ignore error if the target is to be excluded anyway
|
|
||||||
if !s.Select(target, nil) {
|
|
||||||
return stats, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// else return filtered error
|
|
||||||
return stats, s.Error(target, fi, err)
|
return stats, s.Error(target, fi, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// run remaining select functions that require file information
|
||||||
if !s.Select(target, fi) {
|
if !s.Select(target, fi) {
|
||||||
return stats, nil
|
return stats, nil
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue