forked from TrueCloudLab/restic
Merge pull request #1912 from askielboe/select-funcs
Support for different kinds of select functions
This commit is contained in:
commit
6b9dde3ce8
6 changed files with 110 additions and 56 deletions
14
changelog/unreleased/issue-1909
Normal file
14
changelog/unreleased/issue-1909
Normal file
|
@ -0,0 +1,14 @@
|
|||
Enhancement: Reject files/dirs by name first
|
||||
|
||||
The current scanner/archiver code had an architectural limitation: it always
|
||||
ran the `lstat()` system call on all files and directories before a decision to
|
||||
include/exclude the file/dir was made. This lead to a lot of unnecessary system
|
||||
calls for items that could have been rejected by their name or path only.
|
||||
|
||||
We've changed the archiver/scanner implementation so that it now first rejects
|
||||
by name/path, and only runs the system call on the remaining items. This
|
||||
reduces the number of `lstat()` system calls a lot (depending on the exclude
|
||||
settings).
|
||||
|
||||
https://github.com/restic/restic/issues/1909
|
||||
https://github.com/restic/restic/pull/1912
|
|
@ -186,18 +186,9 @@ func (opts BackupOptions) Check(gopts GlobalOptions, args []string) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// collectRejectFuncs returns a list of all functions which may reject data
|
||||
// from being saved in a snapshot
|
||||
func collectRejectFuncs(opts BackupOptions, repo *repository.Repository, targets []string) (fs []RejectFunc, err error) {
|
||||
// allowed devices
|
||||
if opts.ExcludeOtherFS && !opts.Stdin {
|
||||
f, err := rejectByDevice(targets)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fs = append(fs, f)
|
||||
}
|
||||
|
||||
// collectRejectByNameFuncs returns a list of all functions which may reject data
|
||||
// from being saved in a snapshot based on path only
|
||||
func collectRejectByNameFuncs(opts BackupOptions, repo *repository.Repository, targets []string) (fs []RejectByNameFunc, err error) {
|
||||
// exclude restic cache
|
||||
if repo.Cache != nil {
|
||||
f, err := rejectResticCache(repo)
|
||||
|
@ -237,6 +228,21 @@ func collectRejectFuncs(opts BackupOptions, repo *repository.Repository, targets
|
|||
return fs, nil
|
||||
}
|
||||
|
||||
// collectRejectFuncs returns a list of all functions which may reject data
|
||||
// from being saved in a snapshot based on path and file info
|
||||
func collectRejectFuncs(opts BackupOptions, repo *repository.Repository, targets []string) (fs []RejectFunc, err error) {
|
||||
// allowed devices
|
||||
if opts.ExcludeOtherFS && !opts.Stdin {
|
||||
f, err := rejectByDevice(targets)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fs = append(fs, f)
|
||||
}
|
||||
|
||||
return fs, nil
|
||||
}
|
||||
|
||||
// readExcludePatternsFromFiles reads all exclude files and returns the list of
|
||||
// exclude patterns. For each line, leading and trailing white space is removed
|
||||
// and comment lines are ignored. For each remaining pattern, environment
|
||||
|
@ -393,7 +399,13 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, term *termstatus.Termina
|
|||
return err
|
||||
}
|
||||
|
||||
// rejectFuncs collect functions that can reject items from the backup
|
||||
// rejectByNameFuncs collect functions that can reject items from the backup based on path only
|
||||
rejectByNameFuncs, err := collectRejectByNameFuncs(opts, repo, targets)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// rejectFuncs collect functions that can reject items from the backup based on path and file info
|
||||
rejectFuncs, err := collectRejectFuncs(opts, repo, targets)
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -414,6 +426,15 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, term *termstatus.Termina
|
|||
p.V("using parent snapshot %v\n", parentSnapshotID.Str())
|
||||
}
|
||||
|
||||
selectByNameFilter := func(item string) bool {
|
||||
for _, reject := range rejectByNameFuncs {
|
||||
if reject(item) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
selectFilter := func(item string, fi os.FileInfo) bool {
|
||||
for _, reject := range rejectFuncs {
|
||||
if reject(item, fi) {
|
||||
|
@ -436,6 +457,7 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, term *termstatus.Termina
|
|||
}
|
||||
|
||||
sc := archiver.NewScanner(targetFS)
|
||||
sc.SelectByName = selectByNameFilter
|
||||
sc.Select = selectFilter
|
||||
sc.Error = p.ScannerError
|
||||
sc.Result = p.ReportTotal
|
||||
|
@ -444,6 +466,7 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, term *termstatus.Termina
|
|||
t.Go(func() error { return sc.Scan(t.Context(gopts.ctx), targets) })
|
||||
|
||||
arch := archiver.New(repo, targetFS, archiver.Options{})
|
||||
arch.SelectByName = selectByNameFilter
|
||||
arch.Select = selectFilter
|
||||
arch.WithAtime = opts.WithAtime
|
||||
arch.Error = p.Error
|
||||
|
|
|
@ -60,15 +60,20 @@ func (rc *rejectionCache) Store(dir string, rejected bool) {
|
|||
rc.m[dir] = rejected
|
||||
}
|
||||
|
||||
// RejectByNameFunc is a function that takes a filename of a
|
||||
// file that would be included in the backup. The function returns true if it
|
||||
// should be excluded (rejected) from the backup.
|
||||
type RejectByNameFunc func(path string) bool
|
||||
|
||||
// RejectFunc is a function that takes a filename and os.FileInfo of a
|
||||
// file that would be included in the backup. The function returns true if it
|
||||
// should be excluded (rejected) from the backup.
|
||||
type RejectFunc func(path string, fi os.FileInfo) bool
|
||||
|
||||
// rejectByPattern returns a RejectFunc which rejects files that match
|
||||
// rejectByPattern returns a RejectByNameFunc which rejects files that match
|
||||
// one of the patterns.
|
||||
func rejectByPattern(patterns []string) RejectFunc {
|
||||
return func(item string, fi os.FileInfo) bool {
|
||||
func rejectByPattern(patterns []string) RejectByNameFunc {
|
||||
return func(item string) bool {
|
||||
matched, _, err := filter.List(patterns, item)
|
||||
if err != nil {
|
||||
Warnf("error for exclude pattern: %v", err)
|
||||
|
@ -83,14 +88,14 @@ func rejectByPattern(patterns []string) RejectFunc {
|
|||
}
|
||||
}
|
||||
|
||||
// rejectIfPresent returns a RejectFunc which itself returns whether a path
|
||||
// should be excluded. The RejectFunc considers a file to be excluded when
|
||||
// rejectIfPresent returns a RejectByNameFunc which itself returns whether a path
|
||||
// should be excluded. The RejectByNameFunc considers a file to be excluded when
|
||||
// it resides in a directory with an exclusion file, that is specified by
|
||||
// excludeFileSpec in the form "filename[:content]". The returned error is
|
||||
// non-nil if the filename component of excludeFileSpec is empty. If rc is
|
||||
// non-nil, it is going to be used in the RejectFunc to expedite the evaluation
|
||||
// non-nil, it is going to be used in the RejectByNameFunc to expedite the evaluation
|
||||
// of a directory based on previous visits.
|
||||
func rejectIfPresent(excludeFileSpec string) (RejectFunc, error) {
|
||||
func rejectIfPresent(excludeFileSpec string) (RejectByNameFunc, error) {
|
||||
if excludeFileSpec == "" {
|
||||
return nil, errors.New("name for exclusion tagfile is empty")
|
||||
}
|
||||
|
@ -107,7 +112,7 @@ func rejectIfPresent(excludeFileSpec string) (RejectFunc, error) {
|
|||
}
|
||||
debug.Log("using %q as exclusion tagfile", tf)
|
||||
rc := &rejectionCache{}
|
||||
fn := func(filename string, _ os.FileInfo) bool {
|
||||
fn := func(filename string) bool {
|
||||
return isExcludedByFile(filename, tf, tc, rc)
|
||||
}
|
||||
return fn, nil
|
||||
|
@ -252,11 +257,11 @@ func rejectByDevice(samples []string) (RejectFunc, error) {
|
|||
}, nil
|
||||
}
|
||||
|
||||
// rejectResticCache returns a RejectFunc that rejects the restic cache
|
||||
// rejectResticCache returns a RejectByNameFunc that rejects the restic cache
|
||||
// directory (if set).
|
||||
func rejectResticCache(repo *repository.Repository) (RejectFunc, error) {
|
||||
func rejectResticCache(repo *repository.Repository) (RejectByNameFunc, error) {
|
||||
if repo.Cache == nil {
|
||||
return func(string, os.FileInfo) bool {
|
||||
return func(string) bool {
|
||||
return false
|
||||
}, nil
|
||||
}
|
||||
|
@ -266,7 +271,7 @@ func rejectResticCache(repo *repository.Repository) (RejectFunc, error) {
|
|||
return nil, errors.New("cacheBase is empty string")
|
||||
}
|
||||
|
||||
return func(item string, _ os.FileInfo) bool {
|
||||
return func(item string) bool {
|
||||
if fs.HasPathPrefix(cacheBase, item) {
|
||||
debug.Log("rejecting restic cache directory %v", item)
|
||||
return true
|
||||
|
|
|
@ -27,7 +27,7 @@ func TestRejectByPattern(t *testing.T) {
|
|||
for _, tc := range tests {
|
||||
t.Run("", func(t *testing.T) {
|
||||
reject := rejectByPattern(patterns)
|
||||
res := reject(tc.filename, nil)
|
||||
res := reject(tc.filename)
|
||||
if res != tc.reject {
|
||||
t.Fatalf("wrong result for filename %v: want %v, got %v",
|
||||
tc.filename, tc.reject, res)
|
||||
|
@ -140,8 +140,8 @@ func TestMultipleIsExcludedByFile(t *testing.T) {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
excludedByFoo := fooExclude(p, fi)
|
||||
excludedByBar := barExclude(p, fi)
|
||||
excludedByFoo := fooExclude(p)
|
||||
excludedByBar := barExclude(p)
|
||||
excluded := excludedByFoo || excludedByBar
|
||||
// the log message helps debugging in case the test fails
|
||||
t.Logf("%q: %v || %v = %v", p, excludedByFoo, excludedByBar, excluded)
|
||||
|
|
|
@ -16,6 +16,10 @@ import (
|
|||
tomb "gopkg.in/tomb.v2"
|
||||
)
|
||||
|
||||
// SelectByNameFunc returns true for all items that should be included (files and
|
||||
// dirs). If false is returned, files are ignored and dirs are not even walked.
|
||||
type SelectByNameFunc func(item string) bool
|
||||
|
||||
// SelectFunc returns true for all items that should be included (files and
|
||||
// dirs). If false is returned, files are ignored and dirs are not even walked.
|
||||
type SelectFunc func(item string, fi os.FileInfo) bool
|
||||
|
@ -43,10 +47,11 @@ func (s *ItemStats) Add(other ItemStats) {
|
|||
|
||||
// Archiver saves a directory structure to the repo.
|
||||
type Archiver struct {
|
||||
Repo restic.Repository
|
||||
Select SelectFunc
|
||||
FS fs.FS
|
||||
Options Options
|
||||
Repo restic.Repository
|
||||
SelectByName SelectByNameFunc
|
||||
Select SelectFunc
|
||||
FS fs.FS
|
||||
Options Options
|
||||
|
||||
blobSaver *BlobSaver
|
||||
fileSaver *FileSaver
|
||||
|
@ -119,10 +124,11 @@ func (o Options) ApplyDefaults() Options {
|
|||
// New initializes a new archiver.
|
||||
func New(repo restic.Repository, fs fs.FS, opts Options) *Archiver {
|
||||
arch := &Archiver{
|
||||
Repo: repo,
|
||||
Select: func(string, os.FileInfo) bool { return true },
|
||||
FS: fs,
|
||||
Options: opts.ApplyDefaults(),
|
||||
Repo: repo,
|
||||
SelectByName: func(item string) bool { return true },
|
||||
Select: func(item string, fi os.FileInfo) bool { return true },
|
||||
FS: fs,
|
||||
Options: opts.ApplyDefaults(),
|
||||
|
||||
CompleteItem: func(string, *restic.Node, *restic.Node, ItemStats, time.Duration) {},
|
||||
StartFile: func(string) {},
|
||||
|
@ -294,10 +300,10 @@ func (fn *FutureNode) wait(ctx context.Context) {
|
|||
}
|
||||
|
||||
// Save saves a target (file or directory) to the repo. If the item is
|
||||
// excluded,this function returns a nil node and error, with excluded set to
|
||||
// excluded, this function returns a nil node and error, with excluded set to
|
||||
// true.
|
||||
//
|
||||
// Errors and completion is needs to be handled by the caller.
|
||||
// Errors and completion needs to be handled by the caller.
|
||||
//
|
||||
// snPath is the path within the current snapshot.
|
||||
func (arch *Archiver) Save(ctx context.Context, snPath, target string, previous *restic.Node) (fn FutureNode, excluded bool, err error) {
|
||||
|
@ -316,6 +322,13 @@ func (arch *Archiver) Save(ctx context.Context, snPath, target string, previous
|
|||
|
||||
fn.absTarget = abstarget
|
||||
|
||||
// exclude files by path before running Lstat to reduce number of lstat calls
|
||||
if !arch.SelectByName(abstarget) {
|
||||
debug.Log("%v is excluded by path", target)
|
||||
return FutureNode{}, true, nil
|
||||
}
|
||||
|
||||
// get file info and run remaining select functions that require file information
|
||||
fi, err := arch.FS.Lstat(target)
|
||||
if !arch.Select(abstarget, fi) {
|
||||
debug.Log("%v is excluded", target)
|
||||
|
|
|
@ -12,23 +12,21 @@ import (
|
|||
// stats concerning the files and folders found. Select is used to decide which
|
||||
// items should be included. Error is called when an error occurs.
|
||||
type Scanner struct {
|
||||
FS fs.FS
|
||||
Select SelectFunc
|
||||
Error ErrorFunc
|
||||
Result func(item string, s ScanStats)
|
||||
FS fs.FS
|
||||
SelectByName SelectByNameFunc
|
||||
Select SelectFunc
|
||||
Error ErrorFunc
|
||||
Result func(item string, s ScanStats)
|
||||
}
|
||||
|
||||
// NewScanner initializes a new Scanner.
|
||||
func NewScanner(fs fs.FS) *Scanner {
|
||||
return &Scanner{
|
||||
FS: fs,
|
||||
Select: func(item string, fi os.FileInfo) bool {
|
||||
return true
|
||||
},
|
||||
Error: func(item string, fi os.FileInfo, err error) error {
|
||||
return err
|
||||
},
|
||||
Result: func(item string, s ScanStats) {},
|
||||
FS: fs,
|
||||
SelectByName: func(item string) bool { return true },
|
||||
Select: func(item string, fi os.FileInfo) bool { return true },
|
||||
Error: func(item string, fi os.FileInfo, err error) error { return err },
|
||||
Result: func(item string, s ScanStats) {},
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -70,17 +68,18 @@ func (s *Scanner) scan(ctx context.Context, stats ScanStats, target string) (Sca
|
|||
return stats, ctx.Err()
|
||||
}
|
||||
|
||||
// exclude files by path before running stat to reduce number of lstat calls
|
||||
if !s.SelectByName(target) {
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// get file information
|
||||
fi, err := s.FS.Lstat(target)
|
||||
if err != nil {
|
||||
// ignore error if the target is to be excluded anyway
|
||||
if !s.Select(target, nil) {
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// else return filtered error
|
||||
return stats, s.Error(target, fi, err)
|
||||
}
|
||||
|
||||
// run remaining select functions that require file information
|
||||
if !s.Select(target, fi) {
|
||||
return stats, nil
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue