Merge pull request #1912 from askielboe/select-funcs

Support for different kinds of select functions
This commit is contained in:
Alexander Neumann 2018-08-12 19:26:36 +02:00
commit 6b9dde3ce8
6 changed files with 110 additions and 56 deletions

View file

@ -0,0 +1,14 @@
Enhancement: Reject files/dirs by name first
The current scanner/archiver code had an architectural limitation: it always
ran the `lstat()` system call on all files and directories before a decision to
include/exclude the file/dir was made. This lead to a lot of unnecessary system
calls for items that could have been rejected by their name or path only.
We've changed the archiver/scanner implementation so that it now first rejects
by name/path, and only runs the system call on the remaining items. This
reduces the number of `lstat()` system calls a lot (depending on the exclude
settings).
https://github.com/restic/restic/issues/1909
https://github.com/restic/restic/pull/1912

View file

@ -186,18 +186,9 @@ func (opts BackupOptions) Check(gopts GlobalOptions, args []string) error {
return nil
}
// collectRejectFuncs returns a list of all functions which may reject data
// from being saved in a snapshot
func collectRejectFuncs(opts BackupOptions, repo *repository.Repository, targets []string) (fs []RejectFunc, err error) {
// allowed devices
if opts.ExcludeOtherFS && !opts.Stdin {
f, err := rejectByDevice(targets)
if err != nil {
return nil, err
}
fs = append(fs, f)
}
// collectRejectByNameFuncs returns a list of all functions which may reject data
// from being saved in a snapshot based on path only
func collectRejectByNameFuncs(opts BackupOptions, repo *repository.Repository, targets []string) (fs []RejectByNameFunc, err error) {
// exclude restic cache
if repo.Cache != nil {
f, err := rejectResticCache(repo)
@ -237,6 +228,21 @@ func collectRejectFuncs(opts BackupOptions, repo *repository.Repository, targets
return fs, nil
}
// collectRejectFuncs returns a list of all functions which may reject data
// from being saved in a snapshot based on path and file info
func collectRejectFuncs(opts BackupOptions, repo *repository.Repository, targets []string) (fs []RejectFunc, err error) {
// allowed devices
if opts.ExcludeOtherFS && !opts.Stdin {
f, err := rejectByDevice(targets)
if err != nil {
return nil, err
}
fs = append(fs, f)
}
return fs, nil
}
// readExcludePatternsFromFiles reads all exclude files and returns the list of
// exclude patterns. For each line, leading and trailing white space is removed
// and comment lines are ignored. For each remaining pattern, environment
@ -393,7 +399,13 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, term *termstatus.Termina
return err
}
// rejectFuncs collect functions that can reject items from the backup
// rejectByNameFuncs collect functions that can reject items from the backup based on path only
rejectByNameFuncs, err := collectRejectByNameFuncs(opts, repo, targets)
if err != nil {
return err
}
// rejectFuncs collect functions that can reject items from the backup based on path and file info
rejectFuncs, err := collectRejectFuncs(opts, repo, targets)
if err != nil {
return err
@ -414,6 +426,15 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, term *termstatus.Termina
p.V("using parent snapshot %v\n", parentSnapshotID.Str())
}
selectByNameFilter := func(item string) bool {
for _, reject := range rejectByNameFuncs {
if reject(item) {
return false
}
}
return true
}
selectFilter := func(item string, fi os.FileInfo) bool {
for _, reject := range rejectFuncs {
if reject(item, fi) {
@ -436,6 +457,7 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, term *termstatus.Termina
}
sc := archiver.NewScanner(targetFS)
sc.SelectByName = selectByNameFilter
sc.Select = selectFilter
sc.Error = p.ScannerError
sc.Result = p.ReportTotal
@ -444,6 +466,7 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, term *termstatus.Termina
t.Go(func() error { return sc.Scan(t.Context(gopts.ctx), targets) })
arch := archiver.New(repo, targetFS, archiver.Options{})
arch.SelectByName = selectByNameFilter
arch.Select = selectFilter
arch.WithAtime = opts.WithAtime
arch.Error = p.Error

View file

@ -60,15 +60,20 @@ func (rc *rejectionCache) Store(dir string, rejected bool) {
rc.m[dir] = rejected
}
// RejectByNameFunc is a function that takes a filename of a
// file that would be included in the backup. The function returns true if it
// should be excluded (rejected) from the backup.
type RejectByNameFunc func(path string) bool
// RejectFunc is a function that takes a filename and os.FileInfo of a
// file that would be included in the backup. The function returns true if it
// should be excluded (rejected) from the backup.
type RejectFunc func(path string, fi os.FileInfo) bool
// rejectByPattern returns a RejectFunc which rejects files that match
// rejectByPattern returns a RejectByNameFunc which rejects files that match
// one of the patterns.
func rejectByPattern(patterns []string) RejectFunc {
return func(item string, fi os.FileInfo) bool {
func rejectByPattern(patterns []string) RejectByNameFunc {
return func(item string) bool {
matched, _, err := filter.List(patterns, item)
if err != nil {
Warnf("error for exclude pattern: %v", err)
@ -83,14 +88,14 @@ func rejectByPattern(patterns []string) RejectFunc {
}
}
// rejectIfPresent returns a RejectFunc which itself returns whether a path
// should be excluded. The RejectFunc considers a file to be excluded when
// rejectIfPresent returns a RejectByNameFunc which itself returns whether a path
// should be excluded. The RejectByNameFunc considers a file to be excluded when
// it resides in a directory with an exclusion file, that is specified by
// excludeFileSpec in the form "filename[:content]". The returned error is
// non-nil if the filename component of excludeFileSpec is empty. If rc is
// non-nil, it is going to be used in the RejectFunc to expedite the evaluation
// non-nil, it is going to be used in the RejectByNameFunc to expedite the evaluation
// of a directory based on previous visits.
func rejectIfPresent(excludeFileSpec string) (RejectFunc, error) {
func rejectIfPresent(excludeFileSpec string) (RejectByNameFunc, error) {
if excludeFileSpec == "" {
return nil, errors.New("name for exclusion tagfile is empty")
}
@ -107,7 +112,7 @@ func rejectIfPresent(excludeFileSpec string) (RejectFunc, error) {
}
debug.Log("using %q as exclusion tagfile", tf)
rc := &rejectionCache{}
fn := func(filename string, _ os.FileInfo) bool {
fn := func(filename string) bool {
return isExcludedByFile(filename, tf, tc, rc)
}
return fn, nil
@ -252,11 +257,11 @@ func rejectByDevice(samples []string) (RejectFunc, error) {
}, nil
}
// rejectResticCache returns a RejectFunc that rejects the restic cache
// rejectResticCache returns a RejectByNameFunc that rejects the restic cache
// directory (if set).
func rejectResticCache(repo *repository.Repository) (RejectFunc, error) {
func rejectResticCache(repo *repository.Repository) (RejectByNameFunc, error) {
if repo.Cache == nil {
return func(string, os.FileInfo) bool {
return func(string) bool {
return false
}, nil
}
@ -266,7 +271,7 @@ func rejectResticCache(repo *repository.Repository) (RejectFunc, error) {
return nil, errors.New("cacheBase is empty string")
}
return func(item string, _ os.FileInfo) bool {
return func(item string) bool {
if fs.HasPathPrefix(cacheBase, item) {
debug.Log("rejecting restic cache directory %v", item)
return true

View file

@ -27,7 +27,7 @@ func TestRejectByPattern(t *testing.T) {
for _, tc := range tests {
t.Run("", func(t *testing.T) {
reject := rejectByPattern(patterns)
res := reject(tc.filename, nil)
res := reject(tc.filename)
if res != tc.reject {
t.Fatalf("wrong result for filename %v: want %v, got %v",
tc.filename, tc.reject, res)
@ -140,8 +140,8 @@ func TestMultipleIsExcludedByFile(t *testing.T) {
if err != nil {
return err
}
excludedByFoo := fooExclude(p, fi)
excludedByBar := barExclude(p, fi)
excludedByFoo := fooExclude(p)
excludedByBar := barExclude(p)
excluded := excludedByFoo || excludedByBar
// the log message helps debugging in case the test fails
t.Logf("%q: %v || %v = %v", p, excludedByFoo, excludedByBar, excluded)

View file

@ -16,6 +16,10 @@ import (
tomb "gopkg.in/tomb.v2"
)
// SelectByNameFunc returns true for all items that should be included (files and
// dirs). If false is returned, files are ignored and dirs are not even walked.
type SelectByNameFunc func(item string) bool
// SelectFunc returns true for all items that should be included (files and
// dirs). If false is returned, files are ignored and dirs are not even walked.
type SelectFunc func(item string, fi os.FileInfo) bool
@ -43,10 +47,11 @@ func (s *ItemStats) Add(other ItemStats) {
// Archiver saves a directory structure to the repo.
type Archiver struct {
Repo restic.Repository
Select SelectFunc
FS fs.FS
Options Options
Repo restic.Repository
SelectByName SelectByNameFunc
Select SelectFunc
FS fs.FS
Options Options
blobSaver *BlobSaver
fileSaver *FileSaver
@ -119,10 +124,11 @@ func (o Options) ApplyDefaults() Options {
// New initializes a new archiver.
func New(repo restic.Repository, fs fs.FS, opts Options) *Archiver {
arch := &Archiver{
Repo: repo,
Select: func(string, os.FileInfo) bool { return true },
FS: fs,
Options: opts.ApplyDefaults(),
Repo: repo,
SelectByName: func(item string) bool { return true },
Select: func(item string, fi os.FileInfo) bool { return true },
FS: fs,
Options: opts.ApplyDefaults(),
CompleteItem: func(string, *restic.Node, *restic.Node, ItemStats, time.Duration) {},
StartFile: func(string) {},
@ -294,10 +300,10 @@ func (fn *FutureNode) wait(ctx context.Context) {
}
// Save saves a target (file or directory) to the repo. If the item is
// excluded,this function returns a nil node and error, with excluded set to
// excluded, this function returns a nil node and error, with excluded set to
// true.
//
// Errors and completion is needs to be handled by the caller.
// Errors and completion needs to be handled by the caller.
//
// snPath is the path within the current snapshot.
func (arch *Archiver) Save(ctx context.Context, snPath, target string, previous *restic.Node) (fn FutureNode, excluded bool, err error) {
@ -316,6 +322,13 @@ func (arch *Archiver) Save(ctx context.Context, snPath, target string, previous
fn.absTarget = abstarget
// exclude files by path before running Lstat to reduce number of lstat calls
if !arch.SelectByName(abstarget) {
debug.Log("%v is excluded by path", target)
return FutureNode{}, true, nil
}
// get file info and run remaining select functions that require file information
fi, err := arch.FS.Lstat(target)
if !arch.Select(abstarget, fi) {
debug.Log("%v is excluded", target)

View file

@ -12,23 +12,21 @@ import (
// stats concerning the files and folders found. Select is used to decide which
// items should be included. Error is called when an error occurs.
type Scanner struct {
FS fs.FS
Select SelectFunc
Error ErrorFunc
Result func(item string, s ScanStats)
FS fs.FS
SelectByName SelectByNameFunc
Select SelectFunc
Error ErrorFunc
Result func(item string, s ScanStats)
}
// NewScanner initializes a new Scanner.
func NewScanner(fs fs.FS) *Scanner {
return &Scanner{
FS: fs,
Select: func(item string, fi os.FileInfo) bool {
return true
},
Error: func(item string, fi os.FileInfo, err error) error {
return err
},
Result: func(item string, s ScanStats) {},
FS: fs,
SelectByName: func(item string) bool { return true },
Select: func(item string, fi os.FileInfo) bool { return true },
Error: func(item string, fi os.FileInfo, err error) error { return err },
Result: func(item string, s ScanStats) {},
}
}
@ -70,17 +68,18 @@ func (s *Scanner) scan(ctx context.Context, stats ScanStats, target string) (Sca
return stats, ctx.Err()
}
// exclude files by path before running stat to reduce number of lstat calls
if !s.SelectByName(target) {
return stats, nil
}
// get file information
fi, err := s.FS.Lstat(target)
if err != nil {
// ignore error if the target is to be excluded anyway
if !s.Select(target, nil) {
return stats, nil
}
// else return filtered error
return stats, s.Error(target, fi, err)
}
// run remaining select functions that require file information
if !s.Select(target, fi) {
return stats, nil
}