From c6fae0320ef7b38a789d490be1ec53a7f8ca430d Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Tue, 27 Aug 2024 11:26:52 +0200 Subject: [PATCH 1/9] archiver: hide implementation details --- internal/archiver/archiver.go | 106 ++++++++++++++------------- internal/archiver/archiver_test.go | 2 +- internal/archiver/blob_saver.go | 34 ++++----- internal/archiver/blob_saver_test.go | 16 ++-- internal/archiver/buffer.go | 26 +++---- internal/archiver/doc.go | 9 --- internal/archiver/file_saver.go | 36 ++++----- internal/archiver/file_saver_test.go | 10 +-- internal/archiver/scanner.go | 4 +- internal/archiver/tree.go | 54 +++++++------- internal/archiver/tree_saver.go | 32 ++++---- internal/archiver/tree_saver_test.go | 16 ++-- internal/archiver/tree_test.go | 102 +++++++++++++------------- 13 files changed, 223 insertions(+), 224 deletions(-) diff --git a/internal/archiver/archiver.go b/internal/archiver/archiver.go index 4f0990843..397347bcb 100644 --- a/internal/archiver/archiver.go +++ b/internal/archiver/archiver.go @@ -75,6 +75,14 @@ type archiverRepo interface { } // Archiver saves a directory structure to the repo. +// +// An Archiver has a number of worker goroutines handling saving the different +// data structures to the repository, the details are implemented by the +// fileSaver, blobSaver, and treeSaver types. +// +// The main goroutine (the one calling Snapshot()) traverses the directory tree +// and delegates all work to these worker pools. They return a futureNode which +// can be resolved later, by calling Wait() on it. type Archiver struct { Repo archiverRepo SelectByName SelectByNameFunc @@ -82,9 +90,9 @@ type Archiver struct { FS fs.FS Options Options - blobSaver *BlobSaver - fileSaver *FileSaver - treeSaver *TreeSaver + blobSaver *blobSaver + fileSaver *fileSaver + treeSaver *treeSaver mu sync.Mutex summary *Summary @@ -160,7 +168,7 @@ func (o Options) ApplyDefaults() Options { if o.SaveTreeConcurrency == 0 { // can either wait for a file, wait for a tree, serialize a tree or wait for saveblob // the last two are cpu-bound and thus mutually exclusive. - // Also allow waiting for FileReadConcurrency files, this is the maximum of FutureFiles + // Also allow waiting for FileReadConcurrency files, this is the maximum of files // which currently can be in progress. The main backup loop blocks when trying to queue // more files to read. o.SaveTreeConcurrency = uint(runtime.GOMAXPROCS(0)) + o.ReadConcurrency @@ -297,27 +305,27 @@ func (arch *Archiver) wrapLoadTreeError(id restic.ID, err error) error { // saveDir stores a directory in the repo and returns the node. snPath is the // path within the current snapshot. -func (arch *Archiver) saveDir(ctx context.Context, snPath string, dir string, fi os.FileInfo, previous *restic.Tree, complete CompleteFunc) (d FutureNode, err error) { +func (arch *Archiver) saveDir(ctx context.Context, snPath string, dir string, fi os.FileInfo, previous *restic.Tree, complete fileCompleteFunc) (d futureNode, err error) { debug.Log("%v %v", snPath, dir) treeNode, err := arch.nodeFromFileInfo(snPath, dir, fi, false) if err != nil { - return FutureNode{}, err + return futureNode{}, err } names, err := fs.Readdirnames(arch.FS, dir, fs.O_NOFOLLOW) if err != nil { - return FutureNode{}, err + return futureNode{}, err } sort.Strings(names) - nodes := make([]FutureNode, 0, len(names)) + nodes := make([]futureNode, 0, len(names)) for _, name := range names { // test if context has been cancelled if ctx.Err() != nil { debug.Log("context has been cancelled, aborting") - return FutureNode{}, ctx.Err() + return futureNode{}, ctx.Err() } pathname := arch.FS.Join(dir, name) @@ -333,7 +341,7 @@ func (arch *Archiver) saveDir(ctx context.Context, snPath string, dir string, fi continue } - return FutureNode{}, err + return futureNode{}, err } if excluded { @@ -348,11 +356,11 @@ func (arch *Archiver) saveDir(ctx context.Context, snPath string, dir string, fi return fn, nil } -// FutureNode holds a reference to a channel that returns a FutureNodeResult +// futureNode holds a reference to a channel that returns a FutureNodeResult // or a reference to an already existing result. If the result is available // immediately, then storing a reference directly requires less memory than // using the indirection via a channel. -type FutureNode struct { +type futureNode struct { ch <-chan futureNodeResult res *futureNodeResult } @@ -365,18 +373,18 @@ type futureNodeResult struct { err error } -func newFutureNode() (FutureNode, chan<- futureNodeResult) { +func newFutureNode() (futureNode, chan<- futureNodeResult) { ch := make(chan futureNodeResult, 1) - return FutureNode{ch: ch}, ch + return futureNode{ch: ch}, ch } -func newFutureNodeWithResult(res futureNodeResult) FutureNode { - return FutureNode{ +func newFutureNodeWithResult(res futureNodeResult) futureNode { + return futureNode{ res: &res, } } -func (fn *FutureNode) take(ctx context.Context) futureNodeResult { +func (fn *futureNode) take(ctx context.Context) futureNodeResult { if fn.res != nil { res := fn.res // free result @@ -415,19 +423,19 @@ func (arch *Archiver) allBlobsPresent(previous *restic.Node) bool { // Errors and completion needs to be handled by the caller. // // snPath is the path within the current snapshot. -func (arch *Archiver) save(ctx context.Context, snPath, target string, previous *restic.Node) (fn FutureNode, excluded bool, err error) { +func (arch *Archiver) save(ctx context.Context, snPath, target string, previous *restic.Node) (fn futureNode, excluded bool, err error) { start := time.Now() debug.Log("%v target %q, previous %v", snPath, target, previous) abstarget, err := arch.FS.Abs(target) if err != nil { - return FutureNode{}, false, err + return futureNode{}, false, err } // exclude files by path before running Lstat to reduce number of lstat calls if !arch.SelectByName(abstarget) { debug.Log("%v is excluded by path", target) - return FutureNode{}, true, nil + return futureNode{}, true, nil } // get file info and run remaining select functions that require file information @@ -436,13 +444,13 @@ func (arch *Archiver) save(ctx context.Context, snPath, target string, previous debug.Log("lstat() for %v returned error: %v", target, err) err = arch.error(abstarget, err) if err != nil { - return FutureNode{}, false, errors.WithStack(err) + return futureNode{}, false, errors.WithStack(err) } - return FutureNode{}, true, nil + return futureNode{}, true, nil } if !arch.Select(abstarget, fi) { debug.Log("%v is excluded", target) - return FutureNode{}, true, nil + return futureNode{}, true, nil } switch { @@ -458,7 +466,7 @@ func (arch *Archiver) save(ctx context.Context, snPath, target string, previous arch.CompleteBlob(previous.Size) node, err := arch.nodeFromFileInfo(snPath, target, fi, false) if err != nil { - return FutureNode{}, false, err + return futureNode{}, false, err } // copy list of blobs @@ -477,7 +485,7 @@ func (arch *Archiver) save(ctx context.Context, snPath, target string, previous err := errors.Errorf("parts of %v not found in the repository index; storing the file again", target) err = arch.error(abstarget, err) if err != nil { - return FutureNode{}, false, err + return futureNode{}, false, err } } @@ -488,9 +496,9 @@ func (arch *Archiver) save(ctx context.Context, snPath, target string, previous debug.Log("Openfile() for %v returned error: %v", target, err) err = arch.error(abstarget, err) if err != nil { - return FutureNode{}, false, errors.WithStack(err) + return futureNode{}, false, errors.WithStack(err) } - return FutureNode{}, true, nil + return futureNode{}, true, nil } fi, err = file.Stat() @@ -499,9 +507,9 @@ func (arch *Archiver) save(ctx context.Context, snPath, target string, previous _ = file.Close() err = arch.error(abstarget, err) if err != nil { - return FutureNode{}, false, errors.WithStack(err) + return futureNode{}, false, errors.WithStack(err) } - return FutureNode{}, true, nil + return futureNode{}, true, nil } // make sure it's still a file @@ -510,9 +518,9 @@ func (arch *Archiver) save(ctx context.Context, snPath, target string, previous _ = file.Close() err = arch.error(abstarget, err) if err != nil { - return FutureNode{}, false, err + return futureNode{}, false, err } - return FutureNode{}, true, nil + return futureNode{}, true, nil } // Save will close the file, we don't need to do that @@ -533,7 +541,7 @@ func (arch *Archiver) save(ctx context.Context, snPath, target string, previous err = arch.error(abstarget, err) } if err != nil { - return FutureNode{}, false, err + return futureNode{}, false, err } fn, err = arch.saveDir(ctx, snPath, target, fi, oldSubtree, @@ -542,19 +550,19 @@ func (arch *Archiver) save(ctx context.Context, snPath, target string, previous }) if err != nil { debug.Log("SaveDir for %v returned error: %v", snPath, err) - return FutureNode{}, false, err + return futureNode{}, false, err } case fi.Mode()&os.ModeSocket > 0: debug.Log(" %v is a socket, ignoring", target) - return FutureNode{}, true, nil + return futureNode{}, true, nil default: debug.Log(" %v other", target) node, err := arch.nodeFromFileInfo(snPath, target, fi, false) if err != nil { - return FutureNode{}, false, err + return futureNode{}, false, err } fn = newFutureNodeWithResult(futureNodeResult{ snPath: snPath, @@ -621,17 +629,17 @@ func (arch *Archiver) statDir(dir string) (os.FileInfo, error) { // saveTree stores a Tree in the repo, returned is the tree. snPath is the path // within the current snapshot. -func (arch *Archiver) saveTree(ctx context.Context, snPath string, atree *Tree, previous *restic.Tree, complete CompleteFunc) (FutureNode, int, error) { +func (arch *Archiver) saveTree(ctx context.Context, snPath string, atree *tree, previous *restic.Tree, complete fileCompleteFunc) (futureNode, int, error) { var node *restic.Node if snPath != "/" { if atree.FileInfoPath == "" { - return FutureNode{}, 0, errors.Errorf("FileInfoPath for %v is empty", snPath) + return futureNode{}, 0, errors.Errorf("FileInfoPath for %v is empty", snPath) } fi, err := arch.statDir(atree.FileInfoPath) if err != nil { - return FutureNode{}, 0, err + return futureNode{}, 0, err } debug.Log("%v, dir node data loaded from %v", snPath, atree.FileInfoPath) @@ -639,7 +647,7 @@ func (arch *Archiver) saveTree(ctx context.Context, snPath string, atree *Tree, // thus ignore errors for such folders. node, err = arch.nodeFromFileInfo(snPath, atree.FileInfoPath, fi, true) if err != nil { - return FutureNode{}, 0, err + return futureNode{}, 0, err } } else { // fake root node @@ -648,7 +656,7 @@ func (arch *Archiver) saveTree(ctx context.Context, snPath string, atree *Tree, debug.Log("%v (%v nodes), parent %v", snPath, len(atree.Nodes), previous) nodeNames := atree.NodeNames() - nodes := make([]FutureNode, 0, len(nodeNames)) + nodes := make([]futureNode, 0, len(nodeNames)) // iterate over the nodes of atree in lexicographic (=deterministic) order for _, name := range nodeNames { @@ -656,7 +664,7 @@ func (arch *Archiver) saveTree(ctx context.Context, snPath string, atree *Tree, // test if context has been cancelled if ctx.Err() != nil { - return FutureNode{}, 0, ctx.Err() + return futureNode{}, 0, ctx.Err() } // this is a leaf node @@ -669,11 +677,11 @@ func (arch *Archiver) saveTree(ctx context.Context, snPath string, atree *Tree, // ignore error continue } - return FutureNode{}, 0, err + return futureNode{}, 0, err } if err != nil { - return FutureNode{}, 0, err + return futureNode{}, 0, err } if !excluded { @@ -691,7 +699,7 @@ func (arch *Archiver) saveTree(ctx context.Context, snPath string, atree *Tree, err = arch.error(join(snPath, name), err) } if err != nil { - return FutureNode{}, 0, err + return futureNode{}, 0, err } // not a leaf node, archive subtree @@ -699,7 +707,7 @@ func (arch *Archiver) saveTree(ctx context.Context, snPath string, atree *Tree, arch.trackItem(snItem, oldNode, n, is, time.Since(start)) }) if err != nil { - return FutureNode{}, 0, err + return futureNode{}, 0, err } nodes = append(nodes, fn) } @@ -779,16 +787,16 @@ func (arch *Archiver) loadParentTree(ctx context.Context, sn *restic.Snapshot) * // runWorkers starts the worker pools, which are stopped when the context is cancelled. func (arch *Archiver) runWorkers(ctx context.Context, wg *errgroup.Group) { - arch.blobSaver = NewBlobSaver(ctx, wg, arch.Repo, arch.Options.SaveBlobConcurrency) + arch.blobSaver = newBlobSaver(ctx, wg, arch.Repo, arch.Options.SaveBlobConcurrency) - arch.fileSaver = NewFileSaver(ctx, wg, + arch.fileSaver = newFileSaver(ctx, wg, arch.blobSaver.Save, arch.Repo.Config().ChunkerPolynomial, arch.Options.ReadConcurrency, arch.Options.SaveBlobConcurrency) arch.fileSaver.CompleteBlob = arch.CompleteBlob arch.fileSaver.NodeFromFileInfo = arch.nodeFromFileInfo - arch.treeSaver = NewTreeSaver(ctx, wg, arch.Options.SaveTreeConcurrency, arch.blobSaver.Save, arch.Error) + arch.treeSaver = newTreeSaver(ctx, wg, arch.Options.SaveTreeConcurrency, arch.blobSaver.Save, arch.Error) } func (arch *Archiver) stopWorkers() { @@ -809,7 +817,7 @@ func (arch *Archiver) Snapshot(ctx context.Context, targets []string, opts Snaps return nil, restic.ID{}, nil, err } - atree, err := NewTree(arch.FS, cleanTargets) + atree, err := newTree(arch.FS, cleanTargets) if err != nil { return nil, restic.ID{}, nil, err } diff --git a/internal/archiver/archiver_test.go b/internal/archiver/archiver_test.go index d67b5b06a..74fecef80 100644 --- a/internal/archiver/archiver_test.go +++ b/internal/archiver/archiver_test.go @@ -1121,7 +1121,7 @@ func TestArchiverSaveTree(t *testing.T) { test.prepare(t) } - atree, err := NewTree(testFS, test.targets) + atree, err := newTree(testFS, test.targets) if err != nil { t.Fatal(err) } diff --git a/internal/archiver/blob_saver.go b/internal/archiver/blob_saver.go index d4347a169..356a32ce2 100644 --- a/internal/archiver/blob_saver.go +++ b/internal/archiver/blob_saver.go @@ -9,22 +9,22 @@ import ( "golang.org/x/sync/errgroup" ) -// Saver allows saving a blob. -type Saver interface { +// saver allows saving a blob. +type saver interface { SaveBlob(ctx context.Context, t restic.BlobType, data []byte, id restic.ID, storeDuplicate bool) (restic.ID, bool, int, error) } -// BlobSaver concurrently saves incoming blobs to the repo. -type BlobSaver struct { - repo Saver +// blobSaver concurrently saves incoming blobs to the repo. +type blobSaver struct { + repo saver ch chan<- saveBlobJob } -// NewBlobSaver returns a new blob. A worker pool is started, it is stopped +// newBlobSaver returns a new blob. A worker pool is started, it is stopped // when ctx is cancelled. -func NewBlobSaver(ctx context.Context, wg *errgroup.Group, repo Saver, workers uint) *BlobSaver { +func newBlobSaver(ctx context.Context, wg *errgroup.Group, repo saver, workers uint) *blobSaver { ch := make(chan saveBlobJob) - s := &BlobSaver{ + s := &blobSaver{ repo: repo, ch: ch, } @@ -38,13 +38,13 @@ func NewBlobSaver(ctx context.Context, wg *errgroup.Group, repo Saver, workers u return s } -func (s *BlobSaver) TriggerShutdown() { +func (s *blobSaver) TriggerShutdown() { close(s.ch) } // Save stores a blob in the repo. It checks the index and the known blobs // before saving anything. It takes ownership of the buffer passed in. -func (s *BlobSaver) Save(ctx context.Context, t restic.BlobType, buf *Buffer, filename string, cb func(res SaveBlobResponse)) { +func (s *blobSaver) Save(ctx context.Context, t restic.BlobType, buf *buffer, filename string, cb func(res saveBlobResponse)) { select { case s.ch <- saveBlobJob{BlobType: t, buf: buf, fn: filename, cb: cb}: case <-ctx.Done(): @@ -54,26 +54,26 @@ func (s *BlobSaver) Save(ctx context.Context, t restic.BlobType, buf *Buffer, fi type saveBlobJob struct { restic.BlobType - buf *Buffer + buf *buffer fn string - cb func(res SaveBlobResponse) + cb func(res saveBlobResponse) } -type SaveBlobResponse struct { +type saveBlobResponse struct { id restic.ID length int sizeInRepo int known bool } -func (s *BlobSaver) saveBlob(ctx context.Context, t restic.BlobType, buf []byte) (SaveBlobResponse, error) { +func (s *blobSaver) saveBlob(ctx context.Context, t restic.BlobType, buf []byte) (saveBlobResponse, error) { id, known, sizeInRepo, err := s.repo.SaveBlob(ctx, t, buf, restic.ID{}, false) if err != nil { - return SaveBlobResponse{}, err + return saveBlobResponse{}, err } - return SaveBlobResponse{ + return saveBlobResponse{ id: id, length: len(buf), sizeInRepo: sizeInRepo, @@ -81,7 +81,7 @@ func (s *BlobSaver) saveBlob(ctx context.Context, t restic.BlobType, buf []byte) }, nil } -func (s *BlobSaver) worker(ctx context.Context, jobs <-chan saveBlobJob) error { +func (s *blobSaver) worker(ctx context.Context, jobs <-chan saveBlobJob) error { for { var job saveBlobJob var ok bool diff --git a/internal/archiver/blob_saver_test.go b/internal/archiver/blob_saver_test.go index f7ef2f47d..e23ed12e5 100644 --- a/internal/archiver/blob_saver_test.go +++ b/internal/archiver/blob_saver_test.go @@ -38,20 +38,20 @@ func TestBlobSaver(t *testing.T) { wg, ctx := errgroup.WithContext(ctx) saver := &saveFail{} - b := NewBlobSaver(ctx, wg, saver, uint(runtime.NumCPU())) + b := newBlobSaver(ctx, wg, saver, uint(runtime.NumCPU())) var wait sync.WaitGroup - var results []SaveBlobResponse + var results []saveBlobResponse var lock sync.Mutex wait.Add(20) for i := 0; i < 20; i++ { - buf := &Buffer{Data: []byte(fmt.Sprintf("foo%d", i))} + buf := &buffer{Data: []byte(fmt.Sprintf("foo%d", i))} idx := i lock.Lock() - results = append(results, SaveBlobResponse{}) + results = append(results, saveBlobResponse{}) lock.Unlock() - b.Save(ctx, restic.DataBlob, buf, "file", func(res SaveBlobResponse) { + b.Save(ctx, restic.DataBlob, buf, "file", func(res saveBlobResponse) { lock.Lock() results[idx] = res lock.Unlock() @@ -95,11 +95,11 @@ func TestBlobSaverError(t *testing.T) { failAt: int32(test.failAt), } - b := NewBlobSaver(ctx, wg, saver, uint(runtime.NumCPU())) + b := newBlobSaver(ctx, wg, saver, uint(runtime.NumCPU())) for i := 0; i < test.blobs; i++ { - buf := &Buffer{Data: []byte(fmt.Sprintf("foo%d", i))} - b.Save(ctx, restic.DataBlob, buf, "errfile", func(res SaveBlobResponse) {}) + buf := &buffer{Data: []byte(fmt.Sprintf("foo%d", i))} + b.Save(ctx, restic.DataBlob, buf, "errfile", func(res saveBlobResponse) {}) } b.TriggerShutdown() diff --git a/internal/archiver/buffer.go b/internal/archiver/buffer.go index 39bda2668..d5bfb46b3 100644 --- a/internal/archiver/buffer.go +++ b/internal/archiver/buffer.go @@ -1,14 +1,14 @@ package archiver -// Buffer is a reusable buffer. After the buffer has been used, Release should +// buffer is a reusable buffer. After the buffer has been used, Release should // be called so the underlying slice is put back into the pool. -type Buffer struct { +type buffer struct { Data []byte - pool *BufferPool + pool *bufferPool } // Release puts the buffer back into the pool it came from. -func (b *Buffer) Release() { +func (b *buffer) Release() { pool := b.pool if pool == nil || cap(b.Data) > pool.defaultSize { return @@ -20,32 +20,32 @@ func (b *Buffer) Release() { } } -// BufferPool implements a limited set of reusable buffers. -type BufferPool struct { - ch chan *Buffer +// bufferPool implements a limited set of reusable buffers. +type bufferPool struct { + ch chan *buffer defaultSize int } -// NewBufferPool initializes a new buffer pool. The pool stores at most max +// newBufferPool initializes a new buffer pool. The pool stores at most max // items. New buffers are created with defaultSize. Buffers that have grown // larger are not put back. -func NewBufferPool(max int, defaultSize int) *BufferPool { - b := &BufferPool{ - ch: make(chan *Buffer, max), +func newBufferPool(max int, defaultSize int) *bufferPool { + b := &bufferPool{ + ch: make(chan *buffer, max), defaultSize: defaultSize, } return b } // Get returns a new buffer, either from the pool or newly allocated. -func (pool *BufferPool) Get() *Buffer { +func (pool *bufferPool) Get() *buffer { select { case buf := <-pool.ch: return buf default: } - b := &Buffer{ + b := &buffer{ Data: make([]byte, pool.defaultSize), pool: pool, } diff --git a/internal/archiver/doc.go b/internal/archiver/doc.go index 928145aa2..1b9603975 100644 --- a/internal/archiver/doc.go +++ b/internal/archiver/doc.go @@ -1,12 +1,3 @@ // Package archiver contains the code which reads files, splits them into // chunks and saves the data to the repository. -// -// An Archiver has a number of worker goroutines handling saving the different -// data structures to the repository, the details are implemented by the -// FileSaver, BlobSaver, and TreeSaver types. -// -// The main goroutine (the one calling Snapshot()) traverses the directory tree -// and delegates all work to these worker pools. They return a type -// (FutureFile, FutureBlob, and FutureTree) which can be resolved later, by -// calling Wait() on it. package archiver diff --git a/internal/archiver/file_saver.go b/internal/archiver/file_saver.go index d10334301..fa19cab86 100644 --- a/internal/archiver/file_saver.go +++ b/internal/archiver/file_saver.go @@ -15,13 +15,13 @@ import ( "golang.org/x/sync/errgroup" ) -// SaveBlobFn saves a blob to a repo. -type SaveBlobFn func(context.Context, restic.BlobType, *Buffer, string, func(res SaveBlobResponse)) +// saveBlobFn saves a blob to a repo. +type saveBlobFn func(context.Context, restic.BlobType, *buffer, string, func(res saveBlobResponse)) -// FileSaver concurrently saves incoming files to the repo. -type FileSaver struct { - saveFilePool *BufferPool - saveBlob SaveBlobFn +// fileSaver concurrently saves incoming files to the repo. +type fileSaver struct { + saveFilePool *bufferPool + saveBlob saveBlobFn pol chunker.Pol @@ -32,18 +32,18 @@ type FileSaver struct { NodeFromFileInfo func(snPath, filename string, fi os.FileInfo, ignoreXattrListError bool) (*restic.Node, error) } -// NewFileSaver returns a new file saver. A worker pool with fileWorkers is +// newFileSaver returns a new file saver. A worker pool with fileWorkers is // started, it is stopped when ctx is cancelled. -func NewFileSaver(ctx context.Context, wg *errgroup.Group, save SaveBlobFn, pol chunker.Pol, fileWorkers, blobWorkers uint) *FileSaver { +func newFileSaver(ctx context.Context, wg *errgroup.Group, save saveBlobFn, pol chunker.Pol, fileWorkers, blobWorkers uint) *fileSaver { ch := make(chan saveFileJob) debug.Log("new file saver with %v file workers and %v blob workers", fileWorkers, blobWorkers) poolSize := fileWorkers + blobWorkers - s := &FileSaver{ + s := &fileSaver{ saveBlob: save, - saveFilePool: NewBufferPool(int(poolSize), chunker.MaxSize), + saveFilePool: newBufferPool(int(poolSize), chunker.MaxSize), pol: pol, ch: ch, @@ -60,18 +60,18 @@ func NewFileSaver(ctx context.Context, wg *errgroup.Group, save SaveBlobFn, pol return s } -func (s *FileSaver) TriggerShutdown() { +func (s *fileSaver) TriggerShutdown() { close(s.ch) } -// CompleteFunc is called when the file has been saved. -type CompleteFunc func(*restic.Node, ItemStats) +// fileCompleteFunc is called when the file has been saved. +type fileCompleteFunc func(*restic.Node, ItemStats) // Save stores the file f and returns the data once it has been completed. The // file is closed by Save. completeReading is only called if the file was read // successfully. complete is always called. If completeReading is called, then // this will always happen before calling complete. -func (s *FileSaver) Save(ctx context.Context, snPath string, target string, file fs.File, fi os.FileInfo, start func(), completeReading func(), complete CompleteFunc) FutureNode { +func (s *fileSaver) Save(ctx context.Context, snPath string, target string, file fs.File, fi os.FileInfo, start func(), completeReading func(), complete fileCompleteFunc) futureNode { fn, ch := newFutureNode() job := saveFileJob{ snPath: snPath, @@ -105,11 +105,11 @@ type saveFileJob struct { start func() completeReading func() - complete CompleteFunc + complete fileCompleteFunc } // saveFile stores the file f in the repo, then closes it. -func (s *FileSaver) saveFile(ctx context.Context, chnker *chunker.Chunker, snPath string, target string, f fs.File, fi os.FileInfo, start func(), finishReading func(), finish func(res futureNodeResult)) { +func (s *fileSaver) saveFile(ctx context.Context, chnker *chunker.Chunker, snPath string, target string, f fs.File, fi os.FileInfo, start func(), finishReading func(), finish func(res futureNodeResult)) { start() fnr := futureNodeResult{ @@ -205,7 +205,7 @@ func (s *FileSaver) saveFile(ctx context.Context, chnker *chunker.Chunker, snPat node.Content = append(node.Content, restic.ID{}) lock.Unlock() - s.saveBlob(ctx, restic.DataBlob, buf, target, func(sbr SaveBlobResponse) { + s.saveBlob(ctx, restic.DataBlob, buf, target, func(sbr saveBlobResponse) { lock.Lock() if !sbr.known { fnr.stats.DataBlobs++ @@ -246,7 +246,7 @@ func (s *FileSaver) saveFile(ctx context.Context, chnker *chunker.Chunker, snPat completeBlob() } -func (s *FileSaver) worker(ctx context.Context, jobs <-chan saveFileJob) { +func (s *fileSaver) worker(ctx context.Context, jobs <-chan saveFileJob) { // a worker has one chunker which is reused for each file (because it contains a rather large buffer) chnker := chunker.New(nil, s.pol) diff --git a/internal/archiver/file_saver_test.go b/internal/archiver/file_saver_test.go index 948d7ce3c..ede616e28 100644 --- a/internal/archiver/file_saver_test.go +++ b/internal/archiver/file_saver_test.go @@ -30,11 +30,11 @@ func createTestFiles(t testing.TB, num int) (files []string) { return files } -func startFileSaver(ctx context.Context, t testing.TB) (*FileSaver, context.Context, *errgroup.Group) { +func startFileSaver(ctx context.Context, t testing.TB) (*fileSaver, context.Context, *errgroup.Group) { wg, ctx := errgroup.WithContext(ctx) - saveBlob := func(ctx context.Context, tpe restic.BlobType, buf *Buffer, _ string, cb func(SaveBlobResponse)) { - cb(SaveBlobResponse{ + saveBlob := func(ctx context.Context, tpe restic.BlobType, buf *buffer, _ string, cb func(saveBlobResponse)) { + cb(saveBlobResponse{ id: restic.Hash(buf.Data), length: len(buf.Data), sizeInRepo: len(buf.Data), @@ -48,7 +48,7 @@ func startFileSaver(ctx context.Context, t testing.TB) (*FileSaver, context.Cont t.Fatal(err) } - s := NewFileSaver(ctx, wg, saveBlob, pol, workers, workers) + s := newFileSaver(ctx, wg, saveBlob, pol, workers, workers) s.NodeFromFileInfo = func(snPath, filename string, fi os.FileInfo, ignoreXattrListError bool) (*restic.Node, error) { return fs.NodeFromFileInfo(filename, fi, ignoreXattrListError) } @@ -69,7 +69,7 @@ func TestFileSaver(t *testing.T) { testFs := fs.Local{} s, ctx, wg := startFileSaver(ctx, t) - var results []FutureNode + var results []futureNode for _, filename := range files { f, err := testFs.OpenFile(filename, os.O_RDONLY, 0) diff --git a/internal/archiver/scanner.go b/internal/archiver/scanner.go index d61e5ce47..433d38821 100644 --- a/internal/archiver/scanner.go +++ b/internal/archiver/scanner.go @@ -38,7 +38,7 @@ type ScanStats struct { Bytes uint64 } -func (s *Scanner) scanTree(ctx context.Context, stats ScanStats, tree Tree) (ScanStats, error) { +func (s *Scanner) scanTree(ctx context.Context, stats ScanStats, tree tree) (ScanStats, error) { // traverse the path in the file system for all leaf nodes if tree.Leaf() { abstarget, err := s.FS.Abs(tree.Path) @@ -83,7 +83,7 @@ func (s *Scanner) Scan(ctx context.Context, targets []string) error { debug.Log("clean targets %v", cleanTargets) // we're using the same tree representation as the archiver does - tree, err := NewTree(s.FS, cleanTargets) + tree, err := newTree(s.FS, cleanTargets) if err != nil { return err } diff --git a/internal/archiver/tree.go b/internal/archiver/tree.go index cd03ba521..f4eb1abde 100644 --- a/internal/archiver/tree.go +++ b/internal/archiver/tree.go @@ -9,7 +9,7 @@ import ( "github.com/restic/restic/internal/fs" ) -// Tree recursively defines how a snapshot should look like when +// tree recursively defines how a snapshot should look like when // archived. // // When `Path` is set, this is a leaf node and the contents of `Path` should be @@ -20,8 +20,8 @@ import ( // // `FileInfoPath` is used to extract metadata for intermediate (=non-leaf) // trees. -type Tree struct { - Nodes map[string]Tree +type tree struct { + Nodes map[string]tree Path string // where the files/dirs to be saved are found FileInfoPath string // where the dir can be found that is not included itself, but its subdirs Root string // parent directory of the tree @@ -95,13 +95,13 @@ func rootDirectory(fs fs.FS, target string) string { } // Add adds a new file or directory to the tree. -func (t *Tree) Add(fs fs.FS, path string) error { +func (t *tree) Add(fs fs.FS, path string) error { if path == "" { panic("invalid path (empty string)") } if t.Nodes == nil { - t.Nodes = make(map[string]Tree) + t.Nodes = make(map[string]tree) } pc, virtualPrefix := pathComponents(fs, path, false) @@ -111,7 +111,7 @@ func (t *Tree) Add(fs fs.FS, path string) error { name := pc[0] root := rootDirectory(fs, path) - tree := Tree{Root: root} + tree := tree{Root: root} origName := name i := 0 @@ -152,63 +152,63 @@ func (t *Tree) Add(fs fs.FS, path string) error { } // add adds a new target path into the tree. -func (t *Tree) add(fs fs.FS, target, root string, pc []string) error { +func (t *tree) add(fs fs.FS, target, root string, pc []string) error { if len(pc) == 0 { return errors.Errorf("invalid path %q", target) } if t.Nodes == nil { - t.Nodes = make(map[string]Tree) + t.Nodes = make(map[string]tree) } name := pc[0] if len(pc) == 1 { - tree, ok := t.Nodes[name] + node, ok := t.Nodes[name] if !ok { - t.Nodes[name] = Tree{Path: target} + t.Nodes[name] = tree{Path: target} return nil } - if tree.Path != "" { + if node.Path != "" { return errors.Errorf("path is already set for target %v", target) } - tree.Path = target - t.Nodes[name] = tree + node.Path = target + t.Nodes[name] = node return nil } - tree := Tree{} + node := tree{} if other, ok := t.Nodes[name]; ok { - tree = other + node = other } subroot := fs.Join(root, name) - tree.FileInfoPath = subroot + node.FileInfoPath = subroot - err := tree.add(fs, target, subroot, pc[1:]) + err := node.add(fs, target, subroot, pc[1:]) if err != nil { return err } - t.Nodes[name] = tree + t.Nodes[name] = node return nil } -func (t Tree) String() string { +func (t tree) String() string { return formatTree(t, "") } // Leaf returns true if this is a leaf node, which means Path is set to a // non-empty string and the contents of Path should be inserted at this point // in the tree. -func (t Tree) Leaf() bool { +func (t tree) Leaf() bool { return t.Path != "" } // NodeNames returns the sorted list of subtree names. -func (t Tree) NodeNames() []string { +func (t tree) NodeNames() []string { // iterate over the nodes of atree in lexicographic (=deterministic) order names := make([]string, 0, len(t.Nodes)) for name := range t.Nodes { @@ -219,7 +219,7 @@ func (t Tree) NodeNames() []string { } // formatTree returns a text representation of the tree t. -func formatTree(t Tree, indent string) (s string) { +func formatTree(t tree, indent string) (s string) { for name, node := range t.Nodes { s += fmt.Sprintf("%v/%v, root %q, path %q, meta %q\n", indent, name, node.Root, node.Path, node.FileInfoPath) s += formatTree(node, indent+" ") @@ -228,7 +228,7 @@ func formatTree(t Tree, indent string) (s string) { } // unrollTree unrolls the tree so that only leaf nodes have Path set. -func unrollTree(f fs.FS, t *Tree) error { +func unrollTree(f fs.FS, t *tree) error { // if the current tree is a leaf node (Path is set) and has additional // nodes, add the contents of Path to the nodes. if t.Path != "" && len(t.Nodes) > 0 { @@ -252,7 +252,7 @@ func unrollTree(f fs.FS, t *Tree) error { return errors.Errorf("tree unrollTree: collision on path, node %#v, path %q", node, f.Join(t.Path, entry)) } - t.Nodes[entry] = Tree{Path: f.Join(t.Path, entry)} + t.Nodes[entry] = tree{Path: f.Join(t.Path, entry)} } t.Path = "" } @@ -269,10 +269,10 @@ func unrollTree(f fs.FS, t *Tree) error { return nil } -// NewTree creates a Tree from the target files/directories. -func NewTree(fs fs.FS, targets []string) (*Tree, error) { +// newTree creates a Tree from the target files/directories. +func newTree(fs fs.FS, targets []string) (*tree, error) { debug.Log("targets: %v", targets) - tree := &Tree{} + tree := &tree{} seen := make(map[string]struct{}) for _, target := range targets { target = fs.Clean(target) diff --git a/internal/archiver/tree_saver.go b/internal/archiver/tree_saver.go index 9c11b48f0..aeedefef5 100644 --- a/internal/archiver/tree_saver.go +++ b/internal/archiver/tree_saver.go @@ -9,20 +9,20 @@ import ( "golang.org/x/sync/errgroup" ) -// TreeSaver concurrently saves incoming trees to the repo. -type TreeSaver struct { - saveBlob SaveBlobFn +// treeSaver concurrently saves incoming trees to the repo. +type treeSaver struct { + saveBlob saveBlobFn errFn ErrorFunc ch chan<- saveTreeJob } -// NewTreeSaver returns a new tree saver. A worker pool with treeWorkers is +// newTreeSaver returns a new tree saver. A worker pool with treeWorkers is // started, it is stopped when ctx is cancelled. -func NewTreeSaver(ctx context.Context, wg *errgroup.Group, treeWorkers uint, saveBlob SaveBlobFn, errFn ErrorFunc) *TreeSaver { +func newTreeSaver(ctx context.Context, wg *errgroup.Group, treeWorkers uint, saveBlob saveBlobFn, errFn ErrorFunc) *treeSaver { ch := make(chan saveTreeJob) - s := &TreeSaver{ + s := &treeSaver{ ch: ch, saveBlob: saveBlob, errFn: errFn, @@ -37,12 +37,12 @@ func NewTreeSaver(ctx context.Context, wg *errgroup.Group, treeWorkers uint, sav return s } -func (s *TreeSaver) TriggerShutdown() { +func (s *treeSaver) TriggerShutdown() { close(s.ch) } // Save stores the dir d and returns the data once it has been completed. -func (s *TreeSaver) Save(ctx context.Context, snPath string, target string, node *restic.Node, nodes []FutureNode, complete CompleteFunc) FutureNode { +func (s *treeSaver) Save(ctx context.Context, snPath string, target string, node *restic.Node, nodes []futureNode, complete fileCompleteFunc) futureNode { fn, ch := newFutureNode() job := saveTreeJob{ snPath: snPath, @@ -66,13 +66,13 @@ type saveTreeJob struct { snPath string target string node *restic.Node - nodes []FutureNode + nodes []futureNode ch chan<- futureNodeResult - complete CompleteFunc + complete fileCompleteFunc } // save stores the nodes as a tree in the repo. -func (s *TreeSaver) save(ctx context.Context, job *saveTreeJob) (*restic.Node, ItemStats, error) { +func (s *treeSaver) save(ctx context.Context, job *saveTreeJob) (*restic.Node, ItemStats, error) { var stats ItemStats node := job.node nodes := job.nodes @@ -84,7 +84,7 @@ func (s *TreeSaver) save(ctx context.Context, job *saveTreeJob) (*restic.Node, I for i, fn := range nodes { // fn is a copy, so clear the original value explicitly - nodes[i] = FutureNode{} + nodes[i] = futureNode{} fnr := fn.take(ctx) // return the error if it wasn't ignored @@ -128,9 +128,9 @@ func (s *TreeSaver) save(ctx context.Context, job *saveTreeJob) (*restic.Node, I return nil, stats, err } - b := &Buffer{Data: buf} - ch := make(chan SaveBlobResponse, 1) - s.saveBlob(ctx, restic.TreeBlob, b, job.target, func(res SaveBlobResponse) { + b := &buffer{Data: buf} + ch := make(chan saveBlobResponse, 1) + s.saveBlob(ctx, restic.TreeBlob, b, job.target, func(res saveBlobResponse) { ch <- res }) @@ -149,7 +149,7 @@ func (s *TreeSaver) save(ctx context.Context, job *saveTreeJob) (*restic.Node, I } } -func (s *TreeSaver) worker(ctx context.Context, jobs <-chan saveTreeJob) error { +func (s *treeSaver) worker(ctx context.Context, jobs <-chan saveTreeJob) error { for { var job saveTreeJob var ok bool diff --git a/internal/archiver/tree_saver_test.go b/internal/archiver/tree_saver_test.go index 47a3f3842..4aa4c51f1 100644 --- a/internal/archiver/tree_saver_test.go +++ b/internal/archiver/tree_saver_test.go @@ -12,8 +12,8 @@ import ( "golang.org/x/sync/errgroup" ) -func treeSaveHelper(_ context.Context, _ restic.BlobType, buf *Buffer, _ string, cb func(res SaveBlobResponse)) { - cb(SaveBlobResponse{ +func treeSaveHelper(_ context.Context, _ restic.BlobType, buf *buffer, _ string, cb func(res saveBlobResponse)) { + cb(saveBlobResponse{ id: restic.NewRandomID(), known: false, length: len(buf.Data), @@ -21,7 +21,7 @@ func treeSaveHelper(_ context.Context, _ restic.BlobType, buf *Buffer, _ string, }) } -func setupTreeSaver() (context.Context, context.CancelFunc, *TreeSaver, func() error) { +func setupTreeSaver() (context.Context, context.CancelFunc, *treeSaver, func() error) { ctx, cancel := context.WithCancel(context.Background()) wg, ctx := errgroup.WithContext(ctx) @@ -29,7 +29,7 @@ func setupTreeSaver() (context.Context, context.CancelFunc, *TreeSaver, func() e return err } - b := NewTreeSaver(ctx, wg, uint(runtime.NumCPU()), treeSaveHelper, errFn) + b := newTreeSaver(ctx, wg, uint(runtime.NumCPU()), treeSaveHelper, errFn) shutdown := func() error { b.TriggerShutdown() @@ -43,7 +43,7 @@ func TestTreeSaver(t *testing.T) { ctx, cancel, b, shutdown := setupTreeSaver() defer cancel() - var results []FutureNode + var results []futureNode for i := 0; i < 20; i++ { node := &restic.Node{ @@ -83,13 +83,13 @@ func TestTreeSaverError(t *testing.T) { ctx, cancel, b, shutdown := setupTreeSaver() defer cancel() - var results []FutureNode + var results []futureNode for i := 0; i < test.trees; i++ { node := &restic.Node{ Name: fmt.Sprintf("file-%d", i), } - nodes := []FutureNode{ + nodes := []futureNode{ newFutureNodeWithResult(futureNodeResult{node: &restic.Node{ Name: fmt.Sprintf("child-%d", i), }}), @@ -128,7 +128,7 @@ func TestTreeSaverDuplicates(t *testing.T) { node := &restic.Node{ Name: "file", } - nodes := []FutureNode{ + nodes := []futureNode{ newFutureNodeWithResult(futureNodeResult{node: &restic.Node{ Name: "child", }}), diff --git a/internal/archiver/tree_test.go b/internal/archiver/tree_test.go index a9d2d97ff..c9fe776b1 100644 --- a/internal/archiver/tree_test.go +++ b/internal/archiver/tree_test.go @@ -12,7 +12,7 @@ import ( ) // debug.Log requires Tree.String. -var _ fmt.Stringer = Tree{} +var _ fmt.Stringer = tree{} func TestPathComponents(t *testing.T) { var tests = []struct { @@ -142,20 +142,20 @@ func TestTree(t *testing.T) { var tests = []struct { targets []string src TestDir - want Tree + want tree unix bool win bool mustError bool }{ { targets: []string{"foo"}, - want: Tree{Nodes: map[string]Tree{ + want: tree{Nodes: map[string]tree{ "foo": {Path: "foo", Root: "."}, }}, }, { targets: []string{"foo", "bar", "baz"}, - want: Tree{Nodes: map[string]Tree{ + want: tree{Nodes: map[string]tree{ "foo": {Path: "foo", Root: "."}, "bar": {Path: "bar", Root: "."}, "baz": {Path: "baz", Root: "."}, @@ -163,8 +163,8 @@ func TestTree(t *testing.T) { }, { targets: []string{"foo/user1", "foo/user2", "foo/other"}, - want: Tree{Nodes: map[string]Tree{ - "foo": {Root: ".", FileInfoPath: "foo", Nodes: map[string]Tree{ + want: tree{Nodes: map[string]tree{ + "foo": {Root: ".", FileInfoPath: "foo", Nodes: map[string]tree{ "user1": {Path: filepath.FromSlash("foo/user1")}, "user2": {Path: filepath.FromSlash("foo/user2")}, "other": {Path: filepath.FromSlash("foo/other")}, @@ -173,9 +173,9 @@ func TestTree(t *testing.T) { }, { targets: []string{"foo/work/user1", "foo/work/user2"}, - want: Tree{Nodes: map[string]Tree{ - "foo": {Root: ".", FileInfoPath: "foo", Nodes: map[string]Tree{ - "work": {FileInfoPath: filepath.FromSlash("foo/work"), Nodes: map[string]Tree{ + want: tree{Nodes: map[string]tree{ + "foo": {Root: ".", FileInfoPath: "foo", Nodes: map[string]tree{ + "work": {FileInfoPath: filepath.FromSlash("foo/work"), Nodes: map[string]tree{ "user1": {Path: filepath.FromSlash("foo/work/user1")}, "user2": {Path: filepath.FromSlash("foo/work/user2")}, }}, @@ -184,50 +184,50 @@ func TestTree(t *testing.T) { }, { targets: []string{"foo/user1", "bar/user1", "foo/other"}, - want: Tree{Nodes: map[string]Tree{ - "foo": {Root: ".", FileInfoPath: "foo", Nodes: map[string]Tree{ + want: tree{Nodes: map[string]tree{ + "foo": {Root: ".", FileInfoPath: "foo", Nodes: map[string]tree{ "user1": {Path: filepath.FromSlash("foo/user1")}, "other": {Path: filepath.FromSlash("foo/other")}, }}, - "bar": {Root: ".", FileInfoPath: "bar", Nodes: map[string]Tree{ + "bar": {Root: ".", FileInfoPath: "bar", Nodes: map[string]tree{ "user1": {Path: filepath.FromSlash("bar/user1")}, }}, }}, }, { targets: []string{"../work"}, - want: Tree{Nodes: map[string]Tree{ + want: tree{Nodes: map[string]tree{ "work": {Root: "..", Path: filepath.FromSlash("../work")}, }}, }, { targets: []string{"../work/other"}, - want: Tree{Nodes: map[string]Tree{ - "work": {Root: "..", FileInfoPath: filepath.FromSlash("../work"), Nodes: map[string]Tree{ + want: tree{Nodes: map[string]tree{ + "work": {Root: "..", FileInfoPath: filepath.FromSlash("../work"), Nodes: map[string]tree{ "other": {Path: filepath.FromSlash("../work/other")}, }}, }}, }, { targets: []string{"foo/user1", "../work/other", "foo/user2"}, - want: Tree{Nodes: map[string]Tree{ - "foo": {Root: ".", FileInfoPath: "foo", Nodes: map[string]Tree{ + want: tree{Nodes: map[string]tree{ + "foo": {Root: ".", FileInfoPath: "foo", Nodes: map[string]tree{ "user1": {Path: filepath.FromSlash("foo/user1")}, "user2": {Path: filepath.FromSlash("foo/user2")}, }}, - "work": {Root: "..", FileInfoPath: filepath.FromSlash("../work"), Nodes: map[string]Tree{ + "work": {Root: "..", FileInfoPath: filepath.FromSlash("../work"), Nodes: map[string]tree{ "other": {Path: filepath.FromSlash("../work/other")}, }}, }}, }, { targets: []string{"foo/user1", "../foo/other", "foo/user2"}, - want: Tree{Nodes: map[string]Tree{ - "foo": {Root: ".", FileInfoPath: "foo", Nodes: map[string]Tree{ + want: tree{Nodes: map[string]tree{ + "foo": {Root: ".", FileInfoPath: "foo", Nodes: map[string]tree{ "user1": {Path: filepath.FromSlash("foo/user1")}, "user2": {Path: filepath.FromSlash("foo/user2")}, }}, - "foo-1": {Root: "..", FileInfoPath: filepath.FromSlash("../foo"), Nodes: map[string]Tree{ + "foo-1": {Root: "..", FileInfoPath: filepath.FromSlash("../foo"), Nodes: map[string]tree{ "other": {Path: filepath.FromSlash("../foo/other")}, }}, }}, @@ -240,11 +240,11 @@ func TestTree(t *testing.T) { }, }, targets: []string{"foo", "foo/work"}, - want: Tree{Nodes: map[string]Tree{ + want: tree{Nodes: map[string]tree{ "foo": { Root: ".", FileInfoPath: "foo", - Nodes: map[string]Tree{ + Nodes: map[string]tree{ "file": {Path: filepath.FromSlash("foo/file")}, "work": {Path: filepath.FromSlash("foo/work")}, }, @@ -261,11 +261,11 @@ func TestTree(t *testing.T) { }, }, targets: []string{"foo/work", "foo"}, - want: Tree{Nodes: map[string]Tree{ + want: tree{Nodes: map[string]tree{ "foo": { Root: ".", FileInfoPath: "foo", - Nodes: map[string]Tree{ + Nodes: map[string]tree{ "file": {Path: filepath.FromSlash("foo/file")}, "work": {Path: filepath.FromSlash("foo/work")}, }, @@ -282,11 +282,11 @@ func TestTree(t *testing.T) { }, }, targets: []string{"foo/work", "foo/work/user2"}, - want: Tree{Nodes: map[string]Tree{ - "foo": {Root: ".", FileInfoPath: "foo", Nodes: map[string]Tree{ + want: tree{Nodes: map[string]tree{ + "foo": {Root: ".", FileInfoPath: "foo", Nodes: map[string]tree{ "work": { FileInfoPath: filepath.FromSlash("foo/work"), - Nodes: map[string]Tree{ + Nodes: map[string]tree{ "user1": {Path: filepath.FromSlash("foo/work/user1")}, "user2": {Path: filepath.FromSlash("foo/work/user2")}, }, @@ -304,10 +304,10 @@ func TestTree(t *testing.T) { }, }, targets: []string{"foo/work/user2", "foo/work"}, - want: Tree{Nodes: map[string]Tree{ - "foo": {Root: ".", FileInfoPath: "foo", Nodes: map[string]Tree{ + want: tree{Nodes: map[string]tree{ + "foo": {Root: ".", FileInfoPath: "foo", Nodes: map[string]tree{ "work": {FileInfoPath: filepath.FromSlash("foo/work"), - Nodes: map[string]Tree{ + Nodes: map[string]tree{ "user1": {Path: filepath.FromSlash("foo/work/user1")}, "user2": {Path: filepath.FromSlash("foo/work/user2")}, }, @@ -332,12 +332,12 @@ func TestTree(t *testing.T) { }, }, targets: []string{"foo/work/user2/data/secret", "foo"}, - want: Tree{Nodes: map[string]Tree{ - "foo": {Root: ".", FileInfoPath: "foo", Nodes: map[string]Tree{ + want: tree{Nodes: map[string]tree{ + "foo": {Root: ".", FileInfoPath: "foo", Nodes: map[string]tree{ "other": {Path: filepath.FromSlash("foo/other")}, - "work": {FileInfoPath: filepath.FromSlash("foo/work"), Nodes: map[string]Tree{ - "user2": {FileInfoPath: filepath.FromSlash("foo/work/user2"), Nodes: map[string]Tree{ - "data": {FileInfoPath: filepath.FromSlash("foo/work/user2/data"), Nodes: map[string]Tree{ + "work": {FileInfoPath: filepath.FromSlash("foo/work"), Nodes: map[string]tree{ + "user2": {FileInfoPath: filepath.FromSlash("foo/work/user2"), Nodes: map[string]tree{ + "data": {FileInfoPath: filepath.FromSlash("foo/work/user2/data"), Nodes: map[string]tree{ "secret": { Path: filepath.FromSlash("foo/work/user2/data/secret"), }, @@ -368,10 +368,10 @@ func TestTree(t *testing.T) { }, unix: true, targets: []string{"mnt/driveA", "mnt/driveA/work/driveB"}, - want: Tree{Nodes: map[string]Tree{ - "mnt": {Root: ".", FileInfoPath: filepath.FromSlash("mnt"), Nodes: map[string]Tree{ - "driveA": {FileInfoPath: filepath.FromSlash("mnt/driveA"), Nodes: map[string]Tree{ - "work": {FileInfoPath: filepath.FromSlash("mnt/driveA/work"), Nodes: map[string]Tree{ + want: tree{Nodes: map[string]tree{ + "mnt": {Root: ".", FileInfoPath: filepath.FromSlash("mnt"), Nodes: map[string]tree{ + "driveA": {FileInfoPath: filepath.FromSlash("mnt/driveA"), Nodes: map[string]tree{ + "work": {FileInfoPath: filepath.FromSlash("mnt/driveA/work"), Nodes: map[string]tree{ "driveB": { Path: filepath.FromSlash("mnt/driveA/work/driveB"), }, @@ -384,9 +384,9 @@ func TestTree(t *testing.T) { }, { targets: []string{"foo/work/user", "foo/work/user"}, - want: Tree{Nodes: map[string]Tree{ - "foo": {Root: ".", FileInfoPath: "foo", Nodes: map[string]Tree{ - "work": {FileInfoPath: filepath.FromSlash("foo/work"), Nodes: map[string]Tree{ + want: tree{Nodes: map[string]tree{ + "foo": {Root: ".", FileInfoPath: "foo", Nodes: map[string]tree{ + "work": {FileInfoPath: filepath.FromSlash("foo/work"), Nodes: map[string]tree{ "user": {Path: filepath.FromSlash("foo/work/user")}, }}, }}, @@ -394,9 +394,9 @@ func TestTree(t *testing.T) { }, { targets: []string{"./foo/work/user", "foo/work/user"}, - want: Tree{Nodes: map[string]Tree{ - "foo": {Root: ".", FileInfoPath: "foo", Nodes: map[string]Tree{ - "work": {FileInfoPath: filepath.FromSlash("foo/work"), Nodes: map[string]Tree{ + want: tree{Nodes: map[string]tree{ + "foo": {Root: ".", FileInfoPath: "foo", Nodes: map[string]tree{ + "work": {FileInfoPath: filepath.FromSlash("foo/work"), Nodes: map[string]tree{ "user": {Path: filepath.FromSlash("foo/work/user")}, }}, }}, @@ -405,10 +405,10 @@ func TestTree(t *testing.T) { { win: true, targets: []string{`c:\users\foobar\temp`}, - want: Tree{Nodes: map[string]Tree{ - "c": {Root: `c:\`, FileInfoPath: `c:\`, Nodes: map[string]Tree{ - "users": {FileInfoPath: `c:\users`, Nodes: map[string]Tree{ - "foobar": {FileInfoPath: `c:\users\foobar`, Nodes: map[string]Tree{ + want: tree{Nodes: map[string]tree{ + "c": {Root: `c:\`, FileInfoPath: `c:\`, Nodes: map[string]tree{ + "users": {FileInfoPath: `c:\users`, Nodes: map[string]tree{ + "foobar": {FileInfoPath: `c:\users\foobar`, Nodes: map[string]tree{ "temp": {Path: `c:\users\foobar\temp`}, }}, }}, @@ -445,7 +445,7 @@ func TestTree(t *testing.T) { back := rtest.Chdir(t, tempdir) defer back() - tree, err := NewTree(fs.Local{}, test.targets) + tree, err := newTree(fs.Local{}, test.targets) if test.mustError { if err == nil { t.Fatal("expected error, got nil") From f9dbcd25319a742e3036a447c92618b61de9a178 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Tue, 27 Aug 2024 12:07:26 +0200 Subject: [PATCH 2/9] backup: convert reject funcs to use FS interface Depending on parameters the paths in a snapshot do not directly correspond to real paths on the filesystem. Therefore, reject funcs must use the FS interface to work correctly. --- cmd/restic/cmd_backup.go | 80 +++++++++++++++--------------- cmd/restic/exclude.go | 50 +++++++++---------- cmd/restic/exclude_test.go | 11 ++-- internal/archiver/archiver.go | 10 ++-- internal/archiver/archiver_test.go | 14 +++--- internal/archiver/scanner.go | 8 +-- internal/archiver/scanner_test.go | 2 +- internal/fs/fs_local.go | 6 +++ internal/fs/fs_reader.go | 4 ++ internal/fs/interface.go | 1 + 10 files changed, 98 insertions(+), 88 deletions(-) diff --git a/cmd/restic/cmd_backup.go b/cmd/restic/cmd_backup.go index 562108a33..1fdec081b 100644 --- a/cmd/restic/cmd_backup.go +++ b/cmd/restic/cmd_backup.go @@ -314,6 +314,29 @@ func collectRejectByNameFuncs(opts BackupOptions, repo *repository.Repository) ( } fs = append(fs, fsPatterns...) + return fs, nil +} + +// collectRejectFuncs returns a list of all functions which may reject data +// from being saved in a snapshot based on path and file info +func collectRejectFuncs(opts BackupOptions, targets []string, fs fs.FS) (funcs []RejectFunc, err error) { + // allowed devices + if opts.ExcludeOtherFS && !opts.Stdin { + f, err := rejectByDevice(targets, fs) + if err != nil { + return nil, err + } + funcs = append(funcs, f) + } + + if len(opts.ExcludeLargerThan) != 0 && !opts.Stdin { + f, err := rejectBySize(opts.ExcludeLargerThan) + if err != nil { + return nil, err + } + funcs = append(funcs, f) + } + if opts.ExcludeCaches { opts.ExcludeIfPresent = append(opts.ExcludeIfPresent, "CACHEDIR.TAG:Signature: 8a477f597d28d172789f06886806bc55") } @@ -324,33 +347,10 @@ func collectRejectByNameFuncs(opts BackupOptions, repo *repository.Repository) ( return nil, err } - fs = append(fs, f) + funcs = append(funcs, f) } - return fs, nil -} - -// collectRejectFuncs returns a list of all functions which may reject data -// from being saved in a snapshot based on path and file info -func collectRejectFuncs(opts BackupOptions, targets []string) (fs []RejectFunc, err error) { - // allowed devices - if opts.ExcludeOtherFS && !opts.Stdin { - f, err := rejectByDevice(targets) - if err != nil { - return nil, err - } - fs = append(fs, f) - } - - if len(opts.ExcludeLargerThan) != 0 && !opts.Stdin { - f, err := rejectBySize(opts.ExcludeLargerThan) - if err != nil { - return nil, err - } - fs = append(fs, f) - } - - return fs, nil + return funcs, nil } // collectTargets returns a list of target files/dirs from several sources. @@ -505,12 +505,6 @@ func runBackup(ctx context.Context, opts BackupOptions, gopts GlobalOptions, ter return err } - // rejectFuncs collect functions that can reject items from the backup based on path and file info - rejectFuncs, err := collectRejectFuncs(opts, targets) - if err != nil { - return err - } - var parentSnapshot *restic.Snapshot if !opts.Stdin { parentSnapshot, err = findParentSnapshot(ctx, repo, opts, targets, timeStamp) @@ -547,15 +541,6 @@ func runBackup(ctx context.Context, opts BackupOptions, gopts GlobalOptions, ter return true } - selectFilter := func(item string, fi os.FileInfo) bool { - for _, reject := range rejectFuncs { - if reject(item, fi) { - return false - } - } - return true - } - var targetFS fs.FS = fs.Local{} if runtime.GOOS == "windows" && opts.UseFsSnapshot { if err = fs.HasSufficientPrivilegesForVSS(); err != nil { @@ -598,6 +583,21 @@ func runBackup(ctx context.Context, opts BackupOptions, gopts GlobalOptions, ter targets = []string{filename} } + // rejectFuncs collect functions that can reject items from the backup based on path and file info + rejectFuncs, err := collectRejectFuncs(opts, targets, targetFS) + if err != nil { + return err + } + + selectFilter := func(item string, fi os.FileInfo, fs fs.FS) bool { + for _, reject := range rejectFuncs { + if reject(item, fi, fs) { + return false + } + } + return true + } + wg, wgCtx := errgroup.WithContext(ctx) cancelCtx, cancel := context.WithCancel(wgCtx) defer cancel() diff --git a/cmd/restic/exclude.go b/cmd/restic/exclude.go index 4657e4915..f1e1011f2 100644 --- a/cmd/restic/exclude.go +++ b/cmd/restic/exclude.go @@ -6,7 +6,6 @@ import ( "fmt" "io" "os" - "path/filepath" "strings" "sync" @@ -72,7 +71,7 @@ type RejectByNameFunc func(path string) bool // RejectFunc is a function that takes a filename and os.FileInfo of a // file that would be included in the backup. The function returns true if it // should be excluded (rejected) from the backup. -type RejectFunc func(path string, fi os.FileInfo) bool +type RejectFunc func(path string, fi os.FileInfo, fs fs.FS) bool // rejectByPattern returns a RejectByNameFunc which rejects files that match // one of the patterns. @@ -112,7 +111,7 @@ func rejectByInsensitivePattern(patterns []string) RejectByNameFunc { // non-nil if the filename component of excludeFileSpec is empty. If rc is // non-nil, it is going to be used in the RejectByNameFunc to expedite the evaluation // of a directory based on previous visits. -func rejectIfPresent(excludeFileSpec string) (RejectByNameFunc, error) { +func rejectIfPresent(excludeFileSpec string) (RejectFunc, error) { if excludeFileSpec == "" { return nil, errors.New("name for exclusion tagfile is empty") } @@ -129,10 +128,9 @@ func rejectIfPresent(excludeFileSpec string) (RejectByNameFunc, error) { } debug.Log("using %q as exclusion tagfile", tf) rc := &rejectionCache{} - fn := func(filename string) bool { - return isExcludedByFile(filename, tf, tc, rc) - } - return fn, nil + return func(filename string, _ os.FileInfo, fs fs.FS) bool { + return isExcludedByFile(filename, tf, tc, rc, fs) + }, nil } // isExcludedByFile interprets filename as a path and returns true if that file @@ -140,28 +138,28 @@ func rejectIfPresent(excludeFileSpec string) (RejectByNameFunc, error) { // tagfile which bears the name specified in tagFilename and starts with // header. If rc is non-nil, it is used to expedite the evaluation of a // directory based on previous visits. -func isExcludedByFile(filename, tagFilename, header string, rc *rejectionCache) bool { +func isExcludedByFile(filename, tagFilename, header string, rc *rejectionCache, fs fs.FS) bool { if tagFilename == "" { return false } - dir, base := filepath.Split(filename) - if base == tagFilename { + if fs.Base(filename) == tagFilename { return false // do not exclude the tagfile itself } rc.Lock() defer rc.Unlock() + dir := fs.Dir(filename) rejected, visited := rc.Get(dir) if visited { return rejected } - rejected = isDirExcludedByFile(dir, tagFilename, header) + rejected = isDirExcludedByFile(dir, tagFilename, header, fs) rc.Store(dir, rejected) return rejected } -func isDirExcludedByFile(dir, tagFilename, header string) bool { - tf := filepath.Join(dir, tagFilename) +func isDirExcludedByFile(dir, tagFilename, header string, fs fs.FS) bool { + tf := fs.Join(dir, tagFilename) _, err := fs.Lstat(tf) if os.IsNotExist(err) { return false @@ -178,7 +176,7 @@ func isDirExcludedByFile(dir, tagFilename, header string) bool { // From this stage, errors mean tagFilename exists but it is malformed. // Warnings will be generated so that the user is informed that the // indented ignore-action is not performed. - f, err := os.Open(tf) + f, err := fs.OpenFile(tf, os.O_RDONLY, 0) if err != nil { Warnf("could not open exclusion tagfile: %v", err) return false @@ -210,11 +208,11 @@ func isDirExcludedByFile(dir, tagFilename, header string) bool { type DeviceMap map[string]uint64 // NewDeviceMap creates a new device map from the list of source paths. -func NewDeviceMap(allowedSourcePaths []string) (DeviceMap, error) { +func NewDeviceMap(allowedSourcePaths []string, fs fs.FS) (DeviceMap, error) { deviceMap := make(map[string]uint64) for _, item := range allowedSourcePaths { - item, err := filepath.Abs(filepath.Clean(item)) + item, err := fs.Abs(fs.Clean(item)) if err != nil { return nil, err } @@ -240,15 +238,15 @@ func NewDeviceMap(allowedSourcePaths []string) (DeviceMap, error) { } // IsAllowed returns true if the path is located on an allowed device. -func (m DeviceMap) IsAllowed(item string, deviceID uint64) (bool, error) { - for dir := item; ; dir = filepath.Dir(dir) { +func (m DeviceMap) IsAllowed(item string, deviceID uint64, fs fs.FS) (bool, error) { + for dir := item; ; dir = fs.Dir(dir) { debug.Log("item %v, test dir %v", item, dir) // find a parent directory that is on an allowed device (otherwise // we would not traverse the directory at all) allowedID, ok := m[dir] if !ok { - if dir == filepath.Dir(dir) { + if dir == fs.Dir(dir) { // arrived at root, no allowed device found. this should not happen. break } @@ -272,14 +270,14 @@ func (m DeviceMap) IsAllowed(item string, deviceID uint64) (bool, error) { // rejectByDevice returns a RejectFunc that rejects files which are on a // different file systems than the files/dirs in samples. -func rejectByDevice(samples []string) (RejectFunc, error) { - deviceMap, err := NewDeviceMap(samples) +func rejectByDevice(samples []string, filesystem fs.FS) (RejectFunc, error) { + deviceMap, err := NewDeviceMap(samples, filesystem) if err != nil { return nil, err } debug.Log("allowed devices: %v\n", deviceMap) - return func(item string, fi os.FileInfo) bool { + return func(item string, fi os.FileInfo, fs fs.FS) bool { id, err := fs.DeviceID(fi) if err != nil { // This should never happen because gatherDevices() would have @@ -287,7 +285,7 @@ func rejectByDevice(samples []string) (RejectFunc, error) { panic(err) } - allowed, err := deviceMap.IsAllowed(filepath.Clean(item), id) + allowed, err := deviceMap.IsAllowed(fs.Clean(item), id, fs) if err != nil { // this should not happen panic(fmt.Sprintf("error checking device ID of %v: %v", item, err)) @@ -306,7 +304,7 @@ func rejectByDevice(samples []string) (RejectFunc, error) { // special case: make sure we keep mountpoints (directories which // contain a mounted file system). Test this by checking if the parent // directory would be included. - parentDir := filepath.Dir(filepath.Clean(item)) + parentDir := fs.Dir(fs.Clean(item)) parentFI, err := fs.Lstat(parentDir) if err != nil { @@ -322,7 +320,7 @@ func rejectByDevice(samples []string) (RejectFunc, error) { return true } - parentAllowed, err := deviceMap.IsAllowed(parentDir, parentDeviceID) + parentAllowed, err := deviceMap.IsAllowed(parentDir, parentDeviceID, fs) if err != nil { debug.Log("item %v: error checking parent directory: %v", item, err) // if in doubt, reject @@ -369,7 +367,7 @@ func rejectBySize(maxSizeStr string) (RejectFunc, error) { return nil, err } - return func(item string, fi os.FileInfo) bool { + return func(item string, fi os.FileInfo, _ fs.FS) bool { // directory will be ignored if fi.IsDir() { return false diff --git a/cmd/restic/exclude_test.go b/cmd/restic/exclude_test.go index 9a24418ae..166ee1d84 100644 --- a/cmd/restic/exclude_test.go +++ b/cmd/restic/exclude_test.go @@ -5,6 +5,7 @@ import ( "path/filepath" "testing" + "github.com/restic/restic/internal/fs" "github.com/restic/restic/internal/test" ) @@ -102,7 +103,7 @@ func TestIsExcludedByFile(t *testing.T) { if tc.content == "" { h = "" } - if got := isExcludedByFile(foo, tagFilename, h, nil); tc.want != got { + if got := isExcludedByFile(foo, tagFilename, h, nil, &fs.Local{}); tc.want != got { t.Fatalf("expected %v, got %v", tc.want, got) } }) @@ -164,8 +165,8 @@ func TestMultipleIsExcludedByFile(t *testing.T) { if err != nil { return err } - excludedByFoo := fooExclude(p) - excludedByBar := barExclude(p) + excludedByFoo := fooExclude(p, nil, &fs.Local{}) + excludedByBar := barExclude(p, nil, &fs.Local{}) excluded := excludedByFoo || excludedByBar // the log message helps debugging in case the test fails t.Logf("%q: %v || %v = %v", p, excludedByFoo, excludedByBar, excluded) @@ -249,7 +250,7 @@ func TestIsExcludedByFileSize(t *testing.T) { return err } - excluded := sizeExclude(p, fi) + excluded := sizeExclude(p, fi, nil) // the log message helps debugging in case the test fails t.Logf("%q: dir:%t; size:%d; excluded:%v", p, fi.IsDir(), fi.Size(), excluded) m[p] = !excluded @@ -299,7 +300,7 @@ func TestDeviceMap(t *testing.T) { for _, test := range tests { t.Run("", func(t *testing.T) { - res, err := deviceMap.IsAllowed(filepath.FromSlash(test.item), test.deviceID) + res, err := deviceMap.IsAllowed(filepath.FromSlash(test.item), test.deviceID, &fs.Local{}) if err != nil { t.Fatal(err) } diff --git a/internal/archiver/archiver.go b/internal/archiver/archiver.go index 397347bcb..eab65bb5f 100644 --- a/internal/archiver/archiver.go +++ b/internal/archiver/archiver.go @@ -25,7 +25,7 @@ type SelectByNameFunc func(item string) bool // SelectFunc returns true for all items that should be included (files and // dirs). If false is returned, files are ignored and dirs are not even walked. -type SelectFunc func(item string, fi os.FileInfo) bool +type SelectFunc func(item string, fi os.FileInfo, fs fs.FS) bool // ErrorFunc is called when an error during archiving occurs. When nil is // returned, the archiver continues, otherwise it aborts and passes the error @@ -178,12 +178,12 @@ func (o Options) ApplyDefaults() Options { } // New initializes a new archiver. -func New(repo archiverRepo, fs fs.FS, opts Options) *Archiver { +func New(repo archiverRepo, filesystem fs.FS, opts Options) *Archiver { arch := &Archiver{ Repo: repo, SelectByName: func(_ string) bool { return true }, - Select: func(_ string, _ os.FileInfo) bool { return true }, - FS: fs, + Select: func(_ string, _ os.FileInfo, _ fs.FS) bool { return true }, + FS: filesystem, Options: opts.ApplyDefaults(), CompleteItem: func(string, *restic.Node, *restic.Node, ItemStats, time.Duration) {}, @@ -448,7 +448,7 @@ func (arch *Archiver) save(ctx context.Context, snPath, target string, previous } return futureNode{}, true, nil } - if !arch.Select(abstarget, fi) { + if !arch.Select(abstarget, fi, arch.FS) { debug.Log("%v is excluded", target) return futureNode{}, true, nil } diff --git a/internal/archiver/archiver_test.go b/internal/archiver/archiver_test.go index 74fecef80..b56452182 100644 --- a/internal/archiver/archiver_test.go +++ b/internal/archiver/archiver_test.go @@ -1529,7 +1529,7 @@ func TestArchiverSnapshotSelect(t *testing.T) { }, "other": TestFile{Content: "another file"}, }, - selFn: func(item string, fi os.FileInfo) bool { + selFn: func(item string, fi os.FileInfo, _ fs.FS) bool { return true }, }, @@ -1546,7 +1546,7 @@ func TestArchiverSnapshotSelect(t *testing.T) { }, "other": TestFile{Content: "another file"}, }, - selFn: func(item string, fi os.FileInfo) bool { + selFn: func(item string, fi os.FileInfo, _ fs.FS) bool { return false }, err: "snapshot is empty", @@ -1573,7 +1573,7 @@ func TestArchiverSnapshotSelect(t *testing.T) { }, "other": TestFile{Content: "another file"}, }, - selFn: func(item string, fi os.FileInfo) bool { + selFn: func(item string, fi os.FileInfo, _ fs.FS) bool { return filepath.Ext(item) != ".txt" }, }, @@ -1597,8 +1597,8 @@ func TestArchiverSnapshotSelect(t *testing.T) { }, "other": TestFile{Content: "another file"}, }, - selFn: func(item string, fi os.FileInfo) bool { - return filepath.Base(item) != "subdir" + selFn: func(item string, fi os.FileInfo, fs fs.FS) bool { + return fs.Base(item) != "subdir" }, }, { @@ -1606,8 +1606,8 @@ func TestArchiverSnapshotSelect(t *testing.T) { src: TestDir{ "foo": TestFile{Content: "foo"}, }, - selFn: func(item string, fi os.FileInfo) bool { - return filepath.IsAbs(item) + selFn: func(item string, fi os.FileInfo, fs fs.FS) bool { + return fs.IsAbs(item) }, }, } diff --git a/internal/archiver/scanner.go b/internal/archiver/scanner.go index 433d38821..cb74a31d6 100644 --- a/internal/archiver/scanner.go +++ b/internal/archiver/scanner.go @@ -22,11 +22,11 @@ type Scanner struct { } // NewScanner initializes a new Scanner. -func NewScanner(fs fs.FS) *Scanner { +func NewScanner(filesystem fs.FS) *Scanner { return &Scanner{ - FS: fs, + FS: filesystem, SelectByName: func(_ string) bool { return true }, - Select: func(_ string, _ os.FileInfo) bool { return true }, + Select: func(_ string, _ os.FileInfo, _ fs.FS) bool { return true }, Error: func(_ string, err error) error { return err }, Result: func(_ string, _ ScanStats) {}, } @@ -115,7 +115,7 @@ func (s *Scanner) scan(ctx context.Context, stats ScanStats, target string) (Sca } // run remaining select functions that require file information - if !s.Select(target, fi) { + if !s.Select(target, fi, s.FS) { return stats, nil } diff --git a/internal/archiver/scanner_test.go b/internal/archiver/scanner_test.go index b5b7057b8..e4e2c9f59 100644 --- a/internal/archiver/scanner_test.go +++ b/internal/archiver/scanner_test.go @@ -56,7 +56,7 @@ func TestScanner(t *testing.T) { }, }, }, - selFn: func(item string, fi os.FileInfo) bool { + selFn: func(item string, fi os.FileInfo, fs fs.FS) bool { if fi.IsDir() { return true } diff --git a/internal/fs/fs_local.go b/internal/fs/fs_local.go index 0bcbf7f3a..33d83bf63 100644 --- a/internal/fs/fs_local.go +++ b/internal/fs/fs_local.go @@ -46,6 +46,12 @@ func (fs Local) Lstat(name string) (os.FileInfo, error) { return os.Lstat(fixpath(name)) } +// DeviceID extracts the DeviceID from the given FileInfo. If the fs does +// not support a DeviceID, it returns an error instead +func (fs Local) DeviceID(fi os.FileInfo) (deviceID uint64, err error) { + return DeviceID(fi) +} + // Join joins any number of path elements into a single path, adding a // Separator if necessary. Join calls Clean on the result; in particular, all // empty strings are ignored. On Windows, the result is a UNC path if and only diff --git a/internal/fs/fs_reader.go b/internal/fs/fs_reader.go index 93a42f9eb..b3371a8c9 100644 --- a/internal/fs/fs_reader.go +++ b/internal/fs/fs_reader.go @@ -122,6 +122,10 @@ func (fs *Reader) Lstat(name string) (os.FileInfo, error) { return nil, pathError("lstat", name, os.ErrNotExist) } +func (fs *Reader) DeviceID(_ os.FileInfo) (deviceID uint64, err error) { + return 0, errors.New("Device IDs are not supported") +} + // Join joins any number of path elements into a single path, adding a // Separator if necessary. Join calls Clean on the result; in particular, all // empty strings are ignored. On Windows, the result is a UNC path if and only diff --git a/internal/fs/interface.go b/internal/fs/interface.go index e1f4ef2d9..1c27c1c13 100644 --- a/internal/fs/interface.go +++ b/internal/fs/interface.go @@ -10,6 +10,7 @@ type FS interface { OpenFile(name string, flag int, perm os.FileMode) (File, error) Stat(name string) (os.FileInfo, error) Lstat(name string) (os.FileInfo, error) + DeviceID(fi os.FileInfo) (deviceID uint64, err error) Join(elem ...string) string Separator() string From 41c031a19e2d338a2e600a84209204a1507423ec Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Tue, 27 Aug 2024 12:30:25 +0200 Subject: [PATCH 3/9] backup: move RejectFuncs to archiver package --- cmd/restic/cmd_backup.go | 12 +- cmd/restic/exclude.go | 307 ----------------------------- cmd/restic/exclude_test.go | 254 ------------------------ internal/archiver/exclude.go | 311 ++++++++++++++++++++++++++++++ internal/archiver/exclude_test.go | 259 +++++++++++++++++++++++++ 5 files changed, 576 insertions(+), 567 deletions(-) create mode 100644 internal/archiver/exclude.go create mode 100644 internal/archiver/exclude_test.go diff --git a/cmd/restic/cmd_backup.go b/cmd/restic/cmd_backup.go index 1fdec081b..ceb7694b1 100644 --- a/cmd/restic/cmd_backup.go +++ b/cmd/restic/cmd_backup.go @@ -319,18 +319,18 @@ func collectRejectByNameFuncs(opts BackupOptions, repo *repository.Repository) ( // collectRejectFuncs returns a list of all functions which may reject data // from being saved in a snapshot based on path and file info -func collectRejectFuncs(opts BackupOptions, targets []string, fs fs.FS) (funcs []RejectFunc, err error) { +func collectRejectFuncs(opts BackupOptions, targets []string, fs fs.FS) (funcs []archiver.RejectFunc, err error) { // allowed devices - if opts.ExcludeOtherFS && !opts.Stdin { - f, err := rejectByDevice(targets, fs) + if opts.ExcludeOtherFS && !opts.Stdin && !opts.StdinCommand { + f, err := archiver.RejectByDevice(targets, fs) if err != nil { return nil, err } funcs = append(funcs, f) } - if len(opts.ExcludeLargerThan) != 0 && !opts.Stdin { - f, err := rejectBySize(opts.ExcludeLargerThan) + if len(opts.ExcludeLargerThan) != 0 && !opts.Stdin && !opts.StdinCommand { + f, err := archiver.RejectBySize(opts.ExcludeLargerThan) if err != nil { return nil, err } @@ -342,7 +342,7 @@ func collectRejectFuncs(opts BackupOptions, targets []string, fs fs.FS) (funcs [ } for _, spec := range opts.ExcludeIfPresent { - f, err := rejectIfPresent(spec) + f, err := archiver.RejectIfPresent(spec, Warnf) if err != nil { return nil, err } diff --git a/cmd/restic/exclude.go b/cmd/restic/exclude.go index f1e1011f2..40eb93933 100644 --- a/cmd/restic/exclude.go +++ b/cmd/restic/exclude.go @@ -4,10 +4,8 @@ import ( "bufio" "bytes" "fmt" - "io" "os" "strings" - "sync" "github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/errors" @@ -15,64 +13,14 @@ import ( "github.com/restic/restic/internal/fs" "github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/textfile" - "github.com/restic/restic/internal/ui" "github.com/spf13/pflag" ) -type rejectionCache struct { - m map[string]bool - mtx sync.Mutex -} - -// Lock locks the mutex in rc. -func (rc *rejectionCache) Lock() { - if rc != nil { - rc.mtx.Lock() - } -} - -// Unlock unlocks the mutex in rc. -func (rc *rejectionCache) Unlock() { - if rc != nil { - rc.mtx.Unlock() - } -} - -// Get returns the last stored value for dir and a second boolean that -// indicates whether that value was actually written to the cache. It is the -// callers responsibility to call rc.Lock and rc.Unlock before using this -// method, otherwise data races may occur. -func (rc *rejectionCache) Get(dir string) (bool, bool) { - if rc == nil || rc.m == nil { - return false, false - } - v, ok := rc.m[dir] - return v, ok -} - -// Store stores a new value for dir. It is the callers responsibility to call -// rc.Lock and rc.Unlock before using this method, otherwise data races may -// occur. -func (rc *rejectionCache) Store(dir string, rejected bool) { - if rc == nil { - return - } - if rc.m == nil { - rc.m = make(map[string]bool) - } - rc.m[dir] = rejected -} - // RejectByNameFunc is a function that takes a filename of a // file that would be included in the backup. The function returns true if it // should be excluded (rejected) from the backup. type RejectByNameFunc func(path string) bool -// RejectFunc is a function that takes a filename and os.FileInfo of a -// file that would be included in the backup. The function returns true if it -// should be excluded (rejected) from the backup. -type RejectFunc func(path string, fi os.FileInfo, fs fs.FS) bool - // rejectByPattern returns a RejectByNameFunc which rejects files that match // one of the patterns. func rejectByPattern(patterns []string) RejectByNameFunc { @@ -104,239 +52,6 @@ func rejectByInsensitivePattern(patterns []string) RejectByNameFunc { } } -// rejectIfPresent returns a RejectByNameFunc which itself returns whether a path -// should be excluded. The RejectByNameFunc considers a file to be excluded when -// it resides in a directory with an exclusion file, that is specified by -// excludeFileSpec in the form "filename[:content]". The returned error is -// non-nil if the filename component of excludeFileSpec is empty. If rc is -// non-nil, it is going to be used in the RejectByNameFunc to expedite the evaluation -// of a directory based on previous visits. -func rejectIfPresent(excludeFileSpec string) (RejectFunc, error) { - if excludeFileSpec == "" { - return nil, errors.New("name for exclusion tagfile is empty") - } - colon := strings.Index(excludeFileSpec, ":") - if colon == 0 { - return nil, fmt.Errorf("no name for exclusion tagfile provided") - } - tf, tc := "", "" - if colon > 0 { - tf = excludeFileSpec[:colon] - tc = excludeFileSpec[colon+1:] - } else { - tf = excludeFileSpec - } - debug.Log("using %q as exclusion tagfile", tf) - rc := &rejectionCache{} - return func(filename string, _ os.FileInfo, fs fs.FS) bool { - return isExcludedByFile(filename, tf, tc, rc, fs) - }, nil -} - -// isExcludedByFile interprets filename as a path and returns true if that file -// is in an excluded directory. A directory is identified as excluded if it contains a -// tagfile which bears the name specified in tagFilename and starts with -// header. If rc is non-nil, it is used to expedite the evaluation of a -// directory based on previous visits. -func isExcludedByFile(filename, tagFilename, header string, rc *rejectionCache, fs fs.FS) bool { - if tagFilename == "" { - return false - } - if fs.Base(filename) == tagFilename { - return false // do not exclude the tagfile itself - } - rc.Lock() - defer rc.Unlock() - - dir := fs.Dir(filename) - rejected, visited := rc.Get(dir) - if visited { - return rejected - } - rejected = isDirExcludedByFile(dir, tagFilename, header, fs) - rc.Store(dir, rejected) - return rejected -} - -func isDirExcludedByFile(dir, tagFilename, header string, fs fs.FS) bool { - tf := fs.Join(dir, tagFilename) - _, err := fs.Lstat(tf) - if os.IsNotExist(err) { - return false - } - if err != nil { - Warnf("could not access exclusion tagfile: %v", err) - return false - } - // when no signature is given, the mere presence of tf is enough reason - // to exclude filename - if len(header) == 0 { - return true - } - // From this stage, errors mean tagFilename exists but it is malformed. - // Warnings will be generated so that the user is informed that the - // indented ignore-action is not performed. - f, err := fs.OpenFile(tf, os.O_RDONLY, 0) - if err != nil { - Warnf("could not open exclusion tagfile: %v", err) - return false - } - defer func() { - _ = f.Close() - }() - buf := make([]byte, len(header)) - _, err = io.ReadFull(f, buf) - // EOF is handled with a dedicated message, otherwise the warning were too cryptic - if err == io.EOF { - Warnf("invalid (too short) signature in exclusion tagfile %q\n", tf) - return false - } - if err != nil { - Warnf("could not read signature from exclusion tagfile %q: %v\n", tf, err) - return false - } - if !bytes.Equal(buf, []byte(header)) { - Warnf("invalid signature in exclusion tagfile %q\n", tf) - return false - } - return true -} - -// DeviceMap is used to track allowed source devices for backup. This is used to -// check for crossing mount points during backup (for --one-file-system). It -// maps the name of a source path to its device ID. -type DeviceMap map[string]uint64 - -// NewDeviceMap creates a new device map from the list of source paths. -func NewDeviceMap(allowedSourcePaths []string, fs fs.FS) (DeviceMap, error) { - deviceMap := make(map[string]uint64) - - for _, item := range allowedSourcePaths { - item, err := fs.Abs(fs.Clean(item)) - if err != nil { - return nil, err - } - - fi, err := fs.Lstat(item) - if err != nil { - return nil, err - } - - id, err := fs.DeviceID(fi) - if err != nil { - return nil, err - } - - deviceMap[item] = id - } - - if len(deviceMap) == 0 { - return nil, errors.New("zero allowed devices") - } - - return deviceMap, nil -} - -// IsAllowed returns true if the path is located on an allowed device. -func (m DeviceMap) IsAllowed(item string, deviceID uint64, fs fs.FS) (bool, error) { - for dir := item; ; dir = fs.Dir(dir) { - debug.Log("item %v, test dir %v", item, dir) - - // find a parent directory that is on an allowed device (otherwise - // we would not traverse the directory at all) - allowedID, ok := m[dir] - if !ok { - if dir == fs.Dir(dir) { - // arrived at root, no allowed device found. this should not happen. - break - } - continue - } - - // if the item has a different device ID than the parent directory, - // we crossed a file system boundary - if allowedID != deviceID { - debug.Log("item %v (dir %v) on disallowed device %d", item, dir, deviceID) - return false, nil - } - - // item is on allowed device, accept it - debug.Log("item %v allowed", item) - return true, nil - } - - return false, fmt.Errorf("item %v (device ID %v) not found, deviceMap: %v", item, deviceID, m) -} - -// rejectByDevice returns a RejectFunc that rejects files which are on a -// different file systems than the files/dirs in samples. -func rejectByDevice(samples []string, filesystem fs.FS) (RejectFunc, error) { - deviceMap, err := NewDeviceMap(samples, filesystem) - if err != nil { - return nil, err - } - debug.Log("allowed devices: %v\n", deviceMap) - - return func(item string, fi os.FileInfo, fs fs.FS) bool { - id, err := fs.DeviceID(fi) - if err != nil { - // This should never happen because gatherDevices() would have - // errored out earlier. If it still does that's a reason to panic. - panic(err) - } - - allowed, err := deviceMap.IsAllowed(fs.Clean(item), id, fs) - if err != nil { - // this should not happen - panic(fmt.Sprintf("error checking device ID of %v: %v", item, err)) - } - - if allowed { - // accept item - return false - } - - // reject everything except directories - if !fi.IsDir() { - return true - } - - // special case: make sure we keep mountpoints (directories which - // contain a mounted file system). Test this by checking if the parent - // directory would be included. - parentDir := fs.Dir(fs.Clean(item)) - - parentFI, err := fs.Lstat(parentDir) - if err != nil { - debug.Log("item %v: error running lstat() on parent directory: %v", item, err) - // if in doubt, reject - return true - } - - parentDeviceID, err := fs.DeviceID(parentFI) - if err != nil { - debug.Log("item %v: getting device ID of parent directory: %v", item, err) - // if in doubt, reject - return true - } - - parentAllowed, err := deviceMap.IsAllowed(parentDir, parentDeviceID, fs) - if err != nil { - debug.Log("item %v: error checking parent directory: %v", item, err) - // if in doubt, reject - return true - } - - if parentAllowed { - // we found a mount point, so accept the directory - return false - } - - // reject everything else - return true - }, nil -} - // rejectResticCache returns a RejectByNameFunc that rejects the restic cache // directory (if set). func rejectResticCache(repo *repository.Repository) (RejectByNameFunc, error) { @@ -361,28 +76,6 @@ func rejectResticCache(repo *repository.Repository) (RejectByNameFunc, error) { }, nil } -func rejectBySize(maxSizeStr string) (RejectFunc, error) { - maxSize, err := ui.ParseBytes(maxSizeStr) - if err != nil { - return nil, err - } - - return func(item string, fi os.FileInfo, _ fs.FS) bool { - // directory will be ignored - if fi.IsDir() { - return false - } - - filesize := fi.Size() - if filesize > maxSize { - debug.Log("file %s is oversize: %d", item, filesize) - return true - } - - return false - }, nil -} - // readPatternsFromFiles reads all files and returns the list of // patterns. For each line, leading and trailing white space is removed // and comment lines are ignored. For each remaining pattern, environment diff --git a/cmd/restic/exclude_test.go b/cmd/restic/exclude_test.go index 166ee1d84..177a81df2 100644 --- a/cmd/restic/exclude_test.go +++ b/cmd/restic/exclude_test.go @@ -1,12 +1,7 @@ package main import ( - "os" - "path/filepath" "testing" - - "github.com/restic/restic/internal/fs" - "github.com/restic/restic/internal/test" ) func TestRejectByPattern(t *testing.T) { @@ -62,252 +57,3 @@ func TestRejectByInsensitivePattern(t *testing.T) { }) } } - -func TestIsExcludedByFile(t *testing.T) { - const ( - tagFilename = "CACHEDIR.TAG" - header = "Signature: 8a477f597d28d172789f06886806bc55" - ) - tests := []struct { - name string - tagFile string - content string - want bool - }{ - {"NoTagfile", "", "", false}, - {"EmptyTagfile", tagFilename, "", true}, - {"UnnamedTagFile", "", header, false}, - {"WrongTagFile", "notatagfile", header, false}, - {"IncorrectSig", tagFilename, header[1:], false}, - {"ValidSig", tagFilename, header, true}, - {"ValidPlusStuff", tagFilename, header + "foo", true}, - {"ValidPlusNewlineAndStuff", tagFilename, header + "\nbar", true}, - } - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - tempDir := test.TempDir(t) - - foo := filepath.Join(tempDir, "foo") - err := os.WriteFile(foo, []byte("foo"), 0666) - if err != nil { - t.Fatalf("could not write file: %v", err) - } - if tc.tagFile != "" { - tagFile := filepath.Join(tempDir, tc.tagFile) - err = os.WriteFile(tagFile, []byte(tc.content), 0666) - if err != nil { - t.Fatalf("could not write tagfile: %v", err) - } - } - h := header - if tc.content == "" { - h = "" - } - if got := isExcludedByFile(foo, tagFilename, h, nil, &fs.Local{}); tc.want != got { - t.Fatalf("expected %v, got %v", tc.want, got) - } - }) - } -} - -// TestMultipleIsExcludedByFile is for testing that multiple instances of -// the --exclude-if-present parameter (or the shortcut --exclude-caches do not -// cancel each other out. It was initially written to demonstrate a bug in -// rejectIfPresent. -func TestMultipleIsExcludedByFile(t *testing.T) { - tempDir := test.TempDir(t) - - // Create some files in a temporary directory. - // Files in UPPERCASE will be used as exclusion triggers later on. - // We will test the inclusion later, so we add the expected value as - // a bool. - files := []struct { - path string - incl bool - }{ - {"42", true}, - - // everything in foodir except the NOFOO tagfile - // should not be included. - {"foodir/NOFOO", true}, - {"foodir/foo", false}, - {"foodir/foosub/underfoo", false}, - - // everything in bardir except the NOBAR tagfile - // should not be included. - {"bardir/NOBAR", true}, - {"bardir/bar", false}, - {"bardir/barsub/underbar", false}, - - // everything in bazdir should be included. - {"bazdir/baz", true}, - {"bazdir/bazsub/underbaz", true}, - } - var errs []error - for _, f := range files { - // create directories first, then the file - p := filepath.Join(tempDir, filepath.FromSlash(f.path)) - errs = append(errs, os.MkdirAll(filepath.Dir(p), 0700)) - errs = append(errs, os.WriteFile(p, []byte(f.path), 0600)) - } - test.OKs(t, errs) // see if anything went wrong during the creation - - // create two rejection functions, one that tests for the NOFOO file - // and one for the NOBAR file - fooExclude, _ := rejectIfPresent("NOFOO") - barExclude, _ := rejectIfPresent("NOBAR") - - // To mock the archiver scanning walk, we create filepath.WalkFn - // that tests against the two rejection functions and stores - // the result in a map against we can test later. - m := make(map[string]bool) - walk := func(p string, fi os.FileInfo, err error) error { - if err != nil { - return err - } - excludedByFoo := fooExclude(p, nil, &fs.Local{}) - excludedByBar := barExclude(p, nil, &fs.Local{}) - excluded := excludedByFoo || excludedByBar - // the log message helps debugging in case the test fails - t.Logf("%q: %v || %v = %v", p, excludedByFoo, excludedByBar, excluded) - m[p] = !excluded - if excluded { - return filepath.SkipDir - } - return nil - } - // walk through the temporary file and check the error - test.OK(t, filepath.Walk(tempDir, walk)) - - // compare whether the walk gave the expected values for the test cases - for _, f := range files { - p := filepath.Join(tempDir, filepath.FromSlash(f.path)) - if m[p] != f.incl { - t.Errorf("inclusion status of %s is wrong: want %v, got %v", f.path, f.incl, m[p]) - } - } -} - -// TestIsExcludedByFileSize is for testing the instance of -// --exclude-larger-than parameters -func TestIsExcludedByFileSize(t *testing.T) { - tempDir := test.TempDir(t) - - // Max size of file is set to be 1k - maxSizeStr := "1k" - - // Create some files in a temporary directory. - // Files in UPPERCASE will be used as exclusion triggers later on. - // We will test the inclusion later, so we add the expected value as - // a bool. - files := []struct { - path string - size int64 - incl bool - }{ - {"42", 100, true}, - - // everything in foodir except the FOOLARGE tagfile - // should not be included. - {"foodir/FOOLARGE", 2048, false}, - {"foodir/foo", 1002, true}, - {"foodir/foosub/underfoo", 100, true}, - - // everything in bardir except the BARLARGE tagfile - // should not be included. - {"bardir/BARLARGE", 1030, false}, - {"bardir/bar", 1000, true}, - {"bardir/barsub/underbar", 500, true}, - - // everything in bazdir should be included. - {"bazdir/baz", 100, true}, - {"bazdir/bazsub/underbaz", 200, true}, - } - var errs []error - for _, f := range files { - // create directories first, then the file - p := filepath.Join(tempDir, filepath.FromSlash(f.path)) - errs = append(errs, os.MkdirAll(filepath.Dir(p), 0700)) - file, err := os.OpenFile(p, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0600) - errs = append(errs, err) - if err == nil { - // create a file with given size - errs = append(errs, file.Truncate(f.size)) - } - errs = append(errs, file.Close()) - } - test.OKs(t, errs) // see if anything went wrong during the creation - - // create rejection function - sizeExclude, _ := rejectBySize(maxSizeStr) - - // To mock the archiver scanning walk, we create filepath.WalkFn - // that tests against the two rejection functions and stores - // the result in a map against we can test later. - m := make(map[string]bool) - walk := func(p string, fi os.FileInfo, err error) error { - if err != nil { - return err - } - - excluded := sizeExclude(p, fi, nil) - // the log message helps debugging in case the test fails - t.Logf("%q: dir:%t; size:%d; excluded:%v", p, fi.IsDir(), fi.Size(), excluded) - m[p] = !excluded - return nil - } - // walk through the temporary file and check the error - test.OK(t, filepath.Walk(tempDir, walk)) - - // compare whether the walk gave the expected values for the test cases - for _, f := range files { - p := filepath.Join(tempDir, filepath.FromSlash(f.path)) - if m[p] != f.incl { - t.Errorf("inclusion status of %s is wrong: want %v, got %v", f.path, f.incl, m[p]) - } - } -} - -func TestDeviceMap(t *testing.T) { - deviceMap := DeviceMap{ - filepath.FromSlash("/"): 1, - filepath.FromSlash("/usr/local"): 5, - } - - var tests = []struct { - item string - deviceID uint64 - allowed bool - }{ - {"/root", 1, true}, - {"/usr", 1, true}, - - {"/proc", 2, false}, - {"/proc/1234", 2, false}, - - {"/usr", 3, false}, - {"/usr/share", 3, false}, - - {"/usr/local", 5, true}, - {"/usr/local/foobar", 5, true}, - - {"/usr/local/foobar/submount", 23, false}, - {"/usr/local/foobar/submount/file", 23, false}, - - {"/usr/local/foobar/outhersubmount", 1, false}, - {"/usr/local/foobar/outhersubmount/otherfile", 1, false}, - } - - for _, test := range tests { - t.Run("", func(t *testing.T) { - res, err := deviceMap.IsAllowed(filepath.FromSlash(test.item), test.deviceID, &fs.Local{}) - if err != nil { - t.Fatal(err) - } - - if res != test.allowed { - t.Fatalf("wrong result returned by IsAllowed(%v): want %v, got %v", test.item, test.allowed, res) - } - }) - } -} diff --git a/internal/archiver/exclude.go b/internal/archiver/exclude.go new file mode 100644 index 000000000..f4444812c --- /dev/null +++ b/internal/archiver/exclude.go @@ -0,0 +1,311 @@ +package archiver + +import ( + "bytes" + "fmt" + "io" + "os" + "strings" + "sync" + + "github.com/restic/restic/internal/debug" + "github.com/restic/restic/internal/errors" + "github.com/restic/restic/internal/fs" + "github.com/restic/restic/internal/ui" +) + +type rejectionCache struct { + m map[string]bool + mtx sync.Mutex +} + +func newRejectionCache() *rejectionCache { + return &rejectionCache{m: make(map[string]bool)} +} + +// Lock locks the mutex in rc. +func (rc *rejectionCache) Lock() { + rc.mtx.Lock() +} + +// Unlock unlocks the mutex in rc. +func (rc *rejectionCache) Unlock() { + rc.mtx.Unlock() +} + +// Get returns the last stored value for dir and a second boolean that +// indicates whether that value was actually written to the cache. It is the +// callers responsibility to call rc.Lock and rc.Unlock before using this +// method, otherwise data races may occur. +func (rc *rejectionCache) Get(dir string) (bool, bool) { + v, ok := rc.m[dir] + return v, ok +} + +// Store stores a new value for dir. It is the callers responsibility to call +// rc.Lock and rc.Unlock before using this method, otherwise data races may +// occur. +func (rc *rejectionCache) Store(dir string, rejected bool) { + rc.m[dir] = rejected +} + +// RejectFunc is a function that takes a filename and os.FileInfo of a +// file that would be included in the backup. The function returns true if it +// should be excluded (rejected) from the backup. +type RejectFunc func(path string, fi os.FileInfo, fs fs.FS) bool + +// RejectIfPresent returns a RejectByNameFunc which itself returns whether a path +// should be excluded. The RejectByNameFunc considers a file to be excluded when +// it resides in a directory with an exclusion file, that is specified by +// excludeFileSpec in the form "filename[:content]". The returned error is +// non-nil if the filename component of excludeFileSpec is empty. If rc is +// non-nil, it is going to be used in the RejectByNameFunc to expedite the evaluation +// of a directory based on previous visits. +func RejectIfPresent(excludeFileSpec string, warnf func(msg string, args ...interface{})) (RejectFunc, error) { + if excludeFileSpec == "" { + return nil, errors.New("name for exclusion tagfile is empty") + } + colon := strings.Index(excludeFileSpec, ":") + if colon == 0 { + return nil, fmt.Errorf("no name for exclusion tagfile provided") + } + tf, tc := "", "" + if colon > 0 { + tf = excludeFileSpec[:colon] + tc = excludeFileSpec[colon+1:] + } else { + tf = excludeFileSpec + } + debug.Log("using %q as exclusion tagfile", tf) + rc := newRejectionCache() + return func(filename string, _ os.FileInfo, fs fs.FS) bool { + return isExcludedByFile(filename, tf, tc, rc, fs, warnf) + }, nil +} + +// isExcludedByFile interprets filename as a path and returns true if that file +// is in an excluded directory. A directory is identified as excluded if it contains a +// tagfile which bears the name specified in tagFilename and starts with +// header. If rc is non-nil, it is used to expedite the evaluation of a +// directory based on previous visits. +func isExcludedByFile(filename, tagFilename, header string, rc *rejectionCache, fs fs.FS, warnf func(msg string, args ...interface{})) bool { + if tagFilename == "" { + return false + } + + if fs.Base(filename) == tagFilename { + return false // do not exclude the tagfile itself + } + rc.Lock() + defer rc.Unlock() + + dir := fs.Dir(filename) + rejected, visited := rc.Get(dir) + if visited { + return rejected + } + rejected = isDirExcludedByFile(dir, tagFilename, header, fs, warnf) + rc.Store(dir, rejected) + return rejected +} + +func isDirExcludedByFile(dir, tagFilename, header string, fs fs.FS, warnf func(msg string, args ...interface{})) bool { + tf := fs.Join(dir, tagFilename) + _, err := fs.Lstat(tf) + if os.IsNotExist(err) { + return false + } + if err != nil { + warnf("could not access exclusion tagfile: %v", err) + return false + } + // when no signature is given, the mere presence of tf is enough reason + // to exclude filename + if len(header) == 0 { + return true + } + // From this stage, errors mean tagFilename exists but it is malformed. + // Warnings will be generated so that the user is informed that the + // indented ignore-action is not performed. + f, err := fs.OpenFile(tf, os.O_RDONLY, 0) + if err != nil { + warnf("could not open exclusion tagfile: %v", err) + return false + } + defer func() { + _ = f.Close() + }() + buf := make([]byte, len(header)) + _, err = io.ReadFull(f, buf) + // EOF is handled with a dedicated message, otherwise the warning were too cryptic + if err == io.EOF { + warnf("invalid (too short) signature in exclusion tagfile %q\n", tf) + return false + } + if err != nil { + warnf("could not read signature from exclusion tagfile %q: %v\n", tf, err) + return false + } + if !bytes.Equal(buf, []byte(header)) { + warnf("invalid signature in exclusion tagfile %q\n", tf) + return false + } + return true +} + +// deviceMap is used to track allowed source devices for backup. This is used to +// check for crossing mount points during backup (for --one-file-system). It +// maps the name of a source path to its device ID. +type deviceMap map[string]uint64 + +// newDeviceMap creates a new device map from the list of source paths. +func newDeviceMap(allowedSourcePaths []string, fs fs.FS) (deviceMap, error) { + deviceMap := make(map[string]uint64) + + for _, item := range allowedSourcePaths { + item, err := fs.Abs(fs.Clean(item)) + if err != nil { + return nil, err + } + + fi, err := fs.Lstat(item) + if err != nil { + return nil, err + } + + id, err := fs.DeviceID(fi) + if err != nil { + return nil, err + } + + deviceMap[item] = id + } + + if len(deviceMap) == 0 { + return nil, errors.New("zero allowed devices") + } + + return deviceMap, nil +} + +// IsAllowed returns true if the path is located on an allowed device. +func (m deviceMap) IsAllowed(item string, deviceID uint64, fs fs.FS) (bool, error) { + for dir := item; ; dir = fs.Dir(dir) { + debug.Log("item %v, test dir %v", item, dir) + + // find a parent directory that is on an allowed device (otherwise + // we would not traverse the directory at all) + allowedID, ok := m[dir] + if !ok { + if dir == fs.Dir(dir) { + // arrived at root, no allowed device found. this should not happen. + break + } + continue + } + + // if the item has a different device ID than the parent directory, + // we crossed a file system boundary + if allowedID != deviceID { + debug.Log("item %v (dir %v) on disallowed device %d", item, dir, deviceID) + return false, nil + } + + // item is on allowed device, accept it + debug.Log("item %v allowed", item) + return true, nil + } + + return false, fmt.Errorf("item %v (device ID %v) not found, deviceMap: %v", item, deviceID, m) +} + +// RejectByDevice returns a RejectFunc that rejects files which are on a +// different file systems than the files/dirs in samples. +func RejectByDevice(samples []string, filesystem fs.FS) (RejectFunc, error) { + deviceMap, err := newDeviceMap(samples, filesystem) + if err != nil { + return nil, err + } + debug.Log("allowed devices: %v\n", deviceMap) + + return func(item string, fi os.FileInfo, fs fs.FS) bool { + id, err := fs.DeviceID(fi) + if err != nil { + // This should never happen because gatherDevices() would have + // errored out earlier. If it still does that's a reason to panic. + panic(err) + } + + allowed, err := deviceMap.IsAllowed(fs.Clean(item), id, fs) + if err != nil { + // this should not happen + panic(fmt.Sprintf("error checking device ID of %v: %v", item, err)) + } + + if allowed { + // accept item + return false + } + + // reject everything except directories + if !fi.IsDir() { + return true + } + + // special case: make sure we keep mountpoints (directories which + // contain a mounted file system). Test this by checking if the parent + // directory would be included. + parentDir := fs.Dir(fs.Clean(item)) + + parentFI, err := fs.Lstat(parentDir) + if err != nil { + debug.Log("item %v: error running lstat() on parent directory: %v", item, err) + // if in doubt, reject + return true + } + + parentDeviceID, err := fs.DeviceID(parentFI) + if err != nil { + debug.Log("item %v: getting device ID of parent directory: %v", item, err) + // if in doubt, reject + return true + } + + parentAllowed, err := deviceMap.IsAllowed(parentDir, parentDeviceID, fs) + if err != nil { + debug.Log("item %v: error checking parent directory: %v", item, err) + // if in doubt, reject + return true + } + + if parentAllowed { + // we found a mount point, so accept the directory + return false + } + + // reject everything else + return true + }, nil +} + +func RejectBySize(maxSizeStr string) (RejectFunc, error) { + maxSize, err := ui.ParseBytes(maxSizeStr) + if err != nil { + return nil, err + } + + return func(item string, fi os.FileInfo, _ fs.FS) bool { + // directory will be ignored + if fi.IsDir() { + return false + } + + filesize := fi.Size() + if filesize > maxSize { + debug.Log("file %s is oversize: %d", item, filesize) + return true + } + + return false + }, nil +} diff --git a/internal/archiver/exclude_test.go b/internal/archiver/exclude_test.go new file mode 100644 index 000000000..b9f1f8cdd --- /dev/null +++ b/internal/archiver/exclude_test.go @@ -0,0 +1,259 @@ +package archiver + +import ( + "os" + "path/filepath" + "testing" + + "github.com/restic/restic/internal/fs" + "github.com/restic/restic/internal/test" +) + +func TestIsExcludedByFile(t *testing.T) { + const ( + tagFilename = "CACHEDIR.TAG" + header = "Signature: 8a477f597d28d172789f06886806bc55" + ) + tests := []struct { + name string + tagFile string + content string + want bool + }{ + {"NoTagfile", "", "", false}, + {"EmptyTagfile", tagFilename, "", true}, + {"UnnamedTagFile", "", header, false}, + {"WrongTagFile", "notatagfile", header, false}, + {"IncorrectSig", tagFilename, header[1:], false}, + {"ValidSig", tagFilename, header, true}, + {"ValidPlusStuff", tagFilename, header + "foo", true}, + {"ValidPlusNewlineAndStuff", tagFilename, header + "\nbar", true}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + tempDir := test.TempDir(t) + + foo := filepath.Join(tempDir, "foo") + err := os.WriteFile(foo, []byte("foo"), 0666) + if err != nil { + t.Fatalf("could not write file: %v", err) + } + if tc.tagFile != "" { + tagFile := filepath.Join(tempDir, tc.tagFile) + err = os.WriteFile(tagFile, []byte(tc.content), 0666) + if err != nil { + t.Fatalf("could not write tagfile: %v", err) + } + } + h := header + if tc.content == "" { + h = "" + } + if got := isExcludedByFile(foo, tagFilename, h, newRejectionCache(), &fs.Local{}, func(msg string, args ...interface{}) { t.Logf(msg, args...) }); tc.want != got { + t.Fatalf("expected %v, got %v", tc.want, got) + } + }) + } +} + +// TestMultipleIsExcludedByFile is for testing that multiple instances of +// the --exclude-if-present parameter (or the shortcut --exclude-caches do not +// cancel each other out. It was initially written to demonstrate a bug in +// rejectIfPresent. +func TestMultipleIsExcludedByFile(t *testing.T) { + tempDir := test.TempDir(t) + + // Create some files in a temporary directory. + // Files in UPPERCASE will be used as exclusion triggers later on. + // We will test the inclusion later, so we add the expected value as + // a bool. + files := []struct { + path string + incl bool + }{ + {"42", true}, + + // everything in foodir except the NOFOO tagfile + // should not be included. + {"foodir/NOFOO", true}, + {"foodir/foo", false}, + {"foodir/foosub/underfoo", false}, + + // everything in bardir except the NOBAR tagfile + // should not be included. + {"bardir/NOBAR", true}, + {"bardir/bar", false}, + {"bardir/barsub/underbar", false}, + + // everything in bazdir should be included. + {"bazdir/baz", true}, + {"bazdir/bazsub/underbaz", true}, + } + var errs []error + for _, f := range files { + // create directories first, then the file + p := filepath.Join(tempDir, filepath.FromSlash(f.path)) + errs = append(errs, os.MkdirAll(filepath.Dir(p), 0700)) + errs = append(errs, os.WriteFile(p, []byte(f.path), 0600)) + } + test.OKs(t, errs) // see if anything went wrong during the creation + + // create two rejection functions, one that tests for the NOFOO file + // and one for the NOBAR file + fooExclude, _ := RejectIfPresent("NOFOO", nil) + barExclude, _ := RejectIfPresent("NOBAR", nil) + + // To mock the archiver scanning walk, we create filepath.WalkFn + // that tests against the two rejection functions and stores + // the result in a map against we can test later. + m := make(map[string]bool) + walk := func(p string, fi os.FileInfo, err error) error { + if err != nil { + return err + } + excludedByFoo := fooExclude(p, nil, &fs.Local{}) + excludedByBar := barExclude(p, nil, &fs.Local{}) + excluded := excludedByFoo || excludedByBar + // the log message helps debugging in case the test fails + t.Logf("%q: %v || %v = %v", p, excludedByFoo, excludedByBar, excluded) + m[p] = !excluded + if excluded { + return filepath.SkipDir + } + return nil + } + // walk through the temporary file and check the error + test.OK(t, filepath.Walk(tempDir, walk)) + + // compare whether the walk gave the expected values for the test cases + for _, f := range files { + p := filepath.Join(tempDir, filepath.FromSlash(f.path)) + if m[p] != f.incl { + t.Errorf("inclusion status of %s is wrong: want %v, got %v", f.path, f.incl, m[p]) + } + } +} + +// TestIsExcludedByFileSize is for testing the instance of +// --exclude-larger-than parameters +func TestIsExcludedByFileSize(t *testing.T) { + tempDir := test.TempDir(t) + + // Max size of file is set to be 1k + maxSizeStr := "1k" + + // Create some files in a temporary directory. + // Files in UPPERCASE will be used as exclusion triggers later on. + // We will test the inclusion later, so we add the expected value as + // a bool. + files := []struct { + path string + size int64 + incl bool + }{ + {"42", 100, true}, + + // everything in foodir except the FOOLARGE tagfile + // should not be included. + {"foodir/FOOLARGE", 2048, false}, + {"foodir/foo", 1002, true}, + {"foodir/foosub/underfoo", 100, true}, + + // everything in bardir except the BARLARGE tagfile + // should not be included. + {"bardir/BARLARGE", 1030, false}, + {"bardir/bar", 1000, true}, + {"bardir/barsub/underbar", 500, true}, + + // everything in bazdir should be included. + {"bazdir/baz", 100, true}, + {"bazdir/bazsub/underbaz", 200, true}, + } + var errs []error + for _, f := range files { + // create directories first, then the file + p := filepath.Join(tempDir, filepath.FromSlash(f.path)) + errs = append(errs, os.MkdirAll(filepath.Dir(p), 0700)) + file, err := os.OpenFile(p, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0600) + errs = append(errs, err) + if err == nil { + // create a file with given size + errs = append(errs, file.Truncate(f.size)) + } + errs = append(errs, file.Close()) + } + test.OKs(t, errs) // see if anything went wrong during the creation + + // create rejection function + sizeExclude, _ := RejectBySize(maxSizeStr) + + // To mock the archiver scanning walk, we create filepath.WalkFn + // that tests against the two rejection functions and stores + // the result in a map against we can test later. + m := make(map[string]bool) + walk := func(p string, fi os.FileInfo, err error) error { + if err != nil { + return err + } + + excluded := sizeExclude(p, fi, nil) + // the log message helps debugging in case the test fails + t.Logf("%q: dir:%t; size:%d; excluded:%v", p, fi.IsDir(), fi.Size(), excluded) + m[p] = !excluded + return nil + } + // walk through the temporary file and check the error + test.OK(t, filepath.Walk(tempDir, walk)) + + // compare whether the walk gave the expected values for the test cases + for _, f := range files { + p := filepath.Join(tempDir, filepath.FromSlash(f.path)) + if m[p] != f.incl { + t.Errorf("inclusion status of %s is wrong: want %v, got %v", f.path, f.incl, m[p]) + } + } +} + +func TestDeviceMap(t *testing.T) { + deviceMap := deviceMap{ + filepath.FromSlash("/"): 1, + filepath.FromSlash("/usr/local"): 5, + } + + var tests = []struct { + item string + deviceID uint64 + allowed bool + }{ + {"/root", 1, true}, + {"/usr", 1, true}, + + {"/proc", 2, false}, + {"/proc/1234", 2, false}, + + {"/usr", 3, false}, + {"/usr/share", 3, false}, + + {"/usr/local", 5, true}, + {"/usr/local/foobar", 5, true}, + + {"/usr/local/foobar/submount", 23, false}, + {"/usr/local/foobar/submount/file", 23, false}, + + {"/usr/local/foobar/outhersubmount", 1, false}, + {"/usr/local/foobar/outhersubmount/otherfile", 1, false}, + } + + for _, test := range tests { + t.Run("", func(t *testing.T) { + res, err := deviceMap.IsAllowed(filepath.FromSlash(test.item), test.deviceID, &fs.Local{}) + if err != nil { + t.Fatal(err) + } + + if res != test.allowed { + t.Fatalf("wrong result returned by IsAllowed(%v): want %v, got %v", test.item, test.allowed, res) + } + }) + } +} From 5d58945718dfaf2ee7946542eb26b69c8cca8fb0 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Tue, 27 Aug 2024 13:48:31 +0200 Subject: [PATCH 4/9] cleanup include / exclude option setup --- cmd/restic/cmd_backup.go | 2 +- cmd/restic/cmd_restore.go | 4 ++-- cmd/restic/cmd_rewrite.go | 2 +- cmd/restic/exclude.go | 2 +- cmd/restic/include.go | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cmd/restic/cmd_backup.go b/cmd/restic/cmd_backup.go index ceb7694b1..8d72a27b0 100644 --- a/cmd/restic/cmd_backup.go +++ b/cmd/restic/cmd_backup.go @@ -108,7 +108,7 @@ func init() { f.VarP(&backupOptions.GroupBy, "group-by", "g", "`group` snapshots by host, paths and/or tags, separated by comma (disable grouping with '')") f.BoolVarP(&backupOptions.Force, "force", "f", false, `force re-reading the source files/directories (overrides the "parent" flag)`) - initExcludePatternOptions(f, &backupOptions.excludePatternOptions) + backupOptions.excludePatternOptions.Add(f) f.BoolVarP(&backupOptions.ExcludeOtherFS, "one-file-system", "x", false, "exclude other file systems, don't cross filesystem boundaries and subvolumes") f.StringArrayVar(&backupOptions.ExcludeIfPresent, "exclude-if-present", nil, "takes `filename[:header]`, exclude contents of directories containing filename (except filename itself) if header of that file is as provided (can be specified multiple times)") diff --git a/cmd/restic/cmd_restore.go b/cmd/restic/cmd_restore.go index c58b0b80d..f20359dc0 100644 --- a/cmd/restic/cmd_restore.go +++ b/cmd/restic/cmd_restore.go @@ -68,8 +68,8 @@ func init() { flags := cmdRestore.Flags() flags.StringVarP(&restoreOptions.Target, "target", "t", "", "directory to extract data to") - initExcludePatternOptions(flags, &restoreOptions.excludePatternOptions) - initIncludePatternOptions(flags, &restoreOptions.includePatternOptions) + restoreOptions.excludePatternOptions.Add(flags) + restoreOptions.includePatternOptions.Add(flags) initSingleSnapshotFilter(flags, &restoreOptions.SnapshotFilter) flags.BoolVar(&restoreOptions.DryRun, "dry-run", false, "do not write any data, just show what would be done") diff --git a/cmd/restic/cmd_rewrite.go b/cmd/restic/cmd_rewrite.go index 7788016b7..fc9da5b60 100644 --- a/cmd/restic/cmd_rewrite.go +++ b/cmd/restic/cmd_rewrite.go @@ -103,7 +103,7 @@ func init() { f.StringVar(&rewriteOptions.Metadata.Time, "new-time", "", "replace time of the backup") initMultiSnapshotFilter(f, &rewriteOptions.SnapshotFilter, true) - initExcludePatternOptions(f, &rewriteOptions.excludePatternOptions) + rewriteOptions.excludePatternOptions.Add(f) } type rewriteFilterFunc func(ctx context.Context, sn *restic.Snapshot) (restic.ID, error) diff --git a/cmd/restic/exclude.go b/cmd/restic/exclude.go index 40eb93933..a37f9c68e 100644 --- a/cmd/restic/exclude.go +++ b/cmd/restic/exclude.go @@ -130,7 +130,7 @@ type excludePatternOptions struct { InsensitiveExcludeFiles []string } -func initExcludePatternOptions(f *pflag.FlagSet, opts *excludePatternOptions) { +func (opts *excludePatternOptions) Add(f *pflag.FlagSet) { f.StringArrayVarP(&opts.Excludes, "exclude", "e", nil, "exclude a `pattern` (can be specified multiple times)") f.StringArrayVar(&opts.InsensitiveExcludes, "iexclude", nil, "same as --exclude `pattern` but ignores the casing of filenames") f.StringArrayVar(&opts.ExcludeFiles, "exclude-file", nil, "read exclude patterns from a `file` (can be specified multiple times)") diff --git a/cmd/restic/include.go b/cmd/restic/include.go index dcc4c7f37..514a24016 100644 --- a/cmd/restic/include.go +++ b/cmd/restic/include.go @@ -19,7 +19,7 @@ type includePatternOptions struct { InsensitiveIncludeFiles []string } -func initIncludePatternOptions(f *pflag.FlagSet, opts *includePatternOptions) { +func (opts *includePatternOptions) Add(f *pflag.FlagSet) { f.StringArrayVarP(&opts.Includes, "include", "i", nil, "include a `pattern` (can be specified multiple times)") f.StringArrayVar(&opts.InsensitiveIncludes, "iinclude", nil, "same as --include `pattern` but ignores the casing of filenames") f.StringArrayVar(&opts.IncludeFiles, "include-file", nil, "read include patterns from a `file` (can be specified multiple times)") From f1585af0f263e517f1cfbb468fabb8a63dc9bc76 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Tue, 27 Aug 2024 14:03:36 +0200 Subject: [PATCH 5/9] move include/exclude options to filter package --- cmd/restic/cmd_backup.go | 13 +- cmd/restic/cmd_restore.go | 13 +- cmd/restic/cmd_rewrite.go | 9 +- cmd/restic/cmd_rewrite_integration_test.go | 3 +- cmd/restic/exclude.go | 160 +---------------- cmd/restic/integration_filter_pattern_test.go | 25 +-- internal/archiver/exclude.go | 15 +- internal/filter/exclude.go | 162 ++++++++++++++++++ .../filter}/exclude_test.go | 6 +- {cmd/restic => internal/filter}/include.go | 37 ++-- .../filter}/include_test.go | 6 +- 11 files changed, 233 insertions(+), 216 deletions(-) create mode 100644 internal/filter/exclude.go rename {cmd/restic => internal/filter}/exclude_test.go (92%) rename {cmd/restic => internal/filter}/include.go (65%) rename {cmd/restic => internal/filter}/include_test.go (92%) diff --git a/cmd/restic/cmd_backup.go b/cmd/restic/cmd_backup.go index 8d72a27b0..43ef29ba2 100644 --- a/cmd/restic/cmd_backup.go +++ b/cmd/restic/cmd_backup.go @@ -20,6 +20,7 @@ import ( "github.com/restic/restic/internal/archiver" "github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/errors" + "github.com/restic/restic/internal/filter" "github.com/restic/restic/internal/fs" "github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/restic" @@ -66,7 +67,7 @@ Exit status is 12 if the password is incorrect. // BackupOptions bundles all options for the backup command. type BackupOptions struct { - excludePatternOptions + filter.ExcludePatternOptions Parent string GroupBy restic.SnapshotGroupByOptions @@ -108,7 +109,7 @@ func init() { f.VarP(&backupOptions.GroupBy, "group-by", "g", "`group` snapshots by host, paths and/or tags, separated by comma (disable grouping with '')") f.BoolVarP(&backupOptions.Force, "force", "f", false, `force re-reading the source files/directories (overrides the "parent" flag)`) - backupOptions.excludePatternOptions.Add(f) + backupOptions.ExcludePatternOptions.Add(f) f.BoolVarP(&backupOptions.ExcludeOtherFS, "one-file-system", "x", false, "exclude other file systems, don't cross filesystem boundaries and subvolumes") f.StringArrayVar(&backupOptions.ExcludeIfPresent, "exclude-if-present", nil, "takes `filename[:header]`, exclude contents of directories containing filename (except filename itself) if header of that file is as provided (can be specified multiple times)") @@ -297,7 +298,7 @@ func (opts BackupOptions) Check(gopts GlobalOptions, args []string) error { // collectRejectByNameFuncs returns a list of all functions which may reject data // from being saved in a snapshot based on path only -func collectRejectByNameFuncs(opts BackupOptions, repo *repository.Repository) (fs []RejectByNameFunc, err error) { +func collectRejectByNameFuncs(opts BackupOptions, repo *repository.Repository) (fs []archiver.RejectByNameFunc, err error) { // exclude restic cache if repo.Cache != nil { f, err := rejectResticCache(repo) @@ -308,11 +309,13 @@ func collectRejectByNameFuncs(opts BackupOptions, repo *repository.Repository) ( fs = append(fs, f) } - fsPatterns, err := opts.excludePatternOptions.CollectPatterns() + fsPatterns, err := opts.ExcludePatternOptions.CollectPatterns(Warnf) if err != nil { return nil, err } - fs = append(fs, fsPatterns...) + for _, pat := range fsPatterns { + fs = append(fs, archiver.RejectByNameFunc(pat)) + } return fs, nil } diff --git a/cmd/restic/cmd_restore.go b/cmd/restic/cmd_restore.go index f20359dc0..82dd408a8 100644 --- a/cmd/restic/cmd_restore.go +++ b/cmd/restic/cmd_restore.go @@ -7,6 +7,7 @@ import ( "github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/errors" + "github.com/restic/restic/internal/filter" "github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/restorer" "github.com/restic/restic/internal/ui" @@ -49,8 +50,8 @@ Exit status is 12 if the password is incorrect. // RestoreOptions collects all options for the restore command. type RestoreOptions struct { - excludePatternOptions - includePatternOptions + filter.ExcludePatternOptions + filter.IncludePatternOptions Target string restic.SnapshotFilter DryRun bool @@ -68,8 +69,8 @@ func init() { flags := cmdRestore.Flags() flags.StringVarP(&restoreOptions.Target, "target", "t", "", "directory to extract data to") - restoreOptions.excludePatternOptions.Add(flags) - restoreOptions.includePatternOptions.Add(flags) + restoreOptions.ExcludePatternOptions.Add(flags) + restoreOptions.IncludePatternOptions.Add(flags) initSingleSnapshotFilter(flags, &restoreOptions.SnapshotFilter) flags.BoolVar(&restoreOptions.DryRun, "dry-run", false, "do not write any data, just show what would be done") @@ -82,12 +83,12 @@ func init() { func runRestore(ctx context.Context, opts RestoreOptions, gopts GlobalOptions, term *termstatus.Terminal, args []string) error { - excludePatternFns, err := opts.excludePatternOptions.CollectPatterns() + excludePatternFns, err := opts.ExcludePatternOptions.CollectPatterns(Warnf) if err != nil { return err } - includePatternFns, err := opts.includePatternOptions.CollectPatterns() + includePatternFns, err := opts.IncludePatternOptions.CollectPatterns(Warnf) if err != nil { return err } diff --git a/cmd/restic/cmd_rewrite.go b/cmd/restic/cmd_rewrite.go index fc9da5b60..a9f664110 100644 --- a/cmd/restic/cmd_rewrite.go +++ b/cmd/restic/cmd_rewrite.go @@ -10,6 +10,7 @@ import ( "github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/errors" + "github.com/restic/restic/internal/filter" "github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/walker" @@ -88,7 +89,7 @@ type RewriteOptions struct { Metadata snapshotMetadataArgs restic.SnapshotFilter - excludePatternOptions + filter.ExcludePatternOptions } var rewriteOptions RewriteOptions @@ -103,7 +104,7 @@ func init() { f.StringVar(&rewriteOptions.Metadata.Time, "new-time", "", "replace time of the backup") initMultiSnapshotFilter(f, &rewriteOptions.SnapshotFilter, true) - rewriteOptions.excludePatternOptions.Add(f) + rewriteOptions.ExcludePatternOptions.Add(f) } type rewriteFilterFunc func(ctx context.Context, sn *restic.Snapshot) (restic.ID, error) @@ -113,7 +114,7 @@ func rewriteSnapshot(ctx context.Context, repo *repository.Repository, sn *resti return false, errors.Errorf("snapshot %v has nil tree", sn.ID().Str()) } - rejectByNameFuncs, err := opts.excludePatternOptions.CollectPatterns() + rejectByNameFuncs, err := opts.ExcludePatternOptions.CollectPatterns(Warnf) if err != nil { return false, err } @@ -263,7 +264,7 @@ func filterAndReplaceSnapshot(ctx context.Context, repo restic.Repository, sn *r } func runRewrite(ctx context.Context, opts RewriteOptions, gopts GlobalOptions, args []string) error { - if opts.excludePatternOptions.Empty() && opts.Metadata.empty() { + if opts.ExcludePatternOptions.Empty() && opts.Metadata.empty() { return errors.Fatal("Nothing to do: no excludes provided and no new metadata provided") } diff --git a/cmd/restic/cmd_rewrite_integration_test.go b/cmd/restic/cmd_rewrite_integration_test.go index 781266184..6471d49ba 100644 --- a/cmd/restic/cmd_rewrite_integration_test.go +++ b/cmd/restic/cmd_rewrite_integration_test.go @@ -5,6 +5,7 @@ import ( "path/filepath" "testing" + "github.com/restic/restic/internal/filter" "github.com/restic/restic/internal/restic" rtest "github.com/restic/restic/internal/test" "github.com/restic/restic/internal/ui" @@ -12,7 +13,7 @@ import ( func testRunRewriteExclude(t testing.TB, gopts GlobalOptions, excludes []string, forget bool, metadata snapshotMetadataArgs) { opts := RewriteOptions{ - excludePatternOptions: excludePatternOptions{ + ExcludePatternOptions: filter.ExcludePatternOptions{ Excludes: excludes, }, Forget: forget, diff --git a/cmd/restic/exclude.go b/cmd/restic/exclude.go index a37f9c68e..99d1128a9 100644 --- a/cmd/restic/exclude.go +++ b/cmd/restic/exclude.go @@ -1,60 +1,16 @@ package main import ( - "bufio" - "bytes" - "fmt" - "os" - "strings" - + "github.com/restic/restic/internal/archiver" "github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/errors" - "github.com/restic/restic/internal/filter" "github.com/restic/restic/internal/fs" "github.com/restic/restic/internal/repository" - "github.com/restic/restic/internal/textfile" - "github.com/spf13/pflag" ) -// RejectByNameFunc is a function that takes a filename of a -// file that would be included in the backup. The function returns true if it -// should be excluded (rejected) from the backup. -type RejectByNameFunc func(path string) bool - -// rejectByPattern returns a RejectByNameFunc which rejects files that match -// one of the patterns. -func rejectByPattern(patterns []string) RejectByNameFunc { - parsedPatterns := filter.ParsePatterns(patterns) - return func(item string) bool { - matched, err := filter.List(parsedPatterns, item) - if err != nil { - Warnf("error for exclude pattern: %v", err) - } - - if matched { - debug.Log("path %q excluded by an exclude pattern", item) - return true - } - - return false - } -} - -// Same as `rejectByPattern` but case insensitive. -func rejectByInsensitivePattern(patterns []string) RejectByNameFunc { - for index, path := range patterns { - patterns[index] = strings.ToLower(path) - } - - rejFunc := rejectByPattern(patterns) - return func(item string) bool { - return rejFunc(strings.ToLower(item)) - } -} - // rejectResticCache returns a RejectByNameFunc that rejects the restic cache // directory (if set). -func rejectResticCache(repo *repository.Repository) (RejectByNameFunc, error) { +func rejectResticCache(repo *repository.Repository) (archiver.RejectByNameFunc, error) { if repo.Cache == nil { return func(string) bool { return false @@ -75,115 +31,3 @@ func rejectResticCache(repo *repository.Repository) (RejectByNameFunc, error) { return false }, nil } - -// readPatternsFromFiles reads all files and returns the list of -// patterns. For each line, leading and trailing white space is removed -// and comment lines are ignored. For each remaining pattern, environment -// variables are resolved. For adding a literal dollar sign ($), write $$ to -// the file. -func readPatternsFromFiles(files []string) ([]string, error) { - getenvOrDollar := func(s string) string { - if s == "$" { - return "$" - } - return os.Getenv(s) - } - - var patterns []string - for _, filename := range files { - err := func() (err error) { - data, err := textfile.Read(filename) - if err != nil { - return err - } - - scanner := bufio.NewScanner(bytes.NewReader(data)) - for scanner.Scan() { - line := strings.TrimSpace(scanner.Text()) - - // ignore empty lines - if line == "" { - continue - } - - // strip comments - if strings.HasPrefix(line, "#") { - continue - } - - line = os.Expand(line, getenvOrDollar) - patterns = append(patterns, line) - } - return scanner.Err() - }() - if err != nil { - return nil, fmt.Errorf("failed to read patterns from file %q: %w", filename, err) - } - } - return patterns, nil -} - -type excludePatternOptions struct { - Excludes []string - InsensitiveExcludes []string - ExcludeFiles []string - InsensitiveExcludeFiles []string -} - -func (opts *excludePatternOptions) Add(f *pflag.FlagSet) { - f.StringArrayVarP(&opts.Excludes, "exclude", "e", nil, "exclude a `pattern` (can be specified multiple times)") - f.StringArrayVar(&opts.InsensitiveExcludes, "iexclude", nil, "same as --exclude `pattern` but ignores the casing of filenames") - f.StringArrayVar(&opts.ExcludeFiles, "exclude-file", nil, "read exclude patterns from a `file` (can be specified multiple times)") - f.StringArrayVar(&opts.InsensitiveExcludeFiles, "iexclude-file", nil, "same as --exclude-file but ignores casing of `file`names in patterns") -} - -func (opts *excludePatternOptions) Empty() bool { - return len(opts.Excludes) == 0 && len(opts.InsensitiveExcludes) == 0 && len(opts.ExcludeFiles) == 0 && len(opts.InsensitiveExcludeFiles) == 0 -} - -func (opts excludePatternOptions) CollectPatterns() ([]RejectByNameFunc, error) { - var fs []RejectByNameFunc - // add patterns from file - if len(opts.ExcludeFiles) > 0 { - excludePatterns, err := readPatternsFromFiles(opts.ExcludeFiles) - if err != nil { - return nil, err - } - - if err := filter.ValidatePatterns(excludePatterns); err != nil { - return nil, errors.Fatalf("--exclude-file: %s", err) - } - - opts.Excludes = append(opts.Excludes, excludePatterns...) - } - - if len(opts.InsensitiveExcludeFiles) > 0 { - excludes, err := readPatternsFromFiles(opts.InsensitiveExcludeFiles) - if err != nil { - return nil, err - } - - if err := filter.ValidatePatterns(excludes); err != nil { - return nil, errors.Fatalf("--iexclude-file: %s", err) - } - - opts.InsensitiveExcludes = append(opts.InsensitiveExcludes, excludes...) - } - - if len(opts.InsensitiveExcludes) > 0 { - if err := filter.ValidatePatterns(opts.InsensitiveExcludes); err != nil { - return nil, errors.Fatalf("--iexclude: %s", err) - } - - fs = append(fs, rejectByInsensitivePattern(opts.InsensitiveExcludes)) - } - - if len(opts.Excludes) > 0 { - if err := filter.ValidatePatterns(opts.Excludes); err != nil { - return nil, errors.Fatalf("--exclude: %s", err) - } - - fs = append(fs, rejectByPattern(opts.Excludes)) - } - return fs, nil -} diff --git a/cmd/restic/integration_filter_pattern_test.go b/cmd/restic/integration_filter_pattern_test.go index dccbcc0a0..46badbe4f 100644 --- a/cmd/restic/integration_filter_pattern_test.go +++ b/cmd/restic/integration_filter_pattern_test.go @@ -5,6 +5,7 @@ import ( "path/filepath" "testing" + "github.com/restic/restic/internal/filter" rtest "github.com/restic/restic/internal/test" ) @@ -17,14 +18,14 @@ func TestBackupFailsWhenUsingInvalidPatterns(t *testing.T) { var err error // Test --exclude - err = testRunBackupAssumeFailure(t, filepath.Dir(env.testdata), []string{"testdata"}, BackupOptions{excludePatternOptions: excludePatternOptions{Excludes: []string{"*[._]log[.-][0-9]", "!*[._]log[.-][0-9]"}}}, env.gopts) + err = testRunBackupAssumeFailure(t, filepath.Dir(env.testdata), []string{"testdata"}, BackupOptions{ExcludePatternOptions: filter.ExcludePatternOptions{Excludes: []string{"*[._]log[.-][0-9]", "!*[._]log[.-][0-9]"}}}, env.gopts) rtest.Equals(t, `Fatal: --exclude: invalid pattern(s) provided: *[._]log[.-][0-9] !*[._]log[.-][0-9]`, err.Error()) // Test --iexclude - err = testRunBackupAssumeFailure(t, filepath.Dir(env.testdata), []string{"testdata"}, BackupOptions{excludePatternOptions: excludePatternOptions{InsensitiveExcludes: []string{"*[._]log[.-][0-9]", "!*[._]log[.-][0-9]"}}}, env.gopts) + err = testRunBackupAssumeFailure(t, filepath.Dir(env.testdata), []string{"testdata"}, BackupOptions{ExcludePatternOptions: filter.ExcludePatternOptions{InsensitiveExcludes: []string{"*[._]log[.-][0-9]", "!*[._]log[.-][0-9]"}}}, env.gopts) rtest.Equals(t, `Fatal: --iexclude: invalid pattern(s) provided: *[._]log[.-][0-9] @@ -47,14 +48,14 @@ func TestBackupFailsWhenUsingInvalidPatternsFromFile(t *testing.T) { var err error // Test --exclude-file: - err = testRunBackupAssumeFailure(t, filepath.Dir(env.testdata), []string{"testdata"}, BackupOptions{excludePatternOptions: excludePatternOptions{ExcludeFiles: []string{excludeFile}}}, env.gopts) + err = testRunBackupAssumeFailure(t, filepath.Dir(env.testdata), []string{"testdata"}, BackupOptions{ExcludePatternOptions: filter.ExcludePatternOptions{ExcludeFiles: []string{excludeFile}}}, env.gopts) rtest.Equals(t, `Fatal: --exclude-file: invalid pattern(s) provided: *[._]log[.-][0-9] !*[._]log[.-][0-9]`, err.Error()) // Test --iexclude-file - err = testRunBackupAssumeFailure(t, filepath.Dir(env.testdata), []string{"testdata"}, BackupOptions{excludePatternOptions: excludePatternOptions{InsensitiveExcludeFiles: []string{excludeFile}}}, env.gopts) + err = testRunBackupAssumeFailure(t, filepath.Dir(env.testdata), []string{"testdata"}, BackupOptions{ExcludePatternOptions: filter.ExcludePatternOptions{InsensitiveExcludeFiles: []string{excludeFile}}}, env.gopts) rtest.Equals(t, `Fatal: --iexclude-file: invalid pattern(s) provided: *[._]log[.-][0-9] @@ -70,28 +71,28 @@ func TestRestoreFailsWhenUsingInvalidPatterns(t *testing.T) { var err error // Test --exclude - err = testRunRestoreAssumeFailure("latest", RestoreOptions{excludePatternOptions: excludePatternOptions{Excludes: []string{"*[._]log[.-][0-9]", "!*[._]log[.-][0-9]"}}}, env.gopts) + err = testRunRestoreAssumeFailure("latest", RestoreOptions{ExcludePatternOptions: filter.ExcludePatternOptions{Excludes: []string{"*[._]log[.-][0-9]", "!*[._]log[.-][0-9]"}}}, env.gopts) rtest.Equals(t, `Fatal: --exclude: invalid pattern(s) provided: *[._]log[.-][0-9] !*[._]log[.-][0-9]`, err.Error()) // Test --iexclude - err = testRunRestoreAssumeFailure("latest", RestoreOptions{excludePatternOptions: excludePatternOptions{InsensitiveExcludes: []string{"*[._]log[.-][0-9]", "!*[._]log[.-][0-9]"}}}, env.gopts) + err = testRunRestoreAssumeFailure("latest", RestoreOptions{ExcludePatternOptions: filter.ExcludePatternOptions{InsensitiveExcludes: []string{"*[._]log[.-][0-9]", "!*[._]log[.-][0-9]"}}}, env.gopts) rtest.Equals(t, `Fatal: --iexclude: invalid pattern(s) provided: *[._]log[.-][0-9] !*[._]log[.-][0-9]`, err.Error()) // Test --include - err = testRunRestoreAssumeFailure("latest", RestoreOptions{includePatternOptions: includePatternOptions{Includes: []string{"*[._]log[.-][0-9]", "!*[._]log[.-][0-9]"}}}, env.gopts) + err = testRunRestoreAssumeFailure("latest", RestoreOptions{IncludePatternOptions: filter.IncludePatternOptions{Includes: []string{"*[._]log[.-][0-9]", "!*[._]log[.-][0-9]"}}}, env.gopts) rtest.Equals(t, `Fatal: --include: invalid pattern(s) provided: *[._]log[.-][0-9] !*[._]log[.-][0-9]`, err.Error()) // Test --iinclude - err = testRunRestoreAssumeFailure("latest", RestoreOptions{includePatternOptions: includePatternOptions{InsensitiveIncludes: []string{"*[._]log[.-][0-9]", "!*[._]log[.-][0-9]"}}}, env.gopts) + err = testRunRestoreAssumeFailure("latest", RestoreOptions{IncludePatternOptions: filter.IncludePatternOptions{InsensitiveIncludes: []string{"*[._]log[.-][0-9]", "!*[._]log[.-][0-9]"}}}, env.gopts) rtest.Equals(t, `Fatal: --iinclude: invalid pattern(s) provided: *[._]log[.-][0-9] @@ -111,22 +112,22 @@ func TestRestoreFailsWhenUsingInvalidPatternsFromFile(t *testing.T) { t.Fatalf("Could not write include file: %v", fileErr) } - err := testRunRestoreAssumeFailure("latest", RestoreOptions{includePatternOptions: includePatternOptions{IncludeFiles: []string{patternsFile}}}, env.gopts) + err := testRunRestoreAssumeFailure("latest", RestoreOptions{IncludePatternOptions: filter.IncludePatternOptions{IncludeFiles: []string{patternsFile}}}, env.gopts) rtest.Equals(t, `Fatal: --include-file: invalid pattern(s) provided: *[._]log[.-][0-9] !*[._]log[.-][0-9]`, err.Error()) - err = testRunRestoreAssumeFailure("latest", RestoreOptions{excludePatternOptions: excludePatternOptions{ExcludeFiles: []string{patternsFile}}}, env.gopts) + err = testRunRestoreAssumeFailure("latest", RestoreOptions{ExcludePatternOptions: filter.ExcludePatternOptions{ExcludeFiles: []string{patternsFile}}}, env.gopts) rtest.Equals(t, `Fatal: --exclude-file: invalid pattern(s) provided: *[._]log[.-][0-9] !*[._]log[.-][0-9]`, err.Error()) - err = testRunRestoreAssumeFailure("latest", RestoreOptions{includePatternOptions: includePatternOptions{InsensitiveIncludeFiles: []string{patternsFile}}}, env.gopts) + err = testRunRestoreAssumeFailure("latest", RestoreOptions{IncludePatternOptions: filter.IncludePatternOptions{InsensitiveIncludeFiles: []string{patternsFile}}}, env.gopts) rtest.Equals(t, `Fatal: --iinclude-file: invalid pattern(s) provided: *[._]log[.-][0-9] !*[._]log[.-][0-9]`, err.Error()) - err = testRunRestoreAssumeFailure("latest", RestoreOptions{excludePatternOptions: excludePatternOptions{InsensitiveExcludeFiles: []string{patternsFile}}}, env.gopts) + err = testRunRestoreAssumeFailure("latest", RestoreOptions{ExcludePatternOptions: filter.ExcludePatternOptions{InsensitiveExcludeFiles: []string{patternsFile}}}, env.gopts) rtest.Equals(t, `Fatal: --iexclude-file: invalid pattern(s) provided: *[._]log[.-][0-9] !*[._]log[.-][0-9]`, err.Error()) diff --git a/internal/archiver/exclude.go b/internal/archiver/exclude.go index f4444812c..62e4ea17e 100644 --- a/internal/archiver/exclude.go +++ b/internal/archiver/exclude.go @@ -14,6 +14,16 @@ import ( "github.com/restic/restic/internal/ui" ) +// RejectByNameFunc is a function that takes a filename of a +// file that would be included in the backup. The function returns true if it +// should be excluded (rejected) from the backup. +type RejectByNameFunc func(path string) bool + +// RejectFunc is a function that takes a filename and os.FileInfo of a +// file that would be included in the backup. The function returns true if it +// should be excluded (rejected) from the backup. +type RejectFunc func(path string, fi os.FileInfo, fs fs.FS) bool + type rejectionCache struct { m map[string]bool mtx sync.Mutex @@ -49,11 +59,6 @@ func (rc *rejectionCache) Store(dir string, rejected bool) { rc.m[dir] = rejected } -// RejectFunc is a function that takes a filename and os.FileInfo of a -// file that would be included in the backup. The function returns true if it -// should be excluded (rejected) from the backup. -type RejectFunc func(path string, fi os.FileInfo, fs fs.FS) bool - // RejectIfPresent returns a RejectByNameFunc which itself returns whether a path // should be excluded. The RejectByNameFunc considers a file to be excluded when // it resides in a directory with an exclusion file, that is specified by diff --git a/internal/filter/exclude.go b/internal/filter/exclude.go new file mode 100644 index 000000000..48ecdfddf --- /dev/null +++ b/internal/filter/exclude.go @@ -0,0 +1,162 @@ +package filter + +import ( + "bufio" + "bytes" + "fmt" + "os" + "strings" + + "github.com/restic/restic/internal/debug" + "github.com/restic/restic/internal/errors" + "github.com/restic/restic/internal/textfile" + "github.com/spf13/pflag" +) + +// RejectByNameFunc is a function that takes a filename of a +// file that would be included in the backup. The function returns true if it +// should be excluded (rejected) from the backup. +type RejectByNameFunc func(path string) bool + +// RejectByPattern returns a RejectByNameFunc which rejects files that match +// one of the patterns. +func RejectByPattern(patterns []string, warnf func(msg string, args ...interface{})) RejectByNameFunc { + parsedPatterns := ParsePatterns(patterns) + return func(item string) bool { + matched, err := List(parsedPatterns, item) + if err != nil { + warnf("error for exclude pattern: %v", err) + } + + if matched { + debug.Log("path %q excluded by an exclude pattern", item) + return true + } + + return false + } +} + +// RejectByInsensitivePattern is like RejectByPattern but case insensitive. +func RejectByInsensitivePattern(patterns []string, warnf func(msg string, args ...interface{})) RejectByNameFunc { + for index, path := range patterns { + patterns[index] = strings.ToLower(path) + } + + rejFunc := RejectByPattern(patterns, warnf) + return func(item string) bool { + return rejFunc(strings.ToLower(item)) + } +} + +// readPatternsFromFiles reads all files and returns the list of +// patterns. For each line, leading and trailing white space is removed +// and comment lines are ignored. For each remaining pattern, environment +// variables are resolved. For adding a literal dollar sign ($), write $$ to +// the file. +func readPatternsFromFiles(files []string) ([]string, error) { + getenvOrDollar := func(s string) string { + if s == "$" { + return "$" + } + return os.Getenv(s) + } + + var patterns []string + for _, filename := range files { + err := func() (err error) { + data, err := textfile.Read(filename) + if err != nil { + return err + } + + scanner := bufio.NewScanner(bytes.NewReader(data)) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + + // ignore empty lines + if line == "" { + continue + } + + // strip comments + if strings.HasPrefix(line, "#") { + continue + } + + line = os.Expand(line, getenvOrDollar) + patterns = append(patterns, line) + } + return scanner.Err() + }() + if err != nil { + return nil, fmt.Errorf("failed to read patterns from file %q: %w", filename, err) + } + } + return patterns, nil +} + +type ExcludePatternOptions struct { + Excludes []string + InsensitiveExcludes []string + ExcludeFiles []string + InsensitiveExcludeFiles []string +} + +func (opts *ExcludePatternOptions) Add(f *pflag.FlagSet) { + f.StringArrayVarP(&opts.Excludes, "exclude", "e", nil, "exclude a `pattern` (can be specified multiple times)") + f.StringArrayVar(&opts.InsensitiveExcludes, "iexclude", nil, "same as --exclude `pattern` but ignores the casing of filenames") + f.StringArrayVar(&opts.ExcludeFiles, "exclude-file", nil, "read exclude patterns from a `file` (can be specified multiple times)") + f.StringArrayVar(&opts.InsensitiveExcludeFiles, "iexclude-file", nil, "same as --exclude-file but ignores casing of `file`names in patterns") +} + +func (opts *ExcludePatternOptions) Empty() bool { + return len(opts.Excludes) == 0 && len(opts.InsensitiveExcludes) == 0 && len(opts.ExcludeFiles) == 0 && len(opts.InsensitiveExcludeFiles) == 0 +} + +func (opts ExcludePatternOptions) CollectPatterns(warnf func(msg string, args ...interface{})) ([]RejectByNameFunc, error) { + var fs []RejectByNameFunc + // add patterns from file + if len(opts.ExcludeFiles) > 0 { + excludePatterns, err := readPatternsFromFiles(opts.ExcludeFiles) + if err != nil { + return nil, err + } + + if err := ValidatePatterns(excludePatterns); err != nil { + return nil, errors.Fatalf("--exclude-file: %s", err) + } + + opts.Excludes = append(opts.Excludes, excludePatterns...) + } + + if len(opts.InsensitiveExcludeFiles) > 0 { + excludes, err := readPatternsFromFiles(opts.InsensitiveExcludeFiles) + if err != nil { + return nil, err + } + + if err := ValidatePatterns(excludes); err != nil { + return nil, errors.Fatalf("--iexclude-file: %s", err) + } + + opts.InsensitiveExcludes = append(opts.InsensitiveExcludes, excludes...) + } + + if len(opts.InsensitiveExcludes) > 0 { + if err := ValidatePatterns(opts.InsensitiveExcludes); err != nil { + return nil, errors.Fatalf("--iexclude: %s", err) + } + + fs = append(fs, RejectByInsensitivePattern(opts.InsensitiveExcludes, warnf)) + } + + if len(opts.Excludes) > 0 { + if err := ValidatePatterns(opts.Excludes); err != nil { + return nil, errors.Fatalf("--exclude: %s", err) + } + + fs = append(fs, RejectByPattern(opts.Excludes, warnf)) + } + return fs, nil +} diff --git a/cmd/restic/exclude_test.go b/internal/filter/exclude_test.go similarity index 92% rename from cmd/restic/exclude_test.go rename to internal/filter/exclude_test.go index 177a81df2..738fb216d 100644 --- a/cmd/restic/exclude_test.go +++ b/internal/filter/exclude_test.go @@ -1,4 +1,4 @@ -package main +package filter import ( "testing" @@ -21,7 +21,7 @@ func TestRejectByPattern(t *testing.T) { for _, tc := range tests { t.Run("", func(t *testing.T) { - reject := rejectByPattern(patterns) + reject := RejectByPattern(patterns, nil) res := reject(tc.filename) if res != tc.reject { t.Fatalf("wrong result for filename %v: want %v, got %v", @@ -48,7 +48,7 @@ func TestRejectByInsensitivePattern(t *testing.T) { for _, tc := range tests { t.Run("", func(t *testing.T) { - reject := rejectByInsensitivePattern(patterns) + reject := RejectByInsensitivePattern(patterns, nil) res := reject(tc.filename) if res != tc.reject { t.Fatalf("wrong result for filename %v: want %v, got %v", diff --git a/cmd/restic/include.go b/internal/filter/include.go similarity index 65% rename from cmd/restic/include.go rename to internal/filter/include.go index 514a24016..87d5f1207 100644 --- a/cmd/restic/include.go +++ b/internal/filter/include.go @@ -1,10 +1,9 @@ -package main +package filter import ( "strings" "github.com/restic/restic/internal/errors" - "github.com/restic/restic/internal/filter" "github.com/spf13/pflag" ) @@ -12,21 +11,21 @@ import ( // in the restore process and returns whether it should be included. type IncludeByNameFunc func(item string) (matched bool, childMayMatch bool) -type includePatternOptions struct { +type IncludePatternOptions struct { Includes []string InsensitiveIncludes []string IncludeFiles []string InsensitiveIncludeFiles []string } -func (opts *includePatternOptions) Add(f *pflag.FlagSet) { +func (opts *IncludePatternOptions) Add(f *pflag.FlagSet) { f.StringArrayVarP(&opts.Includes, "include", "i", nil, "include a `pattern` (can be specified multiple times)") f.StringArrayVar(&opts.InsensitiveIncludes, "iinclude", nil, "same as --include `pattern` but ignores the casing of filenames") f.StringArrayVar(&opts.IncludeFiles, "include-file", nil, "read include patterns from a `file` (can be specified multiple times)") f.StringArrayVar(&opts.InsensitiveIncludeFiles, "iinclude-file", nil, "same as --include-file but ignores casing of `file`names in patterns") } -func (opts includePatternOptions) CollectPatterns() ([]IncludeByNameFunc, error) { +func (opts IncludePatternOptions) CollectPatterns(warnf func(msg string, args ...interface{})) ([]IncludeByNameFunc, error) { var fs []IncludeByNameFunc if len(opts.IncludeFiles) > 0 { includePatterns, err := readPatternsFromFiles(opts.IncludeFiles) @@ -34,7 +33,7 @@ func (opts includePatternOptions) CollectPatterns() ([]IncludeByNameFunc, error) return nil, err } - if err := filter.ValidatePatterns(includePatterns); err != nil { + if err := ValidatePatterns(includePatterns); err != nil { return nil, errors.Fatalf("--include-file: %s", err) } @@ -47,7 +46,7 @@ func (opts includePatternOptions) CollectPatterns() ([]IncludeByNameFunc, error) return nil, err } - if err := filter.ValidatePatterns(includePatterns); err != nil { + if err := ValidatePatterns(includePatterns); err != nil { return nil, errors.Fatalf("--iinclude-file: %s", err) } @@ -55,45 +54,45 @@ func (opts includePatternOptions) CollectPatterns() ([]IncludeByNameFunc, error) } if len(opts.InsensitiveIncludes) > 0 { - if err := filter.ValidatePatterns(opts.InsensitiveIncludes); err != nil { + if err := ValidatePatterns(opts.InsensitiveIncludes); err != nil { return nil, errors.Fatalf("--iinclude: %s", err) } - fs = append(fs, includeByInsensitivePattern(opts.InsensitiveIncludes)) + fs = append(fs, IncludeByInsensitivePattern(opts.InsensitiveIncludes, warnf)) } if len(opts.Includes) > 0 { - if err := filter.ValidatePatterns(opts.Includes); err != nil { + if err := ValidatePatterns(opts.Includes); err != nil { return nil, errors.Fatalf("--include: %s", err) } - fs = append(fs, includeByPattern(opts.Includes)) + fs = append(fs, IncludeByPattern(opts.Includes, warnf)) } return fs, nil } -// includeByPattern returns a IncludeByNameFunc which includes files that match +// IncludeByPattern returns a IncludeByNameFunc which includes files that match // one of the patterns. -func includeByPattern(patterns []string) IncludeByNameFunc { - parsedPatterns := filter.ParsePatterns(patterns) +func IncludeByPattern(patterns []string, warnf func(msg string, args ...interface{})) IncludeByNameFunc { + parsedPatterns := ParsePatterns(patterns) return func(item string) (matched bool, childMayMatch bool) { - matched, childMayMatch, err := filter.ListWithChild(parsedPatterns, item) + matched, childMayMatch, err := ListWithChild(parsedPatterns, item) if err != nil { - Warnf("error for include pattern: %v", err) + warnf("error for include pattern: %v", err) } return matched, childMayMatch } } -// includeByInsensitivePattern returns a IncludeByNameFunc which includes files that match +// IncludeByInsensitivePattern returns a IncludeByNameFunc which includes files that match // one of the patterns, ignoring the casing of the filenames. -func includeByInsensitivePattern(patterns []string) IncludeByNameFunc { +func IncludeByInsensitivePattern(patterns []string, warnf func(msg string, args ...interface{})) IncludeByNameFunc { for index, path := range patterns { patterns[index] = strings.ToLower(path) } - includeFunc := includeByPattern(patterns) + includeFunc := IncludeByPattern(patterns, warnf) return func(item string) (matched bool, childMayMatch bool) { return includeFunc(strings.ToLower(item)) } diff --git a/cmd/restic/include_test.go b/internal/filter/include_test.go similarity index 92% rename from cmd/restic/include_test.go rename to internal/filter/include_test.go index 751bfbb76..2f474622c 100644 --- a/cmd/restic/include_test.go +++ b/internal/filter/include_test.go @@ -1,4 +1,4 @@ -package main +package filter import ( "testing" @@ -21,7 +21,7 @@ func TestIncludeByPattern(t *testing.T) { for _, tc := range tests { t.Run(tc.filename, func(t *testing.T) { - includeFunc := includeByPattern(patterns) + includeFunc := IncludeByPattern(patterns, nil) matched, _ := includeFunc(tc.filename) if matched != tc.include { t.Fatalf("wrong result for filename %v: want %v, got %v", @@ -48,7 +48,7 @@ func TestIncludeByInsensitivePattern(t *testing.T) { for _, tc := range tests { t.Run(tc.filename, func(t *testing.T) { - includeFunc := includeByInsensitivePattern(patterns) + includeFunc := IncludeByInsensitivePattern(patterns, nil) matched, _ := includeFunc(tc.filename) if matched != tc.include { t.Fatalf("wrong result for filename %v: want %v, got %v", From 6fd5d5f2d5f142a0a36a21d8e6ca6421b54f632b Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Tue, 27 Aug 2024 14:10:57 +0200 Subject: [PATCH 6/9] archiver: move helper functions to combine rejects --- cmd/restic/cmd_backup.go | 20 ++------------------ internal/archiver/exclude.go | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/cmd/restic/cmd_backup.go b/cmd/restic/cmd_backup.go index 43ef29ba2..eaca150d9 100644 --- a/cmd/restic/cmd_backup.go +++ b/cmd/restic/cmd_backup.go @@ -529,21 +529,11 @@ func runBackup(ctx context.Context, opts BackupOptions, gopts GlobalOptions, ter } bar := newIndexTerminalProgress(gopts.Quiet, gopts.JSON, term) - err = repo.LoadIndex(ctx, bar) if err != nil { return err } - selectByNameFilter := func(item string) bool { - for _, reject := range rejectByNameFuncs { - if reject(item) { - return false - } - } - return true - } - var targetFS fs.FS = fs.Local{} if runtime.GOOS == "windows" && opts.UseFsSnapshot { if err = fs.HasSufficientPrivilegesForVSS(); err != nil { @@ -592,14 +582,8 @@ func runBackup(ctx context.Context, opts BackupOptions, gopts GlobalOptions, ter return err } - selectFilter := func(item string, fi os.FileInfo, fs fs.FS) bool { - for _, reject := range rejectFuncs { - if reject(item, fi, fs) { - return false - } - } - return true - } + selectByNameFilter := archiver.CombineRejectByNames(rejectByNameFuncs) + selectFilter := archiver.CombineRejects(rejectFuncs) wg, wgCtx := errgroup.WithContext(ctx) cancelCtx, cancel := context.WithCancel(wgCtx) diff --git a/internal/archiver/exclude.go b/internal/archiver/exclude.go index 62e4ea17e..280322f3c 100644 --- a/internal/archiver/exclude.go +++ b/internal/archiver/exclude.go @@ -24,6 +24,28 @@ type RejectByNameFunc func(path string) bool // should be excluded (rejected) from the backup. type RejectFunc func(path string, fi os.FileInfo, fs fs.FS) bool +func CombineRejectByNames(funcs []RejectByNameFunc) SelectByNameFunc { + return func(item string) bool { + for _, reject := range funcs { + if reject(item) { + return false + } + } + return true + } +} + +func CombineRejects(funcs []RejectFunc) SelectFunc { + return func(item string, fi os.FileInfo, fs fs.FS) bool { + for _, reject := range funcs { + if reject(item, fi, fs) { + return false + } + } + return true + } +} + type rejectionCache struct { m map[string]bool mtx sync.Mutex From 70fbad662391334c285bf7b8623610face9df7ee Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Tue, 27 Aug 2024 14:25:35 +0200 Subject: [PATCH 7/9] archiver: minimize imports --- cmd/restic/cmd_backup.go | 8 +++++++- internal/archiver/exclude.go | 10 ++-------- internal/archiver/exclude_test.go | 5 +---- internal/archiver/scanner.go | 3 +-- 4 files changed, 11 insertions(+), 15 deletions(-) diff --git a/cmd/restic/cmd_backup.go b/cmd/restic/cmd_backup.go index eaca150d9..107e8bbe0 100644 --- a/cmd/restic/cmd_backup.go +++ b/cmd/restic/cmd_backup.go @@ -25,6 +25,7 @@ import ( "github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/textfile" + "github.com/restic/restic/internal/ui" "github.com/restic/restic/internal/ui/backup" "github.com/restic/restic/internal/ui/termstatus" ) @@ -333,7 +334,12 @@ func collectRejectFuncs(opts BackupOptions, targets []string, fs fs.FS) (funcs [ } if len(opts.ExcludeLargerThan) != 0 && !opts.Stdin && !opts.StdinCommand { - f, err := archiver.RejectBySize(opts.ExcludeLargerThan) + maxSize, err := ui.ParseBytes(opts.ExcludeLargerThan) + if err != nil { + return nil, err + } + + f, err := archiver.RejectBySize(maxSize) if err != nil { return nil, err } diff --git a/internal/archiver/exclude.go b/internal/archiver/exclude.go index 280322f3c..1e855fc3a 100644 --- a/internal/archiver/exclude.go +++ b/internal/archiver/exclude.go @@ -11,7 +11,6 @@ import ( "github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/fs" - "github.com/restic/restic/internal/ui" ) // RejectByNameFunc is a function that takes a filename of a @@ -139,7 +138,7 @@ func isExcludedByFile(filename, tagFilename, header string, rc *rejectionCache, func isDirExcludedByFile(dir, tagFilename, header string, fs fs.FS, warnf func(msg string, args ...interface{})) bool { tf := fs.Join(dir, tagFilename) _, err := fs.Lstat(tf) - if os.IsNotExist(err) { + if errors.Is(err, os.ErrNotExist) { return false } if err != nil { @@ -315,12 +314,7 @@ func RejectByDevice(samples []string, filesystem fs.FS) (RejectFunc, error) { }, nil } -func RejectBySize(maxSizeStr string) (RejectFunc, error) { - maxSize, err := ui.ParseBytes(maxSizeStr) - if err != nil { - return nil, err - } - +func RejectBySize(maxSize int64) (RejectFunc, error) { return func(item string, fi os.FileInfo, _ fs.FS) bool { // directory will be ignored if fi.IsDir() { diff --git a/internal/archiver/exclude_test.go b/internal/archiver/exclude_test.go index b9f1f8cdd..7eb24b08b 100644 --- a/internal/archiver/exclude_test.go +++ b/internal/archiver/exclude_test.go @@ -139,9 +139,6 @@ func TestMultipleIsExcludedByFile(t *testing.T) { func TestIsExcludedByFileSize(t *testing.T) { tempDir := test.TempDir(t) - // Max size of file is set to be 1k - maxSizeStr := "1k" - // Create some files in a temporary directory. // Files in UPPERCASE will be used as exclusion triggers later on. // We will test the inclusion later, so we add the expected value as @@ -185,7 +182,7 @@ func TestIsExcludedByFileSize(t *testing.T) { test.OKs(t, errs) // see if anything went wrong during the creation // create rejection function - sizeExclude, _ := RejectBySize(maxSizeStr) + sizeExclude, _ := RejectBySize(1024) // To mock the archiver scanning walk, we create filepath.WalkFn // that tests against the two rejection functions and stores diff --git a/internal/archiver/scanner.go b/internal/archiver/scanner.go index cb74a31d6..debd09aa3 100644 --- a/internal/archiver/scanner.go +++ b/internal/archiver/scanner.go @@ -3,7 +3,6 @@ package archiver import ( "context" "os" - "path/filepath" "sort" "github.com/restic/restic/internal/debug" @@ -131,7 +130,7 @@ func (s *Scanner) scan(ctx context.Context, stats ScanStats, target string) (Sca sort.Strings(names) for _, name := range names { - stats, err = s.scan(ctx, stats, filepath.Join(target, name)) + stats, err = s.scan(ctx, stats, s.FS.Join(target, name)) if err != nil { return stats, err } From e79dca644e03b0be946a0b2e2b78b64a303ef22e Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Tue, 27 Aug 2024 14:35:10 +0200 Subject: [PATCH 8/9] fs: unexport DeviceID --- internal/fs/deviceid_unix.go | 4 ++-- internal/fs/deviceid_windows.go | 4 ++-- internal/fs/fs_local.go | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/internal/fs/deviceid_unix.go b/internal/fs/deviceid_unix.go index c366221ab..4d5593335 100644 --- a/internal/fs/deviceid_unix.go +++ b/internal/fs/deviceid_unix.go @@ -10,9 +10,9 @@ import ( "github.com/restic/restic/internal/errors" ) -// DeviceID extracts the device ID from an os.FileInfo object by casting it +// deviceID extracts the device ID from an os.FileInfo object by casting it // to syscall.Stat_t -func DeviceID(fi os.FileInfo) (deviceID uint64, err error) { +func deviceID(fi os.FileInfo) (deviceID uint64, err error) { if fi == nil { return 0, errors.New("unable to determine device: fi is nil") } diff --git a/internal/fs/deviceid_windows.go b/internal/fs/deviceid_windows.go index 42355817d..bfb22dc9a 100644 --- a/internal/fs/deviceid_windows.go +++ b/internal/fs/deviceid_windows.go @@ -9,8 +9,8 @@ import ( "github.com/restic/restic/internal/errors" ) -// DeviceID extracts the device ID from an os.FileInfo object by casting it +// deviceID extracts the device ID from an os.FileInfo object by casting it // to syscall.Stat_t -func DeviceID(fi os.FileInfo) (deviceID uint64, err error) { +func deviceID(_ os.FileInfo) (deviceID uint64, err error) { return 0, errors.New("Device IDs are not supported on Windows") } diff --git a/internal/fs/fs_local.go b/internal/fs/fs_local.go index 33d83bf63..019069a55 100644 --- a/internal/fs/fs_local.go +++ b/internal/fs/fs_local.go @@ -48,8 +48,8 @@ func (fs Local) Lstat(name string) (os.FileInfo, error) { // DeviceID extracts the DeviceID from the given FileInfo. If the fs does // not support a DeviceID, it returns an error instead -func (fs Local) DeviceID(fi os.FileInfo) (deviceID uint64, err error) { - return DeviceID(fi) +func (fs Local) DeviceID(fi os.FileInfo) (id uint64, err error) { + return deviceID(fi) } // Join joins any number of path elements into a single path, adding a From 7bb92dc7bda46201fe84f4b5a67b670ee5544083 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Tue, 27 Aug 2024 14:35:40 +0200 Subject: [PATCH 9/9] archiver: use ExtendedStat from FS interface With this change, NodeFromFileInfo is the last function that bypasses the FS interface in the archiver. --- internal/archiver/archiver.go | 4 ++-- internal/archiver/archiver_test.go | 11 ++++++----- internal/fs/fs_local.go | 5 +++++ internal/fs/fs_reader.go | 6 ++++++ internal/fs/interface.go | 1 + 5 files changed, 20 insertions(+), 7 deletions(-) diff --git a/internal/archiver/archiver.go b/internal/archiver/archiver.go index eab65bb5f..a783f6c7f 100644 --- a/internal/archiver/archiver.go +++ b/internal/archiver/archiver.go @@ -459,7 +459,7 @@ func (arch *Archiver) save(ctx context.Context, snPath, target string, previous // check if the file has not changed before performing a fopen operation (more expensive, specially // in network filesystems) - if previous != nil && !fileChanged(fi, previous, arch.ChangeIgnoreFlags) { + if previous != nil && !fileChanged(arch.FS, fi, previous, arch.ChangeIgnoreFlags) { if arch.allBlobsPresent(previous) { debug.Log("%v hasn't changed, using old list of blobs", target) arch.trackItem(snPath, previous, previous, ItemStats{}, time.Since(start)) @@ -579,7 +579,7 @@ func (arch *Archiver) save(ctx context.Context, snPath, target string, previous // fileChanged tries to detect whether a file's content has changed compared // to the contents of node, which describes the same path in the parent backup. // It should only be run for regular files. -func fileChanged(fi os.FileInfo, node *restic.Node, ignoreFlags uint) bool { +func fileChanged(fs fs.FS, fi os.FileInfo, node *restic.Node, ignoreFlags uint) bool { switch { case node == nil: return true diff --git a/internal/archiver/archiver_test.go b/internal/archiver/archiver_test.go index b56452182..962fd5481 100644 --- a/internal/archiver/archiver_test.go +++ b/internal/archiver/archiver_test.go @@ -686,10 +686,11 @@ func TestFileChanged(t *testing.T) { } save(t, filename, content) + fs := &fs.Local{} fiBefore := lstat(t, filename) node := nodeFromFI(t, filename, fiBefore) - if fileChanged(fiBefore, node, 0) { + if fileChanged(fs, fiBefore, node, 0) { t.Fatalf("unchanged file detected as changed") } @@ -699,12 +700,12 @@ func TestFileChanged(t *testing.T) { if test.SameFile { // file should be detected as unchanged - if fileChanged(fiAfter, node, test.ChangeIgnore) { + if fileChanged(fs, fiAfter, node, test.ChangeIgnore) { t.Fatalf("unmodified file detected as changed") } } else { // file should be detected as changed - if !fileChanged(fiAfter, node, test.ChangeIgnore) && !test.SameFile { + if !fileChanged(fs, fiAfter, node, test.ChangeIgnore) && !test.SameFile { t.Fatalf("modified file detected as unchanged") } } @@ -721,7 +722,7 @@ func TestFilChangedSpecialCases(t *testing.T) { t.Run("nil-node", func(t *testing.T) { fi := lstat(t, filename) - if !fileChanged(fi, nil, 0) { + if !fileChanged(&fs.Local{}, fi, nil, 0) { t.Fatal("nil node detected as unchanged") } }) @@ -730,7 +731,7 @@ func TestFilChangedSpecialCases(t *testing.T) { fi := lstat(t, filename) node := nodeFromFI(t, filename, fi) node.Type = "symlink" - if !fileChanged(fi, node, 0) { + if !fileChanged(&fs.Local{}, fi, node, 0) { t.Fatal("node with changed type detected as unchanged") } }) diff --git a/internal/fs/fs_local.go b/internal/fs/fs_local.go index 019069a55..034d1aa24 100644 --- a/internal/fs/fs_local.go +++ b/internal/fs/fs_local.go @@ -52,6 +52,11 @@ func (fs Local) DeviceID(fi os.FileInfo) (id uint64, err error) { return deviceID(fi) } +// ExtendedStat converts the give FileInfo into ExtendedFileInfo. +func (fs Local) ExtendedStat(fi os.FileInfo) ExtendedFileInfo { + return ExtendedStat(fi) +} + // Join joins any number of path elements into a single path, adding a // Separator if necessary. Join calls Clean on the result; in particular, all // empty strings are ignored. On Windows, the result is a UNC path if and only diff --git a/internal/fs/fs_reader.go b/internal/fs/fs_reader.go index b3371a8c9..84a79168e 100644 --- a/internal/fs/fs_reader.go +++ b/internal/fs/fs_reader.go @@ -126,6 +126,12 @@ func (fs *Reader) DeviceID(_ os.FileInfo) (deviceID uint64, err error) { return 0, errors.New("Device IDs are not supported") } +func (fs *Reader) ExtendedStat(fi os.FileInfo) ExtendedFileInfo { + return ExtendedFileInfo{ + FileInfo: fi, + } +} + // Join joins any number of path elements into a single path, adding a // Separator if necessary. Join calls Clean on the result; in particular, all // empty strings are ignored. On Windows, the result is a UNC path if and only diff --git a/internal/fs/interface.go b/internal/fs/interface.go index 1c27c1c13..bc6aab44a 100644 --- a/internal/fs/interface.go +++ b/internal/fs/interface.go @@ -11,6 +11,7 @@ type FS interface { Stat(name string) (os.FileInfo, error) Lstat(name string) (os.FileInfo, error) DeviceID(fi os.FileInfo) (deviceID uint64, err error) + ExtendedStat(fi os.FileInfo) ExtendedFileInfo Join(elem ...string) string Separator() string