From 5c617859ab6f438a6aa6de80ee107ec05e7bd1c9 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Thu, 28 Jan 2021 21:30:06 +0100 Subject: [PATCH] backup/scanner: Fix total size for overlapping targets Before, the scanner would could files twice if they were included in the list of backup targets twice, e.g. `restic backup foo foo/bar` would could the file `foo/bar` twice. This commit uses the tree structure from the archiver to run the scanner, so both parts see the same files. --- internal/archiver/scanner.go | 55 ++++++++++++++++++++++++++----- internal/archiver/scanner_test.go | 14 ++++---- 2 files changed, 52 insertions(+), 17 deletions(-) diff --git a/internal/archiver/scanner.go b/internal/archiver/scanner.go index 71634015b..5c8474259 100644 --- a/internal/archiver/scanner.go +++ b/internal/archiver/scanner.go @@ -6,6 +6,7 @@ import ( "path/filepath" "sort" + "github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/fs" ) @@ -37,27 +38,63 @@ type ScanStats struct { Bytes uint64 } -// Scan traverses the targets. The function Result is called for each new item -// found, the complete result is also returned by Scan. -func (s *Scanner) Scan(ctx context.Context, targets []string) error { - var stats ScanStats - for _, target := range targets { - abstarget, err := s.FS.Abs(target) +func (s *Scanner) scanTree(ctx context.Context, stats ScanStats, tree Tree) (ScanStats, error) { + // traverse the path in the file system for all leaf nodes + if tree.Leaf() { + abstarget, err := s.FS.Abs(tree.Path) if err != nil { - return err + return ScanStats{}, err } stats, err = s.scan(ctx, stats, abstarget) if err != nil { - return err + return ScanStats{}, err + } + + return stats, nil + } + + // otherwise recurse into the nodes in a deterministic order + for _, name := range tree.NodeNames() { + var err error + stats, err = s.scanTree(ctx, stats, tree.Nodes[name]) + if err != nil { + return ScanStats{}, err } if ctx.Err() != nil { - return nil + return stats, nil } } + return stats, nil +} + +// Scan traverses the targets. The function Result is called for each new item +// found, the complete result is also returned by Scan. +func (s *Scanner) Scan(ctx context.Context, targets []string) error { + debug.Log("start scan for %v", targets) + + cleanTargets, err := resolveRelativeTargets(s.FS, targets) + if err != nil { + return err + } + + debug.Log("clean targets %v", cleanTargets) + + // we're using the same tree representation as the archiver does + tree, err := NewTree(s.FS, cleanTargets) + if err != nil { + return err + } + + stats, err := s.scanTree(ctx, ScanStats{}, *tree) + if err != nil { + return err + } + s.Result("", stats) + debug.Log("result: %+v", stats) return nil } diff --git a/internal/archiver/scanner_test.go b/internal/archiver/scanner_test.go index 4eeef309c..6c2d35d81 100644 --- a/internal/archiver/scanner_test.go +++ b/internal/archiver/scanner_test.go @@ -40,8 +40,7 @@ func TestScanner(t *testing.T) { filepath.FromSlash("work/subdir/other"): {Files: 5, Bytes: 60}, filepath.FromSlash("work/subdir"): {Files: 5, Dirs: 1, Bytes: 60}, filepath.FromSlash("work"): {Files: 5, Dirs: 2, Bytes: 60}, - filepath.FromSlash("."): {Files: 5, Dirs: 3, Bytes: 60}, - filepath.FromSlash(""): {Files: 5, Dirs: 3, Bytes: 60}, + filepath.FromSlash(""): {Files: 5, Dirs: 2, Bytes: 60}, }, }, { @@ -72,8 +71,7 @@ func TestScanner(t *testing.T) { filepath.FromSlash("work/subdir/bar.txt"): {Files: 2, Bytes: 30}, filepath.FromSlash("work/subdir"): {Files: 2, Dirs: 1, Bytes: 30}, filepath.FromSlash("work"): {Files: 2, Dirs: 2, Bytes: 30}, - filepath.FromSlash("."): {Files: 2, Dirs: 3, Bytes: 30}, - filepath.FromSlash(""): {Files: 2, Dirs: 3, Bytes: 30}, + filepath.FromSlash(""): {Files: 2, Dirs: 2, Bytes: 30}, }, }, } @@ -152,7 +150,7 @@ func TestScannerError(t *testing.T) { }, }, }, - result: ScanStats{Files: 5, Dirs: 3, Bytes: 60}, + result: ScanStats{Files: 5, Dirs: 2, Bytes: 60}, }, { name: "unreadable-dir", @@ -168,7 +166,7 @@ func TestScannerError(t *testing.T) { }, }, }, - result: ScanStats{Files: 3, Dirs: 2, Bytes: 28}, + result: ScanStats{Files: 3, Dirs: 1, Bytes: 28}, prepare: func(t testing.TB) { err := os.Chmod(filepath.Join("work", "subdir"), 0000) if err != nil { @@ -191,7 +189,7 @@ func TestScannerError(t *testing.T) { "foo": TestFile{Content: "foo"}, "other": TestFile{Content: "other"}, }, - result: ScanStats{Files: 3, Dirs: 1, Bytes: 11}, + result: ScanStats{Files: 3, Dirs: 0, Bytes: 11}, resFn: func(t testing.TB, item string, s ScanStats) { if item == "bar" { err := os.Remove("foo") @@ -289,7 +287,7 @@ func TestScannerCancel(t *testing.T) { "other": TestFile{Content: "other"}, } - result := ScanStats{Files: 2, Dirs: 1, Bytes: 6} + result := ScanStats{Files: 2, Dirs: 0, Bytes: 6} ctx, cancel := context.WithCancel(context.Background()) defer cancel()