From 81211750ba8e23c9d23159b86db11887713ceaa7 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Fri, 29 Jan 2021 11:10:28 +0100 Subject: [PATCH 1/3] archiver/tree: Introduce functions Leaf() and NodeNames() --- internal/archiver/archiver.go | 10 ++-------- internal/archiver/tree.go | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/internal/archiver/archiver.go b/internal/archiver/archiver.go index 2fd170a3a..bc74bd75f 100644 --- a/internal/archiver/archiver.go +++ b/internal/archiver/archiver.go @@ -548,13 +548,7 @@ func (arch *Archiver) SaveTree(ctx context.Context, snPath string, atree *Tree, futureNodes := make(map[string]FutureNode) // iterate over the nodes of atree in lexicographic (=deterministic) order - names := make([]string, 0, len(atree.Nodes)) - for name := range atree.Nodes { - names = append(names, name) - } - sort.Strings(names) - - for _, name := range names { + for _, name := range atree.NodeNames() { subatree := atree.Nodes[name] // test if context has been cancelled @@ -563,7 +557,7 @@ func (arch *Archiver) SaveTree(ctx context.Context, snPath string, atree *Tree, } // this is a leaf node - if subatree.Path != "" { + if subatree.Leaf() { fn, excluded, err := arch.Save(ctx, join(snPath, name), subatree.Path, previous.Find(name)) if err != nil { diff --git a/internal/archiver/tree.go b/internal/archiver/tree.go index 04c0a8e33..16a78ee70 100644 --- a/internal/archiver/tree.go +++ b/internal/archiver/tree.go @@ -2,6 +2,7 @@ package archiver import ( "fmt" + "sort" "github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/errors" @@ -199,6 +200,24 @@ func (t Tree) String() string { return formatTree(t, "") } +// Leaf returns true if this is a leaf node, which means Path is set to a +// non-empty string and the contents of Path should be inserted at this point +// in the tree. +func (t Tree) Leaf() bool { + return t.Path != "" +} + +// NodeNames returns the sorted list of subtree names. +func (t Tree) NodeNames() []string { + // iterate over the nodes of atree in lexicographic (=deterministic) order + names := make([]string, 0, len(t.Nodes)) + for name := range t.Nodes { + names = append(names, name) + } + sort.Strings(names) + return names +} + // formatTree returns a text representation of the tree t. func formatTree(t Tree, indent string) (s string) { for name, node := range t.Nodes { From 5c617859ab6f438a6aa6de80ee107ec05e7bd1c9 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Thu, 28 Jan 2021 21:30:06 +0100 Subject: [PATCH 2/3] backup/scanner: Fix total size for overlapping targets Before, the scanner would could files twice if they were included in the list of backup targets twice, e.g. `restic backup foo foo/bar` would could the file `foo/bar` twice. This commit uses the tree structure from the archiver to run the scanner, so both parts see the same files. --- internal/archiver/scanner.go | 55 ++++++++++++++++++++++++++----- internal/archiver/scanner_test.go | 14 ++++---- 2 files changed, 52 insertions(+), 17 deletions(-) diff --git a/internal/archiver/scanner.go b/internal/archiver/scanner.go index 71634015b..5c8474259 100644 --- a/internal/archiver/scanner.go +++ b/internal/archiver/scanner.go @@ -6,6 +6,7 @@ import ( "path/filepath" "sort" + "github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/fs" ) @@ -37,27 +38,63 @@ type ScanStats struct { Bytes uint64 } -// Scan traverses the targets. The function Result is called for each new item -// found, the complete result is also returned by Scan. -func (s *Scanner) Scan(ctx context.Context, targets []string) error { - var stats ScanStats - for _, target := range targets { - abstarget, err := s.FS.Abs(target) +func (s *Scanner) scanTree(ctx context.Context, stats ScanStats, tree Tree) (ScanStats, error) { + // traverse the path in the file system for all leaf nodes + if tree.Leaf() { + abstarget, err := s.FS.Abs(tree.Path) if err != nil { - return err + return ScanStats{}, err } stats, err = s.scan(ctx, stats, abstarget) if err != nil { - return err + return ScanStats{}, err + } + + return stats, nil + } + + // otherwise recurse into the nodes in a deterministic order + for _, name := range tree.NodeNames() { + var err error + stats, err = s.scanTree(ctx, stats, tree.Nodes[name]) + if err != nil { + return ScanStats{}, err } if ctx.Err() != nil { - return nil + return stats, nil } } + return stats, nil +} + +// Scan traverses the targets. The function Result is called for each new item +// found, the complete result is also returned by Scan. +func (s *Scanner) Scan(ctx context.Context, targets []string) error { + debug.Log("start scan for %v", targets) + + cleanTargets, err := resolveRelativeTargets(s.FS, targets) + if err != nil { + return err + } + + debug.Log("clean targets %v", cleanTargets) + + // we're using the same tree representation as the archiver does + tree, err := NewTree(s.FS, cleanTargets) + if err != nil { + return err + } + + stats, err := s.scanTree(ctx, ScanStats{}, *tree) + if err != nil { + return err + } + s.Result("", stats) + debug.Log("result: %+v", stats) return nil } diff --git a/internal/archiver/scanner_test.go b/internal/archiver/scanner_test.go index 4eeef309c..6c2d35d81 100644 --- a/internal/archiver/scanner_test.go +++ b/internal/archiver/scanner_test.go @@ -40,8 +40,7 @@ func TestScanner(t *testing.T) { filepath.FromSlash("work/subdir/other"): {Files: 5, Bytes: 60}, filepath.FromSlash("work/subdir"): {Files: 5, Dirs: 1, Bytes: 60}, filepath.FromSlash("work"): {Files: 5, Dirs: 2, Bytes: 60}, - filepath.FromSlash("."): {Files: 5, Dirs: 3, Bytes: 60}, - filepath.FromSlash(""): {Files: 5, Dirs: 3, Bytes: 60}, + filepath.FromSlash(""): {Files: 5, Dirs: 2, Bytes: 60}, }, }, { @@ -72,8 +71,7 @@ func TestScanner(t *testing.T) { filepath.FromSlash("work/subdir/bar.txt"): {Files: 2, Bytes: 30}, filepath.FromSlash("work/subdir"): {Files: 2, Dirs: 1, Bytes: 30}, filepath.FromSlash("work"): {Files: 2, Dirs: 2, Bytes: 30}, - filepath.FromSlash("."): {Files: 2, Dirs: 3, Bytes: 30}, - filepath.FromSlash(""): {Files: 2, Dirs: 3, Bytes: 30}, + filepath.FromSlash(""): {Files: 2, Dirs: 2, Bytes: 30}, }, }, } @@ -152,7 +150,7 @@ func TestScannerError(t *testing.T) { }, }, }, - result: ScanStats{Files: 5, Dirs: 3, Bytes: 60}, + result: ScanStats{Files: 5, Dirs: 2, Bytes: 60}, }, { name: "unreadable-dir", @@ -168,7 +166,7 @@ func TestScannerError(t *testing.T) { }, }, }, - result: ScanStats{Files: 3, Dirs: 2, Bytes: 28}, + result: ScanStats{Files: 3, Dirs: 1, Bytes: 28}, prepare: func(t testing.TB) { err := os.Chmod(filepath.Join("work", "subdir"), 0000) if err != nil { @@ -191,7 +189,7 @@ func TestScannerError(t *testing.T) { "foo": TestFile{Content: "foo"}, "other": TestFile{Content: "other"}, }, - result: ScanStats{Files: 3, Dirs: 1, Bytes: 11}, + result: ScanStats{Files: 3, Dirs: 0, Bytes: 11}, resFn: func(t testing.TB, item string, s ScanStats) { if item == "bar" { err := os.Remove("foo") @@ -289,7 +287,7 @@ func TestScannerCancel(t *testing.T) { "other": TestFile{Content: "other"}, } - result := ScanStats{Files: 2, Dirs: 1, Bytes: 6} + result := ScanStats{Files: 2, Dirs: 0, Bytes: 6} ctx, cancel := context.WithCancel(context.Background()) defer cancel() From 5c41120c7045be2f2fd492b77b46652c395796c1 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Fri, 29 Jan 2021 11:29:05 +0100 Subject: [PATCH 3/3] Add entry to changelog --- changelog/unreleased/issue-3232 | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 changelog/unreleased/issue-3232 diff --git a/changelog/unreleased/issue-3232 b/changelog/unreleased/issue-3232 new file mode 100644 index 000000000..84aa71999 --- /dev/null +++ b/changelog/unreleased/issue-3232 @@ -0,0 +1,11 @@ +Bugfix: Show correct statistics for overlapping targets + +A user reported that restic's statistics and progress information during backup +is not correctly calculated when the backup targets (files/dirs to save) +overlap. For example, consider a directory `foo` which contains (among others) +a file `foo/bar`. When `restic backup foo foo/bar` is run, restic counted the +size of the file `foo/bar` twice, so the completeness percentage as well as the +number of files was wrong. This is now corrected. + +https://github.com/restic/restic/issues/3232 +https://github.com/restic/restic/pull/3243