diff: Optimize diff calculation for shared subtrees

When the diff calculation compares two trees with identical id then no
differences between them can ever show up. Optimize for that case by
simply traversing the tree only once to collect all referenced blobs for
a proper calculation of added and removed blobs.

Just skipping the common subtrees is not possible as this would skew the
results if the added or removed blobs are shared with one of the
subtrees.
This commit is contained in:
Michael Eischer 2020-02-08 11:04:15 +01:00
parent c0fc85d303
commit f5c448aa65

View file

@ -119,7 +119,7 @@ type DiffStats struct {
ChangedFiles int ChangedFiles int
Added DiffStat Added DiffStat
Removed DiffStat Removed DiffStat
BlobsBefore, BlobsAfter restic.BlobSet BlobsBefore, BlobsAfter, BlobsCommon restic.BlobSet
} }
// NewDiffStats creates new stats for a diff run. // NewDiffStats creates new stats for a diff run.
@ -127,6 +127,7 @@ func NewDiffStats() *DiffStats {
return &DiffStats{ return &DiffStats{
BlobsBefore: restic.NewBlobSet(), BlobsBefore: restic.NewBlobSet(),
BlobsAfter: restic.NewBlobSet(), BlobsAfter: restic.NewBlobSet(),
BlobsCommon: restic.NewBlobSet(),
} }
} }
@ -177,6 +178,27 @@ func (c *Comparer) printDir(ctx context.Context, mode string, stats *DiffStat, b
return nil return nil
} }
func (c *Comparer) collectDir(ctx context.Context, blobs restic.BlobSet, id restic.ID) error {
debug.Log("print tree %v", id)
tree, err := c.repo.LoadTree(ctx, id)
if err != nil {
return err
}
for _, node := range tree.Nodes {
addBlobs(blobs, node)
if node.Type == "dir" {
err := c.collectDir(ctx, blobs, *node.Subtree)
if err != nil {
Warnf("error: %v\n", err)
}
}
}
return nil
}
func uniqueNodeNames(tree1, tree2 *restic.Tree) (tree1Nodes, tree2Nodes map[string]*restic.Node, uniqueNames []string) { func uniqueNodeNames(tree1, tree2 *restic.Tree) (tree1Nodes, tree2Nodes map[string]*restic.Node, uniqueNames []string) {
names := make(map[string]struct{}) names := make(map[string]struct{})
tree1Nodes = make(map[string]*restic.Node) tree1Nodes = make(map[string]*restic.Node)
@ -248,7 +270,12 @@ func (c *Comparer) diffTree(ctx context.Context, stats *DiffStats, prefix string
} }
if node1.Type == "dir" && node2.Type == "dir" { if node1.Type == "dir" && node2.Type == "dir" {
err := c.diffTree(ctx, stats, name, *node1.Subtree, *node2.Subtree) var err error
if (*node1.Subtree).Equal(*node2.Subtree) {
err = c.collectDir(ctx, stats.BlobsCommon, *node1.Subtree)
} else {
err = c.diffTree(ctx, stats, name, *node1.Subtree, *node2.Subtree)
}
if err != nil { if err != nil {
Warnf("error: %v\n", err) Warnf("error: %v\n", err)
} }
@ -345,8 +372,8 @@ func runDiff(opts DiffOptions, gopts GlobalOptions, args []string) error {
} }
both := stats.BlobsBefore.Intersect(stats.BlobsAfter) both := stats.BlobsBefore.Intersect(stats.BlobsAfter)
updateBlobs(repo, stats.BlobsBefore.Sub(both), &stats.Removed) updateBlobs(repo, stats.BlobsBefore.Sub(both).Sub(stats.BlobsCommon), &stats.Removed)
updateBlobs(repo, stats.BlobsAfter.Sub(both), &stats.Added) updateBlobs(repo, stats.BlobsAfter.Sub(both).Sub(stats.BlobsCommon), &stats.Added)
Printf("\n") Printf("\n")
Printf("Files: %5d new, %5d removed, %5d changed\n", stats.Added.Files, stats.Removed.Files, stats.ChangedFiles) Printf("Files: %5d new, %5d removed, %5d changed\n", stats.Added.Files, stats.Removed.Files, stats.ChangedFiles)