diff: Optimize diff calculation for shared subtrees

When the diff calculation compares two trees with identical id then no
differences between them can ever show up. Optimize for that case by
simply traversing the tree only once to collect all referenced blobs for
a proper calculation of added and removed blobs.

Just skipping the common subtrees is not possible as this would skew the
results if the added or removed blobs are shared with one of the
subtrees.
This commit is contained in:
Michael Eischer 2020-02-08 11:04:15 +01:00
parent c0fc85d303
commit f5c448aa65

View file

@ -116,10 +116,10 @@ func addBlobs(bs restic.BlobSet, node *restic.Node) {
// DiffStats collects the differences between two snapshots.
type DiffStats struct {
ChangedFiles int
Added DiffStat
Removed DiffStat
BlobsBefore, BlobsAfter restic.BlobSet
ChangedFiles int
Added DiffStat
Removed DiffStat
BlobsBefore, BlobsAfter, BlobsCommon restic.BlobSet
}
// NewDiffStats creates new stats for a diff run.
@ -127,6 +127,7 @@ func NewDiffStats() *DiffStats {
return &DiffStats{
BlobsBefore: restic.NewBlobSet(),
BlobsAfter: restic.NewBlobSet(),
BlobsCommon: restic.NewBlobSet(),
}
}
@ -177,6 +178,27 @@ func (c *Comparer) printDir(ctx context.Context, mode string, stats *DiffStat, b
return nil
}
func (c *Comparer) collectDir(ctx context.Context, blobs restic.BlobSet, id restic.ID) error {
debug.Log("print tree %v", id)
tree, err := c.repo.LoadTree(ctx, id)
if err != nil {
return err
}
for _, node := range tree.Nodes {
addBlobs(blobs, node)
if node.Type == "dir" {
err := c.collectDir(ctx, blobs, *node.Subtree)
if err != nil {
Warnf("error: %v\n", err)
}
}
}
return nil
}
func uniqueNodeNames(tree1, tree2 *restic.Tree) (tree1Nodes, tree2Nodes map[string]*restic.Node, uniqueNames []string) {
names := make(map[string]struct{})
tree1Nodes = make(map[string]*restic.Node)
@ -248,7 +270,12 @@ func (c *Comparer) diffTree(ctx context.Context, stats *DiffStats, prefix string
}
if node1.Type == "dir" && node2.Type == "dir" {
err := c.diffTree(ctx, stats, name, *node1.Subtree, *node2.Subtree)
var err error
if (*node1.Subtree).Equal(*node2.Subtree) {
err = c.collectDir(ctx, stats.BlobsCommon, *node1.Subtree)
} else {
err = c.diffTree(ctx, stats, name, *node1.Subtree, *node2.Subtree)
}
if err != nil {
Warnf("error: %v\n", err)
}
@ -345,8 +372,8 @@ func runDiff(opts DiffOptions, gopts GlobalOptions, args []string) error {
}
both := stats.BlobsBefore.Intersect(stats.BlobsAfter)
updateBlobs(repo, stats.BlobsBefore.Sub(both), &stats.Removed)
updateBlobs(repo, stats.BlobsAfter.Sub(both), &stats.Added)
updateBlobs(repo, stats.BlobsBefore.Sub(both).Sub(stats.BlobsCommon), &stats.Removed)
updateBlobs(repo, stats.BlobsAfter.Sub(both).Sub(stats.BlobsCommon), &stats.Added)
Printf("\n")
Printf("Files: %5d new, %5d removed, %5d changed\n", stats.Added.Files, stats.Removed.Files, stats.ChangedFiles)