Merge pull request #2598 from MichaelEischer/diff-speedup

Diff speedup
This commit is contained in:
Alexander Neumann 2020-09-13 16:42:05 +02:00 committed by GitHub
commit b10dce541e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 143 additions and 7 deletions

View file

@ -0,0 +1,6 @@
Enhancement: Improve speed of diff command
We've improved the performance of the diff command when comparing snapshots
with similar content. It should run up to twice as fast as before.
https://github.com/restic/restic/pull/2598

View file

@ -116,10 +116,10 @@ func addBlobs(bs restic.BlobSet, node *restic.Node) {
// DiffStats collects the differences between two snapshots.
type DiffStats struct {
ChangedFiles int
Added DiffStat
Removed DiffStat
BlobsBefore, BlobsAfter restic.BlobSet
ChangedFiles int
Added DiffStat
Removed DiffStat
BlobsBefore, BlobsAfter, BlobsCommon restic.BlobSet
}
// NewDiffStats creates new stats for a diff run.
@ -127,6 +127,7 @@ func NewDiffStats() *DiffStats {
return &DiffStats{
BlobsBefore: restic.NewBlobSet(),
BlobsAfter: restic.NewBlobSet(),
BlobsCommon: restic.NewBlobSet(),
}
}
@ -177,6 +178,27 @@ func (c *Comparer) printDir(ctx context.Context, mode string, stats *DiffStat, b
return nil
}
func (c *Comparer) collectDir(ctx context.Context, blobs restic.BlobSet, id restic.ID) error {
debug.Log("print tree %v", id)
tree, err := c.repo.LoadTree(ctx, id)
if err != nil {
return err
}
for _, node := range tree.Nodes {
addBlobs(blobs, node)
if node.Type == "dir" {
err := c.collectDir(ctx, blobs, *node.Subtree)
if err != nil {
Warnf("error: %v\n", err)
}
}
}
return nil
}
func uniqueNodeNames(tree1, tree2 *restic.Tree) (tree1Nodes, tree2Nodes map[string]*restic.Node, uniqueNames []string) {
names := make(map[string]struct{})
tree1Nodes = make(map[string]*restic.Node)
@ -248,7 +270,12 @@ func (c *Comparer) diffTree(ctx context.Context, stats *DiffStats, prefix string
}
if node1.Type == "dir" && node2.Type == "dir" {
err := c.diffTree(ctx, stats, name, *node1.Subtree, *node2.Subtree)
var err error
if (*node1.Subtree).Equal(*node2.Subtree) {
err = c.collectDir(ctx, stats.BlobsCommon, *node1.Subtree)
} else {
err = c.diffTree(ctx, stats, name, *node1.Subtree, *node2.Subtree)
}
if err != nil {
Warnf("error: %v\n", err)
}
@ -345,8 +372,8 @@ func runDiff(opts DiffOptions, gopts GlobalOptions, args []string) error {
}
both := stats.BlobsBefore.Intersect(stats.BlobsAfter)
updateBlobs(repo, stats.BlobsBefore.Sub(both), &stats.Removed)
updateBlobs(repo, stats.BlobsAfter.Sub(both), &stats.Added)
updateBlobs(repo, stats.BlobsBefore.Sub(both).Sub(stats.BlobsCommon), &stats.Removed)
updateBlobs(repo, stats.BlobsAfter.Sub(both).Sub(stats.BlobsCommon), &stats.Added)
Printf("\n")
Printf("Files: %5d new, %5d removed, %5d changed\n", stats.Added.Files, stats.Removed.Files, stats.ChangedFiles)

View file

@ -154,6 +154,21 @@ func testRunCheckOutput(gopts GlobalOptions) (string, error) {
return buf.String(), err
}
func testRunDiffOutput(gopts GlobalOptions, firstSnapshotID string, secondSnapshotID string) (string, error) {
buf := bytes.NewBuffer(nil)
globalOptions.stdout = buf
defer func() {
globalOptions.stdout = os.Stdout
}()
opts := DiffOptions{
ShowMetadata: false,
}
err := runDiff(opts, gopts, []string{firstSnapshotID, secondSnapshotID})
return string(buf.Bytes()), err
}
func testRunRebuildIndex(t testing.TB, gopts GlobalOptions) {
globalOptions.stdout = ioutil.Discard
defer func() {
@ -1472,3 +1487,91 @@ func TestQuietBackup(t *testing.T) {
testRunCheck(t, env.gopts)
}
func copyFile(dst string, src string) error {
srcFile, err := os.Open(src)
if err != nil {
return err
}
defer srcFile.Close()
dstFile, err := os.Create(dst)
if err != nil {
return err
}
defer dstFile.Close()
_, err = io.Copy(dstFile, srcFile)
return err
}
var diffOutputRegexPatterns = []string{
"-.+modfile",
"M.+modfile1",
"\\+.+modfile2",
"\\+.+modfile3",
"\\+.+modfile4",
"-.+submoddir",
"-.+submoddir.subsubmoddir",
"\\+.+submoddir2",
"\\+.+submoddir2.subsubmoddir",
"Files: +2 new, +1 removed, +1 changed",
"Dirs: +3 new, +2 removed",
"Data Blobs: +2 new, +1 removed",
"Added: +7[0-9]{2}\\.[0-9]{3} KiB",
"Removed: +2[0-9]{2}\\.[0-9]{3} KiB",
}
func TestDiff(t *testing.T) {
env, cleanup := withTestEnvironment(t)
defer cleanup()
testRunInit(t, env.gopts)
datadir := filepath.Join(env.base, "testdata")
testdir := filepath.Join(datadir, "testdir")
subtestdir := filepath.Join(testdir, "subtestdir")
testfile := filepath.Join(testdir, "testfile")
rtest.OK(t, os.Mkdir(testdir, 0755))
rtest.OK(t, os.Mkdir(subtestdir, 0755))
rtest.OK(t, appendRandomData(testfile, 256*1024))
moddir := filepath.Join(datadir, "moddir")
submoddir := filepath.Join(moddir, "submoddir")
subsubmoddir := filepath.Join(submoddir, "subsubmoddir")
modfile := filepath.Join(moddir, "modfile")
rtest.OK(t, os.Mkdir(moddir, 0755))
rtest.OK(t, os.Mkdir(submoddir, 0755))
rtest.OK(t, os.Mkdir(subsubmoddir, 0755))
rtest.OK(t, copyFile(modfile, testfile))
rtest.OK(t, appendRandomData(modfile+"1", 256*1024))
snapshots := make(map[string]struct{})
opts := BackupOptions{}
testRunBackup(t, "", []string{datadir}, opts, env.gopts)
snapshots, firstSnapshotID := lastSnapshot(snapshots, loadSnapshotMap(t, env.gopts))
rtest.OK(t, os.Rename(modfile, modfile+"3"))
rtest.OK(t, os.Rename(submoddir, submoddir+"2"))
rtest.OK(t, appendRandomData(modfile+"1", 256*1024))
rtest.OK(t, appendRandomData(modfile+"2", 256*1024))
rtest.OK(t, os.Mkdir(modfile+"4", 0755))
testRunBackup(t, "", []string{datadir}, opts, env.gopts)
snapshots, secondSnapshotID := lastSnapshot(snapshots, loadSnapshotMap(t, env.gopts))
_, err := testRunDiffOutput(env.gopts, "", secondSnapshotID)
rtest.Assert(t, err != nil, "expected error on invalid snapshot id")
out, err := testRunDiffOutput(env.gopts, firstSnapshotID, secondSnapshotID)
if err != nil {
t.Fatalf("expected no error from diff for test repository, got %v", err)
}
for _, pattern := range diffOutputRegexPatterns {
r, err := regexp.Compile(pattern)
rtest.Assert(t, err == nil, "failed to compile regexp %v", pattern)
rtest.Assert(t, r.MatchString(out), "expected pattern %v in output, got\n%v", pattern, out)
}
}