Merge pull request #2594 from greatroar/concurrent-restore-verify
Concurrent restore --verify
This commit is contained in:
commit
71fcf48533
4 changed files with 169 additions and 58 deletions
6
changelog/unreleased/pull-2594
Normal file
6
changelog/unreleased/pull-2594
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
Enhancement: Speed up restic restore --verify
|
||||||
|
|
||||||
|
The --verify option lets restic restore verify the file content after it has
|
||||||
|
restored a snapshot. We've sped up the verification by up to a factor of two.
|
||||||
|
|
||||||
|
https://github.com/restic/restic/pull/2594
|
|
@ -2,6 +2,7 @@ package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/restic/restic/internal/debug"
|
"github.com/restic/restic/internal/debug"
|
||||||
"github.com/restic/restic/internal/errors"
|
"github.com/restic/restic/internal/errors"
|
||||||
|
@ -202,6 +203,7 @@ func runRestore(opts RestoreOptions, gopts GlobalOptions, args []string) error {
|
||||||
if opts.Verify {
|
if opts.Verify {
|
||||||
Verbosef("verifying files in %s\n", opts.Target)
|
Verbosef("verifying files in %s\n", opts.Target)
|
||||||
var count int
|
var count int
|
||||||
|
t0 := time.Now()
|
||||||
count, err = res.VerifyFiles(ctx, opts.Target)
|
count, err = res.VerifyFiles(ctx, opts.Target)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -209,7 +211,8 @@ func runRestore(opts RestoreOptions, gopts GlobalOptions, args []string) error {
|
||||||
if totalErrors > 0 {
|
if totalErrors > 0 {
|
||||||
return errors.Fatalf("There were %d errors\n", totalErrors)
|
return errors.Fatalf("There were %d errors\n", totalErrors)
|
||||||
}
|
}
|
||||||
Verbosef("finished verifying %d files in %s\n", count, opts.Target)
|
Verbosef("finished verifying %d files in %s (took %s)\n", count, opts.Target,
|
||||||
|
time.Since(t0).Round(time.Millisecond))
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
|
|
@ -4,12 +4,14 @@ import (
|
||||||
"context"
|
"context"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"sync/atomic"
|
||||||
"github.com/restic/restic/internal/errors"
|
|
||||||
|
|
||||||
"github.com/restic/restic/internal/debug"
|
"github.com/restic/restic/internal/debug"
|
||||||
|
"github.com/restic/restic/internal/errors"
|
||||||
"github.com/restic/restic/internal/fs"
|
"github.com/restic/restic/internal/fs"
|
||||||
"github.com/restic/restic/internal/restic"
|
"github.com/restic/restic/internal/restic"
|
||||||
|
|
||||||
|
"golang.org/x/sync/errgroup"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Restorer is used to restore a snapshot to a directory.
|
// Restorer is used to restore a snapshot to a directory.
|
||||||
|
@ -97,10 +99,13 @@ func (res *Restorer) traverseTree(ctx context.Context, target, location string,
|
||||||
}
|
}
|
||||||
|
|
||||||
sanitizeError := func(err error) error {
|
sanitizeError := func(err error) error {
|
||||||
if err != nil {
|
switch err {
|
||||||
err = res.Error(nodeLocation, err)
|
case nil, context.Canceled, context.DeadlineExceeded:
|
||||||
}
|
// Context errors are permanent.
|
||||||
return err
|
return err
|
||||||
|
default:
|
||||||
|
return res.Error(nodeLocation, err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if node.Type == "dir" {
|
if node.Type == "dir" {
|
||||||
|
@ -108,7 +113,7 @@ func (res *Restorer) traverseTree(ctx context.Context, target, location string,
|
||||||
return hasRestored, errors.Errorf("Dir without subtree in tree %v", treeID.Str())
|
return hasRestored, errors.Errorf("Dir without subtree in tree %v", treeID.Str())
|
||||||
}
|
}
|
||||||
|
|
||||||
if selectedForRestore {
|
if selectedForRestore && visitor.enterDir != nil {
|
||||||
err = sanitizeError(visitor.enterDir(node, nodeTarget, nodeLocation))
|
err = sanitizeError(visitor.enterDir(node, nodeTarget, nodeLocation))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return hasRestored, err
|
return hasRestored, err
|
||||||
|
@ -133,7 +138,7 @@ func (res *Restorer) traverseTree(ctx context.Context, target, location string,
|
||||||
|
|
||||||
// metadata need to be restore when leaving the directory in both cases
|
// metadata need to be restore when leaving the directory in both cases
|
||||||
// selected for restore or any child of any subtree have been restored
|
// selected for restore or any child of any subtree have been restored
|
||||||
if selectedForRestore || childHasRestored {
|
if (selectedForRestore || childHasRestored) && visitor.leaveDir != nil {
|
||||||
err = sanitizeError(visitor.leaveDir(node, nodeTarget, nodeLocation))
|
err = sanitizeError(visitor.leaveDir(node, nodeTarget, nodeLocation))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return hasRestored, err
|
return hasRestored, err
|
||||||
|
@ -214,7 +219,6 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
idx := restic.NewHardlinkIndex()
|
idx := restic.NewHardlinkIndex()
|
||||||
|
|
||||||
filerestorer := newFileRestorer(dst, res.repo.Backend().Load, res.repo.Key(), res.repo.Index().Lookup)
|
filerestorer := newFileRestorer(dst, res.repo.Backend().Load, res.repo.Key(), res.repo.Index().Lookup)
|
||||||
filerestorer.Error = res.Error
|
filerestorer.Error = res.Error
|
||||||
|
|
||||||
|
@ -257,9 +261,6 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error {
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
},
|
},
|
||||||
leaveDir: func(node *restic.Node, target, location string) error {
|
|
||||||
return nil
|
|
||||||
},
|
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -274,9 +275,6 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error {
|
||||||
|
|
||||||
// second tree pass: restore special files and filesystem metadata
|
// second tree pass: restore special files and filesystem metadata
|
||||||
_, err = res.traverseTree(ctx, dst, string(filepath.Separator), *res.sn.Tree, treeVisitor{
|
_, err = res.traverseTree(ctx, dst, string(filepath.Separator), *res.sn.Tree, treeVisitor{
|
||||||
enterDir: func(node *restic.Node, target, location string) error {
|
|
||||||
return nil
|
|
||||||
},
|
|
||||||
visitNode: func(node *restic.Node, target, location string) error {
|
visitNode: func(node *restic.Node, target, location string) error {
|
||||||
debug.Log("second pass, visitNode: restore node %q", location)
|
debug.Log("second pass, visitNode: restore node %q", location)
|
||||||
if node.Type != "file" {
|
if node.Type != "file" {
|
||||||
|
@ -297,10 +295,7 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error {
|
||||||
|
|
||||||
return res.restoreNodeMetadataTo(node, target, location)
|
return res.restoreNodeMetadataTo(node, target, location)
|
||||||
},
|
},
|
||||||
leaveDir: func(node *restic.Node, target, location string) error {
|
leaveDir: res.restoreNodeMetadataTo,
|
||||||
debug.Log("second pass, leaveDir restore metadata %q", location)
|
|
||||||
return res.restoreNodeMetadataTo(node, target, location)
|
|
||||||
},
|
|
||||||
})
|
})
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -310,52 +305,112 @@ func (res *Restorer) Snapshot() *restic.Snapshot {
|
||||||
return res.sn
|
return res.sn
|
||||||
}
|
}
|
||||||
|
|
||||||
// VerifyFiles reads all snapshot files and verifies their contents
|
// Number of workers in VerifyFiles.
|
||||||
func (res *Restorer) VerifyFiles(ctx context.Context, dst string) (int, error) {
|
const nVerifyWorkers = 8
|
||||||
// TODO multithreaded?
|
|
||||||
|
// VerifyFiles checks whether all regular files in the snapshot res.sn
|
||||||
|
// have been successfully written to dst. It stops when it encounters an
|
||||||
|
// error. It returns that error and the number of files it has successfully
|
||||||
|
// verified.
|
||||||
|
func (res *Restorer) VerifyFiles(ctx context.Context, dst string) (int, error) {
|
||||||
|
type mustCheck struct {
|
||||||
|
node *restic.Node
|
||||||
|
path string
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
nchecked uint64
|
||||||
|
work = make(chan mustCheck, 2*nVerifyWorkers)
|
||||||
|
)
|
||||||
|
|
||||||
|
g, ctx := errgroup.WithContext(ctx)
|
||||||
|
|
||||||
|
// Traverse tree and send jobs to work.
|
||||||
|
g.Go(func() error {
|
||||||
|
defer close(work)
|
||||||
|
|
||||||
count := 0
|
|
||||||
_, err := res.traverseTree(ctx, dst, string(filepath.Separator), *res.sn.Tree, treeVisitor{
|
_, err := res.traverseTree(ctx, dst, string(filepath.Separator), *res.sn.Tree, treeVisitor{
|
||||||
enterDir: func(node *restic.Node, target, location string) error { return nil },
|
|
||||||
visitNode: func(node *restic.Node, target, location string) error {
|
visitNode: func(node *restic.Node, target, location string) error {
|
||||||
if node.Type != "file" {
|
if node.Type != "file" {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
select {
|
||||||
count++
|
case <-ctx.Done():
|
||||||
stat, err := os.Stat(target)
|
return ctx.Err()
|
||||||
if err != nil {
|
case work <- mustCheck{node, target}:
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
},
|
||||||
|
})
|
||||||
return err
|
return err
|
||||||
}
|
})
|
||||||
if int64(node.Size) != stat.Size() {
|
|
||||||
return errors.Errorf("Invalid file size: expected %d got %d", node.Size, stat.Size())
|
|
||||||
}
|
|
||||||
|
|
||||||
file, err := os.Open(target)
|
for i := 0; i < nVerifyWorkers; i++ {
|
||||||
|
g.Go(func() (err error) {
|
||||||
|
var buf []byte
|
||||||
|
for job := range work {
|
||||||
|
buf, err = res.verifyFile(job.path, job.node, buf)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
err = res.Error(job.path, err)
|
||||||
|
}
|
||||||
|
if err != nil || ctx.Err() != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
atomic.AddUint64(&nchecked, 1)
|
||||||
|
}
|
||||||
return err
|
return err
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
offset := int64(0)
|
return int(nchecked), g.Wait()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify that the file target has the contents of node.
|
||||||
|
//
|
||||||
|
// buf and the first return value are scratch space, passed around for reuse.
|
||||||
|
// Reusing buffers prevents the verifier goroutines allocating all of RAM and
|
||||||
|
// flushing the filesystem cache (at least on Linux).
|
||||||
|
func (res *Restorer) verifyFile(target string, node *restic.Node, buf []byte) ([]byte, error) {
|
||||||
|
f, err := os.Open(target)
|
||||||
|
if err != nil {
|
||||||
|
return buf, err
|
||||||
|
}
|
||||||
|
defer func() {
|
||||||
|
_ = f.Close()
|
||||||
|
}()
|
||||||
|
|
||||||
|
fi, err := f.Stat()
|
||||||
|
switch {
|
||||||
|
case err != nil:
|
||||||
|
return buf, err
|
||||||
|
case int64(node.Size) != fi.Size():
|
||||||
|
return buf, errors.Errorf("Invalid file size for %s: expected %d, got %d",
|
||||||
|
target, node.Size, fi.Size())
|
||||||
|
}
|
||||||
|
|
||||||
|
var offset int64
|
||||||
for _, blobID := range node.Content {
|
for _, blobID := range node.Content {
|
||||||
length, _ := res.repo.LookupBlobSize(blobID, restic.DataBlob)
|
length, found := res.repo.LookupBlobSize(blobID, restic.DataBlob)
|
||||||
buf := make([]byte, length) // TODO do I want to reuse the buffer somehow?
|
if !found {
|
||||||
_, err = file.ReadAt(buf, offset)
|
return buf, errors.Errorf("Unable to fetch blob %s", blobID)
|
||||||
|
}
|
||||||
|
|
||||||
|
if length > uint(cap(buf)) {
|
||||||
|
buf = make([]byte, 2*length)
|
||||||
|
}
|
||||||
|
buf = buf[:length]
|
||||||
|
|
||||||
|
_, err = f.ReadAt(buf, offset)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
_ = file.Close()
|
return buf, err
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
if !blobID.Equal(restic.Hash(buf)) {
|
if !blobID.Equal(restic.Hash(buf)) {
|
||||||
_ = file.Close()
|
return buf, errors.Errorf(
|
||||||
return errors.Errorf("Unexpected contents starting at offset %d", offset)
|
"Unexpected content in %s, starting at offset %d",
|
||||||
|
target, offset)
|
||||||
}
|
}
|
||||||
offset += int64(length)
|
offset += int64(length)
|
||||||
}
|
}
|
||||||
|
|
||||||
return file.Close()
|
return buf, nil
|
||||||
},
|
|
||||||
leaveDir: func(node *restic.Node, target, location string) error { return nil },
|
|
||||||
})
|
|
||||||
|
|
||||||
return count, err
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -367,6 +367,11 @@ func TestRestorer(t *testing.T) {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if len(test.ErrorsMust)+len(test.ErrorsMay) == 0 {
|
||||||
|
_, err = res.VerifyFiles(ctx, tempdir)
|
||||||
|
rtest.OK(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
for location, expectedErrors := range test.ErrorsMust {
|
for location, expectedErrors := range test.ErrorsMust {
|
||||||
actualErrors, ok := errors[location]
|
actualErrors, ok := errors[location]
|
||||||
if !ok {
|
if !ok {
|
||||||
|
@ -465,6 +470,9 @@ func TestRestorerRelative(t *testing.T) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
nverified, err := res.VerifyFiles(ctx, "restore")
|
||||||
|
rtest.OK(t, err)
|
||||||
|
rtest.Equals(t, len(test.Files), nverified)
|
||||||
|
|
||||||
for filename, err := range errors {
|
for filename, err := range errors {
|
||||||
t.Errorf("unexpected error for %v found: %v", filename, err)
|
t.Errorf("unexpected error for %v found: %v", filename, err)
|
||||||
|
@ -800,3 +808,42 @@ func TestRestorerConsistentTimestampsAndPermissions(t *testing.T) {
|
||||||
checkConsistentInfo(t, test.path, f, test.modtime, test.mode)
|
checkConsistentInfo(t, test.path, f, test.modtime, test.mode)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// VerifyFiles must not report cancelation of its context through res.Error.
|
||||||
|
func TestVerifyCancel(t *testing.T) {
|
||||||
|
snapshot := Snapshot{
|
||||||
|
Nodes: map[string]Node{
|
||||||
|
"foo": File{Data: "content: foo\n"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
repo, cleanup := repository.TestRepository(t)
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
_, id := saveSnapshot(t, repo, snapshot)
|
||||||
|
|
||||||
|
res, err := NewRestorer(context.TODO(), repo, id)
|
||||||
|
rtest.OK(t, err)
|
||||||
|
|
||||||
|
tempdir, cleanup := rtest.TempDir(t)
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
rtest.OK(t, res.RestoreTo(ctx, tempdir))
|
||||||
|
err = ioutil.WriteFile(filepath.Join(tempdir, "foo"), []byte("bar"), 0644)
|
||||||
|
rtest.OK(t, err)
|
||||||
|
|
||||||
|
var errs []error
|
||||||
|
res.Error = func(filename string, err error) error {
|
||||||
|
errs = append(errs, err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
nverified, err := res.VerifyFiles(ctx, tempdir)
|
||||||
|
rtest.Equals(t, 0, nverified)
|
||||||
|
rtest.Assert(t, err != nil, "nil error from VerifyFiles")
|
||||||
|
rtest.Equals(t, 1, len(errs))
|
||||||
|
rtest.Assert(t, strings.Contains(errs[0].Error(), "Invalid file size for"), "wrong error %q", errs[0].Error())
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue