walker: extend TreeRewriter to support snapshot repairing

This adds support for caching already rewritten trees, handling of load
errors and disabling the check that the serialization doesn't lead to
data loss.
This commit is contained in:
Michael Eischer 2022-12-28 11:34:55 +01:00
parent 38dac78180
commit 1bd1f3008d
3 changed files with 161 additions and 12 deletions

View file

@ -95,6 +95,7 @@ func rewriteSnapshot(ctx context.Context, repo *repository.Repository, sn *resti
Verbosef(fmt.Sprintf("excluding %s\n", path))
return nil
},
DisableNodeCache: true,
})
return filterAndReplaceSnapshot(ctx, repo, sn,

View file

@ -10,26 +10,45 @@ import (
)
type NodeRewriteFunc func(node *restic.Node, path string) *restic.Node
type FailedTreeRewriteFunc func(nodeID restic.ID, path string, err error) (restic.ID, error)
type RewriteOpts struct {
// return nil to remove the node
RewriteNode NodeRewriteFunc
// decide what to do with a tree that could not be loaded. Return nil to remove the node. By default the load error is returned which causes the operation to fail.
RewriteFailedTree FailedTreeRewriteFunc
AllowUnstableSerialization bool
DisableNodeCache bool
}
type idMap map[restic.ID]restic.ID
type TreeRewriter struct {
opts RewriteOpts
replaces idMap
}
func NewTreeRewriter(opts RewriteOpts) *TreeRewriter {
rw := &TreeRewriter{
opts: opts,
}
if !opts.DisableNodeCache {
rw.replaces = make(idMap)
}
// setup default implementations
if rw.opts.RewriteNode == nil {
rw.opts.RewriteNode = func(node *restic.Node, path string) *restic.Node {
return node
}
}
if rw.opts.RewriteFailedTree == nil {
// fail with error by default
rw.opts.RewriteFailedTree = func(nodeID restic.ID, path string, err error) (restic.ID, error) {
return restic.ID{}, err
}
}
return rw
}
@ -39,20 +58,29 @@ type BlobLoadSaver interface {
}
func (t *TreeRewriter) RewriteTree(ctx context.Context, repo BlobLoadSaver, nodepath string, nodeID restic.ID) (newNodeID restic.ID, err error) {
curTree, err := restic.LoadTree(ctx, repo, nodeID)
if err != nil {
return restic.ID{}, err
// check if tree was already changed
newID, ok := t.replaces[nodeID]
if ok {
return newID, nil
}
// check that we can properly encode this tree without losing information
// The alternative of using json/Decoder.DisallowUnknownFields() doesn't work as we use
// a custom UnmarshalJSON to decode trees, see also https://github.com/golang/go/issues/41144
testID, err := restic.SaveTree(ctx, repo, curTree)
// a nil nodeID will lead to a load error
curTree, err := restic.LoadTree(ctx, repo, nodeID)
if err != nil {
return restic.ID{}, err
return t.opts.RewriteFailedTree(nodeID, nodepath, err)
}
if nodeID != testID {
return restic.ID{}, fmt.Errorf("cannot encode tree at %q without losing information", nodepath)
if !t.opts.AllowUnstableSerialization {
// check that we can properly encode this tree without losing information
// The alternative of using json/Decoder.DisallowUnknownFields() doesn't work as we use
// a custom UnmarshalJSON to decode trees, see also https://github.com/golang/go/issues/41144
testID, err := restic.SaveTree(ctx, repo, curTree)
if err != nil {
return restic.ID{}, err
}
if nodeID != testID {
return restic.ID{}, fmt.Errorf("cannot encode tree at %q without losing information", nodepath)
}
}
debug.Log("filterTree: %s, nodeId: %s\n", nodepath, nodeID.Str())
@ -72,7 +100,12 @@ func (t *TreeRewriter) RewriteTree(ctx context.Context, repo BlobLoadSaver, node
}
continue
}
newID, err := t.RewriteTree(ctx, repo, path, *node.Subtree)
// treat nil as null id
var subtree restic.ID
if node.Subtree != nil {
subtree = *node.Subtree
}
newID, err := t.RewriteTree(ctx, repo, path, subtree)
if err != nil {
return restic.ID{}, err
}
@ -90,6 +123,9 @@ func (t *TreeRewriter) RewriteTree(ctx context.Context, repo BlobLoadSaver, node
// Save new tree
newTreeID, _, _, err := repo.SaveBlob(ctx, restic.TreeBlob, tree, restic.ID{}, false)
if t.replaces != nil {
t.replaces[nodeID] = newTreeID
}
if !newTreeID.Equal(nodeID) {
debug.Log("filterTree: save new tree for %s as %v\n", nodepath, newTreeID)
}

View file

@ -7,6 +7,7 @@ import (
"github.com/pkg/errors"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/test"
)
// WritableTreeMap also support saving
@ -69,7 +70,7 @@ func checkRewriteItemOrder(want []string) checkRewriteFunc {
}
// checkRewriteSkips excludes nodes if path is in skipFor, it checks that rewriting proceedes in the correct order.
func checkRewriteSkips(skipFor map[string]struct{}, want []string) checkRewriteFunc {
func checkRewriteSkips(skipFor map[string]struct{}, want []string, disableCache bool) checkRewriteFunc {
var pos int
return func(t testing.TB) (rewriter *TreeRewriter, final func(testing.TB)) {
@ -91,6 +92,7 @@ func checkRewriteSkips(skipFor map[string]struct{}, want []string) checkRewriteF
}
return node
},
DisableNodeCache: disableCache,
})
final = func(t testing.TB) {
@ -160,6 +162,7 @@ func TestRewriter(t *testing.T) {
"/subdir",
"/subdir/subfile",
},
false,
),
},
{ // exclude dir
@ -180,6 +183,7 @@ func TestRewriter(t *testing.T) {
"/foo",
"/subdir",
},
false,
),
},
{ // modify node
@ -197,6 +201,75 @@ func TestRewriter(t *testing.T) {
},
check: checkIncreaseNodeSize(21),
},
{ // test cache
tree: TestTree{
// both subdirs are identical
"subdir1": TestTree{
"subfile": TestFile{},
"subfile2": TestFile{},
},
"subdir2": TestTree{
"subfile": TestFile{},
"subfile2": TestFile{},
},
},
newTree: TestTree{
"subdir1": TestTree{
"subfile2": TestFile{},
},
"subdir2": TestTree{
"subfile2": TestFile{},
},
},
check: checkRewriteSkips(
map[string]struct{}{
"/subdir1/subfile": {},
},
[]string{
"/subdir1",
"/subdir1/subfile",
"/subdir1/subfile2",
"/subdir2",
},
false,
),
},
{ // test disabled cache
tree: TestTree{
// both subdirs are identical
"subdir1": TestTree{
"subfile": TestFile{},
"subfile2": TestFile{},
},
"subdir2": TestTree{
"subfile": TestFile{},
"subfile2": TestFile{},
},
},
newTree: TestTree{
"subdir1": TestTree{
"subfile2": TestFile{},
},
"subdir2": TestTree{
"subfile": TestFile{},
"subfile2": TestFile{},
},
},
check: checkRewriteSkips(
map[string]struct{}{
"/subdir1/subfile": {},
},
[]string{
"/subdir1",
"/subdir1/subfile",
"/subdir1/subfile2",
"/subdir2",
"/subdir2/subfile",
"/subdir2/subfile2",
},
true,
),
},
}
for _, test := range tests {
@ -251,4 +324,43 @@ func TestRewriterFailOnUnknownFields(t *testing.T) {
if err == nil {
t.Error("missing error on unknown field")
}
// check that the serialization check can be disabled
rewriter = NewTreeRewriter(RewriteOpts{
AllowUnstableSerialization: true,
})
root, err := rewriter.RewriteTree(ctx, tm, "/", id)
test.OK(t, err)
_, expRoot := BuildTreeMap(TestTree{
"subfile": TestFile{},
})
test.Assert(t, root == expRoot, "mismatched trees")
}
func TestRewriterTreeLoadError(t *testing.T) {
tm := WritableTreeMap{TreeMap{}}
id := restic.NewRandomID()
ctx, cancel := context.WithCancel(context.TODO())
defer cancel()
// also check that load error by default cause the operation to fail
rewriter := NewTreeRewriter(RewriteOpts{})
_, err := rewriter.RewriteTree(ctx, tm, "/", id)
if err == nil {
t.Fatal("missing error on unloadable tree")
}
replacementID := restic.NewRandomID()
rewriter = NewTreeRewriter(RewriteOpts{
RewriteFailedTree: func(nodeID restic.ID, path string, err error) (restic.ID, error) {
if nodeID != id || path != "/" {
t.Fail()
}
return replacementID, nil
},
})
newRoot, err := rewriter.RewriteTree(ctx, tm, "/", id)
test.OK(t, err)
test.Equals(t, replacementID, newRoot)
}