From bda8d7722edf054392f2dfce5877754757f80982 Mon Sep 17 00:00:00 2001 From: Igor Fedorenko Date: Thu, 27 Sep 2018 08:59:33 -0400 Subject: [PATCH] restorer: Optimize empty file restore don't create fileInfo structs for empty files. this saves memory. this also avoids extra serial scan of all fileInfo, which should make restore faster and more consistent. Signed-off-by: Igor Fedorenko --- internal/restorer/filerestorer.go | 51 +++++-------- internal/restorer/filerestorer_test.go | 12 ---- internal/restorer/restorer.go | 33 +++++++-- internal/restorer/restorer_test.go | 95 ++++++++++++++----------- internal/restorer/restorer_unix_test.go | 61 ++++++++++++++++ 5 files changed, 163 insertions(+), 89 deletions(-) create mode 100644 internal/restorer/restorer_unix_test.go diff --git a/internal/restorer/filerestorer.go b/internal/restorer/filerestorer.go index c8ce43d60..4baf9b567 100644 --- a/internal/restorer/filerestorer.go +++ b/internal/restorer/filerestorer.go @@ -3,7 +3,6 @@ package restorer import ( "context" "io" - "os" "path/filepath" "github.com/restic/restic/internal/crypto" @@ -95,37 +94,25 @@ type processingInfo struct { } func (r *fileRestorer) restoreFiles(ctx context.Context, onError func(path string, err error)) error { - for _, file := range r.files { - dbgmsg := file.location + ": " - r.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, packBlobs []restic.Blob) bool { - if packIdx > 0 { - dbgmsg += ", " - } - dbgmsg += "pack{id=" + packID.Str() + ", blobs: " - for blobIdx, blob := range packBlobs { - if blobIdx > 0 { - dbgmsg += ", " - } - dbgmsg += blob.ID.Str() - } - dbgmsg += "}" - return true // keep going - }) - debug.Log(dbgmsg) - } - - // synchronously create empty files (empty files need no packs and are ignored by packQueue) - for _, file := range r.files { - fullpath := r.targetPath(file.location) - if len(file.blobs) == 0 { - wr, err := os.OpenFile(fullpath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600) - if err == nil { - wr.Close() - } else { - onError(file.location, err) - } - } - } + // TODO conditionally enable when debug log is on + // for _, file := range r.files { + // dbgmsg := file.location + ": " + // r.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, packBlobs []restic.Blob) bool { + // if packIdx > 0 { + // dbgmsg += ", " + // } + // dbgmsg += "pack{id=" + packID.Str() + ", blobs: " + // for blobIdx, blob := range packBlobs { + // if blobIdx > 0 { + // dbgmsg += ", " + // } + // dbgmsg += blob.ID.Str() + // } + // dbgmsg += "}" + // return true // keep going + // }) + // debug.Log(dbgmsg) + // } inprogress := make(map[*fileInfo]struct{}) queue, err := newPackQueue(r.idx, r.files, func(files map[*fileInfo]struct{}) bool { diff --git a/internal/restorer/filerestorer_test.go b/internal/restorer/filerestorer_test.go index 9584fabf7..dd022e9d4 100644 --- a/internal/restorer/filerestorer_test.go +++ b/internal/restorer/filerestorer_test.go @@ -210,15 +210,3 @@ func TestFileRestorerBasic(t *testing.T) { }, }) } - -func TestFileRestorerEmptyFile(t *testing.T) { - tempdir, cleanup := rtest.TempDir(t) - defer cleanup() - - restoreAndVerify(t, tempdir, []TestFile{ - TestFile{ - name: "empty", - blobs: []TestBlob{}, - }, - }) -} diff --git a/internal/restorer/restorer.go b/internal/restorer/restorer.go index 291ede287..b6fb2b60e 100644 --- a/internal/restorer/restorer.go +++ b/internal/restorer/restorer.go @@ -175,6 +175,19 @@ func (res *Restorer) restoreHardlinkAt(node *restic.Node, target, path, location return res.restoreNodeMetadataTo(node, path, location) } +func (res *Restorer) restoreEmptyFileAt(node *restic.Node, target, location string) error { + wr, err := os.OpenFile(target, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600) + if err != nil { + return err + } + err = wr.Close() + if err != nil { + return err + } + + return res.restoreNodeMetadataTo(node, target, location) +} + // RestoreTo creates the directories and files in the snapshot below dst. // Before an item is created, res.Filter is called. func (res *Restorer) RestoreTo(ctx context.Context, dst string) error { @@ -215,6 +228,10 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error { return nil } + if node.Size == 0 { + return nil // deal with empty files later + } + if node.Links > 1 { if idx.Has(node.Inode, node.DeviceID) { return nil @@ -241,14 +258,22 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error { return res.traverseTree(ctx, dst, string(filepath.Separator), *res.sn.Tree, treeVisitor{ enterDir: noop, visitNode: func(node *restic.Node, target, location string) error { - if idx.Has(node.Inode, node.DeviceID) && idx.GetFilename(node.Inode, node.DeviceID) != location { - return res.restoreHardlinkAt(node, filerestorer.targetPath(idx.GetFilename(node.Inode, node.DeviceID)), target, location) - } - if node.Type != "file" { return res.restoreNodeTo(ctx, node, target, location) } + // create empty files, but not hardlinks to empty files + if node.Size == 0 && (node.Links < 2 || !idx.Has(node.Inode, node.DeviceID)) { + if node.Links > 1 { + idx.Add(node.Inode, node.DeviceID, location) + } + return res.restoreEmptyFileAt(node, target, location) + } + + if idx.Has(node.Inode, node.DeviceID) && idx.GetFilename(node.Inode, node.DeviceID) != location { + return res.restoreHardlinkAt(node, filerestorer.targetPath(idx.GetFilename(node.Inode, node.DeviceID)), target, location) + } + return res.restoreNodeMetadataTo(node, target, location) }, leaveDir: restoreNodeMetadata, diff --git a/internal/restorer/restorer_test.go b/internal/restorer/restorer_test.go index 1e73674cd..70136bfe3 100644 --- a/internal/restorer/restorer_test.go +++ b/internal/restorer/restorer_test.go @@ -24,7 +24,9 @@ type Snapshot struct { } type File struct { - Data string + Data string + Links uint64 + Inode uint64 } type Dir struct { @@ -51,22 +53,33 @@ func saveDir(t testing.TB, repo restic.Repository, nodes map[string]Node, inode tree := &restic.Tree{} for name, n := range nodes { inode++ - var id restic.ID switch node := n.(type) { case File: - id = saveFile(t, repo, node) + fi := n.(File).Inode + if fi == 0 { + fi = inode + } + lc := n.(File).Links + if lc == 0 { + lc = 1 + } + fc := []restic.ID{} + if len(n.(File).Data) > 0 { + fc = append(fc, saveFile(t, repo, node)) + } tree.Insert(&restic.Node{ Type: "file", Mode: 0644, Name: name, UID: uint32(os.Getuid()), GID: uint32(os.Getgid()), - Content: []restic.ID{id}, + Content: fc, Size: uint64(len(n.(File).Data)), - Inode: inode, + Inode: fi, + Links: lc, }) case Dir: - id = saveDir(t, repo, node.Nodes, inode) + id := saveDir(t, repo, node.Nodes, inode) mode := node.Mode if mode == 0 { @@ -98,7 +111,7 @@ func saveSnapshot(t testing.TB, repo restic.Repository, snapshot Snapshot) (*res ctx, cancel := context.WithCancel(context.Background()) defer cancel() - treeID := saveDir(t, repo, snapshot.Nodes, 0) + treeID := saveDir(t, repo, snapshot.Nodes, 1000) err := repo.Flush(ctx) if err != nil { @@ -142,10 +155,10 @@ func TestRestorer(t *testing.T) { { Snapshot: Snapshot{ Nodes: map[string]Node{ - "foo": File{"content: foo\n"}, + "foo": File{Data: "content: foo\n"}, "dirtest": Dir{ Nodes: map[string]Node{ - "file": File{"content: file\n"}, + "file": File{Data: "content: file\n"}, }, }, }, @@ -158,13 +171,13 @@ func TestRestorer(t *testing.T) { { Snapshot: Snapshot{ Nodes: map[string]Node{ - "top": File{"toplevel file"}, + "top": File{Data: "toplevel file"}, "dir": Dir{ Nodes: map[string]Node{ - "file": File{"file in dir"}, + "file": File{Data: "file in dir"}, "subdir": Dir{ Nodes: map[string]Node{ - "file": File{"file in subdir"}, + "file": File{Data: "file in subdir"}, }, }, }, @@ -183,7 +196,7 @@ func TestRestorer(t *testing.T) { "dir": Dir{ Mode: 0444, }, - "file": File{"top-level file"}, + "file": File{Data: "top-level file"}, }, }, Files: map[string]string{ @@ -196,7 +209,7 @@ func TestRestorer(t *testing.T) { "dir": Dir{ Mode: 0555, Nodes: map[string]Node{ - "file": File{"file in dir"}, + "file": File{Data: "file in dir"}, }, }, }, @@ -208,7 +221,7 @@ func TestRestorer(t *testing.T) { { Snapshot: Snapshot{ Nodes: map[string]Node{ - "topfile": File{"top-level file"}, + "topfile": File{Data: "top-level file"}, }, }, Files: map[string]string{ @@ -220,7 +233,7 @@ func TestRestorer(t *testing.T) { Nodes: map[string]Node{ "dir": Dir{ Nodes: map[string]Node{ - "file": File{"content: file\n"}, + "file": File{Data: "content: file\n"}, }, }, }, @@ -245,8 +258,8 @@ func TestRestorer(t *testing.T) { { Snapshot: Snapshot{ Nodes: map[string]Node{ - `..\test`: File{"foo\n"}, - `..\..\foo\..\bar\..\xx\test2`: File{"test2\n"}, + `..\test`: File{Data: "foo\n"}, + `..\..\foo\..\bar\..\xx\test2`: File{Data: "test2\n"}, }, }, ErrorsMay: map[string]map[string]struct{}{ @@ -259,8 +272,8 @@ func TestRestorer(t *testing.T) { { Snapshot: Snapshot{ Nodes: map[string]Node{ - `../test`: File{"foo\n"}, - `../../foo/../bar/../xx/test2`: File{"test2\n"}, + `../test`: File{Data: "foo\n"}, + `../../foo/../bar/../xx/test2`: File{Data: "test2\n"}, }, }, ErrorsMay: map[string]map[string]struct{}{ @@ -273,16 +286,16 @@ func TestRestorer(t *testing.T) { { Snapshot: Snapshot{ Nodes: map[string]Node{ - "top": File{"toplevel file"}, + "top": File{Data: "toplevel file"}, "x": Dir{ Nodes: map[string]Node{ - "file1": File{"file1"}, + "file1": File{Data: "file1"}, "..": Dir{ Nodes: map[string]Node{ - "file2": File{"file2"}, + "file2": File{Data: "file2"}, "..": Dir{ Nodes: map[string]Node{ - "file2": File{"file2"}, + "file2": File{Data: "file2"}, }, }, }, @@ -404,10 +417,10 @@ func TestRestorerRelative(t *testing.T) { { Snapshot: Snapshot{ Nodes: map[string]Node{ - "foo": File{"content: foo\n"}, + "foo": File{Data: "content: foo\n"}, "dirtest": Dir{ Nodes: map[string]Node{ - "file": File{"content: file\n"}, + "file": File{Data: "content: file\n"}, }, }, }, @@ -526,12 +539,12 @@ func TestRestorerTraverseTree(t *testing.T) { Snapshot: Snapshot{ Nodes: map[string]Node{ "dir": Dir{Nodes: map[string]Node{ - "otherfile": File{"x"}, + "otherfile": File{Data: "x"}, "subdir": Dir{Nodes: map[string]Node{ - "file": File{"content: file\n"}, + "file": File{Data: "content: file\n"}, }}, }}, - "foo": File{"content: foo\n"}, + "foo": File{Data: "content: foo\n"}, }, }, Select: func(item string, dstpath string, node *restic.Node) (selectForRestore bool, childMayBeSelected bool) { @@ -553,12 +566,12 @@ func TestRestorerTraverseTree(t *testing.T) { Snapshot: Snapshot{ Nodes: map[string]Node{ "dir": Dir{Nodes: map[string]Node{ - "otherfile": File{"x"}, + "otherfile": File{Data: "x"}, "subdir": Dir{Nodes: map[string]Node{ - "file": File{"content: file\n"}, + "file": File{Data: "content: file\n"}, }}, }}, - "foo": File{"content: foo\n"}, + "foo": File{Data: "content: foo\n"}, }, }, Select: func(item string, dstpath string, node *restic.Node) (selectForRestore bool, childMayBeSelected bool) { @@ -574,11 +587,11 @@ func TestRestorerTraverseTree(t *testing.T) { { Snapshot: Snapshot{ Nodes: map[string]Node{ - "aaa": File{"content: foo\n"}, + "aaa": File{Data: "content: foo\n"}, "dir": Dir{Nodes: map[string]Node{ - "otherfile": File{"x"}, + "otherfile": File{Data: "x"}, "subdir": Dir{Nodes: map[string]Node{ - "file": File{"content: file\n"}, + "file": File{Data: "content: file\n"}, }}, }}, }, @@ -599,12 +612,12 @@ func TestRestorerTraverseTree(t *testing.T) { Snapshot: Snapshot{ Nodes: map[string]Node{ "dir": Dir{Nodes: map[string]Node{ - "otherfile": File{"x"}, + "otherfile": File{Data: "x"}, "subdir": Dir{Nodes: map[string]Node{ - "file": File{"content: file\n"}, + "file": File{Data: "content: file\n"}, }}, }}, - "foo": File{"content: foo\n"}, + "foo": File{Data: "content: foo\n"}, }, }, Select: func(item string, dstpath string, node *restic.Node) (selectForRestore bool, childMayBeSelected bool) { @@ -628,12 +641,12 @@ func TestRestorerTraverseTree(t *testing.T) { Snapshot: Snapshot{ Nodes: map[string]Node{ "dir": Dir{Nodes: map[string]Node{ - "otherfile": File{"x"}, + "otherfile": File{Data: "x"}, "subdir": Dir{Nodes: map[string]Node{ - "file": File{"content: file\n"}, + "file": File{Data: "content: file\n"}, }}, }}, - "foo": File{"content: foo\n"}, + "foo": File{Data: "content: foo\n"}, }, }, Select: func(item string, dstpath string, node *restic.Node) (selectForRestore bool, childMayBeSelected bool) { diff --git a/internal/restorer/restorer_unix_test.go b/internal/restorer/restorer_unix_test.go new file mode 100644 index 000000000..fc80015c1 --- /dev/null +++ b/internal/restorer/restorer_unix_test.go @@ -0,0 +1,61 @@ +//+build !windows + +package restorer + +import ( + "context" + "os" + "path/filepath" + "syscall" + "testing" + + "github.com/restic/restic/internal/repository" + "github.com/restic/restic/internal/restic" + rtest "github.com/restic/restic/internal/test" +) + +func TestRestorerRestoreEmptyHardlinkedFileds(t *testing.T) { + repo, cleanup := repository.TestRepository(t) + defer cleanup() + + _, id := saveSnapshot(t, repo, Snapshot{ + Nodes: map[string]Node{ + "dirtest": Dir{ + Nodes: map[string]Node{ + "file1": File{Links: 2, Inode: 1}, + "file2": File{Links: 2, Inode: 1}, + }, + }, + }, + }) + + res, err := NewRestorer(repo, id) + rtest.OK(t, err) + + res.SelectFilter = func(item string, dstpath string, node *restic.Node) (selectedForRestore bool, childMayBeSelected bool) { + return true, true + } + + tempdir, cleanup := rtest.TempDir(t) + defer cleanup() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + err = res.RestoreTo(ctx, tempdir) + rtest.OK(t, err) + + f1, err := os.Stat(filepath.Join(tempdir, "dirtest/file1")) + rtest.OK(t, err) + rtest.Equals(t, int64(0), f1.Size()) + s1, ok1 := f1.Sys().(*syscall.Stat_t) + + f2, err := os.Stat(filepath.Join(tempdir, "dirtest/file2")) + rtest.OK(t, err) + rtest.Equals(t, int64(0), f2.Size()) + s2, ok2 := f2.Sys().(*syscall.Stat_t) + + if ok1 && ok2 { + rtest.Equals(t, s1.Ino, s2.Ino) + } +}