restorer: Optimize empty file restore

don't create fileInfo structs for empty files. this saves memory.
this also avoids extra serial scan of all fileInfo, which should
make restore faster and more consistent.

Signed-off-by: Igor Fedorenko <igor@ifedorenko.com>
This commit is contained in:
Igor Fedorenko 2018-09-27 08:59:33 -04:00 committed by Alexander Neumann
parent c2bcb764cd
commit bda8d7722e
5 changed files with 163 additions and 89 deletions

View file

@ -3,7 +3,6 @@ package restorer
import (
"context"
"io"
"os"
"path/filepath"
"github.com/restic/restic/internal/crypto"
@ -95,37 +94,25 @@ type processingInfo struct {
}
func (r *fileRestorer) restoreFiles(ctx context.Context, onError func(path string, err error)) error {
for _, file := range r.files {
dbgmsg := file.location + ": "
r.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, packBlobs []restic.Blob) bool {
if packIdx > 0 {
dbgmsg += ", "
}
dbgmsg += "pack{id=" + packID.Str() + ", blobs: "
for blobIdx, blob := range packBlobs {
if blobIdx > 0 {
dbgmsg += ", "
}
dbgmsg += blob.ID.Str()
}
dbgmsg += "}"
return true // keep going
})
debug.Log(dbgmsg)
}
// synchronously create empty files (empty files need no packs and are ignored by packQueue)
for _, file := range r.files {
fullpath := r.targetPath(file.location)
if len(file.blobs) == 0 {
wr, err := os.OpenFile(fullpath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600)
if err == nil {
wr.Close()
} else {
onError(file.location, err)
}
}
}
// TODO conditionally enable when debug log is on
// for _, file := range r.files {
// dbgmsg := file.location + ": "
// r.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, packBlobs []restic.Blob) bool {
// if packIdx > 0 {
// dbgmsg += ", "
// }
// dbgmsg += "pack{id=" + packID.Str() + ", blobs: "
// for blobIdx, blob := range packBlobs {
// if blobIdx > 0 {
// dbgmsg += ", "
// }
// dbgmsg += blob.ID.Str()
// }
// dbgmsg += "}"
// return true // keep going
// })
// debug.Log(dbgmsg)
// }
inprogress := make(map[*fileInfo]struct{})
queue, err := newPackQueue(r.idx, r.files, func(files map[*fileInfo]struct{}) bool {

View file

@ -210,15 +210,3 @@ func TestFileRestorerBasic(t *testing.T) {
},
})
}
func TestFileRestorerEmptyFile(t *testing.T) {
tempdir, cleanup := rtest.TempDir(t)
defer cleanup()
restoreAndVerify(t, tempdir, []TestFile{
TestFile{
name: "empty",
blobs: []TestBlob{},
},
})
}

View file

@ -175,6 +175,19 @@ func (res *Restorer) restoreHardlinkAt(node *restic.Node, target, path, location
return res.restoreNodeMetadataTo(node, path, location)
}
func (res *Restorer) restoreEmptyFileAt(node *restic.Node, target, location string) error {
wr, err := os.OpenFile(target, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600)
if err != nil {
return err
}
err = wr.Close()
if err != nil {
return err
}
return res.restoreNodeMetadataTo(node, target, location)
}
// RestoreTo creates the directories and files in the snapshot below dst.
// Before an item is created, res.Filter is called.
func (res *Restorer) RestoreTo(ctx context.Context, dst string) error {
@ -215,6 +228,10 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error {
return nil
}
if node.Size == 0 {
return nil // deal with empty files later
}
if node.Links > 1 {
if idx.Has(node.Inode, node.DeviceID) {
return nil
@ -241,14 +258,22 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error {
return res.traverseTree(ctx, dst, string(filepath.Separator), *res.sn.Tree, treeVisitor{
enterDir: noop,
visitNode: func(node *restic.Node, target, location string) error {
if idx.Has(node.Inode, node.DeviceID) && idx.GetFilename(node.Inode, node.DeviceID) != location {
return res.restoreHardlinkAt(node, filerestorer.targetPath(idx.GetFilename(node.Inode, node.DeviceID)), target, location)
}
if node.Type != "file" {
return res.restoreNodeTo(ctx, node, target, location)
}
// create empty files, but not hardlinks to empty files
if node.Size == 0 && (node.Links < 2 || !idx.Has(node.Inode, node.DeviceID)) {
if node.Links > 1 {
idx.Add(node.Inode, node.DeviceID, location)
}
return res.restoreEmptyFileAt(node, target, location)
}
if idx.Has(node.Inode, node.DeviceID) && idx.GetFilename(node.Inode, node.DeviceID) != location {
return res.restoreHardlinkAt(node, filerestorer.targetPath(idx.GetFilename(node.Inode, node.DeviceID)), target, location)
}
return res.restoreNodeMetadataTo(node, target, location)
},
leaveDir: restoreNodeMetadata,

View file

@ -24,7 +24,9 @@ type Snapshot struct {
}
type File struct {
Data string
Data string
Links uint64
Inode uint64
}
type Dir struct {
@ -51,22 +53,33 @@ func saveDir(t testing.TB, repo restic.Repository, nodes map[string]Node, inode
tree := &restic.Tree{}
for name, n := range nodes {
inode++
var id restic.ID
switch node := n.(type) {
case File:
id = saveFile(t, repo, node)
fi := n.(File).Inode
if fi == 0 {
fi = inode
}
lc := n.(File).Links
if lc == 0 {
lc = 1
}
fc := []restic.ID{}
if len(n.(File).Data) > 0 {
fc = append(fc, saveFile(t, repo, node))
}
tree.Insert(&restic.Node{
Type: "file",
Mode: 0644,
Name: name,
UID: uint32(os.Getuid()),
GID: uint32(os.Getgid()),
Content: []restic.ID{id},
Content: fc,
Size: uint64(len(n.(File).Data)),
Inode: inode,
Inode: fi,
Links: lc,
})
case Dir:
id = saveDir(t, repo, node.Nodes, inode)
id := saveDir(t, repo, node.Nodes, inode)
mode := node.Mode
if mode == 0 {
@ -98,7 +111,7 @@ func saveSnapshot(t testing.TB, repo restic.Repository, snapshot Snapshot) (*res
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
treeID := saveDir(t, repo, snapshot.Nodes, 0)
treeID := saveDir(t, repo, snapshot.Nodes, 1000)
err := repo.Flush(ctx)
if err != nil {
@ -142,10 +155,10 @@ func TestRestorer(t *testing.T) {
{
Snapshot: Snapshot{
Nodes: map[string]Node{
"foo": File{"content: foo\n"},
"foo": File{Data: "content: foo\n"},
"dirtest": Dir{
Nodes: map[string]Node{
"file": File{"content: file\n"},
"file": File{Data: "content: file\n"},
},
},
},
@ -158,13 +171,13 @@ func TestRestorer(t *testing.T) {
{
Snapshot: Snapshot{
Nodes: map[string]Node{
"top": File{"toplevel file"},
"top": File{Data: "toplevel file"},
"dir": Dir{
Nodes: map[string]Node{
"file": File{"file in dir"},
"file": File{Data: "file in dir"},
"subdir": Dir{
Nodes: map[string]Node{
"file": File{"file in subdir"},
"file": File{Data: "file in subdir"},
},
},
},
@ -183,7 +196,7 @@ func TestRestorer(t *testing.T) {
"dir": Dir{
Mode: 0444,
},
"file": File{"top-level file"},
"file": File{Data: "top-level file"},
},
},
Files: map[string]string{
@ -196,7 +209,7 @@ func TestRestorer(t *testing.T) {
"dir": Dir{
Mode: 0555,
Nodes: map[string]Node{
"file": File{"file in dir"},
"file": File{Data: "file in dir"},
},
},
},
@ -208,7 +221,7 @@ func TestRestorer(t *testing.T) {
{
Snapshot: Snapshot{
Nodes: map[string]Node{
"topfile": File{"top-level file"},
"topfile": File{Data: "top-level file"},
},
},
Files: map[string]string{
@ -220,7 +233,7 @@ func TestRestorer(t *testing.T) {
Nodes: map[string]Node{
"dir": Dir{
Nodes: map[string]Node{
"file": File{"content: file\n"},
"file": File{Data: "content: file\n"},
},
},
},
@ -245,8 +258,8 @@ func TestRestorer(t *testing.T) {
{
Snapshot: Snapshot{
Nodes: map[string]Node{
`..\test`: File{"foo\n"},
`..\..\foo\..\bar\..\xx\test2`: File{"test2\n"},
`..\test`: File{Data: "foo\n"},
`..\..\foo\..\bar\..\xx\test2`: File{Data: "test2\n"},
},
},
ErrorsMay: map[string]map[string]struct{}{
@ -259,8 +272,8 @@ func TestRestorer(t *testing.T) {
{
Snapshot: Snapshot{
Nodes: map[string]Node{
`../test`: File{"foo\n"},
`../../foo/../bar/../xx/test2`: File{"test2\n"},
`../test`: File{Data: "foo\n"},
`../../foo/../bar/../xx/test2`: File{Data: "test2\n"},
},
},
ErrorsMay: map[string]map[string]struct{}{
@ -273,16 +286,16 @@ func TestRestorer(t *testing.T) {
{
Snapshot: Snapshot{
Nodes: map[string]Node{
"top": File{"toplevel file"},
"top": File{Data: "toplevel file"},
"x": Dir{
Nodes: map[string]Node{
"file1": File{"file1"},
"file1": File{Data: "file1"},
"..": Dir{
Nodes: map[string]Node{
"file2": File{"file2"},
"file2": File{Data: "file2"},
"..": Dir{
Nodes: map[string]Node{
"file2": File{"file2"},
"file2": File{Data: "file2"},
},
},
},
@ -404,10 +417,10 @@ func TestRestorerRelative(t *testing.T) {
{
Snapshot: Snapshot{
Nodes: map[string]Node{
"foo": File{"content: foo\n"},
"foo": File{Data: "content: foo\n"},
"dirtest": Dir{
Nodes: map[string]Node{
"file": File{"content: file\n"},
"file": File{Data: "content: file\n"},
},
},
},
@ -526,12 +539,12 @@ func TestRestorerTraverseTree(t *testing.T) {
Snapshot: Snapshot{
Nodes: map[string]Node{
"dir": Dir{Nodes: map[string]Node{
"otherfile": File{"x"},
"otherfile": File{Data: "x"},
"subdir": Dir{Nodes: map[string]Node{
"file": File{"content: file\n"},
"file": File{Data: "content: file\n"},
}},
}},
"foo": File{"content: foo\n"},
"foo": File{Data: "content: foo\n"},
},
},
Select: func(item string, dstpath string, node *restic.Node) (selectForRestore bool, childMayBeSelected bool) {
@ -553,12 +566,12 @@ func TestRestorerTraverseTree(t *testing.T) {
Snapshot: Snapshot{
Nodes: map[string]Node{
"dir": Dir{Nodes: map[string]Node{
"otherfile": File{"x"},
"otherfile": File{Data: "x"},
"subdir": Dir{Nodes: map[string]Node{
"file": File{"content: file\n"},
"file": File{Data: "content: file\n"},
}},
}},
"foo": File{"content: foo\n"},
"foo": File{Data: "content: foo\n"},
},
},
Select: func(item string, dstpath string, node *restic.Node) (selectForRestore bool, childMayBeSelected bool) {
@ -574,11 +587,11 @@ func TestRestorerTraverseTree(t *testing.T) {
{
Snapshot: Snapshot{
Nodes: map[string]Node{
"aaa": File{"content: foo\n"},
"aaa": File{Data: "content: foo\n"},
"dir": Dir{Nodes: map[string]Node{
"otherfile": File{"x"},
"otherfile": File{Data: "x"},
"subdir": Dir{Nodes: map[string]Node{
"file": File{"content: file\n"},
"file": File{Data: "content: file\n"},
}},
}},
},
@ -599,12 +612,12 @@ func TestRestorerTraverseTree(t *testing.T) {
Snapshot: Snapshot{
Nodes: map[string]Node{
"dir": Dir{Nodes: map[string]Node{
"otherfile": File{"x"},
"otherfile": File{Data: "x"},
"subdir": Dir{Nodes: map[string]Node{
"file": File{"content: file\n"},
"file": File{Data: "content: file\n"},
}},
}},
"foo": File{"content: foo\n"},
"foo": File{Data: "content: foo\n"},
},
},
Select: func(item string, dstpath string, node *restic.Node) (selectForRestore bool, childMayBeSelected bool) {
@ -628,12 +641,12 @@ func TestRestorerTraverseTree(t *testing.T) {
Snapshot: Snapshot{
Nodes: map[string]Node{
"dir": Dir{Nodes: map[string]Node{
"otherfile": File{"x"},
"otherfile": File{Data: "x"},
"subdir": Dir{Nodes: map[string]Node{
"file": File{"content: file\n"},
"file": File{Data: "content: file\n"},
}},
}},
"foo": File{"content: foo\n"},
"foo": File{Data: "content: foo\n"},
},
},
Select: func(item string, dstpath string, node *restic.Node) (selectForRestore bool, childMayBeSelected bool) {

View file

@ -0,0 +1,61 @@
//+build !windows
package restorer
import (
"context"
"os"
"path/filepath"
"syscall"
"testing"
"github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/restic"
rtest "github.com/restic/restic/internal/test"
)
func TestRestorerRestoreEmptyHardlinkedFileds(t *testing.T) {
repo, cleanup := repository.TestRepository(t)
defer cleanup()
_, id := saveSnapshot(t, repo, Snapshot{
Nodes: map[string]Node{
"dirtest": Dir{
Nodes: map[string]Node{
"file1": File{Links: 2, Inode: 1},
"file2": File{Links: 2, Inode: 1},
},
},
},
})
res, err := NewRestorer(repo, id)
rtest.OK(t, err)
res.SelectFilter = func(item string, dstpath string, node *restic.Node) (selectedForRestore bool, childMayBeSelected bool) {
return true, true
}
tempdir, cleanup := rtest.TempDir(t)
defer cleanup()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
err = res.RestoreTo(ctx, tempdir)
rtest.OK(t, err)
f1, err := os.Stat(filepath.Join(tempdir, "dirtest/file1"))
rtest.OK(t, err)
rtest.Equals(t, int64(0), f1.Size())
s1, ok1 := f1.Sys().(*syscall.Stat_t)
f2, err := os.Stat(filepath.Join(tempdir, "dirtest/file2"))
rtest.OK(t, err)
rtest.Equals(t, int64(0), f2.Size())
s2, ok2 := f2.Sys().(*syscall.Stat_t)
if ok1 && ok2 {
rtest.Equals(t, s1.Ino, s2.Ino)
}
}