restorer: Optimize empty file restore

don't create fileInfo structs for empty files. this saves memory.
this also avoids extra serial scan of all fileInfo, which should
make restore faster and more consistent.

Signed-off-by: Igor Fedorenko <igor@ifedorenko.com>
This commit is contained in:
Igor Fedorenko 2018-09-27 08:59:33 -04:00 committed by Alexander Neumann
parent c2bcb764cd
commit bda8d7722e
5 changed files with 163 additions and 89 deletions

View file

@ -3,7 +3,6 @@ package restorer
import ( import (
"context" "context"
"io" "io"
"os"
"path/filepath" "path/filepath"
"github.com/restic/restic/internal/crypto" "github.com/restic/restic/internal/crypto"
@ -95,37 +94,25 @@ type processingInfo struct {
} }
func (r *fileRestorer) restoreFiles(ctx context.Context, onError func(path string, err error)) error { func (r *fileRestorer) restoreFiles(ctx context.Context, onError func(path string, err error)) error {
for _, file := range r.files { // TODO conditionally enable when debug log is on
dbgmsg := file.location + ": " // for _, file := range r.files {
r.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, packBlobs []restic.Blob) bool { // dbgmsg := file.location + ": "
if packIdx > 0 { // r.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, packBlobs []restic.Blob) bool {
dbgmsg += ", " // if packIdx > 0 {
} // dbgmsg += ", "
dbgmsg += "pack{id=" + packID.Str() + ", blobs: " // }
for blobIdx, blob := range packBlobs { // dbgmsg += "pack{id=" + packID.Str() + ", blobs: "
if blobIdx > 0 { // for blobIdx, blob := range packBlobs {
dbgmsg += ", " // if blobIdx > 0 {
} // dbgmsg += ", "
dbgmsg += blob.ID.Str() // }
} // dbgmsg += blob.ID.Str()
dbgmsg += "}" // }
return true // keep going // dbgmsg += "}"
}) // return true // keep going
debug.Log(dbgmsg) // })
} // debug.Log(dbgmsg)
// }
// synchronously create empty files (empty files need no packs and are ignored by packQueue)
for _, file := range r.files {
fullpath := r.targetPath(file.location)
if len(file.blobs) == 0 {
wr, err := os.OpenFile(fullpath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600)
if err == nil {
wr.Close()
} else {
onError(file.location, err)
}
}
}
inprogress := make(map[*fileInfo]struct{}) inprogress := make(map[*fileInfo]struct{})
queue, err := newPackQueue(r.idx, r.files, func(files map[*fileInfo]struct{}) bool { queue, err := newPackQueue(r.idx, r.files, func(files map[*fileInfo]struct{}) bool {

View file

@ -210,15 +210,3 @@ func TestFileRestorerBasic(t *testing.T) {
}, },
}) })
} }
func TestFileRestorerEmptyFile(t *testing.T) {
tempdir, cleanup := rtest.TempDir(t)
defer cleanup()
restoreAndVerify(t, tempdir, []TestFile{
TestFile{
name: "empty",
blobs: []TestBlob{},
},
})
}

View file

@ -175,6 +175,19 @@ func (res *Restorer) restoreHardlinkAt(node *restic.Node, target, path, location
return res.restoreNodeMetadataTo(node, path, location) return res.restoreNodeMetadataTo(node, path, location)
} }
func (res *Restorer) restoreEmptyFileAt(node *restic.Node, target, location string) error {
wr, err := os.OpenFile(target, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600)
if err != nil {
return err
}
err = wr.Close()
if err != nil {
return err
}
return res.restoreNodeMetadataTo(node, target, location)
}
// RestoreTo creates the directories and files in the snapshot below dst. // RestoreTo creates the directories and files in the snapshot below dst.
// Before an item is created, res.Filter is called. // Before an item is created, res.Filter is called.
func (res *Restorer) RestoreTo(ctx context.Context, dst string) error { func (res *Restorer) RestoreTo(ctx context.Context, dst string) error {
@ -215,6 +228,10 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error {
return nil return nil
} }
if node.Size == 0 {
return nil // deal with empty files later
}
if node.Links > 1 { if node.Links > 1 {
if idx.Has(node.Inode, node.DeviceID) { if idx.Has(node.Inode, node.DeviceID) {
return nil return nil
@ -241,14 +258,22 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error {
return res.traverseTree(ctx, dst, string(filepath.Separator), *res.sn.Tree, treeVisitor{ return res.traverseTree(ctx, dst, string(filepath.Separator), *res.sn.Tree, treeVisitor{
enterDir: noop, enterDir: noop,
visitNode: func(node *restic.Node, target, location string) error { visitNode: func(node *restic.Node, target, location string) error {
if idx.Has(node.Inode, node.DeviceID) && idx.GetFilename(node.Inode, node.DeviceID) != location {
return res.restoreHardlinkAt(node, filerestorer.targetPath(idx.GetFilename(node.Inode, node.DeviceID)), target, location)
}
if node.Type != "file" { if node.Type != "file" {
return res.restoreNodeTo(ctx, node, target, location) return res.restoreNodeTo(ctx, node, target, location)
} }
// create empty files, but not hardlinks to empty files
if node.Size == 0 && (node.Links < 2 || !idx.Has(node.Inode, node.DeviceID)) {
if node.Links > 1 {
idx.Add(node.Inode, node.DeviceID, location)
}
return res.restoreEmptyFileAt(node, target, location)
}
if idx.Has(node.Inode, node.DeviceID) && idx.GetFilename(node.Inode, node.DeviceID) != location {
return res.restoreHardlinkAt(node, filerestorer.targetPath(idx.GetFilename(node.Inode, node.DeviceID)), target, location)
}
return res.restoreNodeMetadataTo(node, target, location) return res.restoreNodeMetadataTo(node, target, location)
}, },
leaveDir: restoreNodeMetadata, leaveDir: restoreNodeMetadata,

View file

@ -24,7 +24,9 @@ type Snapshot struct {
} }
type File struct { type File struct {
Data string Data string
Links uint64
Inode uint64
} }
type Dir struct { type Dir struct {
@ -51,22 +53,33 @@ func saveDir(t testing.TB, repo restic.Repository, nodes map[string]Node, inode
tree := &restic.Tree{} tree := &restic.Tree{}
for name, n := range nodes { for name, n := range nodes {
inode++ inode++
var id restic.ID
switch node := n.(type) { switch node := n.(type) {
case File: case File:
id = saveFile(t, repo, node) fi := n.(File).Inode
if fi == 0 {
fi = inode
}
lc := n.(File).Links
if lc == 0 {
lc = 1
}
fc := []restic.ID{}
if len(n.(File).Data) > 0 {
fc = append(fc, saveFile(t, repo, node))
}
tree.Insert(&restic.Node{ tree.Insert(&restic.Node{
Type: "file", Type: "file",
Mode: 0644, Mode: 0644,
Name: name, Name: name,
UID: uint32(os.Getuid()), UID: uint32(os.Getuid()),
GID: uint32(os.Getgid()), GID: uint32(os.Getgid()),
Content: []restic.ID{id}, Content: fc,
Size: uint64(len(n.(File).Data)), Size: uint64(len(n.(File).Data)),
Inode: inode, Inode: fi,
Links: lc,
}) })
case Dir: case Dir:
id = saveDir(t, repo, node.Nodes, inode) id := saveDir(t, repo, node.Nodes, inode)
mode := node.Mode mode := node.Mode
if mode == 0 { if mode == 0 {
@ -98,7 +111,7 @@ func saveSnapshot(t testing.TB, repo restic.Repository, snapshot Snapshot) (*res
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
defer cancel() defer cancel()
treeID := saveDir(t, repo, snapshot.Nodes, 0) treeID := saveDir(t, repo, snapshot.Nodes, 1000)
err := repo.Flush(ctx) err := repo.Flush(ctx)
if err != nil { if err != nil {
@ -142,10 +155,10 @@ func TestRestorer(t *testing.T) {
{ {
Snapshot: Snapshot{ Snapshot: Snapshot{
Nodes: map[string]Node{ Nodes: map[string]Node{
"foo": File{"content: foo\n"}, "foo": File{Data: "content: foo\n"},
"dirtest": Dir{ "dirtest": Dir{
Nodes: map[string]Node{ Nodes: map[string]Node{
"file": File{"content: file\n"}, "file": File{Data: "content: file\n"},
}, },
}, },
}, },
@ -158,13 +171,13 @@ func TestRestorer(t *testing.T) {
{ {
Snapshot: Snapshot{ Snapshot: Snapshot{
Nodes: map[string]Node{ Nodes: map[string]Node{
"top": File{"toplevel file"}, "top": File{Data: "toplevel file"},
"dir": Dir{ "dir": Dir{
Nodes: map[string]Node{ Nodes: map[string]Node{
"file": File{"file in dir"}, "file": File{Data: "file in dir"},
"subdir": Dir{ "subdir": Dir{
Nodes: map[string]Node{ Nodes: map[string]Node{
"file": File{"file in subdir"}, "file": File{Data: "file in subdir"},
}, },
}, },
}, },
@ -183,7 +196,7 @@ func TestRestorer(t *testing.T) {
"dir": Dir{ "dir": Dir{
Mode: 0444, Mode: 0444,
}, },
"file": File{"top-level file"}, "file": File{Data: "top-level file"},
}, },
}, },
Files: map[string]string{ Files: map[string]string{
@ -196,7 +209,7 @@ func TestRestorer(t *testing.T) {
"dir": Dir{ "dir": Dir{
Mode: 0555, Mode: 0555,
Nodes: map[string]Node{ Nodes: map[string]Node{
"file": File{"file in dir"}, "file": File{Data: "file in dir"},
}, },
}, },
}, },
@ -208,7 +221,7 @@ func TestRestorer(t *testing.T) {
{ {
Snapshot: Snapshot{ Snapshot: Snapshot{
Nodes: map[string]Node{ Nodes: map[string]Node{
"topfile": File{"top-level file"}, "topfile": File{Data: "top-level file"},
}, },
}, },
Files: map[string]string{ Files: map[string]string{
@ -220,7 +233,7 @@ func TestRestorer(t *testing.T) {
Nodes: map[string]Node{ Nodes: map[string]Node{
"dir": Dir{ "dir": Dir{
Nodes: map[string]Node{ Nodes: map[string]Node{
"file": File{"content: file\n"}, "file": File{Data: "content: file\n"},
}, },
}, },
}, },
@ -245,8 +258,8 @@ func TestRestorer(t *testing.T) {
{ {
Snapshot: Snapshot{ Snapshot: Snapshot{
Nodes: map[string]Node{ Nodes: map[string]Node{
`..\test`: File{"foo\n"}, `..\test`: File{Data: "foo\n"},
`..\..\foo\..\bar\..\xx\test2`: File{"test2\n"}, `..\..\foo\..\bar\..\xx\test2`: File{Data: "test2\n"},
}, },
}, },
ErrorsMay: map[string]map[string]struct{}{ ErrorsMay: map[string]map[string]struct{}{
@ -259,8 +272,8 @@ func TestRestorer(t *testing.T) {
{ {
Snapshot: Snapshot{ Snapshot: Snapshot{
Nodes: map[string]Node{ Nodes: map[string]Node{
`../test`: File{"foo\n"}, `../test`: File{Data: "foo\n"},
`../../foo/../bar/../xx/test2`: File{"test2\n"}, `../../foo/../bar/../xx/test2`: File{Data: "test2\n"},
}, },
}, },
ErrorsMay: map[string]map[string]struct{}{ ErrorsMay: map[string]map[string]struct{}{
@ -273,16 +286,16 @@ func TestRestorer(t *testing.T) {
{ {
Snapshot: Snapshot{ Snapshot: Snapshot{
Nodes: map[string]Node{ Nodes: map[string]Node{
"top": File{"toplevel file"}, "top": File{Data: "toplevel file"},
"x": Dir{ "x": Dir{
Nodes: map[string]Node{ Nodes: map[string]Node{
"file1": File{"file1"}, "file1": File{Data: "file1"},
"..": Dir{ "..": Dir{
Nodes: map[string]Node{ Nodes: map[string]Node{
"file2": File{"file2"}, "file2": File{Data: "file2"},
"..": Dir{ "..": Dir{
Nodes: map[string]Node{ Nodes: map[string]Node{
"file2": File{"file2"}, "file2": File{Data: "file2"},
}, },
}, },
}, },
@ -404,10 +417,10 @@ func TestRestorerRelative(t *testing.T) {
{ {
Snapshot: Snapshot{ Snapshot: Snapshot{
Nodes: map[string]Node{ Nodes: map[string]Node{
"foo": File{"content: foo\n"}, "foo": File{Data: "content: foo\n"},
"dirtest": Dir{ "dirtest": Dir{
Nodes: map[string]Node{ Nodes: map[string]Node{
"file": File{"content: file\n"}, "file": File{Data: "content: file\n"},
}, },
}, },
}, },
@ -526,12 +539,12 @@ func TestRestorerTraverseTree(t *testing.T) {
Snapshot: Snapshot{ Snapshot: Snapshot{
Nodes: map[string]Node{ Nodes: map[string]Node{
"dir": Dir{Nodes: map[string]Node{ "dir": Dir{Nodes: map[string]Node{
"otherfile": File{"x"}, "otherfile": File{Data: "x"},
"subdir": Dir{Nodes: map[string]Node{ "subdir": Dir{Nodes: map[string]Node{
"file": File{"content: file\n"}, "file": File{Data: "content: file\n"},
}}, }},
}}, }},
"foo": File{"content: foo\n"}, "foo": File{Data: "content: foo\n"},
}, },
}, },
Select: func(item string, dstpath string, node *restic.Node) (selectForRestore bool, childMayBeSelected bool) { Select: func(item string, dstpath string, node *restic.Node) (selectForRestore bool, childMayBeSelected bool) {
@ -553,12 +566,12 @@ func TestRestorerTraverseTree(t *testing.T) {
Snapshot: Snapshot{ Snapshot: Snapshot{
Nodes: map[string]Node{ Nodes: map[string]Node{
"dir": Dir{Nodes: map[string]Node{ "dir": Dir{Nodes: map[string]Node{
"otherfile": File{"x"}, "otherfile": File{Data: "x"},
"subdir": Dir{Nodes: map[string]Node{ "subdir": Dir{Nodes: map[string]Node{
"file": File{"content: file\n"}, "file": File{Data: "content: file\n"},
}}, }},
}}, }},
"foo": File{"content: foo\n"}, "foo": File{Data: "content: foo\n"},
}, },
}, },
Select: func(item string, dstpath string, node *restic.Node) (selectForRestore bool, childMayBeSelected bool) { Select: func(item string, dstpath string, node *restic.Node) (selectForRestore bool, childMayBeSelected bool) {
@ -574,11 +587,11 @@ func TestRestorerTraverseTree(t *testing.T) {
{ {
Snapshot: Snapshot{ Snapshot: Snapshot{
Nodes: map[string]Node{ Nodes: map[string]Node{
"aaa": File{"content: foo\n"}, "aaa": File{Data: "content: foo\n"},
"dir": Dir{Nodes: map[string]Node{ "dir": Dir{Nodes: map[string]Node{
"otherfile": File{"x"}, "otherfile": File{Data: "x"},
"subdir": Dir{Nodes: map[string]Node{ "subdir": Dir{Nodes: map[string]Node{
"file": File{"content: file\n"}, "file": File{Data: "content: file\n"},
}}, }},
}}, }},
}, },
@ -599,12 +612,12 @@ func TestRestorerTraverseTree(t *testing.T) {
Snapshot: Snapshot{ Snapshot: Snapshot{
Nodes: map[string]Node{ Nodes: map[string]Node{
"dir": Dir{Nodes: map[string]Node{ "dir": Dir{Nodes: map[string]Node{
"otherfile": File{"x"}, "otherfile": File{Data: "x"},
"subdir": Dir{Nodes: map[string]Node{ "subdir": Dir{Nodes: map[string]Node{
"file": File{"content: file\n"}, "file": File{Data: "content: file\n"},
}}, }},
}}, }},
"foo": File{"content: foo\n"}, "foo": File{Data: "content: foo\n"},
}, },
}, },
Select: func(item string, dstpath string, node *restic.Node) (selectForRestore bool, childMayBeSelected bool) { Select: func(item string, dstpath string, node *restic.Node) (selectForRestore bool, childMayBeSelected bool) {
@ -628,12 +641,12 @@ func TestRestorerTraverseTree(t *testing.T) {
Snapshot: Snapshot{ Snapshot: Snapshot{
Nodes: map[string]Node{ Nodes: map[string]Node{
"dir": Dir{Nodes: map[string]Node{ "dir": Dir{Nodes: map[string]Node{
"otherfile": File{"x"}, "otherfile": File{Data: "x"},
"subdir": Dir{Nodes: map[string]Node{ "subdir": Dir{Nodes: map[string]Node{
"file": File{"content: file\n"}, "file": File{Data: "content: file\n"},
}}, }},
}}, }},
"foo": File{"content: foo\n"}, "foo": File{Data: "content: foo\n"},
}, },
}, },
Select: func(item string, dstpath string, node *restic.Node) (selectForRestore bool, childMayBeSelected bool) { Select: func(item string, dstpath string, node *restic.Node) (selectForRestore bool, childMayBeSelected bool) {

View file

@ -0,0 +1,61 @@
//+build !windows
package restorer
import (
"context"
"os"
"path/filepath"
"syscall"
"testing"
"github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/restic"
rtest "github.com/restic/restic/internal/test"
)
func TestRestorerRestoreEmptyHardlinkedFileds(t *testing.T) {
repo, cleanup := repository.TestRepository(t)
defer cleanup()
_, id := saveSnapshot(t, repo, Snapshot{
Nodes: map[string]Node{
"dirtest": Dir{
Nodes: map[string]Node{
"file1": File{Links: 2, Inode: 1},
"file2": File{Links: 2, Inode: 1},
},
},
},
})
res, err := NewRestorer(repo, id)
rtest.OK(t, err)
res.SelectFilter = func(item string, dstpath string, node *restic.Node) (selectedForRestore bool, childMayBeSelected bool) {
return true, true
}
tempdir, cleanup := rtest.TempDir(t)
defer cleanup()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
err = res.RestoreTo(ctx, tempdir)
rtest.OK(t, err)
f1, err := os.Stat(filepath.Join(tempdir, "dirtest/file1"))
rtest.OK(t, err)
rtest.Equals(t, int64(0), f1.Size())
s1, ok1 := f1.Sys().(*syscall.Stat_t)
f2, err := os.Stat(filepath.Join(tempdir, "dirtest/file2"))
rtest.OK(t, err)
rtest.Equals(t, int64(0), f2.Size())
s2, ok2 := f2.Sys().(*syscall.Stat_t)
if ok1 && ok2 {
rtest.Equals(t, s1.Ino, s2.Ino)
}
}