restore: Chang fileInfo to use snapshot location instead of target path

* uses less memory as common prefix is only stored once
* stepping stone for simpler error callback api, which
  will allow further memory footprint reduction

Signed-off-by: Igor Fedorenko <igor@ifedorenko.com>
This commit is contained in:
Igor Fedorenko 2018-09-14 20:18:37 -04:00 committed by Alexander Neumann
parent 32d5ceba87
commit 9f3ca97ee8
6 changed files with 63 additions and 53 deletions

View file

@ -4,6 +4,7 @@ import (
"context" "context"
"io" "io"
"os" "os"
"path/filepath"
"github.com/restic/restic/internal/crypto" "github.com/restic/restic/internal/crypto"
"github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/debug"
@ -19,7 +20,6 @@ import (
// TODO consider replacing pack file cache with blob cache // TODO consider replacing pack file cache with blob cache
// TODO avoid decrypting the same blob multiple times // TODO avoid decrypting the same blob multiple times
// TODO evaluate disabled debug logging overhead for large repositories // TODO evaluate disabled debug logging overhead for large repositories
// TODO consider logging snapshot-relative path to reduce log clutter
const ( const (
workerCount = 8 workerCount = 8
@ -37,8 +37,8 @@ const (
// information about regular file being restored // information about regular file being restored
type fileInfo struct { type fileInfo struct {
path string // full path to the file on local filesystem location string // file on local filesystem relative to restorer basedir
blobs []restic.ID // remaining blobs of the file blobs []restic.ID // remaining blobs of the file
} }
// information about a data pack required to restore one or more files // information about a data pack required to restore one or more files
@ -65,21 +65,27 @@ type fileRestorer struct {
packCache *packCache // pack cache packCache *packCache // pack cache
filesWriter *filesWriter // file write filesWriter *filesWriter // file write
dst string
files []*fileInfo files []*fileInfo
} }
func newFileRestorer(packLoader func(ctx context.Context, h restic.Handle, length int, offset int64, fn func(rd io.Reader) error) error, key *crypto.Key, idx filePackTraverser) *fileRestorer { func newFileRestorer(dst string, packLoader func(ctx context.Context, h restic.Handle, length int, offset int64, fn func(rd io.Reader) error) error, key *crypto.Key, idx filePackTraverser) *fileRestorer {
return &fileRestorer{ return &fileRestorer{
packLoader: packLoader, packLoader: packLoader,
key: key, key: key,
idx: idx, idx: idx,
filesWriter: newFilesWriter(filesWriterCount), filesWriter: newFilesWriter(filesWriterCount),
packCache: newPackCache(packCacheCapacity), packCache: newPackCache(packCacheCapacity),
dst: dst,
} }
} }
func (r *fileRestorer) addFile(path string, content restic.IDs) { func (r *fileRestorer) addFile(location string, content restic.IDs) {
r.files = append(r.files, &fileInfo{path: path, blobs: content}) r.files = append(r.files, &fileInfo{location: location, blobs: content})
}
func (r *fileRestorer) targetPath(location string) string {
return filepath.Join(r.dst, location)
} }
// used to pass information among workers (wish golang channels allowed multivalues) // used to pass information among workers (wish golang channels allowed multivalues)
@ -90,7 +96,7 @@ type processingInfo struct {
func (r *fileRestorer) restoreFiles(ctx context.Context, onError func(path string, err error)) error { func (r *fileRestorer) restoreFiles(ctx context.Context, onError func(path string, err error)) error {
for _, file := range r.files { for _, file := range r.files {
dbgmsg := file.path + ": " dbgmsg := file.location + ": "
r.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, packBlobs []restic.Blob) bool { r.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, packBlobs []restic.Blob) bool {
if packIdx > 0 { if packIdx > 0 {
dbgmsg += ", " dbgmsg += ", "
@ -110,12 +116,13 @@ func (r *fileRestorer) restoreFiles(ctx context.Context, onError func(path strin
// synchronously create empty files (empty files need no packs and are ignored by packQueue) // synchronously create empty files (empty files need no packs and are ignored by packQueue)
for _, file := range r.files { for _, file := range r.files {
fullpath := r.targetPath(file.location)
if len(file.blobs) == 0 { if len(file.blobs) == 0 {
wr, err := os.OpenFile(file.path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600) wr, err := os.OpenFile(fullpath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600)
if err == nil { if err == nil {
wr.Close() wr.Close()
} else { } else {
onError(file.path, err) onError(file.location, err)
} }
} }
} }
@ -172,9 +179,10 @@ func (r *fileRestorer) restoreFiles(ctx context.Context, onError func(path strin
var success []*fileInfo var success []*fileInfo
var failure []*fileInfo var failure []*fileInfo
for file, ferr := range ferrors { for file, ferr := range ferrors {
target := r.targetPath(file.location)
if ferr != nil { if ferr != nil {
onError(file.path, ferr) onError(file.location, ferr)
r.filesWriter.close(file) r.filesWriter.close(target)
delete(inprogress, file) delete(inprogress, file)
failure = append(failure, file) failure = append(failure, file)
} else { } else {
@ -183,7 +191,7 @@ func (r *fileRestorer) restoreFiles(ctx context.Context, onError func(path strin
return false // only interesed in the first pack return false // only interesed in the first pack
}) })
if len(file.blobs) == 0 { if len(file.blobs) == 0 {
r.filesWriter.close(file) r.filesWriter.close(target)
delete(inprogress, file) delete(inprogress, file)
} }
success = append(success, file) success = append(success, file)
@ -281,12 +289,13 @@ func (r *fileRestorer) processPack(ctx context.Context, request processingInfo,
defer rd.Close() defer rd.Close()
for file := range request.files { for file := range request.files {
target := r.targetPath(file.location)
r.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, packBlobs []restic.Blob) bool { r.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, packBlobs []restic.Blob) bool {
for _, blob := range packBlobs { for _, blob := range packBlobs {
debug.Log("Writing blob %s (%d bytes) from pack %s to %s", blob.ID.Str(), blob.Length, packID.Str(), file.path) debug.Log("Writing blob %s (%d bytes) from pack %s to %s", blob.ID.Str(), blob.Length, packID.Str(), file.location)
buf, err := r.loadBlob(rd, blob) buf, err := r.loadBlob(rd, blob)
if err == nil { if err == nil {
err = r.filesWriter.writeToFile(file, buf) err = r.filesWriter.writeToFile(target, buf)
} }
if err != nil { if err != nil {
request.files[file] = err request.files[file] = err

View file

@ -62,7 +62,7 @@ func (i *TestRepo) pack(queue *packQueue, name string) *packInfo {
} }
func (i *TestRepo) fileContent(file *fileInfo) string { func (i *TestRepo) fileContent(file *fileInfo) string {
return i.filesPathToContent[file.path] return i.filesPathToContent[file.location]
} }
func newTestRepo(content []TestFile) *TestRepo { func newTestRepo(content []TestFile) *TestRepo {
@ -135,7 +135,7 @@ func newTestRepo(content []TestFile) *TestRepo {
for _, blob := range file.blobs { for _, blob := range file.blobs {
content = append(content, restic.Hash([]byte(blob.data))) content = append(content, restic.Hash([]byte(blob.data)))
} }
files = append(files, &fileInfo{path: file.name, blobs: content}) files = append(files, &fileInfo{location: file.name, blobs: content})
} }
repo := &TestRepo{ repo := &TestRepo{
@ -160,10 +160,10 @@ func newTestRepo(content []TestFile) *TestRepo {
return repo return repo
} }
func restoreAndVerify(t *testing.T, content []TestFile) { func restoreAndVerify(t *testing.T, tempdir string, content []TestFile) {
repo := newTestRepo(content) repo := newTestRepo(content)
r := newFileRestorer(repo.loader, repo.key, repo.idx) r := newFileRestorer(tempdir, repo.loader, repo.key, repo.idx)
r.files = repo.files r.files = repo.files
r.restoreFiles(context.TODO(), func(path string, err error) { r.restoreFiles(context.TODO(), func(path string, err error) {
@ -171,17 +171,18 @@ func restoreAndVerify(t *testing.T, content []TestFile) {
}) })
for _, file := range repo.files { for _, file := range repo.files {
data, err := ioutil.ReadFile(file.path) target := r.targetPath(file.location)
data, err := ioutil.ReadFile(target)
if err != nil { if err != nil {
t.Errorf("unable to read file %v: %v", file.path, err) t.Errorf("unable to read file %v: %v", file.location, err)
continue continue
} }
rtest.Equals(t, false, r.filesWriter.writers.Contains(file.path)) rtest.Equals(t, false, r.filesWriter.writers.Contains(target))
content := repo.fileContent(file) content := repo.fileContent(file)
if !bytes.Equal(data, []byte(content)) { if !bytes.Equal(data, []byte(content)) {
t.Errorf("file %v has wrong content: want %q, got %q", file.path, content, data) t.Errorf("file %v has wrong content: want %q, got %q", file.location, content, data)
} }
} }
@ -192,16 +193,16 @@ func TestFileRestorerBasic(t *testing.T) {
tempdir, cleanup := rtest.TempDir(t) tempdir, cleanup := rtest.TempDir(t)
defer cleanup() defer cleanup()
restoreAndVerify(t, []TestFile{ restoreAndVerify(t, tempdir, []TestFile{
TestFile{ TestFile{
name: tempdir + "/file1", name: "file1",
blobs: []TestBlob{ blobs: []TestBlob{
TestBlob{"data1-1", "pack1-1"}, TestBlob{"data1-1", "pack1-1"},
TestBlob{"data1-2", "pack1-2"}, TestBlob{"data1-2", "pack1-2"},
}, },
}, },
TestFile{ TestFile{
name: tempdir + "/file2", name: "file2",
blobs: []TestBlob{ blobs: []TestBlob{
TestBlob{"data2-1", "pack2-1"}, TestBlob{"data2-1", "pack2-1"},
TestBlob{"data2-2", "pack2-2"}, TestBlob{"data2-2", "pack2-2"},
@ -214,9 +215,9 @@ func TestFileRestorerEmptyFile(t *testing.T) {
tempdir, cleanup := rtest.TempDir(t) tempdir, cleanup := rtest.TempDir(t)
defer cleanup() defer cleanup()
restoreAndVerify(t, []TestFile{ restoreAndVerify(t, tempdir, []TestFile{
TestFile{ TestFile{
name: tempdir + "/empty", name: "empty",
blobs: []TestBlob{}, blobs: []TestBlob{},
}, },
}) })

View file

@ -11,9 +11,9 @@ import (
) )
type filesWriter struct { type filesWriter struct {
lock sync.Mutex // guards concurrent access lock sync.Mutex // guards concurrent access
inprogress map[*fileInfo]struct{} // (logically) opened file writers inprogress map[string]struct{} // (logically) opened file writers
writers simplelru.LRUCache // key: *fileInfo, value: *os.File writers simplelru.LRUCache // key: string, value: *os.File
} }
func newFilesWriter(count int) *filesWriter { func newFilesWriter(count int) *filesWriter {
@ -21,30 +21,30 @@ func newFilesWriter(count int) *filesWriter {
value.(*os.File).Close() value.(*os.File).Close()
debug.Log("Closed and purged cached writer for %v", key) debug.Log("Closed and purged cached writer for %v", key)
}) })
return &filesWriter{inprogress: make(map[*fileInfo]struct{}), writers: writers} return &filesWriter{inprogress: make(map[string]struct{}), writers: writers}
} }
func (w *filesWriter) writeToFile(file *fileInfo, buf []byte) error { func (w *filesWriter) writeToFile(path string, buf []byte) error {
acquireWriter := func() (io.Writer, error) { acquireWriter := func() (io.Writer, error) {
w.lock.Lock() w.lock.Lock()
defer w.lock.Unlock() defer w.lock.Unlock()
if wr, ok := w.writers.Get(file); ok { if wr, ok := w.writers.Get(path); ok {
debug.Log("Used cached writer for %s", file.path) debug.Log("Used cached writer for %s", path)
return wr.(*os.File), nil return wr.(*os.File), nil
} }
var flags int var flags int
if _, append := w.inprogress[file]; append { if _, append := w.inprogress[path]; append {
flags = os.O_APPEND | os.O_WRONLY flags = os.O_APPEND | os.O_WRONLY
} else { } else {
w.inprogress[file] = struct{}{} w.inprogress[path] = struct{}{}
flags = os.O_CREATE | os.O_TRUNC | os.O_WRONLY flags = os.O_CREATE | os.O_TRUNC | os.O_WRONLY
} }
wr, err := os.OpenFile(file.path, flags, 0600) wr, err := os.OpenFile(path, flags, 0600)
if err != nil { if err != nil {
return nil, err return nil, err
} }
w.writers.Add(file, wr) w.writers.Add(path, wr)
debug.Log("Opened and cached writer for %s", file.path) debug.Log("Opened and cached writer for %s", path)
return wr, nil return wr, nil
} }
@ -57,14 +57,14 @@ func (w *filesWriter) writeToFile(file *fileInfo, buf []byte) error {
return err return err
} }
if n != len(buf) { if n != len(buf) {
return errors.Errorf("error writing file %v: wrong length written, want %d, got %d", file.path, len(buf), n) return errors.Errorf("error writing file %v: wrong length written, want %d, got %d", path, len(buf), n)
} }
return nil return nil
} }
func (w *filesWriter) close(file *fileInfo) { func (w *filesWriter) close(path string) {
w.lock.Lock() w.lock.Lock()
defer w.lock.Unlock() defer w.lock.Unlock()
w.writers.Remove(file) w.writers.Remove(path)
delete(w.inprogress, file) delete(w.inprogress, path)
} }

View file

@ -13,8 +13,8 @@ func TestFilesWriterBasic(t *testing.T) {
w := newFilesWriter(1) w := newFilesWriter(1)
f1 := &fileInfo{path: dir + "/f1"} f1 := dir + "/f1"
f2 := &fileInfo{path: dir + "/f2"} f2 := dir + "/f2"
rtest.OK(t, w.writeToFile(f1, []byte{1})) rtest.OK(t, w.writeToFile(f1, []byte{1}))
rtest.Equals(t, 1, w.writers.Len()) rtest.Equals(t, 1, w.writers.Len())
@ -34,11 +34,11 @@ func TestFilesWriterBasic(t *testing.T) {
rtest.Equals(t, 0, w.writers.Len()) rtest.Equals(t, 0, w.writers.Len())
rtest.Equals(t, 0, len(w.inprogress)) rtest.Equals(t, 0, len(w.inprogress))
buf, err := ioutil.ReadFile(f1.path) buf, err := ioutil.ReadFile(f1)
rtest.OK(t, err) rtest.OK(t, err)
rtest.Equals(t, []byte{1, 1}, buf) rtest.Equals(t, []byte{1, 1}, buf)
buf, err = ioutil.ReadFile(f2.path) buf, err = ioutil.ReadFile(f2)
rtest.OK(t, err) rtest.OK(t, err)
rtest.Equals(t, []byte{2, 2}, buf) rtest.Equals(t, []byte{2, 2}, buf)
} }

View file

@ -112,7 +112,7 @@ func (h *packQueue) nextPack() (*packInfo, []*fileInfo) {
var files []*fileInfo var files []*fileInfo
for file := range pack.files { for file := range pack.files {
h.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, packBlobs []restic.Blob) bool { h.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, packBlobs []restic.Blob) bool {
debug.Log("Pack #%d %s (%d blobs) used by %s", packIdx, packID.Str(), len(packBlobs), file.path) debug.Log("Pack #%d %s (%d blobs) used by %s", packIdx, packID.Str(), len(packBlobs), file.location)
if pack.id == packID { if pack.id == packID {
files = append(files, file) files = append(files, file)
} }

View file

@ -192,7 +192,7 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error {
idx := restic.NewHardlinkIndex() idx := restic.NewHardlinkIndex()
filerestorer := newFileRestorer(res.repo.Backend().Load, res.repo.Key(), filePackTraverser{lookup: res.repo.Index().Lookup}) filerestorer := newFileRestorer(dst, res.repo.Backend().Load, res.repo.Key(), filePackTraverser{lookup: res.repo.Index().Lookup})
// path->node map, only needed to call res.Error, which uses the node during tests // path->node map, only needed to call res.Error, which uses the node during tests
nodes := make(map[string]*restic.Node) nodes := make(map[string]*restic.Node)
@ -221,11 +221,11 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error {
if idx.Has(node.Inode, node.DeviceID) { if idx.Has(node.Inode, node.DeviceID) {
return nil return nil
} }
idx.Add(node.Inode, node.DeviceID, target) idx.Add(node.Inode, node.DeviceID, location)
} }
nodes[target] = node nodes[target] = node
filerestorer.addFile(target, node.Content) filerestorer.addFile(location, node.Content)
return nil return nil
}, },
@ -244,8 +244,8 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error {
return res.traverseTree(ctx, dst, string(filepath.Separator), *res.sn.Tree, treeVisitor{ return res.traverseTree(ctx, dst, string(filepath.Separator), *res.sn.Tree, treeVisitor{
enterDir: noop, enterDir: noop,
visitNode: func(node *restic.Node, target, location string) error { visitNode: func(node *restic.Node, target, location string) error {
if idx.Has(node.Inode, node.DeviceID) && idx.GetFilename(node.Inode, node.DeviceID) != target { if idx.Has(node.Inode, node.DeviceID) && idx.GetFilename(node.Inode, node.DeviceID) != location {
return res.restoreHardlinkAt(node, idx.GetFilename(node.Inode, node.DeviceID), target, location) return res.restoreHardlinkAt(node, filerestorer.targetPath(idx.GetFilename(node.Inode, node.DeviceID)), target, location)
} }
if node.Type != "file" { if node.Type != "file" {