From 1213d8fef4df73e8af3c69ab194b3f3c36daed8a Mon Sep 17 00:00:00 2001 From: Igor Fedorenko Date: Sat, 7 Apr 2018 20:32:19 -0400 Subject: [PATCH 01/13] Cleanup: more realistic restorer test data setup Signed-off-by: Igor Fedorenko --- internal/restorer/restorer_test.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/internal/restorer/restorer_test.go b/internal/restorer/restorer_test.go index c5fdd6cb8..a0194fa30 100644 --- a/internal/restorer/restorer_test.go +++ b/internal/restorer/restorer_test.go @@ -44,12 +44,13 @@ func saveFile(t testing.TB, repo restic.Repository, node File) restic.ID { return id } -func saveDir(t testing.TB, repo restic.Repository, nodes map[string]Node) restic.ID { +func saveDir(t testing.TB, repo restic.Repository, nodes map[string]Node, inode uint64) restic.ID { ctx, cancel := context.WithCancel(context.Background()) defer cancel() tree := &restic.Tree{} for name, n := range nodes { + inode++ var id restic.ID switch node := n.(type) { case File: @@ -61,9 +62,11 @@ func saveDir(t testing.TB, repo restic.Repository, nodes map[string]Node) restic UID: uint32(os.Getuid()), GID: uint32(os.Getgid()), Content: []restic.ID{id}, + Size: uint64(len(n.(File).Data)), + Inode: inode, }) case Dir: - id = saveDir(t, repo, node.Nodes) + id = saveDir(t, repo, node.Nodes, inode) mode := node.Mode if mode == 0 { @@ -95,7 +98,7 @@ func saveSnapshot(t testing.TB, repo restic.Repository, snapshot Snapshot) (*res ctx, cancel := context.WithCancel(context.Background()) defer cancel() - treeID := saveDir(t, repo, snapshot.Nodes) + treeID := saveDir(t, repo, snapshot.Nodes, 0) err := repo.Flush(ctx) if err != nil { From 1869930d95d8c9c1f49e042b5379c0aab72aa353 Mon Sep 17 00:00:00 2001 From: Igor Fedorenko Date: Sun, 8 Apr 2018 08:02:30 -0400 Subject: [PATCH 02/13] restore: New optimized multithreaded implementation Signed-off-by: Igor Fedorenko --- RESTORE_concurrent-pack-order.md | 30 +++ changelog/unreleased/issue-1605 | 15 ++ cmd/restic/cmd_restore.go | 18 +- internal/restorer/filepacktraverser.go | 52 ++++ internal/restorer/filerestorer.go | 330 +++++++++++++++++++++++++ internal/restorer/filerestorer_test.go | 226 +++++++++++++++++ internal/restorer/fileswriter.go | 69 ++++++ internal/restorer/packcache.go | 238 ++++++++++++++++++ internal/restorer/packcache_test.go | 290 ++++++++++++++++++++++ internal/restorer/packheap.go | 51 ++++ internal/restorer/packqueue.go | 224 +++++++++++++++++ internal/restorer/packqueue_test.go | 236 ++++++++++++++++++ internal/restorer/restorer.go | 95 +++++-- internal/restorer/restorer_test.go | 4 +- 14 files changed, 1854 insertions(+), 24 deletions(-) create mode 100644 RESTORE_concurrent-pack-order.md create mode 100644 changelog/unreleased/issue-1605 create mode 100644 internal/restorer/filepacktraverser.go create mode 100644 internal/restorer/filerestorer.go create mode 100644 internal/restorer/filerestorer_test.go create mode 100644 internal/restorer/fileswriter.go create mode 100644 internal/restorer/packcache.go create mode 100644 internal/restorer/packcache_test.go create mode 100644 internal/restorer/packheap.go create mode 100644 internal/restorer/packqueue.go create mode 100644 internal/restorer/packqueue_test.go diff --git a/RESTORE_concurrent-pack-order.md b/RESTORE_concurrent-pack-order.md new file mode 100644 index 000000000..52837b162 --- /dev/null +++ b/RESTORE_concurrent-pack-order.md @@ -0,0 +1,30 @@ +The main goal of this POC is two fold + +1. Reduce number of backend request, specifically + * download all required blobs of a pack with single backend request + * avoid repeated pack downloads when possible +2. Download multiple pack files concurrently + +Here is high-level pseudo-code of the how the POC attempts to achieve these goals + +``` +while there are packs to process + choose a pack to process [1] + get the pack from the backend or cache [2] + write pack blobs to the files that need them [3] + if not all pack blobs were used + cache the pack for future use [4] +``` + +Pack download and processing (steps [2] - [4]) runs on multiple concurrent goroutings. The POC runs all steps [2]-[4] sequentially on the same gorouting, but it is possible to split the work differently. For example, one pool of workers can handle download (step [2]) while the other pool can handle pack processing (steps [3] and [4]). + +Before a pack is downloaded (step [2]), the required space is "reserved" in the pack cache, which may purge some cached packed to make room for the reservation (well, that's the plan but purging isn't implemented in the POC). Actual download uses single backend request to get all required pack blobs. This may download blobs that are not needed, but I assume it'll still be faster than getting individual blobs. We should be able to optimize this further by changing `Backend.Load(...)` to support multiple byte ranges, for example. + +Target files are written (step [3]) in the "right" order, first file blob first, then second, then third and so on. Blob write order implies that some pack blobs may not be immediately used, i.e. they are "out of order" for their respective target files. Packs with unused blobs are cached (step [4]). The cache has capacity limit and may purge packs before they are fully used, in which case the purged packs will need to be redownloaded. + +Choosing which pack to process next (step [1]) is little convoluted in the POC. The code avoids processing of any given pack and any given target file by multiple workers concurrently. It also tries to reduce likelihook a pack will be purged from the cache by counting how many other packs will need to be processed before the pack is fully used up. Packs that need fewer other packs are processed first, everything else being equal. + +---- +Other ideas to consider + +* Allow out-of-order target file writes. Inprogress restore will be somewhat confusing to observe, but this will eliminate the need to cache packs and should simplify implimentation. On the other hand, crashed/killed restore will be harder to recover. diff --git a/changelog/unreleased/issue-1605 b/changelog/unreleased/issue-1605 new file mode 100644 index 000000000..ba93051ac --- /dev/null +++ b/changelog/unreleased/issue-1605 @@ -0,0 +1,15 @@ +Enhancement: Concurrent restore + +This change significantly improves restore performance, especially +when using high-latency remote repositories like B2. + +Implementation uses threads to download and process miltiple remote +files concurrently. To further reduce restore time, each remote +file is downloaded using single repository request. + +Old restore implementation can be enabled with `--signethreaded` flag. +Use `--verify` restore flag to read restored files and verify their +content checksum. + +https://github.com/restic/restic/issues/1605 +https://github.com/restic/restic/pull/1719 diff --git a/cmd/restic/cmd_restore.go b/cmd/restic/cmd_restore.go index 4bf59c06f..c538f33ae 100644 --- a/cmd/restic/cmd_restore.go +++ b/cmd/restic/cmd_restore.go @@ -28,13 +28,14 @@ repository. // RestoreOptions collects all options for the restore command. type RestoreOptions struct { - Exclude []string - Include []string - Target string - Host string - Paths []string - Tags restic.TagLists - Verify bool + Exclude []string + Include []string + Target string + Host string + Paths []string + Tags restic.TagLists + Verify bool + SingleThreaded bool } var restoreOptions RestoreOptions @@ -51,6 +52,7 @@ func init() { flags.Var(&restoreOptions.Tags, "tag", "only consider snapshots which include this `taglist` for snapshot ID \"latest\"") flags.StringArrayVar(&restoreOptions.Paths, "path", nil, "only consider snapshots which include this (absolute) `path` for snapshot ID \"latest\"") flags.BoolVar(&restoreOptions.Verify, "verify", false, "verify restored files content") + flags.BoolVar(&restoreOptions.SingleThreaded, "singlethreaded", false, "use single-threaded (legacy) restore implementation") } func runRestore(opts RestoreOptions, gopts GlobalOptions, args []string) error { @@ -155,7 +157,7 @@ func runRestore(opts RestoreOptions, gopts GlobalOptions, args []string) error { Verbosef("restoring %s to %s\n", res.Snapshot(), opts.Target) - err = res.RestoreTo(ctx, opts.Target) + err = res.RestoreTo(ctx, opts.Target, opts.SingleThreaded) if err == nil && opts.Verify { Verbosef("verifying files in %s\n", opts.Target) var count int diff --git a/internal/restorer/filepacktraverser.go b/internal/restorer/filepacktraverser.go new file mode 100644 index 000000000..bba61e0f9 --- /dev/null +++ b/internal/restorer/filepacktraverser.go @@ -0,0 +1,52 @@ +package restorer + +import ( + "github.com/restic/restic/internal/errors" + "github.com/restic/restic/internal/restic" +) + +type filePackTraverser struct { + lookup func(restic.ID, restic.BlobType) ([]restic.PackedBlob, bool) +} + +// iterates over all remaining packs of the file +func (t *filePackTraverser) forEachFilePack(file *fileInfo, fn func(packIdx int, packID restic.ID, packBlobs []restic.Blob) bool) error { + if len(file.blobs) == 0 { + return nil + } + + getBlobPack := func(blobID restic.ID) (restic.PackedBlob, error) { + packs, found := t.lookup(blobID, restic.DataBlob) + if !found { + return restic.PackedBlob{}, errors.Errorf("Unknown blob %s", blobID.String()) + } + // TODO which pack to use if multiple packs have the blob? + // MUST return the same pack for the same blob during the same execution + return packs[0], nil + } + + var prevPackID restic.ID + var prevPackBlobs []restic.Blob + packIdx := 0 + for _, blobID := range file.blobs { + packedBlob, err := getBlobPack(blobID) + if err != nil { + return err + } + if !prevPackID.IsNull() && prevPackID != packedBlob.PackID { + if !fn(packIdx, prevPackID, prevPackBlobs) { + return nil + } + packIdx++ + } + if prevPackID != packedBlob.PackID { + prevPackID = packedBlob.PackID + prevPackBlobs = make([]restic.Blob, 0) + } + prevPackBlobs = append(prevPackBlobs, packedBlob.Blob) + } + if len(prevPackBlobs) > 0 { + fn(packIdx, prevPackID, prevPackBlobs) + } + return nil +} diff --git a/internal/restorer/filerestorer.go b/internal/restorer/filerestorer.go new file mode 100644 index 000000000..702f5a12e --- /dev/null +++ b/internal/restorer/filerestorer.go @@ -0,0 +1,330 @@ +package restorer + +import ( + "context" + "io" + "os" + + "github.com/restic/restic/internal/crypto" + "github.com/restic/restic/internal/debug" + "github.com/restic/restic/internal/errors" + "github.com/restic/restic/internal/restic" +) + +// TODO if a blob is corrupt, there may be good blob copies in other packs +// TODO evaluate if it makes sense to split download and processing workers +// pro: can (slowly) read network and decrypt/write files concurrently +// con: each worker needs to keep one pack in memory +// TODO evaluate memory footprint for larger repositories, say 10M packs/10M files +// TODO consider replacing pack file cache with blob cache +// TODO avoid decrypting the same blob multiple times +// TODO remove `restore --singlethreaded` and review/simplify hardlink support +// (node.CreateAt shouldn't take HardlinkIndex) +// TODO evaluate disabled debug logging overhead for large repositories +// TODO consider logging snapshot-relative path to reduce log clutter + +const ( + workerCount = 8 + + // max number of open output file handles + filesWriterCount = 32 + + // estimated average pack size used to calculate pack cache capacity + averagePackSize = 5 * 1024 * 1024 + + // pack cache capacity should support at least one cached pack per worker + // allow space for extra 5 packs for actual caching + packCacheCapacity = (workerCount + 5) * averagePackSize +) + +// information about regular file being restored +type fileInfo struct { + path string // full path to the file on local filesystem + blobs []restic.ID // remaining blobs of the file +} + +// information about a data pack required to restore one or more files +type packInfo struct { + // the pack id + id restic.ID + + // set of files that use blobs from this pack + files map[*fileInfo]struct{} + + // number of other packs that must be downloaded before all blobs in this pack can be used + cost int + + // used by packHeap + index int +} + +// fileRestorer restores set of files +type fileRestorer struct { + key *crypto.Key + idx filePackTraverser + packLoader func(ctx context.Context, h restic.Handle, length int, offset int64, fn func(rd io.Reader) error) error + + packCache *packCache // pack cache + filesWriter *filesWriter // file write + + files []*fileInfo +} + +func newFileRestorer(packLoader func(ctx context.Context, h restic.Handle, length int, offset int64, fn func(rd io.Reader) error) error, key *crypto.Key, idx filePackTraverser) *fileRestorer { + return &fileRestorer{ + packLoader: packLoader, + key: key, + idx: idx, + filesWriter: newFilesWriter(filesWriterCount), + packCache: newPackCache(packCacheCapacity), + } +} + +func (r *fileRestorer) addFile(path string, content restic.IDs) { + r.files = append(r.files, &fileInfo{path: path, blobs: content}) +} + +// used to pass information among workers (wish golang channels allowed multivalues) +type processingInfo struct { + pack *packInfo + files map[*fileInfo]error +} + +func (r *fileRestorer) restoreFiles(ctx context.Context, onError func(path string, err error)) error { + for _, file := range r.files { + dbgmsg := file.path + ": " + r.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, packBlobs []restic.Blob) bool { + if packIdx > 0 { + dbgmsg += ", " + } + dbgmsg += "pack{id=" + packID.Str() + ", blobs: " + for blobIdx, blob := range packBlobs { + if blobIdx > 0 { + dbgmsg += ", " + } + dbgmsg += blob.ID.Str() + } + dbgmsg += "}" + return true // keep going + }) + debug.Log(dbgmsg) + } + + // synchronously create empty files (empty files need no packs and are ignored by packQueue) + for _, file := range r.files { + if len(file.blobs) == 0 { + wr, err := os.OpenFile(file.path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600) + if err == nil { + wr.Close() + } else { + onError(file.path, err) + } + } + } + + inprogress := make(map[*fileInfo]struct{}) + queue, err := newPackQueue(r.idx, r.files, func(files map[*fileInfo]struct{}) bool { + for file := range files { + if _, found := inprogress[file]; found { + return true + } + } + return false + }) + if err != nil { + return err + } + + // workers + downloadCh := make(chan processingInfo) + feedbackCh := make(chan processingInfo) + + defer close(downloadCh) + defer close(feedbackCh) + + worker := func() { + for { + select { + case <-ctx.Done(): + return + case request, ok := <-downloadCh: + if !ok { + return // channel closed + } + rd, err := r.downloadPack(ctx, request.pack) + if err == nil { + r.processPack(ctx, request, rd) + } else { + // mark all files as failed + for file := range request.files { + request.files[file] = err + } + } + feedbackCh <- request + } + } + } + for i := 0; i < workerCount; i++ { + go worker() + } + + processFeedback := func(pack *packInfo, ferrors map[*fileInfo]error) { + // update files blobIdx + // must do it here to avoid race among worker and processing feedback threads + var success []*fileInfo + var failure []*fileInfo + for file, ferr := range ferrors { + if ferr != nil { + onError(file.path, ferr) + r.filesWriter.close(file) + delete(inprogress, file) + failure = append(failure, file) + } else { + r.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, packBlobs []restic.Blob) bool { + file.blobs = file.blobs[len(packBlobs):] + return false // only interesed in the first pack + }) + if len(file.blobs) == 0 { + r.filesWriter.close(file) + delete(inprogress, file) + } + success = append(success, file) + } + } + // update the queue and requeueu the pack as necessary + if !queue.requeuePack(pack, success, failure) { + r.packCache.remove(pack.id) + debug.Log("Purged used up pack %s from pack cache", pack.id.Str()) + } + } + + // the main restore loop + for !queue.isEmpty() { + debug.Log("-----------------------------------") + pack, files := queue.nextPack() + if pack != nil { + ferrors := make(map[*fileInfo]error) + for _, file := range files { + ferrors[file] = nil + inprogress[file] = struct{}{} + } + select { + case <-ctx.Done(): + return ctx.Err() + case downloadCh <- processingInfo{pack: pack, files: ferrors}: + debug.Log("Scheduled download pack %s (%d files)", pack.id.Str(), len(files)) + case feedback := <-feedbackCh: + queue.requeuePack(pack, []*fileInfo{}, []*fileInfo{}) // didn't use the pack during this iteration + processFeedback(feedback.pack, feedback.files) + } + } else { + select { + case <-ctx.Done(): + return ctx.Err() + case feedback := <-feedbackCh: + processFeedback(feedback.pack, feedback.files) + } + } + } + + return nil +} + +func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) (readerAtCloser, error) { + const MaxInt64 = 1<<63 - 1 // odd Go does not have this predefined somewhere + + // calculate pack byte range + start, end := int64(MaxInt64), int64(0) + for file := range pack.files { + r.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, packBlobs []restic.Blob) bool { + if packID.Equal(pack.id) { + for _, blob := range packBlobs { + if start > int64(blob.Offset) { + start = int64(blob.Offset) + } + if end < int64(blob.Offset+blob.Length) { + end = int64(blob.Offset + blob.Length) + } + } + } + + return true // keep going + }) + } + + packReader, err := r.packCache.get(pack.id, start, int(end-start), func(offset int64, length int, wr io.WriteSeeker) error { + h := restic.Handle{Type: restic.DataFile, Name: pack.id.String()} + return r.packLoader(ctx, h, length, offset, func(rd io.Reader) error { + // reset the file in case of a download retry + _, err := wr.Seek(0, io.SeekStart) + if err != nil { + return err + } + + len, err := io.Copy(wr, rd) + if err != nil { + return err + } + if len != int64(length) { + return errors.Errorf("unexpected pack size: expected %d but got %d", length, len) + } + + return nil + }) + }) + if err != nil { + return nil, err + } + + return packReader, nil +} + +func (r *fileRestorer) processPack(ctx context.Context, request processingInfo, rd readerAtCloser) { + defer rd.Close() + + for file := range request.files { + r.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, packBlobs []restic.Blob) bool { + for _, blob := range packBlobs { + debug.Log("Writing blob %s (%d bytes) from pack %s to %s", blob.ID.Str(), blob.Length, packID.Str(), file.path) + buf, err := r.loadBlob(rd, blob) + if err == nil { + err = r.filesWriter.writeToFile(file, buf) + } + if err != nil { + request.files[file] = err + break // could not restore the file + } + } + return false + }) + } +} + +func (r *fileRestorer) loadBlob(rd io.ReaderAt, blob restic.Blob) ([]byte, error) { + // TODO reconcile with Repository#loadBlob implementation + + buf := make([]byte, blob.Length) + + n, err := rd.ReadAt(buf, int64(blob.Offset)) + if err != nil { + return nil, err + } + + if n != int(blob.Length) { + return nil, errors.Errorf("error loading blob %v: wrong length returned, want %d, got %d", blob.ID.Str(), blob.Length, n) + } + + // decrypt + nonce, ciphertext := buf[:r.key.NonceSize()], buf[r.key.NonceSize():] + plaintext, err := r.key.Open(ciphertext[:0], nonce, ciphertext, nil) + if err != nil { + return nil, errors.Errorf("decrypting blob %v failed: %v", blob.ID, err) + } + + // check hash + if !restic.Hash(plaintext).Equal(blob.ID) { + return nil, errors.Errorf("blob %v returned invalid hash", blob.ID) + } + + return plaintext, nil +} diff --git a/internal/restorer/filerestorer_test.go b/internal/restorer/filerestorer_test.go new file mode 100644 index 000000000..75d6e199d --- /dev/null +++ b/internal/restorer/filerestorer_test.go @@ -0,0 +1,226 @@ +package restorer + +import ( + "bytes" + "context" + "io" + "io/ioutil" + "testing" + + "github.com/restic/restic/internal/crypto" + "github.com/restic/restic/internal/errors" + "github.com/restic/restic/internal/restic" + rtest "github.com/restic/restic/internal/test" +) + +/////////////////////////////////////////////////////////////////////////////// +// test helpers (TODO move to a dedicated file?) +/////////////////////////////////////////////////////////////////////////////// + +type _Blob struct { + data string + pack string +} + +type _File struct { + name string + blobs []_Blob +} + +type _TestData struct { + key *crypto.Key + + // pack names and ids + packsNameToID map[string]restic.ID + packsIDToName map[restic.ID]string + packsIDToData map[restic.ID][]byte + + // blobs and files + blobs map[restic.ID][]restic.PackedBlob + files []*fileInfo + filesPathToContent map[string]string + + // + loader func(ctx context.Context, h restic.Handle, length int, offset int64, fn func(rd io.Reader) error) error + + // + idx filePackTraverser +} + +func (i *_TestData) Lookup(blobID restic.ID, _ restic.BlobType) ([]restic.PackedBlob, bool) { + packs, found := i.blobs[blobID] + return packs, found +} + +func (i *_TestData) packName(pack *packInfo) string { + return i.packsIDToName[pack.id] +} +func (i *_TestData) packID(name string) restic.ID { + return i.packsNameToID[name] +} + +func (i *_TestData) pack(queue *packQueue, name string) *packInfo { + id := i.packsNameToID[name] + return queue.packs[id] +} + +func (i *_TestData) fileContent(file *fileInfo) string { + return i.filesPathToContent[file.path] +} + +func _newTestData(_files []_File) *_TestData { + type _Pack struct { + name string + data []byte + blobs map[restic.ID]restic.Blob + } + _packs := make(map[string]_Pack) + + key := crypto.NewRandomKey() + seal := func(data []byte) []byte { + ciphertext := restic.NewBlobBuffer(len(data)) + ciphertext = ciphertext[:0] // TODO what does this actually do? + nonce := crypto.NewRandomNonce() + ciphertext = append(ciphertext, nonce...) + return key.Seal(ciphertext, nonce, data, nil) + } + + filesPathToContent := make(map[string]string) + + for _, _file := range _files { + var content string + for _, _blob := range _file.blobs { + content += _blob.data + + // get the pack, create as necessary + var _pack _Pack + var found bool // TODO is there more concise way of doing this in go? + if _pack, found = _packs[_blob.pack]; !found { + _pack = _Pack{name: _blob.pack, blobs: make(map[restic.ID]restic.Blob)} + } + + // calculate blob id and add to the pack as necessary + _blobID := restic.Hash([]byte(_blob.data)) + if _, found := _pack.blobs[_blobID]; !found { + _blobData := seal([]byte(_blob.data)) + _pack.blobs[_blobID] = restic.Blob{ + Type: restic.DataBlob, + ID: _blobID, + Length: uint(len(_blobData)), // XXX is Length encrypted or plaintext? + Offset: uint(len(_pack.data)), + } + _pack.data = append(_pack.data, _blobData...) + } + + _packs[_blob.pack] = _pack + } + filesPathToContent[_file.name] = content + } + + blobs := make(map[restic.ID][]restic.PackedBlob) + packsIDToName := make(map[restic.ID]string) + packsIDToData := make(map[restic.ID][]byte) + packsNameToID := make(map[string]restic.ID) + + for _, _pack := range _packs { + _packID := restic.Hash(_pack.data) + packsIDToName[_packID] = _pack.name + packsIDToData[_packID] = _pack.data + packsNameToID[_pack.name] = _packID + for blobID, blob := range _pack.blobs { + blobs[blobID] = append(blobs[blobID], restic.PackedBlob{Blob: blob, PackID: _packID}) + } + } + + var files []*fileInfo + for _, _file := range _files { + content := restic.IDs{} + for _, _blob := range _file.blobs { + content = append(content, restic.Hash([]byte(_blob.data))) + } + files = append(files, &fileInfo{path: _file.name, blobs: content}) + } + + _data := &_TestData{ + key: key, + packsIDToName: packsIDToName, + packsIDToData: packsIDToData, + packsNameToID: packsNameToID, + blobs: blobs, + files: files, + filesPathToContent: filesPathToContent, + } + _data.idx = filePackTraverser{lookup: _data.Lookup} + _data.loader = func(ctx context.Context, h restic.Handle, length int, offset int64, fn func(rd io.Reader) error) error { + packID, err := restic.ParseID(h.Name) + if err != nil { + return err + } + rd := bytes.NewReader(_data.packsIDToData[packID][int(offset) : int(offset)+length]) + return fn(rd) + } + + return _data +} + +func restoreAndVerify(t *testing.T, _files []_File) { + test := _newTestData(_files) + + r := newFileRestorer(test.loader, test.key, test.idx) + r.files = test.files + + r.restoreFiles(context.TODO(), func(path string, err error) { + rtest.OK(t, errors.Wrapf(err, "unexpected error")) + }) + + for _, file := range test.files { + data, err := ioutil.ReadFile(file.path) + if err != nil { + t.Errorf("unable to read file %v: %v", file.path, err) + continue + } + + rtest.Equals(t, false, r.filesWriter.writers.Contains(file.path)) + + content := test.fileContent(file) + if !bytes.Equal(data, []byte(content)) { + t.Errorf("file %v has wrong content: want %q, got %q", file.path, content, data) + } + } + + rtest.OK(t, nil) +} + +func TestFileRestorer_basic(t *testing.T) { + tempdir, cleanup := rtest.TempDir(t) + defer cleanup() + + restoreAndVerify(t, []_File{ + _File{ + name: tempdir + "/file1", + blobs: []_Blob{ + _Blob{"data1-1", "pack1-1"}, + _Blob{"data1-2", "pack1-2"}, + }, + }, + _File{ + name: tempdir + "/file2", + blobs: []_Blob{ + _Blob{"data2-1", "pack2-1"}, + _Blob{"data2-2", "pack2-2"}, + }, + }, + }) +} + +func TestFileRestorer_emptyFile(t *testing.T) { + tempdir, cleanup := rtest.TempDir(t) + defer cleanup() + + restoreAndVerify(t, []_File{ + _File{ + name: tempdir + "/empty", + blobs: []_Blob{}, + }, + }) +} diff --git a/internal/restorer/fileswriter.go b/internal/restorer/fileswriter.go new file mode 100644 index 000000000..206ddf2d5 --- /dev/null +++ b/internal/restorer/fileswriter.go @@ -0,0 +1,69 @@ +package restorer + +import ( + "io" + "os" + "sync" + + "github.com/hashicorp/golang-lru/simplelru" + "github.com/restic/restic/internal/debug" + "github.com/restic/restic/internal/errors" +) + +type filesWriter struct { + lock sync.Mutex // guards concurrent access + inprogress map[*fileInfo]struct{} // (logically) opened file writers + writers simplelru.LRUCache // key: *fileInfo, value: *os.File +} + +func newFilesWriter(count int) *filesWriter { + writers, _ := simplelru.NewLRU(count, func(key interface{}, value interface{}) { + value.(*os.File).Close() + debug.Log("Closed and purged cached writer for %v", key) + }) + return &filesWriter{inprogress: make(map[*fileInfo]struct{}), writers: writers} +} + +func (w *filesWriter) writeToFile(file *fileInfo, buf []byte) error { + acquireWriter := func() (io.Writer, error) { + w.lock.Lock() + defer w.lock.Unlock() + if wr, ok := w.writers.Get(file); ok { + debug.Log("Used cached writer for %s", file.path) + return wr.(*os.File), nil + } + var flags int + if _, append := w.inprogress[file]; append { + flags = os.O_APPEND | os.O_WRONLY + } else { + w.inprogress[file] = struct{}{} + flags = os.O_CREATE | os.O_TRUNC | os.O_WRONLY + } + wr, err := os.OpenFile(file.path, flags, 0600) + if err != nil { + return nil, err + } + w.writers.Add(file, wr) + debug.Log("Opened and cached writer for %s", file.path) + return wr, nil + } + + wr, err := acquireWriter() + if err != nil { + return err + } + n, err := wr.Write(buf) + if err != nil { + return err + } + if n != len(buf) { + return errors.Errorf("error writing file %v: wrong length written, want %d, got %d", file.path, len(buf), n) + } + return nil +} + +func (w *filesWriter) close(file *fileInfo) { + w.lock.Lock() + defer w.lock.Unlock() + w.writers.Remove(file) +} diff --git a/internal/restorer/packcache.go b/internal/restorer/packcache.go new file mode 100644 index 000000000..a40c44482 --- /dev/null +++ b/internal/restorer/packcache.go @@ -0,0 +1,238 @@ +package restorer + +import ( + "io" + "sync" + + "github.com/restic/restic/internal/debug" + "github.com/restic/restic/internal/errors" + "github.com/restic/restic/internal/restic" +) + +// packCache is thread safe in-memory cache of pack files required to restore +// one or more files. The cache is meant to hold pack files that cannot be +// fully used right away. This happens when pack files contains blobs from +// "head" of some files and "middle" of other files. "Middle" blobs cannot be +// written to their files until after blobs from some other packs are written +// to the files first. +// +// While the cache is thread safe, implementation assumes (and enforces) +// that individual entries are used by one client at a time. Clients must +// #Close() entry's reader to make the entry available for use by other +// clients. This limitation can be relaxed in the future if necessary. +type packCache struct { + // guards access to cache internal data structures + lock sync.Mutex + + // cache capacity + capacity int + reservedCapacity int + allocatedCapacity int + + // pack records currently being used by active restore worker + reservedPacks map[restic.ID]*packCacheRecord + + // unused allocated packs, can be deleted if necessary + cachedPacks map[restic.ID]*packCacheRecord +} + +type packCacheRecord struct { + master *packCacheRecord + cache *packCache + + id restic.ID // cached pack id + offset int64 // cached pack byte range + + data []byte +} + +type readerAtCloser interface { + io.Closer + io.ReaderAt +} + +type bytesWriteSeeker struct { + pos int + data []byte +} + +func (wr *bytesWriteSeeker) Write(p []byte) (n int, err error) { + if wr.pos+len(p) > len(wr.data) { + return -1, errors.Errorf("not enough space") + } + n = copy(wr.data[wr.pos:], p) + wr.pos += n + return n, nil +} + +func (wr *bytesWriteSeeker) Seek(offset int64, whence int) (int64, error) { + if offset != 0 || whence != io.SeekStart { + return -1, errors.Errorf("unsupported seek request") + } + wr.pos = 0 + return 0, nil +} + +func newPackCache(capacity int) *packCache { + return &packCache{ + capacity: capacity, + reservedPacks: make(map[restic.ID]*packCacheRecord), + cachedPacks: make(map[restic.ID]*packCacheRecord), + } +} + +func (c *packCache) reserve(packID restic.ID, offset int64, length int) (record *packCacheRecord, err error) { + c.lock.Lock() + defer c.lock.Unlock() + + if offset < 0 || length <= 0 { + return nil, errors.Errorf("illegal pack cache allocation range %s {offset: %d, length: %d}", packID.Str(), offset, length) + } + + if c.reservedCapacity+length > c.capacity { + return nil, errors.Errorf("not enough cache capacity: requested %d, available %d", length, c.capacity-c.reservedCapacity) + } + + if _, ok := c.reservedPacks[packID]; ok { + return nil, errors.Errorf("pack is already reserved %s", packID.Str()) + } + + // the pack is available in the cache and currently unused + if pack, ok := c.cachedPacks[packID]; ok { + // check if cached pack includes requested byte range + // the range can shrink, but it never grows bigger unless there is a bug elsewhere + if pack.offset > offset || (pack.offset+int64(len(pack.data))) < (offset+int64(length)) { + return nil, errors.Errorf("cached range %d-%d is smaller than requested range %d-%d for pack %s", pack.offset, pack.offset+int64(len(pack.data)), length, offset+int64(length), packID.Str()) + } + + // move the pack to the used map + delete(c.cachedPacks, packID) + c.reservedPacks[packID] = pack + c.reservedCapacity += len(pack.data) + + debug.Log("Using cached pack %s (%d bytes)", pack.id.Str(), len(pack.data)) + + if pack.offset != offset || len(pack.data) != length { + // restrict returned record to requested range + return &packCacheRecord{ + cache: c, + master: pack, + offset: offset, + data: pack.data[int(offset-pack.offset) : int(offset-pack.offset)+length], + }, nil + } + + return pack, nil + } + + for c.allocatedCapacity+length > c.capacity { + // all cached packs will be needed at some point + // so it does not matter which one to purge + for _, cached := range c.cachedPacks { + delete(c.cachedPacks, cached.id) + c.allocatedCapacity -= len(cached.data) + debug.Log("dropped cached pack %s (%d bytes)", cached.id.Str(), len(cached.data)) + break + } + } + + pack := &packCacheRecord{ + cache: c, + id: packID, + offset: offset, + } + c.reservedPacks[pack.id] = pack + c.allocatedCapacity += length + c.reservedCapacity += length + + return pack, nil +} + +// get returns reader of the specified cached pack. Uses provided load func +// to download pack content if necessary. +// The returned reader is only able to read pack within byte range specified +// by offset and length parameters, attempts to read outside that range will +// result in an error. +// The returned reader must be closed before the same packID can be requested +// from the cache again. +func (c *packCache) get(packID restic.ID, offset int64, length int, load func(offset int64, length int, wr io.WriteSeeker) error) (readerAtCloser, error) { + pack, err := c.reserve(packID, offset, length) + if err != nil { + return nil, err + } + + if pack.data == nil { + wr := &bytesWriteSeeker{data: make([]byte, length)} + err = load(offset, length, wr) + if err != nil { + return nil, err + } + if wr.pos != length { + delete(c.reservedPacks, pack.id) + c.reservedCapacity -= len(pack.data) + return nil, errors.Errorf("invalid read size") + } + pack.data = wr.data + debug.Log("Downloaded and cached pack %s (%d bytes)", pack.id.Str(), len(pack.data)) + } + + return pack, nil +} + +// releases the pack record back to the cache +func (c *packCache) release(pack *packCacheRecord) error { + c.lock.Lock() + defer c.lock.Unlock() + + if _, ok := c.reservedPacks[pack.id]; !ok { + return errors.Errorf("invalid pack release request") + } + + delete(c.reservedPacks, pack.id) + c.cachedPacks[pack.id] = pack + c.reservedCapacity -= len(pack.data) + + return nil +} + +// remove removes specified pack from the cache and frees +// corresponding cache space. should be called after the pack +// was fully used up by the restorer. +func (c *packCache) remove(packID restic.ID) error { + c.lock.Lock() + defer c.lock.Unlock() + + if _, ok := c.reservedPacks[packID]; ok { + return errors.Errorf("invalid pack remove request, pack %s is reserved", packID.Str()) + } + + pack, ok := c.cachedPacks[packID] + if !ok { + return errors.Errorf("invalid pack remove request, pack %s is not cached", packID.Str()) + } + + delete(c.cachedPacks, pack.id) + c.allocatedCapacity -= len(pack.data) + + return nil +} + +// ReadAt reads len(b) bytes from the pack starting at byte offset off. +// It returns the number of bytes read and the error, if any. +func (r *packCacheRecord) ReadAt(b []byte, off int64) (n int, err error) { + if off < r.offset || off+int64(len(b)) > r.offset+int64(len(r.data)) { + return -1, errors.Errorf("read outside available range") + } + return copy(b, r.data[off-r.offset:]), nil +} + +// Close closes the pack reader and releases corresponding cache record +// to the cache. Once closed, the record can be reused by subsequent +// requests for the same packID or it can be purged from the cache to make +// room for other packs +func (r *packCacheRecord) Close() (err error) { + if r.master != nil { + return r.cache.release(r.master) + } + return r.cache.release(r) +} diff --git a/internal/restorer/packcache_test.go b/internal/restorer/packcache_test.go new file mode 100644 index 000000000..ccf58cb9f --- /dev/null +++ b/internal/restorer/packcache_test.go @@ -0,0 +1,290 @@ +package restorer + +import ( + "io" + "testing" + + "github.com/restic/restic/internal/restic" + rtest "github.com/restic/restic/internal/test" +) + +func assertNotOK(t *testing.T, msg string, err error) { + rtest.Assert(t, err != nil, msg+" did not fail") +} + +func TestBytesWriterSeeker(t *testing.T) { + wr := &bytesWriteSeeker{data: make([]byte, 10)} + + n, err := wr.Write([]byte{1, 2}) + rtest.OK(t, err) + rtest.Equals(t, 2, n) + rtest.Equals(t, []byte{1, 2}, wr.data[0:2]) + + n64, err := wr.Seek(0, io.SeekStart) + rtest.OK(t, err) + rtest.Equals(t, int64(0), n64) + + n, err = wr.Write([]byte{0, 1, 2, 3, 4}) + rtest.OK(t, err) + rtest.Equals(t, 5, n) + n, err = wr.Write([]byte{5, 6, 7, 8, 9}) + rtest.OK(t, err) + rtest.Equals(t, 5, n) + rtest.Equals(t, []byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, wr.data) + + // negative tests + _, err = wr.Write([]byte{1}) + assertNotOK(t, "write overflow", err) + _, err = wr.Seek(1, io.SeekStart) + assertNotOK(t, "unsupported seek", err) +} + +func TestPackCache_basic(t *testing.T) { + assertReader := func(expected []byte, offset int64, rd io.ReaderAt) { + actual := make([]byte, len(expected)) + rd.ReadAt(actual, offset) + rtest.Equals(t, expected, actual) + } + + c := newPackCache(10) + + id := restic.NewRandomID() + + // load pack to the cache + rd, err := c.get(id, 10, 5, func(offset int64, length int, wr io.WriteSeeker) error { + rtest.Equals(t, int64(10), offset) + rtest.Equals(t, 5, length) + wr.Write([]byte{1, 2, 3, 4, 5}) + return nil + }) + rtest.OK(t, err) + assertReader([]byte{1, 2, 3, 4, 5}, 10, rd) + + // must close pack reader before can request it again + _, err = c.get(id, 10, 5, func(offset int64, length int, wr io.WriteSeeker) error { + t.Error("unexpected cache load call") + return nil + }) + assertNotOK(t, "double-reservation", err) + + // close the pack reader and get it from cache + rd.Close() + rd, err = c.get(id, 10, 5, func(offset int64, length int, wr io.WriteSeeker) error { + t.Error("unexpected cache load call") + return nil + }) + rtest.OK(t, err) + assertReader([]byte{1, 2, 3, 4, 5}, 10, rd) + + // close the pack reader and remove the pack from cache, assert the pack is loaded on request + rd.Close() + c.remove(id) + rd, err = c.get(id, 10, 5, func(offset int64, length int, wr io.WriteSeeker) error { + rtest.Equals(t, int64(10), offset) + rtest.Equals(t, 5, length) + wr.Write([]byte{1, 2, 3, 4, 5}) + return nil + }) + rtest.OK(t, err) + assertReader([]byte{1, 2, 3, 4, 5}, 10, rd) +} + +func TestPackCache_invalid_range(t *testing.T) { + c := newPackCache(10) + + id := restic.NewRandomID() + + _, err := c.get(id, -1, 1, func(offset int64, length int, wr io.WriteSeeker) error { + t.Error("unexpected cache load call") + return nil + }) + assertNotOK(t, "negative offset request", err) + + _, err = c.get(id, 0, 0, func(offset int64, length int, wr io.WriteSeeker) error { + t.Error("unexpected cache load call") + return nil + }) + assertNotOK(t, "zero length request", err) + + _, err = c.get(id, 0, -1, func(offset int64, length int, wr io.WriteSeeker) error { + t.Error("unexpected cache load call") + return nil + }) + assertNotOK(t, "negative length", err) +} + +func TestPackCache_capacity(t *testing.T) { + c := newPackCache(10) + + id1, id2, id3 := restic.NewRandomID(), restic.NewRandomID(), restic.NewRandomID() + + // load and reserve pack1 + rd1, err := c.get(id1, 0, 5, func(offset int64, length int, wr io.WriteSeeker) error { + wr.Write([]byte{1, 2, 3, 4, 5}) + return nil + }) + rtest.OK(t, err) + + // load and reserve pack2 + _, err = c.get(id2, 0, 5, func(offset int64, length int, wr io.WriteSeeker) error { + wr.Write([]byte{1, 2, 3, 4, 5}) + return nil + }) + rtest.OK(t, err) + + // can't load pack3 because not enough space in the cache + _, err = c.get(id3, 0, 5, func(offset int64, length int, wr io.WriteSeeker) error { + t.Error("unexpected cache load call") + return nil + }) + assertNotOK(t, "request over capacity", err) + + // release pack1 and try again + rd1.Close() + rd3, err := c.get(id3, 0, 5, func(offset int64, length int, wr io.WriteSeeker) error { + wr.Write([]byte{1, 2, 3, 4, 5}) + return nil + }) + rtest.OK(t, err) + + // release pack3 and load pack1 (should not come from cache) + rd3.Close() + loaded := false + rd1, err = c.get(id1, 0, 5, func(offset int64, length int, wr io.WriteSeeker) error { + wr.Write([]byte{1, 2, 3, 4, 5}) + loaded = true + return nil + }) + rtest.OK(t, err) + rtest.Equals(t, true, loaded) +} + +func TestPackCache_downsize_record(t *testing.T) { + c := newPackCache(10) + + id := restic.NewRandomID() + + // get bigger range first + rd, err := c.get(id, 5, 5, func(offset int64, length int, wr io.WriteSeeker) error { + wr.Write([]byte{1, 2, 3, 4, 5}) + return nil + }) + rtest.OK(t, err) + rd.Close() + + // invalid "resize" requests + _, err = c.get(id, 5, 10, func(offset int64, length int, wr io.WriteSeeker) error { + t.Error("unexpected pack load") + return nil + }) + assertNotOK(t, "resize cached record", err) + + // invalid before cached range request + _, err = c.get(id, 0, 5, func(offset int64, length int, wr io.WriteSeeker) error { + t.Error("unexpected pack load") + return nil + }) + assertNotOK(t, "before cached range request", err) + + // invalid after cached range request + _, err = c.get(id, 10, 5, func(offset int64, length int, wr io.WriteSeeker) error { + t.Error("unexpected pack load") + return nil + }) + assertNotOK(t, "after cached range request", err) + + // now get smaller "nested" range + rd, err = c.get(id, 7, 1, func(offset int64, length int, wr io.WriteSeeker) error { + t.Error("unexpected pack load") + return nil + }) + rtest.OK(t, err) + + // assert expected data + buf := make([]byte, 1) + rd.ReadAt(buf, 7) + rtest.Equals(t, byte(3), buf[0]) + _, err = rd.ReadAt(buf, 0) + assertNotOK(t, "read before downsized pack range", err) + _, err = rd.ReadAt(buf, 9) + assertNotOK(t, "read after downsized pack range", err) + + // can't request downsized record again + _, err = c.get(id, 7, 1, func(offset int64, length int, wr io.WriteSeeker) error { + t.Error("unexpected pack load") + return nil + }) + assertNotOK(t, "double-allocation of cache record subrange", err) + + // can't request another subrange of the original record + _, err = c.get(id, 6, 1, func(offset int64, length int, wr io.WriteSeeker) error { + t.Error("unexpected pack load") + return nil + }) + assertNotOK(t, "allocation of another subrange of cache record", err) + + // release downsized record and assert the original is back in the cache + rd.Close() + rd, err = c.get(id, 5, 5, func(offset int64, length int, wr io.WriteSeeker) error { + t.Error("unexpected pack load") + return nil + }) + rtest.OK(t, err) + rd.Close() +} + +func TestPackCache_wrong_load_size(t *testing.T) { + c := newPackCache(10) + + _, err := c.get(restic.NewRandomID(), 0, 5, func(offset int64, length int, wr io.WriteSeeker) error { + wr.Write([]byte{1}) + return nil + }) + assertNotOK(t, "not enough bytes read", err) + + _, err = c.get(restic.NewRandomID(), 0, 5, func(offset int64, length int, wr io.WriteSeeker) error { + wr.Write([]byte{1, 2, 3, 4, 5, 6}) + return nil + }) + assertNotOK(t, "too many bytes read", err) +} + +func TestPackCache_invalidRequests(t *testing.T) { + c := newPackCache(10) + + id := restic.NewRandomID() + + // + rd, _ := c.get(id, 0, 1, func(offset int64, length int, wr io.WriteSeeker) error { + wr.Write([]byte{1}) + return nil + }) + assertNotOK(t, "remove() reserved pack", c.remove(id)) + rtest.OK(t, rd.Close()) + assertNotOK(t, "multiple reader Close() calls)", rd.Close()) + + // + rtest.OK(t, c.remove(id)) + assertNotOK(t, "double remove() the same pack", c.remove(id)) +} + +func TestPackCacheRecord(t *testing.T) { + rd := &packCacheRecord{ + offset: 10, + data: []byte{1}, + } + buf := make([]byte, 1) + n, err := rd.ReadAt(buf, 10) + rtest.OK(t, err) + rtest.Equals(t, 1, n) + rtest.Equals(t, byte(1), buf[0]) + + _, err = rd.ReadAt(buf, 0) + assertNotOK(t, "read before loaded range", err) + + _, err = rd.ReadAt(buf, 11) + assertNotOK(t, "read after loaded range", err) + + _, err = rd.ReadAt(make([]byte, 2), 10) + assertNotOK(t, "read more than available data", err) +} diff --git a/internal/restorer/packheap.go b/internal/restorer/packheap.go new file mode 100644 index 000000000..9f8443d46 --- /dev/null +++ b/internal/restorer/packheap.go @@ -0,0 +1,51 @@ +package restorer + +// packHeap is a heap of packInfo references +// @see https://golang.org/pkg/container/heap/ +// @see https://en.wikipedia.org/wiki/Heap_(data_structure) +type packHeap struct { + elements []*packInfo + + // returns true if download of any of the files is in progress + inprogress func(files map[*fileInfo]struct{}) bool +} + +func (pq *packHeap) Len() int { return len(pq.elements) } + +func (pq *packHeap) Less(a, b int) bool { + packA, packB := pq.elements[a], pq.elements[b] + + ap := pq.inprogress(packA.files) + bp := pq.inprogress(packB.files) + if ap && !bp { + return true + } + + if packA.cost < packB.cost { + return true + } + + return false +} + +func (pq *packHeap) Swap(i, j int) { + pq.elements[i], pq.elements[j] = pq.elements[j], pq.elements[i] + pq.elements[i].index = i + pq.elements[j].index = j +} + +func (pq *packHeap) Push(x interface{}) { + n := len(pq.elements) + item := x.(*packInfo) + item.index = n + pq.elements = append(pq.elements, item) +} + +func (pq *packHeap) Pop() interface{} { + old := pq.elements + n := len(old) + item := old[n-1] + item.index = -1 // for safety + pq.elements = old[0 : n-1] + return item +} diff --git a/internal/restorer/packqueue.go b/internal/restorer/packqueue.go new file mode 100644 index 000000000..fb1517a17 --- /dev/null +++ b/internal/restorer/packqueue.go @@ -0,0 +1,224 @@ +package restorer + +import ( + "container/heap" + + "github.com/restic/restic/internal/debug" + "github.com/restic/restic/internal/restic" +) + +// packQueue tracks remaining file contents restore work and decides what pack +// to download and files to write next. +// +// The packs in the queue can be in one of three states: waiting, ready and +// in-progress. +// Waiting packs are the packs that only have blobs from the "middle" of their +// corresponding files and therefore cannot be used until blobs from some other +// packs are written to the files first. +// In-progress packs are the packs that were removed from the queue by #nextPack +// and must be first returned to the queue before they are considered again. +// Ready packs are the packs can be immediately used to restore at least one +// file. Internally ready packs are kept in a heap and are ordered according +// to these criteria: +// - Packs with "head" blobs of in-progress files are considered first. The +// idea is to complete restore of in-progress files before starting restore +// of other files. This is both more intuitive and also reduces number of +// open file handles needed during restore. +// - Packs with smallest cost are considered next. Pack cost is measured in +// number of other packs required before all blobs in the pack can be used +// and the pack can be removed from the pack cache. +// For example, consisder a file that requires two blobs, blob1 from pack1 +// and blob2 from pack2. The cost of pack2 is 1, because blob2 cannot be +// used before blob1 is available. The higher the cost, the longer the pack +// must be cached locally to avoid redownload. +// +// Pack queue implementation is NOT thread safe. All pack queue methods must +// be called from single gorouting AND packInfo and fileInfo instances must +// be updated synchronously from the same gorouting. +type packQueue struct { + idx filePackTraverser + + packs map[restic.ID]*packInfo // waiting and ready packs + inprogress map[*packInfo]struct{} // inprogress packs + + heap *packHeap // heap of ready packs +} + +func newPackQueue(idx filePackTraverser, files []*fileInfo, inprogress func(files map[*fileInfo]struct{}) bool) (*packQueue, error) { + packs := make(map[restic.ID]*packInfo) // all packs + + // create packInfo from fileInfo + for _, file := range files { + err := idx.forEachFilePack(file, func(packIdx int, packID restic.ID, _ []restic.Blob) bool { + pack, ok := packs[packID] + if !ok { + pack = &packInfo{ + id: packID, + index: -1, + files: make(map[*fileInfo]struct{}), + } + packs[packID] = pack + } + pack.files[file] = struct{}{} + pack.cost += packIdx + + return true // keep going + }) + if err != nil { + // repository index is messed up, can't do anything + return nil, err + } + } + + // create packInfo heap + pheap := &packHeap{inprogress: inprogress} + headPacks := restic.NewIDSet() + for _, file := range files { + idx.forEachFilePack(file, func(packIdx int, packID restic.ID, _ []restic.Blob) bool { + if !headPacks.Has(packID) { + headPacks.Insert(packID) + pack := packs[packID] + pack.index = len(pheap.elements) + pheap.elements = append(pheap.elements, pack) + } + return false // only first pack + }) + } + heap.Init(pheap) + + return &packQueue{idx: idx, packs: packs, heap: pheap, inprogress: make(map[*packInfo]struct{})}, nil +} + +// isEmpty returns true if the queue is empty, i.e. there are no more packs to +// download and files to write to. +func (h *packQueue) isEmpty() bool { + return len(h.packs) == 0 && len(h.inprogress) == 0 +} + +// nextPack returns next ready pack and corresponding files ready for download +// and processing. The returned pack and the files are marked as "in progress" +// internally and must be first returned to the queue before they are +// considered by #nextPack again. +func (h *packQueue) nextPack() (*packInfo, []*fileInfo) { + debug.Log("Ready packs %d, outstanding packs %d, inprogress packs %d", h.heap.Len(), len(h.packs), len(h.inprogress)) + + if h.heap.Len() == 0 { + return nil, nil + } + + pack := heap.Pop(h.heap).(*packInfo) + h.inprogress[pack] = struct{}{} + debug.Log("Popped pack %s (%d files), heap size=%d", pack.id.Str(), len(pack.files), len(h.heap.elements)) + var files []*fileInfo + for file := range pack.files { + h.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, packBlobs []restic.Blob) bool { + debug.Log("Pack #%d %s (%d blobs) used by %s", packIdx, packID.Str(), len(packBlobs), file.path) + if pack.id == packID { + files = append(files, file) + } + return false // only interested in the fist pack here + }) + } + + return pack, files +} + +// requeuePack conditionally adds back to the queue pack previously returned by +// #nextPack. +// If the pack is needed to restore any incomplete files, adds the pack to the +// queue and adjusts order of all affected packs in the queue. Has no effect +// if the pack is not required to restore any files. +// Returns true if the pack was added to the queue, false otherwise. +func (h *packQueue) requeuePack(pack *packInfo, success []*fileInfo, failure []*fileInfo) bool { + debug.Log("Requeue pack %s (%d/%d/%d files/success/failure)", pack.id.Str(), len(pack.files), len(success), len(failure)) + + // maintain inprogress pack set + delete(h.inprogress, pack) + + affectedPacks := make(map[*packInfo]struct{}) + affectedPacks[pack] = struct{}{} // this pack is alwats affected + + // apply download success/failure to the packs + onFailure := func(file *fileInfo) { + h.idx.forEachFilePack(file, func(packInx int, packID restic.ID, _ []restic.Blob) bool { + pack := h.packs[packID] + delete(pack.files, file) + pack.cost -= packInx + affectedPacks[pack] = struct{}{} + return true // keep going + }) + } + for _, file := range failure { + onFailure(file) + } + onSuccess := func(pack *packInfo, file *fileInfo) { + remove := true + h.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, _ []restic.Blob) bool { + if packID.Equal(pack.id) { + // the pack has more blobs required by the file + remove = false + } + otherPack := h.packs[packID] + otherPack.cost-- + affectedPacks[otherPack] = struct{}{} + return true // keep going + }) + if remove { + delete(pack.files, file) + } + } + for _, file := range success { + onSuccess(pack, file) + } + + // drop/update affected packs + isReady := func(affectedPack *packInfo) (ready bool) { + for file := range affectedPack.files { + h.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, _ []restic.Blob) bool { + if packID.Equal(affectedPack.id) { + ready = true + } + return false // only file's first pack matters + }) + if ready { + break + } + } + return ready + } + for affectedPack := range affectedPacks { + if _, inprogress := h.inprogress[affectedPack]; !inprogress { + if len(affectedPack.files) == 0 { + // drop the pack if it isn't inprogress and has no files that need it + if affectedPack.index >= 0 { + // This can't happen unless there is a bug elsewhere: + // - "current" pack isn't in the heap, hence its index must be < 0 + // - "other" packs can't be ready (i.e. in heap) unless they have other files + // in which case len(affectedPack.files) must be > 0 + debug.Log("corrupted ready heap: removed unexpected ready pack %s", affectedPack.id.Str()) + heap.Remove(h.heap, affectedPack.index) + } + delete(h.packs, affectedPack.id) + } else { + ready := isReady(affectedPack) + switch { + case ready && affectedPack.index < 0: + heap.Push(h.heap, affectedPack) + case ready && affectedPack.index >= 0: + heap.Fix(h.heap, affectedPack.index) + case !ready && affectedPack.index >= 0: + // This can't happen unless there is a bug elsewhere: + // - "current" pack isn't in the heap, hence its index must be < 0 + // - "other" packs can't have same head blobs as the "current" pack, + // hence "other" packs can't change their readiness + debug.Log("corrupted ready heap: removed unexpected waiting pack %s", affectedPack.id.Str()) + heap.Remove(h.heap, affectedPack.index) + case !ready && affectedPack.index < 0: + // do nothing + } + } + } + } + + return len(pack.files) > 0 +} diff --git a/internal/restorer/packqueue_test.go b/internal/restorer/packqueue_test.go new file mode 100644 index 000000000..b874a7cb7 --- /dev/null +++ b/internal/restorer/packqueue_test.go @@ -0,0 +1,236 @@ +package restorer + +import ( + "testing" + + "github.com/restic/restic/internal/restic" + rtest "github.com/restic/restic/internal/test" +) + +func processPack(t *testing.T, data *_TestData, pack *packInfo, files []*fileInfo) { + for _, file := range files { + data.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, packBlobs []restic.Blob) bool { + // assert file's head pack + rtest.Equals(t, pack.id, packID) + file.blobs = file.blobs[len(packBlobs):] + return false // only interested in the head pack + }) + } +} + +func TestPackQueue_basic(t *testing.T) { + data := _newTestData([]_File{ + _File{ + name: "file", + blobs: []_Blob{ + _Blob{"data1", "pack1"}, + _Blob{"data2", "pack2"}, + }, + }, + }) + + queue, err := newPackQueue(data.idx, data.files, func(_ map[*fileInfo]struct{}) bool { return false }) + rtest.OK(t, err) + + // assert initial queue state + rtest.Equals(t, false, queue.isEmpty()) + rtest.Equals(t, 0, queue.packs[data.packID("pack1")].cost) + rtest.Equals(t, 1, queue.packs[data.packID("pack2")].cost) + + // get first pack + pack, files := queue.nextPack() + rtest.Equals(t, "pack1", data.packName(pack)) + rtest.Equals(t, 1, len(files)) + rtest.Equals(t, false, queue.isEmpty()) + // TODO assert pack is inprogress + + // can't process the second pack until the first one is processed + { + pack, files := queue.nextPack() + rtest.Equals(t, true, pack == nil) + rtest.Equals(t, true, files == nil) + rtest.Equals(t, false, queue.isEmpty()) + } + + // requeue the pack without processing + rtest.Equals(t, true, queue.requeuePack(pack, []*fileInfo{}, []*fileInfo{})) + rtest.Equals(t, false, queue.isEmpty()) + rtest.Equals(t, 0, queue.packs[data.packID("pack1")].cost) + rtest.Equals(t, 1, queue.packs[data.packID("pack2")].cost) + + // get the first pack again + pack, files = queue.nextPack() + rtest.Equals(t, "pack1", data.packName(pack)) + rtest.Equals(t, 1, len(files)) + rtest.Equals(t, false, queue.isEmpty()) + + // process the first pack and return it to the queue + processPack(t, data, pack, files) + rtest.Equals(t, false, queue.requeuePack(pack, files, []*fileInfo{})) + rtest.Equals(t, 0, queue.packs[data.packID("pack2")].cost) + + // get the second pack + pack, files = queue.nextPack() + rtest.Equals(t, "pack2", data.packName(pack)) + rtest.Equals(t, 1, len(files)) + rtest.Equals(t, false, queue.isEmpty()) + + // process the second pack and return it to the queue + processPack(t, data, pack, files) + rtest.Equals(t, false, queue.requeuePack(pack, files, []*fileInfo{})) + + // all packs processed + rtest.Equals(t, true, queue.isEmpty()) +} + +func TestPackQueue_failedFile(t *testing.T) { + // point of this test is to assert that enqueuePack removes + // all references to files that failed restore + + data := _newTestData([]_File{ + _File{ + name: "file", + blobs: []_Blob{ + _Blob{"data1", "pack1"}, + _Blob{"data2", "pack2"}, + }, + }, + }) + + queue, err := newPackQueue(data.idx, data.files, func(_ map[*fileInfo]struct{}) bool { return false }) + rtest.OK(t, err) + + pack, files := queue.nextPack() + rtest.Equals(t, false, queue.requeuePack(pack, []*fileInfo{}, files /*failed*/)) + rtest.Equals(t, true, queue.isEmpty()) +} + +func TestPackQueue_ordering_cost(t *testing.T) { + // assert pack1 is selected before pack2: + // pack1 is ready to restore file1, pack2 is ready to restore file2 + // but pack2 cannot be immediately used to restore file1 + + data := _newTestData([]_File{ + _File{ + name: "file1", + blobs: []_Blob{ + _Blob{"data1", "pack1"}, + _Blob{"data2", "pack2"}, + }, + }, + _File{ + name: "file2", + blobs: []_Blob{ + _Blob{"data2", "pack2"}, + }, + }, + }) + + queue, err := newPackQueue(data.idx, data.files, func(_ map[*fileInfo]struct{}) bool { return false }) + rtest.OK(t, err) + + // assert initial pack costs + rtest.Equals(t, 0, data.pack(queue, "pack1").cost) + rtest.Equals(t, 0, data.pack(queue, "pack1").index) // head of the heap + rtest.Equals(t, 1, data.pack(queue, "pack2").cost) + rtest.Equals(t, 1, data.pack(queue, "pack2").index) + + pack, files := queue.nextPack() + // assert selected pack and queue state + rtest.Equals(t, "pack1", data.packName(pack)) + // process the pack + processPack(t, data, pack, files) + rtest.Equals(t, false, queue.requeuePack(pack, files, []*fileInfo{})) +} + +func TestPackQueue_ordering_inprogress(t *testing.T) { + // finish restoring one file before starting another + + data := _newTestData([]_File{ + _File{ + name: "file1", + blobs: []_Blob{ + _Blob{"data1-1", "pack1-1"}, + _Blob{"data1-2", "pack1-2"}, + }, + }, + _File{ + name: "file2", + blobs: []_Blob{ + _Blob{"data2-1", "pack2-1"}, + _Blob{"data2-2", "pack2-2"}, + }, + }, + }) + + var inprogress *fileInfo + queue, err := newPackQueue(data.idx, data.files, func(files map[*fileInfo]struct{}) bool { + _, found := files[inprogress] + return found + }) + rtest.OK(t, err) + + // first pack of a file + pack, files := queue.nextPack() + rtest.Equals(t, 1, len(files)) + file := files[0] + processPack(t, data, pack, files) + inprogress = files[0] + queue.requeuePack(pack, files, []*fileInfo{}) + + // second pack of the same file + pack, files = queue.nextPack() + rtest.Equals(t, 1, len(files)) + rtest.Equals(t, true, file == files[0]) // same file as before + processPack(t, data, pack, files) + inprogress = nil + queue.requeuePack(pack, files, []*fileInfo{}) + + // first pack of the second file + pack, files = queue.nextPack() + rtest.Equals(t, 1, len(files)) + rtest.Equals(t, false, file == files[0]) // different file as before +} + +func TestPackQueue_packMultiuse(t *testing.T) { + // the same pack is required multiple times to restore the same file + + data := _newTestData([]_File{ + _File{ + name: "file", + blobs: []_Blob{ + _Blob{"data1", "pack1"}, + _Blob{"data2", "pack2"}, + _Blob{"data3", "pack1"}, // pack1 reuse, new blob + _Blob{"data2", "pack2"}, // pack2 reuse, same blob + }, + }, + }) + + queue, err := newPackQueue(data.idx, data.files, func(_ map[*fileInfo]struct{}) bool { return false }) + rtest.OK(t, err) + + pack, files := queue.nextPack() + rtest.Equals(t, "pack1", data.packName(pack)) + rtest.Equals(t, 1, len(pack.files)) + processPack(t, data, pack, files) + rtest.Equals(t, true, queue.requeuePack(pack, files, []*fileInfo{})) + + pack, files = queue.nextPack() + rtest.Equals(t, "pack2", data.packName(pack)) + rtest.Equals(t, 1, len(pack.files)) + processPack(t, data, pack, files) + rtest.Equals(t, true, queue.requeuePack(pack, files, []*fileInfo{})) + + pack, files = queue.nextPack() + rtest.Equals(t, "pack1", data.packName(pack)) + processPack(t, data, pack, files) + rtest.Equals(t, false, queue.requeuePack(pack, files, []*fileInfo{})) + + pack, files = queue.nextPack() + rtest.Equals(t, "pack2", data.packName(pack)) + processPack(t, data, pack, files) + rtest.Equals(t, false, queue.requeuePack(pack, files, []*fileInfo{})) + + rtest.Equals(t, true, queue.isEmpty()) +} diff --git a/internal/restorer/restorer.go b/internal/restorer/restorer.go index 73e844ac0..8d35b015f 100644 --- a/internal/restorer/restorer.go +++ b/internal/restorer/restorer.go @@ -27,7 +27,8 @@ var restorerAbortOnAllErrors = func(str string, node *restic.Node, err error) er // NewRestorer creates a restorer preloaded with the content from the snapshot id. func NewRestorer(repo restic.Repository, id restic.ID) (*Restorer, error) { r := &Restorer{ - repo: repo, Error: restorerAbortOnAllErrors, + repo: repo, + Error: restorerAbortOnAllErrors, SelectFilter: func(string, string, *restic.Node) (bool, bool) { return true, true }, } @@ -164,7 +165,7 @@ func (res *Restorer) restoreNodeMetadataTo(node *restic.Node, target, location s // RestoreTo creates the directories and files in the snapshot below dst. // Before an item is created, res.Filter is called. -func (res *Restorer) RestoreTo(ctx context.Context, dst string) error { +func (res *Restorer) RestoreTo(ctx context.Context, dst string, singlethreaded bool) error { var err error if !filepath.IsAbs(dst) { dst, err = filepath.Abs(dst) @@ -173,35 +174,101 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error { } } - // make sure the target directory exists - err = fs.MkdirAll(dst, 0777) // umask takes care of dir permissions - if err != nil { - return errors.Wrap(err, "MkdirAll") + restoreNodeMetadata := func(node *restic.Node, target, location string) error { + return res.restoreNodeMetadataTo(node, target, location) } + noop := func(node *restic.Node, target, location string) error { return nil } idx := restic.NewHardlinkIndex() - return res.traverseTree(ctx, dst, string(filepath.Separator), *res.sn.Tree, treeVisitor{ + if singlethreaded { + return res.traverseTree(ctx, dst, string(filepath.Separator), *res.sn.Tree, treeVisitor{ + enterDir: func(node *restic.Node, target, location string) error { + // create dir with default permissions + // #leaveDir restores dir metadata after visiting all children + return fs.MkdirAll(target, 0700) + }, + + visitNode: func(node *restic.Node, target, location string) error { + // create parent dir with default permissions + // #leaveDir restores dir metadata after visiting all children + err := fs.MkdirAll(filepath.Dir(target), 0700) + if err != nil { + return err + } + + return res.restoreNodeTo(ctx, node, target, location, idx) + }, + + // Restore directory permissions and timestamp at the end. If we did it earlier + // - children restore could fail because of restictive directory permission + // - children restore could overwrite the timestamp of the directory they are in + leaveDir: restoreNodeMetadata, + }) + } + + filerestorer := newFileRestorer(res.repo.Backend().Load, res.repo.Key(), filePackTraverser{lookup: res.repo.Index().Lookup}) + + // path->node map, only needed to call res.Error, which uses the node during tests + nodes := make(map[string]*restic.Node) + + // first tree pass: create directories and collect all files to restore + err = res.traverseTree(ctx, dst, string(filepath.Separator), *res.sn.Tree, treeVisitor{ enterDir: func(node *restic.Node, target, location string) error { // create dir with default permissions // #leaveDir restores dir metadata after visiting all children return fs.MkdirAll(target, 0700) }, + visitNode: func(node *restic.Node, target, location string) error { // create parent dir with default permissions - // #leaveDir restores dir metadata after visiting all children + // second pass #leaveDir restores dir metadata after visiting/restoring all children err := fs.MkdirAll(filepath.Dir(target), 0700) if err != nil { return err } - return res.restoreNodeTo(ctx, node, target, location, idx) + if node.Type != "file" { + return nil + } + + if node.Links > 1 { + if idx.Has(node.Inode, node.DeviceID) { + return nil + } + idx.Add(node.Inode, node.DeviceID, target) + } + + nodes[target] = node + filerestorer.addFile(target, node.Content) + + return nil }, - leaveDir: func(node *restic.Node, target, location string) error { - // Restore directory permissions and timestamp at the end. If we did it earlier - // - children restore could fail because of restictive directory permission - // - children restore could overwrite the timestamp of the directory they are in - return res.restoreNodeMetadataTo(node, target, location) + leaveDir: noop, + }) + if err != nil { + return err + } + + err = filerestorer.restoreFiles(ctx, func(path string, err error) { res.Error(path, nodes[path], err) }) + if err != nil { + return err + } + + // second tree pass: restore special files and filesystem metadata + return res.traverseTree(ctx, dst, string(filepath.Separator), *res.sn.Tree, treeVisitor{ + enterDir: noop, + visitNode: func(node *restic.Node, target, location string) error { + isHardlink := func() bool { + return idx.Has(node.Inode, node.DeviceID) && idx.GetFilename(node.Inode, node.DeviceID) != target + } + + if node.Type != "file" || isHardlink() { + return res.restoreNodeTo(ctx, node, target, location, idx) + } + + return node.RestoreMetadata(target) }, + leaveDir: restoreNodeMetadata, }) } diff --git a/internal/restorer/restorer_test.go b/internal/restorer/restorer_test.go index a0194fa30..776dbf82e 100644 --- a/internal/restorer/restorer_test.go +++ b/internal/restorer/restorer_test.go @@ -340,7 +340,7 @@ func TestRestorer(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - err = res.RestoreTo(ctx, tempdir) + err = res.RestoreTo(ctx, tempdir, false) if err != nil { t.Fatal(err) } @@ -446,7 +446,7 @@ func TestRestorerRelative(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - err = res.RestoreTo(ctx, "restore") + err = res.RestoreTo(ctx, "restore", false) if err != nil { t.Fatal(err) } From da57302fca0d80586d6151f5eaf12dd9a61667c8 Mon Sep 17 00:00:00 2001 From: Igor Fedorenko Date: Fri, 11 May 2018 00:45:14 -0400 Subject: [PATCH 03/13] restore: Removed legacy restore implementation Signed-off-by: Igor Fedorenko --- changelog/unreleased/issue-1605 | 4 --- cmd/restic/cmd_restore.go | 18 +++++------ internal/restic/node.go | 21 ++---------- internal/restic/node_test.go | 4 +-- internal/restorer/filerestorer.go | 2 -- internal/restorer/restorer.go | 52 +++++++++++------------------- internal/restorer/restorer_test.go | 4 +-- 7 files changed, 33 insertions(+), 72 deletions(-) diff --git a/changelog/unreleased/issue-1605 b/changelog/unreleased/issue-1605 index ba93051ac..2d8597610 100644 --- a/changelog/unreleased/issue-1605 +++ b/changelog/unreleased/issue-1605 @@ -7,9 +7,5 @@ Implementation uses threads to download and process miltiple remote files concurrently. To further reduce restore time, each remote file is downloaded using single repository request. -Old restore implementation can be enabled with `--signethreaded` flag. -Use `--verify` restore flag to read restored files and verify their -content checksum. - https://github.com/restic/restic/issues/1605 https://github.com/restic/restic/pull/1719 diff --git a/cmd/restic/cmd_restore.go b/cmd/restic/cmd_restore.go index c538f33ae..4bf59c06f 100644 --- a/cmd/restic/cmd_restore.go +++ b/cmd/restic/cmd_restore.go @@ -28,14 +28,13 @@ repository. // RestoreOptions collects all options for the restore command. type RestoreOptions struct { - Exclude []string - Include []string - Target string - Host string - Paths []string - Tags restic.TagLists - Verify bool - SingleThreaded bool + Exclude []string + Include []string + Target string + Host string + Paths []string + Tags restic.TagLists + Verify bool } var restoreOptions RestoreOptions @@ -52,7 +51,6 @@ func init() { flags.Var(&restoreOptions.Tags, "tag", "only consider snapshots which include this `taglist` for snapshot ID \"latest\"") flags.StringArrayVar(&restoreOptions.Paths, "path", nil, "only consider snapshots which include this (absolute) `path` for snapshot ID \"latest\"") flags.BoolVar(&restoreOptions.Verify, "verify", false, "verify restored files content") - flags.BoolVar(&restoreOptions.SingleThreaded, "singlethreaded", false, "use single-threaded (legacy) restore implementation") } func runRestore(opts RestoreOptions, gopts GlobalOptions, args []string) error { @@ -157,7 +155,7 @@ func runRestore(opts RestoreOptions, gopts GlobalOptions, args []string) error { Verbosef("restoring %s to %s\n", res.Snapshot(), opts.Target) - err = res.RestoreTo(ctx, opts.Target, opts.SingleThreaded) + err = res.RestoreTo(ctx, opts.Target) if err == nil && opts.Verify { Verbosef("verifying files in %s\n", opts.Target) var count int diff --git a/internal/restic/node.go b/internal/restic/node.go index c8b089791..638306eac 100644 --- a/internal/restic/node.go +++ b/internal/restic/node.go @@ -135,7 +135,7 @@ func (node Node) GetExtendedAttribute(a string) []byte { } // CreateAt creates the node at the given path but does NOT restore node meta data. -func (node *Node) CreateAt(ctx context.Context, path string, repo Repository, idx *HardlinkIndex) error { +func (node *Node) CreateAt(ctx context.Context, path string, repo Repository) error { debug.Log("create node %v at %v", node.Name, path) switch node.Type { @@ -144,7 +144,7 @@ func (node *Node) CreateAt(ctx context.Context, path string, repo Repository, id return err } case "file": - if err := node.createFileAt(ctx, path, repo, idx); err != nil { + if err := node.createFileAt(ctx, path, repo); err != nil { return err } case "symlink": @@ -259,18 +259,7 @@ func (node Node) createDirAt(path string) error { return nil } -func (node Node) createFileAt(ctx context.Context, path string, repo Repository, idx *HardlinkIndex) error { - if node.Links > 1 && idx.Has(node.Inode, node.DeviceID) { - if err := fs.Remove(path); !os.IsNotExist(err) { - return errors.Wrap(err, "RemoveCreateHardlink") - } - err := fs.Link(idx.GetFilename(node.Inode, node.DeviceID), path) - if err != nil { - return errors.Wrap(err, "CreateHardlink") - } - return nil - } - +func (node Node) createFileAt(ctx context.Context, path string, repo Repository) error { f, err := fs.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0600) if err != nil { return errors.Wrap(err, "OpenFile") @@ -287,10 +276,6 @@ func (node Node) createFileAt(ctx context.Context, path string, repo Repository, return errors.Wrap(closeErr, "Close") } - if node.Links > 1 { - idx.Add(node.Inode, node.DeviceID, path) - } - return nil } diff --git a/internal/restic/node_test.go b/internal/restic/node_test.go index e262a36c6..f12353a0a 100644 --- a/internal/restic/node_test.go +++ b/internal/restic/node_test.go @@ -177,11 +177,9 @@ func TestNodeRestoreAt(t *testing.T) { } }() - idx := restic.NewHardlinkIndex() - for _, test := range nodeTests { nodePath := filepath.Join(tempdir, test.Name) - rtest.OK(t, test.CreateAt(context.TODO(), nodePath, nil, idx)) + rtest.OK(t, test.CreateAt(context.TODO(), nodePath, nil)) rtest.OK(t, test.RestoreMetadata(nodePath)) if test.Type == "symlink" && runtime.GOOS == "windows" { diff --git a/internal/restorer/filerestorer.go b/internal/restorer/filerestorer.go index 702f5a12e..14cf9117a 100644 --- a/internal/restorer/filerestorer.go +++ b/internal/restorer/filerestorer.go @@ -18,8 +18,6 @@ import ( // TODO evaluate memory footprint for larger repositories, say 10M packs/10M files // TODO consider replacing pack file cache with blob cache // TODO avoid decrypting the same blob multiple times -// TODO remove `restore --singlethreaded` and review/simplify hardlink support -// (node.CreateAt shouldn't take HardlinkIndex) // TODO evaluate disabled debug logging overhead for large repositories // TODO consider logging snapshot-relative path to reduce log clutter diff --git a/internal/restorer/restorer.go b/internal/restorer/restorer.go index 8d35b015f..6986c8549 100644 --- a/internal/restorer/restorer.go +++ b/internal/restorer/restorer.go @@ -140,10 +140,10 @@ func (res *Restorer) traverseTree(ctx context.Context, target, location string, return nil } -func (res *Restorer) restoreNodeTo(ctx context.Context, node *restic.Node, target, location string, idx *restic.HardlinkIndex) error { +func (res *Restorer) restoreNodeTo(ctx context.Context, node *restic.Node, target, location string) error { debug.Log("restoreNode %v %v %v", node.Name, target, location) - err := node.CreateAt(ctx, target, res.repo, idx) + err := node.CreateAt(ctx, target, res.repo) if err != nil { debug.Log("node.CreateAt(%s) error %v", target, err) } @@ -163,9 +163,20 @@ func (res *Restorer) restoreNodeMetadataTo(node *restic.Node, target, location s return err } +func (res *Restorer) restoreHardlinkAt(node *restic.Node, target, path, location string) error { + if err := fs.Remove(path); !os.IsNotExist(err) { + return errors.Wrap(err, "RemoveCreateHardlink") + } + err := fs.Link(target, path) + if err != nil { + return errors.Wrap(err, "CreateHardlink") + } + return res.restoreNodeMetadataTo(node, target, location) +} + // RestoreTo creates the directories and files in the snapshot below dst. // Before an item is created, res.Filter is called. -func (res *Restorer) RestoreTo(ctx context.Context, dst string, singlethreaded bool) error { +func (res *Restorer) RestoreTo(ctx context.Context, dst string) error { var err error if !filepath.IsAbs(dst) { dst, err = filepath.Abs(dst) @@ -180,31 +191,6 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string, singlethreaded b noop := func(node *restic.Node, target, location string) error { return nil } idx := restic.NewHardlinkIndex() - if singlethreaded { - return res.traverseTree(ctx, dst, string(filepath.Separator), *res.sn.Tree, treeVisitor{ - enterDir: func(node *restic.Node, target, location string) error { - // create dir with default permissions - // #leaveDir restores dir metadata after visiting all children - return fs.MkdirAll(target, 0700) - }, - - visitNode: func(node *restic.Node, target, location string) error { - // create parent dir with default permissions - // #leaveDir restores dir metadata after visiting all children - err := fs.MkdirAll(filepath.Dir(target), 0700) - if err != nil { - return err - } - - return res.restoreNodeTo(ctx, node, target, location, idx) - }, - - // Restore directory permissions and timestamp at the end. If we did it earlier - // - children restore could fail because of restictive directory permission - // - children restore could overwrite the timestamp of the directory they are in - leaveDir: restoreNodeMetadata, - }) - } filerestorer := newFileRestorer(res.repo.Backend().Load, res.repo.Key(), filePackTraverser{lookup: res.repo.Index().Lookup}) @@ -258,15 +244,15 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string, singlethreaded b return res.traverseTree(ctx, dst, string(filepath.Separator), *res.sn.Tree, treeVisitor{ enterDir: noop, visitNode: func(node *restic.Node, target, location string) error { - isHardlink := func() bool { - return idx.Has(node.Inode, node.DeviceID) && idx.GetFilename(node.Inode, node.DeviceID) != target + if idx.Has(node.Inode, node.DeviceID) && idx.GetFilename(node.Inode, node.DeviceID) != target { + return res.restoreHardlinkAt(node, idx.GetFilename(node.Inode, node.DeviceID), target, location) } - if node.Type != "file" || isHardlink() { - return res.restoreNodeTo(ctx, node, target, location, idx) + if node.Type != "file" { + return res.restoreNodeTo(ctx, node, target, location) } - return node.RestoreMetadata(target) + return res.restoreNodeMetadataTo(node, target, location) }, leaveDir: restoreNodeMetadata, }) diff --git a/internal/restorer/restorer_test.go b/internal/restorer/restorer_test.go index 776dbf82e..a0194fa30 100644 --- a/internal/restorer/restorer_test.go +++ b/internal/restorer/restorer_test.go @@ -340,7 +340,7 @@ func TestRestorer(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - err = res.RestoreTo(ctx, tempdir, false) + err = res.RestoreTo(ctx, tempdir) if err != nil { t.Fatal(err) } @@ -446,7 +446,7 @@ func TestRestorerRelative(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - err = res.RestoreTo(ctx, "restore", false) + err = res.RestoreTo(ctx, "restore") if err != nil { t.Fatal(err) } From 2b3420820bba9c8dccbcdde8a30c20cc0439805d Mon Sep 17 00:00:00 2001 From: Igor Fedorenko Date: Fri, 7 Sep 2018 23:56:14 -0400 Subject: [PATCH 04/13] go mod vendor Signed-off-by: Igor Fedorenko --- go.mod | 1 + go.sum | 2 + .../github.com/hashicorp/golang-lru/LICENSE | 362 ++++++++++++++++++ .../hashicorp/golang-lru/simplelru/lru.go | 161 ++++++++ .../golang-lru/simplelru/lru_interface.go | 36 ++ vendor/modules.txt | 2 + 6 files changed, 564 insertions(+) create mode 100644 vendor/github.com/hashicorp/golang-lru/LICENSE create mode 100644 vendor/github.com/hashicorp/golang-lru/simplelru/lru.go create mode 100644 vendor/github.com/hashicorp/golang-lru/simplelru/lru_interface.go diff --git a/go.mod b/go.mod index d2e323005..936375e55 100644 --- a/go.mod +++ b/go.mod @@ -15,6 +15,7 @@ require ( github.com/golang/protobuf v1.2.0 // indirect github.com/google/go-cmp v0.2.0 github.com/gopherjs/gopherjs v0.0.0-20180825215210-0210a2f0f73c // indirect + github.com/hashicorp/golang-lru v0.5.0 github.com/inconshreveable/mousetrap v1.0.0 // indirect github.com/jtolds/gls v4.2.1+incompatible // indirect github.com/juju/ratelimit v1.0.1 diff --git a/go.sum b/go.sum index 40d16e72c..e49fc4ca5 100644 --- a/go.sum +++ b/go.sum @@ -26,6 +26,8 @@ github.com/google/go-cmp v0.2.0 h1:+dTQ8DZQJz0Mb/HjFlkptS1FeQ4cWSnN941F8aEG4SQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/gopherjs/gopherjs v0.0.0-20180825215210-0210a2f0f73c h1:16eHWuMGvCjSfgRJKqIzapE78onvvTbdi1rMkU00lZw= github.com/gopherjs/gopherjs v0.0.0-20180825215210-0210a2f0f73c/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= +github.com/hashicorp/golang-lru v0.5.0 h1:CL2msUPvZTLb5O648aiLNJw3hnBxN2+1Jq8rCOH9wdo= +github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/jtolds/gls v4.2.1+incompatible h1:fSuqC+Gmlu6l/ZYAoZzx2pyucC8Xza35fpRVWLVmUEE= diff --git a/vendor/github.com/hashicorp/golang-lru/LICENSE b/vendor/github.com/hashicorp/golang-lru/LICENSE new file mode 100644 index 000000000..be2cc4dfb --- /dev/null +++ b/vendor/github.com/hashicorp/golang-lru/LICENSE @@ -0,0 +1,362 @@ +Mozilla Public License, version 2.0 + +1. Definitions + +1.1. "Contributor" + + means each individual or legal entity that creates, contributes to the + creation of, or owns Covered Software. + +1.2. "Contributor Version" + + means the combination of the Contributions of others (if any) used by a + Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + + means Source Code Form to which the initial Contributor has attached the + notice in Exhibit A, the Executable Form of such Source Code Form, and + Modifications of such Source Code Form, in each case including portions + thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + a. that the initial Contributor has attached the notice described in + Exhibit B to the Covered Software; or + + b. that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the terms of + a Secondary License. + +1.6. "Executable Form" + + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + + means a work that combines Covered Software with other material, in a + separate file or files, that is not Covered Software. + +1.8. "License" + + means this document. + +1.9. "Licensable" + + means having the right to grant, to the maximum extent possible, whether + at the time of the initial grant or subsequently, any and all of the + rights conveyed by this License. + +1.10. "Modifications" + + means any of the following: + + a. any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered Software; or + + b. any new file in Source Code Form that contains any Covered Software. + +1.11. "Patent Claims" of a Contributor + + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the License, + by the making, using, selling, offering for sale, having made, import, + or transfer of either its Contributions or its Contributor Version. + +1.12. "Secondary License" + + means either the GNU General Public License, Version 2.0, the GNU Lesser + General Public License, Version 2.1, the GNU Affero General Public + License, Version 3.0, or any later versions of those licenses. + +1.13. "Source Code Form" + + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that controls, is + controlled by, or is under common control with You. For purposes of this + definition, "control" means (a) the power, direct or indirect, to cause + the direction or management of such entity, whether by contract or + otherwise, or (b) ownership of more than fifty percent (50%) of the + outstanding shares or beneficial ownership of such entity. + + +2. License Grants and Conditions + +2.1. Grants + + Each Contributor hereby grants You a world-wide, royalty-free, + non-exclusive license: + + a. under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + + b. under Patent Claims of such Contributor to make, use, sell, offer for + sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + + The licenses granted in Section 2.1 with respect to any Contribution + become effective for each Contribution on the date the Contributor first + distributes such Contribution. + +2.3. Limitations on Grant Scope + + The licenses granted in this Section 2 are the only rights granted under + this License. No additional rights or licenses will be implied from the + distribution or licensing of Covered Software under this License. + Notwithstanding Section 2.1(b) above, no patent license is granted by a + Contributor: + + a. for any code that a Contributor has removed from Covered Software; or + + b. for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + + c. under Patent Claims infringed by Covered Software in the absence of + its Contributions. + + This License does not grant any rights in the trademarks, service marks, + or logos of any Contributor (except as may be necessary to comply with + the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + + No Contributor makes additional grants as a result of Your choice to + distribute the Covered Software under a subsequent version of this + License (see Section 10.2) or under the terms of a Secondary License (if + permitted under the terms of Section 3.3). + +2.5. Representation + + Each Contributor represents that the Contributor believes its + Contributions are its original creation(s) or it has sufficient rights to + grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + + This License is not intended to limit any rights You have under + applicable copyright doctrines of fair use, fair dealing, or other + equivalents. + +2.7. Conditions + + Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in + Section 2.1. + + +3. Responsibilities + +3.1. Distribution of Source Form + + All distribution of Covered Software in Source Code Form, including any + Modifications that You create or to which You contribute, must be under + the terms of this License. You must inform recipients that the Source + Code Form of the Covered Software is governed by the terms of this + License, and how they can obtain a copy of this License. You may not + attempt to alter or restrict the recipients' rights in the Source Code + Form. + +3.2. Distribution of Executable Form + + If You distribute Covered Software in Executable Form then: + + a. such Covered Software must also be made available in Source Code Form, + as described in Section 3.1, and You must inform recipients of the + Executable Form how they can obtain a copy of such Source Code Form by + reasonable means in a timely manner, at a charge no more than the cost + of distribution to the recipient; and + + b. You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter the + recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + + You may create and distribute a Larger Work under terms of Your choice, + provided that You also comply with the requirements of this License for + the Covered Software. If the Larger Work is a combination of Covered + Software with a work governed by one or more Secondary Licenses, and the + Covered Software is not Incompatible With Secondary Licenses, this + License permits You to additionally distribute such Covered Software + under the terms of such Secondary License(s), so that the recipient of + the Larger Work may, at their option, further distribute the Covered + Software under the terms of either this License or such Secondary + License(s). + +3.4. Notices + + You may not remove or alter the substance of any license notices + (including copyright notices, patent notices, disclaimers of warranty, or + limitations of liability) contained within the Source Code Form of the + Covered Software, except that You may alter any license notices to the + extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + + You may choose to offer, and to charge a fee for, warranty, support, + indemnity or liability obligations to one or more recipients of Covered + Software. However, You may do so only on Your own behalf, and not on + behalf of any Contributor. You must make it absolutely clear that any + such warranty, support, indemnity, or liability obligation is offered by + You alone, and You hereby agree to indemnify every Contributor for any + liability incurred by such Contributor as a result of warranty, support, + indemnity or liability terms You offer. You may include additional + disclaimers of warranty and limitations of liability specific to any + jurisdiction. + +4. Inability to Comply Due to Statute or Regulation + + If it is impossible for You to comply with any of the terms of this License + with respect to some or all of the Covered Software due to statute, + judicial order, or regulation then You must: (a) comply with the terms of + this License to the maximum extent possible; and (b) describe the + limitations and the code they affect. Such description must be placed in a + text file included with all distributions of the Covered Software under + this License. Except to the extent prohibited by statute or regulation, + such description must be sufficiently detailed for a recipient of ordinary + skill to be able to understand it. + +5. Termination + +5.1. The rights granted under this License will terminate automatically if You + fail to comply with any of its terms. However, if You become compliant, + then the rights granted under this License from a particular Contributor + are reinstated (a) provisionally, unless and until such Contributor + explicitly and finally terminates Your grants, and (b) on an ongoing + basis, if such Contributor fails to notify You of the non-compliance by + some reasonable means prior to 60 days after You have come back into + compliance. Moreover, Your grants from a particular Contributor are + reinstated on an ongoing basis if such Contributor notifies You of the + non-compliance by some reasonable means, this is the first time You have + received notice of non-compliance with this License from such + Contributor, and You become compliant prior to 30 days after Your receipt + of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent + infringement claim (excluding declaratory judgment actions, + counter-claims, and cross-claims) alleging that a Contributor Version + directly or indirectly infringes any patent, then the rights granted to + You by any and all Contributors for the Covered Software under Section + 2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user + license agreements (excluding distributors and resellers) which have been + validly granted by You or Your distributors under this License prior to + termination shall survive termination. + +6. Disclaimer of Warranty + + Covered Software is provided under this License on an "as is" basis, + without warranty of any kind, either expressed, implied, or statutory, + including, without limitation, warranties that the Covered Software is free + of defects, merchantable, fit for a particular purpose or non-infringing. + The entire risk as to the quality and performance of the Covered Software + is with You. Should any Covered Software prove defective in any respect, + You (not any Contributor) assume the cost of any necessary servicing, + repair, or correction. This disclaimer of warranty constitutes an essential + part of this License. No use of any Covered Software is authorized under + this License except under this disclaimer. + +7. Limitation of Liability + + Under no circumstances and under no legal theory, whether tort (including + negligence), contract, or otherwise, shall any Contributor, or anyone who + distributes Covered Software as permitted above, be liable to You for any + direct, indirect, special, incidental, or consequential damages of any + character including, without limitation, damages for lost profits, loss of + goodwill, work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses, even if such party shall have been + informed of the possibility of such damages. This limitation of liability + shall not apply to liability for death or personal injury resulting from + such party's negligence to the extent applicable law prohibits such + limitation. Some jurisdictions do not allow the exclusion or limitation of + incidental or consequential damages, so this exclusion and limitation may + not apply to You. + +8. Litigation + + Any litigation relating to this License may be brought only in the courts + of a jurisdiction where the defendant maintains its principal place of + business and such litigation shall be governed by laws of that + jurisdiction, without reference to its conflict-of-law provisions. Nothing + in this Section shall prevent a party's ability to bring cross-claims or + counter-claims. + +9. Miscellaneous + + This License represents the complete agreement concerning the subject + matter hereof. If any provision of this License is held to be + unenforceable, such provision shall be reformed only to the extent + necessary to make it enforceable. Any law or regulation which provides that + the language of a contract shall be construed against the drafter shall not + be used to construe this License against a Contributor. + + +10. Versions of the License + +10.1. New Versions + + Mozilla Foundation is the license steward. Except as provided in Section + 10.3, no one other than the license steward has the right to modify or + publish new versions of this License. Each version will be given a + distinguishing version number. + +10.2. Effect of New Versions + + You may distribute the Covered Software under the terms of the version + of the License under which You originally received the Covered Software, + or under the terms of any subsequent version published by the license + steward. + +10.3. Modified Versions + + If you create software not governed by this License, and you want to + create a new license for such software, you may create and use a + modified version of this License if you rename the license and remove + any references to the name of the license steward (except to note that + such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary + Licenses If You choose to distribute Source Code Form that is + Incompatible With Secondary Licenses under the terms of this version of + the License, the notice described in Exhibit B of this License must be + attached. + +Exhibit A - Source Code Form License Notice + + This Source Code Form is subject to the + terms of the Mozilla Public License, v. + 2.0. If a copy of the MPL was not + distributed with this file, You can + obtain one at + http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular file, +then You may include the notice in a location (such as a LICENSE file in a +relevant directory) where a recipient would be likely to look for such a +notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice + + This Source Code Form is "Incompatible + With Secondary Licenses", as defined by + the Mozilla Public License, v. 2.0. diff --git a/vendor/github.com/hashicorp/golang-lru/simplelru/lru.go b/vendor/github.com/hashicorp/golang-lru/simplelru/lru.go new file mode 100644 index 000000000..5673773b2 --- /dev/null +++ b/vendor/github.com/hashicorp/golang-lru/simplelru/lru.go @@ -0,0 +1,161 @@ +package simplelru + +import ( + "container/list" + "errors" +) + +// EvictCallback is used to get a callback when a cache entry is evicted +type EvictCallback func(key interface{}, value interface{}) + +// LRU implements a non-thread safe fixed size LRU cache +type LRU struct { + size int + evictList *list.List + items map[interface{}]*list.Element + onEvict EvictCallback +} + +// entry is used to hold a value in the evictList +type entry struct { + key interface{} + value interface{} +} + +// NewLRU constructs an LRU of the given size +func NewLRU(size int, onEvict EvictCallback) (*LRU, error) { + if size <= 0 { + return nil, errors.New("Must provide a positive size") + } + c := &LRU{ + size: size, + evictList: list.New(), + items: make(map[interface{}]*list.Element), + onEvict: onEvict, + } + return c, nil +} + +// Purge is used to completely clear the cache. +func (c *LRU) Purge() { + for k, v := range c.items { + if c.onEvict != nil { + c.onEvict(k, v.Value.(*entry).value) + } + delete(c.items, k) + } + c.evictList.Init() +} + +// Add adds a value to the cache. Returns true if an eviction occurred. +func (c *LRU) Add(key, value interface{}) (evicted bool) { + // Check for existing item + if ent, ok := c.items[key]; ok { + c.evictList.MoveToFront(ent) + ent.Value.(*entry).value = value + return false + } + + // Add new item + ent := &entry{key, value} + entry := c.evictList.PushFront(ent) + c.items[key] = entry + + evict := c.evictList.Len() > c.size + // Verify size not exceeded + if evict { + c.removeOldest() + } + return evict +} + +// Get looks up a key's value from the cache. +func (c *LRU) Get(key interface{}) (value interface{}, ok bool) { + if ent, ok := c.items[key]; ok { + c.evictList.MoveToFront(ent) + return ent.Value.(*entry).value, true + } + return +} + +// Contains checks if a key is in the cache, without updating the recent-ness +// or deleting it for being stale. +func (c *LRU) Contains(key interface{}) (ok bool) { + _, ok = c.items[key] + return ok +} + +// Peek returns the key value (or undefined if not found) without updating +// the "recently used"-ness of the key. +func (c *LRU) Peek(key interface{}) (value interface{}, ok bool) { + var ent *list.Element + if ent, ok = c.items[key]; ok { + return ent.Value.(*entry).value, true + } + return nil, ok +} + +// Remove removes the provided key from the cache, returning if the +// key was contained. +func (c *LRU) Remove(key interface{}) (present bool) { + if ent, ok := c.items[key]; ok { + c.removeElement(ent) + return true + } + return false +} + +// RemoveOldest removes the oldest item from the cache. +func (c *LRU) RemoveOldest() (key interface{}, value interface{}, ok bool) { + ent := c.evictList.Back() + if ent != nil { + c.removeElement(ent) + kv := ent.Value.(*entry) + return kv.key, kv.value, true + } + return nil, nil, false +} + +// GetOldest returns the oldest entry +func (c *LRU) GetOldest() (key interface{}, value interface{}, ok bool) { + ent := c.evictList.Back() + if ent != nil { + kv := ent.Value.(*entry) + return kv.key, kv.value, true + } + return nil, nil, false +} + +// Keys returns a slice of the keys in the cache, from oldest to newest. +func (c *LRU) Keys() []interface{} { + keys := make([]interface{}, len(c.items)) + i := 0 + for ent := c.evictList.Back(); ent != nil; ent = ent.Prev() { + keys[i] = ent.Value.(*entry).key + i++ + } + return keys +} + +// Len returns the number of items in the cache. +func (c *LRU) Len() int { + return c.evictList.Len() +} + +// removeOldest removes the oldest item from the cache. +func (c *LRU) removeOldest() { + ent := c.evictList.Back() + if ent != nil { + c.removeElement(ent) + } +} + +// removeElement is used to remove a given list element from the cache +func (c *LRU) removeElement(e *list.Element) { + c.evictList.Remove(e) + kv := e.Value.(*entry) + delete(c.items, kv.key) + if c.onEvict != nil { + c.onEvict(kv.key, kv.value) + } +} diff --git a/vendor/github.com/hashicorp/golang-lru/simplelru/lru_interface.go b/vendor/github.com/hashicorp/golang-lru/simplelru/lru_interface.go new file mode 100644 index 000000000..74c707744 --- /dev/null +++ b/vendor/github.com/hashicorp/golang-lru/simplelru/lru_interface.go @@ -0,0 +1,36 @@ +package simplelru + +// LRUCache is the interface for simple LRU cache. +type LRUCache interface { + // Adds a value to the cache, returns true if an eviction occurred and + // updates the "recently used"-ness of the key. + Add(key, value interface{}) bool + + // Returns key's value from the cache and + // updates the "recently used"-ness of the key. #value, isFound + Get(key interface{}) (value interface{}, ok bool) + + // Check if a key exsists in cache without updating the recent-ness. + Contains(key interface{}) (ok bool) + + // Returns key's value without updating the "recently used"-ness of the key. + Peek(key interface{}) (value interface{}, ok bool) + + // Removes a key from the cache. + Remove(key interface{}) bool + + // Removes the oldest entry from cache. + RemoveOldest() (interface{}, interface{}, bool) + + // Returns the oldest entry from the cache. #key, value, isFound + GetOldest() (interface{}, interface{}, bool) + + // Returns a slice of the keys in the cache, from oldest to newest. + Keys() []interface{} + + // Returns the number of items in the cache. + Len() int + + // Clear all cache entries + Purge() +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 436383773..aa1154aa9 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -32,6 +32,8 @@ github.com/google/go-cmp/cmp/cmpopts github.com/google/go-cmp/cmp/internal/diff github.com/google/go-cmp/cmp/internal/function github.com/google/go-cmp/cmp/internal/value +# github.com/hashicorp/golang-lru v0.5.0 +github.com/hashicorp/golang-lru/simplelru # github.com/inconshreveable/mousetrap v1.0.0 github.com/inconshreveable/mousetrap # github.com/juju/ratelimit v1.0.1 From c021ad2334df1621ac8610fe998e27acfea9a826 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Wed, 11 Jul 2018 21:18:01 +0200 Subject: [PATCH 05/13] restore: Move documentation to godoc --- RESTORE_concurrent-pack-order.md | 30 ----------------------------- internal/restorer/doc.go | 33 ++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 30 deletions(-) delete mode 100644 RESTORE_concurrent-pack-order.md create mode 100644 internal/restorer/doc.go diff --git a/RESTORE_concurrent-pack-order.md b/RESTORE_concurrent-pack-order.md deleted file mode 100644 index 52837b162..000000000 --- a/RESTORE_concurrent-pack-order.md +++ /dev/null @@ -1,30 +0,0 @@ -The main goal of this POC is two fold - -1. Reduce number of backend request, specifically - * download all required blobs of a pack with single backend request - * avoid repeated pack downloads when possible -2. Download multiple pack files concurrently - -Here is high-level pseudo-code of the how the POC attempts to achieve these goals - -``` -while there are packs to process - choose a pack to process [1] - get the pack from the backend or cache [2] - write pack blobs to the files that need them [3] - if not all pack blobs were used - cache the pack for future use [4] -``` - -Pack download and processing (steps [2] - [4]) runs on multiple concurrent goroutings. The POC runs all steps [2]-[4] sequentially on the same gorouting, but it is possible to split the work differently. For example, one pool of workers can handle download (step [2]) while the other pool can handle pack processing (steps [3] and [4]). - -Before a pack is downloaded (step [2]), the required space is "reserved" in the pack cache, which may purge some cached packed to make room for the reservation (well, that's the plan but purging isn't implemented in the POC). Actual download uses single backend request to get all required pack blobs. This may download blobs that are not needed, but I assume it'll still be faster than getting individual blobs. We should be able to optimize this further by changing `Backend.Load(...)` to support multiple byte ranges, for example. - -Target files are written (step [3]) in the "right" order, first file blob first, then second, then third and so on. Blob write order implies that some pack blobs may not be immediately used, i.e. they are "out of order" for their respective target files. Packs with unused blobs are cached (step [4]). The cache has capacity limit and may purge packs before they are fully used, in which case the purged packs will need to be redownloaded. - -Choosing which pack to process next (step [1]) is little convoluted in the POC. The code avoids processing of any given pack and any given target file by multiple workers concurrently. It also tries to reduce likelihook a pack will be purged from the cache by counting how many other packs will need to be processed before the pack is fully used up. Packs that need fewer other packs are processed first, everything else being equal. - ----- -Other ideas to consider - -* Allow out-of-order target file writes. Inprogress restore will be somewhat confusing to observe, but this will eliminate the need to cache packs and should simplify implimentation. On the other hand, crashed/killed restore will be harder to recover. diff --git a/internal/restorer/doc.go b/internal/restorer/doc.go new file mode 100644 index 000000000..b3583c728 --- /dev/null +++ b/internal/restorer/doc.go @@ -0,0 +1,33 @@ +// Package restorer contains code to restore data from a repository. +// +// The Restorer tries to keep the number of backend requests minimal. It does +// this by downloading all required blobs of a pack file with a single backend +// request and avoiding repeated downloads of the same pack. In addition, +// several pack files are fetched concurrently. +// +// Here is high-level pseudo-code of the how the Restorer attempts to achieve +// these goals: +// +// while there are packs to process +// choose a pack to process [1] +// get the pack from the backend or cache [2] +// write pack blobs to the files that need them [3] +// if not all pack blobs were used +// cache the pack for future use [4] +// +// Pack download and processing (steps [2] - [4]) runs on multiple concurrent +// Goroutines. The Restorer runs all steps [2]-[4] sequentially on the same +// Goroutine. +// +// Before a pack is downloaded (step [2]), the required space is "reserved" in +// the pack cache. Actual download uses single backend request to get all +// required pack blobs. This may download blobs that are not needed, but we +// assume it'll still be faster than getting individual blobs. +// +// Target files are written (step [3]) in the "right" order, first file blob +// first, then second, then third and so on. Blob write order implies that some +// pack blobs may not be immediately used, i.e. they are "out of order" for +// their respective target files. Packs with unused blobs are cached (step +// [4]). The cache has capacity limit and may purge packs before they are fully +// used, in which case the purged packs will need to be re-downloaded. +package restorer From 941202c1194216b52bcde8a331e76a25aaa9f5e3 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Wed, 11 Jul 2018 21:33:36 +0200 Subject: [PATCH 06/13] restore: Rename things, match the rest of the project --- internal/restorer/filerestorer_test.go | 137 ++++++++++++------------- internal/restorer/packcache_test.go | 12 +-- internal/restorer/packqueue_test.go | 80 +++++++-------- 3 files changed, 113 insertions(+), 116 deletions(-) diff --git a/internal/restorer/filerestorer_test.go b/internal/restorer/filerestorer_test.go index 75d6e199d..f40c12a3a 100644 --- a/internal/restorer/filerestorer_test.go +++ b/internal/restorer/filerestorer_test.go @@ -13,21 +13,17 @@ import ( rtest "github.com/restic/restic/internal/test" ) -/////////////////////////////////////////////////////////////////////////////// -// test helpers (TODO move to a dedicated file?) -/////////////////////////////////////////////////////////////////////////////// - -type _Blob struct { +type TestBlob struct { data string pack string } -type _File struct { +type TestFile struct { name string - blobs []_Blob + blobs []TestBlob } -type _TestData struct { +type TestRepo struct { key *crypto.Key // pack names and ids @@ -47,39 +43,40 @@ type _TestData struct { idx filePackTraverser } -func (i *_TestData) Lookup(blobID restic.ID, _ restic.BlobType) ([]restic.PackedBlob, bool) { +func (i *TestRepo) Lookup(blobID restic.ID, _ restic.BlobType) ([]restic.PackedBlob, bool) { packs, found := i.blobs[blobID] return packs, found } -func (i *_TestData) packName(pack *packInfo) string { +func (i *TestRepo) packName(pack *packInfo) string { return i.packsIDToName[pack.id] } -func (i *_TestData) packID(name string) restic.ID { + +func (i *TestRepo) packID(name string) restic.ID { return i.packsNameToID[name] } -func (i *_TestData) pack(queue *packQueue, name string) *packInfo { +func (i *TestRepo) pack(queue *packQueue, name string) *packInfo { id := i.packsNameToID[name] return queue.packs[id] } -func (i *_TestData) fileContent(file *fileInfo) string { +func (i *TestRepo) fileContent(file *fileInfo) string { return i.filesPathToContent[file.path] } -func _newTestData(_files []_File) *_TestData { - type _Pack struct { +func newTestRepo(content []TestFile) *TestRepo { + type Pack struct { name string data []byte blobs map[restic.ID]restic.Blob } - _packs := make(map[string]_Pack) + packs := make(map[string]Pack) key := crypto.NewRandomKey() seal := func(data []byte) []byte { ciphertext := restic.NewBlobBuffer(len(data)) - ciphertext = ciphertext[:0] // TODO what does this actually do? + ciphertext = ciphertext[:0] // truncate the slice nonce := crypto.NewRandomNonce() ciphertext = append(ciphertext, nonce...) return key.Seal(ciphertext, nonce, data, nil) @@ -87,34 +84,34 @@ func _newTestData(_files []_File) *_TestData { filesPathToContent := make(map[string]string) - for _, _file := range _files { + for _, file := range content { var content string - for _, _blob := range _file.blobs { - content += _blob.data + for _, blob := range file.blobs { + content += blob.data // get the pack, create as necessary - var _pack _Pack - var found bool // TODO is there more concise way of doing this in go? - if _pack, found = _packs[_blob.pack]; !found { - _pack = _Pack{name: _blob.pack, blobs: make(map[restic.ID]restic.Blob)} + var pack Pack + var found bool + if pack, found = packs[blob.pack]; !found { + pack = Pack{name: blob.pack, blobs: make(map[restic.ID]restic.Blob)} } // calculate blob id and add to the pack as necessary - _blobID := restic.Hash([]byte(_blob.data)) - if _, found := _pack.blobs[_blobID]; !found { - _blobData := seal([]byte(_blob.data)) - _pack.blobs[_blobID] = restic.Blob{ + blobID := restic.Hash([]byte(blob.data)) + if _, found := pack.blobs[blobID]; !found { + blobData := seal([]byte(blob.data)) + pack.blobs[blobID] = restic.Blob{ Type: restic.DataBlob, - ID: _blobID, - Length: uint(len(_blobData)), // XXX is Length encrypted or plaintext? - Offset: uint(len(_pack.data)), + ID: blobID, + Length: uint(len(blobData)), + Offset: uint(len(pack.data)), } - _pack.data = append(_pack.data, _blobData...) + pack.data = append(pack.data, blobData...) } - _packs[_blob.pack] = _pack + packs[blob.pack] = pack } - filesPathToContent[_file.name] = content + filesPathToContent[file.name] = content } blobs := make(map[restic.ID][]restic.PackedBlob) @@ -122,26 +119,26 @@ func _newTestData(_files []_File) *_TestData { packsIDToData := make(map[restic.ID][]byte) packsNameToID := make(map[string]restic.ID) - for _, _pack := range _packs { - _packID := restic.Hash(_pack.data) - packsIDToName[_packID] = _pack.name - packsIDToData[_packID] = _pack.data - packsNameToID[_pack.name] = _packID - for blobID, blob := range _pack.blobs { - blobs[blobID] = append(blobs[blobID], restic.PackedBlob{Blob: blob, PackID: _packID}) + for _, pack := range packs { + packID := restic.Hash(pack.data) + packsIDToName[packID] = pack.name + packsIDToData[packID] = pack.data + packsNameToID[pack.name] = packID + for blobID, blob := range pack.blobs { + blobs[blobID] = append(blobs[blobID], restic.PackedBlob{Blob: blob, PackID: packID}) } } var files []*fileInfo - for _, _file := range _files { + for _, file := range content { content := restic.IDs{} - for _, _blob := range _file.blobs { - content = append(content, restic.Hash([]byte(_blob.data))) + for _, blob := range file.blobs { + content = append(content, restic.Hash([]byte(blob.data))) } - files = append(files, &fileInfo{path: _file.name, blobs: content}) + files = append(files, &fileInfo{path: file.name, blobs: content}) } - _data := &_TestData{ + repo := &TestRepo{ key: key, packsIDToName: packsIDToName, packsIDToData: packsIDToData, @@ -150,30 +147,30 @@ func _newTestData(_files []_File) *_TestData { files: files, filesPathToContent: filesPathToContent, } - _data.idx = filePackTraverser{lookup: _data.Lookup} - _data.loader = func(ctx context.Context, h restic.Handle, length int, offset int64, fn func(rd io.Reader) error) error { + repo.idx = filePackTraverser{lookup: repo.Lookup} + repo.loader = func(ctx context.Context, h restic.Handle, length int, offset int64, fn func(rd io.Reader) error) error { packID, err := restic.ParseID(h.Name) if err != nil { return err } - rd := bytes.NewReader(_data.packsIDToData[packID][int(offset) : int(offset)+length]) + rd := bytes.NewReader(repo.packsIDToData[packID][int(offset) : int(offset)+length]) return fn(rd) } - return _data + return repo } -func restoreAndVerify(t *testing.T, _files []_File) { - test := _newTestData(_files) +func restoreAndVerify(t *testing.T, content []TestFile) { + repo := newTestRepo(content) - r := newFileRestorer(test.loader, test.key, test.idx) - r.files = test.files + r := newFileRestorer(repo.loader, repo.key, repo.idx) + r.files = repo.files r.restoreFiles(context.TODO(), func(path string, err error) { rtest.OK(t, errors.Wrapf(err, "unexpected error")) }) - for _, file := range test.files { + for _, file := range repo.files { data, err := ioutil.ReadFile(file.path) if err != nil { t.Errorf("unable to read file %v: %v", file.path, err) @@ -182,7 +179,7 @@ func restoreAndVerify(t *testing.T, _files []_File) { rtest.Equals(t, false, r.filesWriter.writers.Contains(file.path)) - content := test.fileContent(file) + content := repo.fileContent(file) if !bytes.Equal(data, []byte(content)) { t.Errorf("file %v has wrong content: want %q, got %q", file.path, content, data) } @@ -191,36 +188,36 @@ func restoreAndVerify(t *testing.T, _files []_File) { rtest.OK(t, nil) } -func TestFileRestorer_basic(t *testing.T) { +func TestFileRestorerBasic(t *testing.T) { tempdir, cleanup := rtest.TempDir(t) defer cleanup() - restoreAndVerify(t, []_File{ - _File{ + restoreAndVerify(t, []TestFile{ + TestFile{ name: tempdir + "/file1", - blobs: []_Blob{ - _Blob{"data1-1", "pack1-1"}, - _Blob{"data1-2", "pack1-2"}, + blobs: []TestBlob{ + TestBlob{"data1-1", "pack1-1"}, + TestBlob{"data1-2", "pack1-2"}, }, }, - _File{ + TestFile{ name: tempdir + "/file2", - blobs: []_Blob{ - _Blob{"data2-1", "pack2-1"}, - _Blob{"data2-2", "pack2-2"}, + blobs: []TestBlob{ + TestBlob{"data2-1", "pack2-1"}, + TestBlob{"data2-2", "pack2-2"}, }, }, }) } -func TestFileRestorer_emptyFile(t *testing.T) { +func TestFileRestorerEmptyFile(t *testing.T) { tempdir, cleanup := rtest.TempDir(t) defer cleanup() - restoreAndVerify(t, []_File{ - _File{ + restoreAndVerify(t, []TestFile{ + TestFile{ name: tempdir + "/empty", - blobs: []_Blob{}, + blobs: []TestBlob{}, }, }) } diff --git a/internal/restorer/packcache_test.go b/internal/restorer/packcache_test.go index ccf58cb9f..05a9eca2b 100644 --- a/internal/restorer/packcache_test.go +++ b/internal/restorer/packcache_test.go @@ -39,7 +39,7 @@ func TestBytesWriterSeeker(t *testing.T) { assertNotOK(t, "unsupported seek", err) } -func TestPackCache_basic(t *testing.T) { +func TestPackCacheBasic(t *testing.T) { assertReader := func(expected []byte, offset int64, rd io.ReaderAt) { actual := make([]byte, len(expected)) rd.ReadAt(actual, offset) @@ -89,7 +89,7 @@ func TestPackCache_basic(t *testing.T) { assertReader([]byte{1, 2, 3, 4, 5}, 10, rd) } -func TestPackCache_invalid_range(t *testing.T) { +func TestPackCacheInvalidRange(t *testing.T) { c := newPackCache(10) id := restic.NewRandomID() @@ -113,7 +113,7 @@ func TestPackCache_invalid_range(t *testing.T) { assertNotOK(t, "negative length", err) } -func TestPackCache_capacity(t *testing.T) { +func TestPackCacheCapacity(t *testing.T) { c := newPackCache(10) id1, id2, id3 := restic.NewRandomID(), restic.NewRandomID(), restic.NewRandomID() @@ -159,7 +159,7 @@ func TestPackCache_capacity(t *testing.T) { rtest.Equals(t, true, loaded) } -func TestPackCache_downsize_record(t *testing.T) { +func TestPackCacheDownsizeRecord(t *testing.T) { c := newPackCache(10) id := restic.NewRandomID() @@ -233,7 +233,7 @@ func TestPackCache_downsize_record(t *testing.T) { rd.Close() } -func TestPackCache_wrong_load_size(t *testing.T) { +func TestPackCacheWrongLoadSize(t *testing.T) { c := newPackCache(10) _, err := c.get(restic.NewRandomID(), 0, 5, func(offset int64, length int, wr io.WriteSeeker) error { @@ -249,7 +249,7 @@ func TestPackCache_wrong_load_size(t *testing.T) { assertNotOK(t, "too many bytes read", err) } -func TestPackCache_invalidRequests(t *testing.T) { +func TestPackCacheInvalidRequests(t *testing.T) { c := newPackCache(10) id := restic.NewRandomID() diff --git a/internal/restorer/packqueue_test.go b/internal/restorer/packqueue_test.go index b874a7cb7..880f7037a 100644 --- a/internal/restorer/packqueue_test.go +++ b/internal/restorer/packqueue_test.go @@ -7,7 +7,7 @@ import ( rtest "github.com/restic/restic/internal/test" ) -func processPack(t *testing.T, data *_TestData, pack *packInfo, files []*fileInfo) { +func processPack(t *testing.T, data *TestRepo, pack *packInfo, files []*fileInfo) { for _, file := range files { data.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, packBlobs []restic.Blob) bool { // assert file's head pack @@ -18,13 +18,13 @@ func processPack(t *testing.T, data *_TestData, pack *packInfo, files []*fileInf } } -func TestPackQueue_basic(t *testing.T) { - data := _newTestData([]_File{ - _File{ +func TestPackQueueBasic(t *testing.T) { + data := newTestRepo([]TestFile{ + TestFile{ name: "file", - blobs: []_Blob{ - _Blob{"data1", "pack1"}, - _Blob{"data2", "pack2"}, + blobs: []TestBlob{ + TestBlob{"data1", "pack1"}, + TestBlob{"data2", "pack2"}, }, }, }) @@ -83,16 +83,16 @@ func TestPackQueue_basic(t *testing.T) { rtest.Equals(t, true, queue.isEmpty()) } -func TestPackQueue_failedFile(t *testing.T) { +func TestPackQueueFailedFile(t *testing.T) { // point of this test is to assert that enqueuePack removes // all references to files that failed restore - data := _newTestData([]_File{ - _File{ + data := newTestRepo([]TestFile{ + TestFile{ name: "file", - blobs: []_Blob{ - _Blob{"data1", "pack1"}, - _Blob{"data2", "pack2"}, + blobs: []TestBlob{ + TestBlob{"data1", "pack1"}, + TestBlob{"data2", "pack2"}, }, }, }) @@ -105,23 +105,23 @@ func TestPackQueue_failedFile(t *testing.T) { rtest.Equals(t, true, queue.isEmpty()) } -func TestPackQueue_ordering_cost(t *testing.T) { +func TestPackQueueOrderingCost(t *testing.T) { // assert pack1 is selected before pack2: // pack1 is ready to restore file1, pack2 is ready to restore file2 // but pack2 cannot be immediately used to restore file1 - data := _newTestData([]_File{ - _File{ + data := newTestRepo([]TestFile{ + TestFile{ name: "file1", - blobs: []_Blob{ - _Blob{"data1", "pack1"}, - _Blob{"data2", "pack2"}, + blobs: []TestBlob{ + TestBlob{"data1", "pack1"}, + TestBlob{"data2", "pack2"}, }, }, - _File{ + TestFile{ name: "file2", - blobs: []_Blob{ - _Blob{"data2", "pack2"}, + blobs: []TestBlob{ + TestBlob{"data2", "pack2"}, }, }, }) @@ -143,22 +143,22 @@ func TestPackQueue_ordering_cost(t *testing.T) { rtest.Equals(t, false, queue.requeuePack(pack, files, []*fileInfo{})) } -func TestPackQueue_ordering_inprogress(t *testing.T) { +func TestPackQueueOrderingInprogress(t *testing.T) { // finish restoring one file before starting another - data := _newTestData([]_File{ - _File{ + data := newTestRepo([]TestFile{ + TestFile{ name: "file1", - blobs: []_Blob{ - _Blob{"data1-1", "pack1-1"}, - _Blob{"data1-2", "pack1-2"}, + blobs: []TestBlob{ + TestBlob{"data1-1", "pack1-1"}, + TestBlob{"data1-2", "pack1-2"}, }, }, - _File{ + TestFile{ name: "file2", - blobs: []_Blob{ - _Blob{"data2-1", "pack2-1"}, - _Blob{"data2-2", "pack2-2"}, + blobs: []TestBlob{ + TestBlob{"data2-1", "pack2-1"}, + TestBlob{"data2-2", "pack2-2"}, }, }, }) @@ -192,17 +192,17 @@ func TestPackQueue_ordering_inprogress(t *testing.T) { rtest.Equals(t, false, file == files[0]) // different file as before } -func TestPackQueue_packMultiuse(t *testing.T) { +func TestPackQueuePackMultiuse(t *testing.T) { // the same pack is required multiple times to restore the same file - data := _newTestData([]_File{ - _File{ + data := newTestRepo([]TestFile{ + TestFile{ name: "file", - blobs: []_Blob{ - _Blob{"data1", "pack1"}, - _Blob{"data2", "pack2"}, - _Blob{"data3", "pack1"}, // pack1 reuse, new blob - _Blob{"data2", "pack2"}, // pack2 reuse, same blob + blobs: []TestBlob{ + TestBlob{"data1", "pack1"}, + TestBlob{"data2", "pack2"}, + TestBlob{"data3", "pack1"}, // pack1 reuse, new blob + TestBlob{"data2", "pack2"}, // pack2 reuse, same blob }, }, }) From e010f3b8846a9e065bb040fe4c25652001292fd7 Mon Sep 17 00:00:00 2001 From: Igor Fedorenko Date: Tue, 11 Sep 2018 23:12:19 -0400 Subject: [PATCH 07/13] restore: Fix small memory leak in filesWriter, add tests Signed-off-by: Igor Fedorenko --- internal/restorer/fileswriter.go | 1 + internal/restorer/fileswriter_test.go | 44 +++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 internal/restorer/fileswriter_test.go diff --git a/internal/restorer/fileswriter.go b/internal/restorer/fileswriter.go index 206ddf2d5..98fb810fb 100644 --- a/internal/restorer/fileswriter.go +++ b/internal/restorer/fileswriter.go @@ -66,4 +66,5 @@ func (w *filesWriter) close(file *fileInfo) { w.lock.Lock() defer w.lock.Unlock() w.writers.Remove(file) + delete(w.inprogress, file) } diff --git a/internal/restorer/fileswriter_test.go b/internal/restorer/fileswriter_test.go new file mode 100644 index 000000000..fd8fa9c92 --- /dev/null +++ b/internal/restorer/fileswriter_test.go @@ -0,0 +1,44 @@ +package restorer + +import ( + "io/ioutil" + "testing" + + rtest "github.com/restic/restic/internal/test" +) + +func TestFilesWriterBasic(t *testing.T) { + dir, cleanup := rtest.TempDir(t) + defer cleanup() + + w := newFilesWriter(1) + + f1 := &fileInfo{path: dir + "/f1"} + f2 := &fileInfo{path: dir + "/f2"} + + rtest.OK(t, w.writeToFile(f1, []byte{1})) + rtest.Equals(t, 1, w.writers.Len()) + rtest.Equals(t, 1, len(w.inprogress)) + + rtest.OK(t, w.writeToFile(f2, []byte{2})) + rtest.Equals(t, 1, w.writers.Len()) + rtest.Equals(t, 2, len(w.inprogress)) + + rtest.OK(t, w.writeToFile(f1, []byte{1})) + w.close(f1) + rtest.Equals(t, 0, w.writers.Len()) + rtest.Equals(t, 1, len(w.inprogress)) + + rtest.OK(t, w.writeToFile(f2, []byte{2})) + w.close(f2) + rtest.Equals(t, 0, w.writers.Len()) + rtest.Equals(t, 0, len(w.inprogress)) + + buf, err := ioutil.ReadFile(f1.path) + rtest.OK(t, err) + rtest.Equals(t, []byte{1, 1}, buf) + + buf, err = ioutil.ReadFile(f2.path) + rtest.OK(t, err) + rtest.Equals(t, []byte{2, 2}, buf) +} From 32d5ceba87fcd105f20d43bb39a1b606c15db2ae Mon Sep 17 00:00:00 2001 From: Igor Fedorenko Date: Wed, 12 Sep 2018 21:48:58 -0400 Subject: [PATCH 08/13] restore: Fix packcache capacity math with failed downloads Signed-off-by: Igor Fedorenko --- internal/restorer/packcache.go | 9 +++++++-- internal/restorer/packcache_test.go | 17 ++++++++++++++++- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/internal/restorer/packcache.go b/internal/restorer/packcache.go index a40c44482..1eaad63bf 100644 --- a/internal/restorer/packcache.go +++ b/internal/restorer/packcache.go @@ -162,14 +162,19 @@ func (c *packCache) get(packID restic.ID, offset int64, length int, load func(of } if pack.data == nil { + releasePack := func() { + delete(c.reservedPacks, pack.id) + c.reservedCapacity -= length + c.allocatedCapacity -= length + } wr := &bytesWriteSeeker{data: make([]byte, length)} err = load(offset, length, wr) if err != nil { + releasePack() return nil, err } if wr.pos != length { - delete(c.reservedPacks, pack.id) - c.reservedCapacity -= len(pack.data) + releasePack() return nil, errors.Errorf("invalid read size") } pack.data = wr.data diff --git a/internal/restorer/packcache_test.go b/internal/restorer/packcache_test.go index 05a9eca2b..3a5f18cf5 100644 --- a/internal/restorer/packcache_test.go +++ b/internal/restorer/packcache_test.go @@ -4,6 +4,7 @@ import ( "io" "testing" + "github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/restic" rtest "github.com/restic/restic/internal/test" ) @@ -233,20 +234,34 @@ func TestPackCacheDownsizeRecord(t *testing.T) { rd.Close() } -func TestPackCacheWrongLoadSize(t *testing.T) { +func TestPackCacheFailedDownload(t *testing.T) { c := newPackCache(10) + assertEmpty := func() { + rtest.Equals(t, 0, len(c.cachedPacks)) + rtest.Equals(t, 10, c.capacity) + rtest.Equals(t, 0, c.reservedCapacity) + rtest.Equals(t, 0, c.allocatedCapacity) + } _, err := c.get(restic.NewRandomID(), 0, 5, func(offset int64, length int, wr io.WriteSeeker) error { + return errors.Errorf("expected induced test error") + }) + assertNotOK(t, "not enough bytes read", err) + assertEmpty() + + _, err = c.get(restic.NewRandomID(), 0, 5, func(offset int64, length int, wr io.WriteSeeker) error { wr.Write([]byte{1}) return nil }) assertNotOK(t, "not enough bytes read", err) + assertEmpty() _, err = c.get(restic.NewRandomID(), 0, 5, func(offset int64, length int, wr io.WriteSeeker) error { wr.Write([]byte{1, 2, 3, 4, 5, 6}) return nil }) assertNotOK(t, "too many bytes read", err) + assertEmpty() } func TestPackCacheInvalidRequests(t *testing.T) { From 9f3ca97ee887599a7eb3ba8c0f473f8f5d96c402 Mon Sep 17 00:00:00 2001 From: Igor Fedorenko Date: Fri, 14 Sep 2018 20:18:37 -0400 Subject: [PATCH 09/13] restore: Chang fileInfo to use snapshot location instead of target path * uses less memory as common prefix is only stored once * stepping stone for simpler error callback api, which will allow further memory footprint reduction Signed-off-by: Igor Fedorenko --- internal/restorer/filerestorer.go | 37 ++++++++++++++++---------- internal/restorer/filerestorer_test.go | 27 ++++++++++--------- internal/restorer/fileswriter.go | 32 +++++++++++----------- internal/restorer/fileswriter_test.go | 8 +++--- internal/restorer/packqueue.go | 2 +- internal/restorer/restorer.go | 10 +++---- 6 files changed, 63 insertions(+), 53 deletions(-) diff --git a/internal/restorer/filerestorer.go b/internal/restorer/filerestorer.go index 14cf9117a..c8ce43d60 100644 --- a/internal/restorer/filerestorer.go +++ b/internal/restorer/filerestorer.go @@ -4,6 +4,7 @@ import ( "context" "io" "os" + "path/filepath" "github.com/restic/restic/internal/crypto" "github.com/restic/restic/internal/debug" @@ -19,7 +20,6 @@ import ( // TODO consider replacing pack file cache with blob cache // TODO avoid decrypting the same blob multiple times // TODO evaluate disabled debug logging overhead for large repositories -// TODO consider logging snapshot-relative path to reduce log clutter const ( workerCount = 8 @@ -37,8 +37,8 @@ const ( // information about regular file being restored type fileInfo struct { - path string // full path to the file on local filesystem - blobs []restic.ID // remaining blobs of the file + location string // file on local filesystem relative to restorer basedir + blobs []restic.ID // remaining blobs of the file } // information about a data pack required to restore one or more files @@ -65,21 +65,27 @@ type fileRestorer struct { packCache *packCache // pack cache filesWriter *filesWriter // file write + dst string files []*fileInfo } -func newFileRestorer(packLoader func(ctx context.Context, h restic.Handle, length int, offset int64, fn func(rd io.Reader) error) error, key *crypto.Key, idx filePackTraverser) *fileRestorer { +func newFileRestorer(dst string, packLoader func(ctx context.Context, h restic.Handle, length int, offset int64, fn func(rd io.Reader) error) error, key *crypto.Key, idx filePackTraverser) *fileRestorer { return &fileRestorer{ packLoader: packLoader, key: key, idx: idx, filesWriter: newFilesWriter(filesWriterCount), packCache: newPackCache(packCacheCapacity), + dst: dst, } } -func (r *fileRestorer) addFile(path string, content restic.IDs) { - r.files = append(r.files, &fileInfo{path: path, blobs: content}) +func (r *fileRestorer) addFile(location string, content restic.IDs) { + r.files = append(r.files, &fileInfo{location: location, blobs: content}) +} + +func (r *fileRestorer) targetPath(location string) string { + return filepath.Join(r.dst, location) } // used to pass information among workers (wish golang channels allowed multivalues) @@ -90,7 +96,7 @@ type processingInfo struct { func (r *fileRestorer) restoreFiles(ctx context.Context, onError func(path string, err error)) error { for _, file := range r.files { - dbgmsg := file.path + ": " + dbgmsg := file.location + ": " r.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, packBlobs []restic.Blob) bool { if packIdx > 0 { dbgmsg += ", " @@ -110,12 +116,13 @@ func (r *fileRestorer) restoreFiles(ctx context.Context, onError func(path strin // synchronously create empty files (empty files need no packs and are ignored by packQueue) for _, file := range r.files { + fullpath := r.targetPath(file.location) if len(file.blobs) == 0 { - wr, err := os.OpenFile(file.path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600) + wr, err := os.OpenFile(fullpath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600) if err == nil { wr.Close() } else { - onError(file.path, err) + onError(file.location, err) } } } @@ -172,9 +179,10 @@ func (r *fileRestorer) restoreFiles(ctx context.Context, onError func(path strin var success []*fileInfo var failure []*fileInfo for file, ferr := range ferrors { + target := r.targetPath(file.location) if ferr != nil { - onError(file.path, ferr) - r.filesWriter.close(file) + onError(file.location, ferr) + r.filesWriter.close(target) delete(inprogress, file) failure = append(failure, file) } else { @@ -183,7 +191,7 @@ func (r *fileRestorer) restoreFiles(ctx context.Context, onError func(path strin return false // only interesed in the first pack }) if len(file.blobs) == 0 { - r.filesWriter.close(file) + r.filesWriter.close(target) delete(inprogress, file) } success = append(success, file) @@ -281,12 +289,13 @@ func (r *fileRestorer) processPack(ctx context.Context, request processingInfo, defer rd.Close() for file := range request.files { + target := r.targetPath(file.location) r.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, packBlobs []restic.Blob) bool { for _, blob := range packBlobs { - debug.Log("Writing blob %s (%d bytes) from pack %s to %s", blob.ID.Str(), blob.Length, packID.Str(), file.path) + debug.Log("Writing blob %s (%d bytes) from pack %s to %s", blob.ID.Str(), blob.Length, packID.Str(), file.location) buf, err := r.loadBlob(rd, blob) if err == nil { - err = r.filesWriter.writeToFile(file, buf) + err = r.filesWriter.writeToFile(target, buf) } if err != nil { request.files[file] = err diff --git a/internal/restorer/filerestorer_test.go b/internal/restorer/filerestorer_test.go index f40c12a3a..9584fabf7 100644 --- a/internal/restorer/filerestorer_test.go +++ b/internal/restorer/filerestorer_test.go @@ -62,7 +62,7 @@ func (i *TestRepo) pack(queue *packQueue, name string) *packInfo { } func (i *TestRepo) fileContent(file *fileInfo) string { - return i.filesPathToContent[file.path] + return i.filesPathToContent[file.location] } func newTestRepo(content []TestFile) *TestRepo { @@ -135,7 +135,7 @@ func newTestRepo(content []TestFile) *TestRepo { for _, blob := range file.blobs { content = append(content, restic.Hash([]byte(blob.data))) } - files = append(files, &fileInfo{path: file.name, blobs: content}) + files = append(files, &fileInfo{location: file.name, blobs: content}) } repo := &TestRepo{ @@ -160,10 +160,10 @@ func newTestRepo(content []TestFile) *TestRepo { return repo } -func restoreAndVerify(t *testing.T, content []TestFile) { +func restoreAndVerify(t *testing.T, tempdir string, content []TestFile) { repo := newTestRepo(content) - r := newFileRestorer(repo.loader, repo.key, repo.idx) + r := newFileRestorer(tempdir, repo.loader, repo.key, repo.idx) r.files = repo.files r.restoreFiles(context.TODO(), func(path string, err error) { @@ -171,17 +171,18 @@ func restoreAndVerify(t *testing.T, content []TestFile) { }) for _, file := range repo.files { - data, err := ioutil.ReadFile(file.path) + target := r.targetPath(file.location) + data, err := ioutil.ReadFile(target) if err != nil { - t.Errorf("unable to read file %v: %v", file.path, err) + t.Errorf("unable to read file %v: %v", file.location, err) continue } - rtest.Equals(t, false, r.filesWriter.writers.Contains(file.path)) + rtest.Equals(t, false, r.filesWriter.writers.Contains(target)) content := repo.fileContent(file) if !bytes.Equal(data, []byte(content)) { - t.Errorf("file %v has wrong content: want %q, got %q", file.path, content, data) + t.Errorf("file %v has wrong content: want %q, got %q", file.location, content, data) } } @@ -192,16 +193,16 @@ func TestFileRestorerBasic(t *testing.T) { tempdir, cleanup := rtest.TempDir(t) defer cleanup() - restoreAndVerify(t, []TestFile{ + restoreAndVerify(t, tempdir, []TestFile{ TestFile{ - name: tempdir + "/file1", + name: "file1", blobs: []TestBlob{ TestBlob{"data1-1", "pack1-1"}, TestBlob{"data1-2", "pack1-2"}, }, }, TestFile{ - name: tempdir + "/file2", + name: "file2", blobs: []TestBlob{ TestBlob{"data2-1", "pack2-1"}, TestBlob{"data2-2", "pack2-2"}, @@ -214,9 +215,9 @@ func TestFileRestorerEmptyFile(t *testing.T) { tempdir, cleanup := rtest.TempDir(t) defer cleanup() - restoreAndVerify(t, []TestFile{ + restoreAndVerify(t, tempdir, []TestFile{ TestFile{ - name: tempdir + "/empty", + name: "empty", blobs: []TestBlob{}, }, }) diff --git a/internal/restorer/fileswriter.go b/internal/restorer/fileswriter.go index 98fb810fb..af7ea8428 100644 --- a/internal/restorer/fileswriter.go +++ b/internal/restorer/fileswriter.go @@ -11,9 +11,9 @@ import ( ) type filesWriter struct { - lock sync.Mutex // guards concurrent access - inprogress map[*fileInfo]struct{} // (logically) opened file writers - writers simplelru.LRUCache // key: *fileInfo, value: *os.File + lock sync.Mutex // guards concurrent access + inprogress map[string]struct{} // (logically) opened file writers + writers simplelru.LRUCache // key: string, value: *os.File } func newFilesWriter(count int) *filesWriter { @@ -21,30 +21,30 @@ func newFilesWriter(count int) *filesWriter { value.(*os.File).Close() debug.Log("Closed and purged cached writer for %v", key) }) - return &filesWriter{inprogress: make(map[*fileInfo]struct{}), writers: writers} + return &filesWriter{inprogress: make(map[string]struct{}), writers: writers} } -func (w *filesWriter) writeToFile(file *fileInfo, buf []byte) error { +func (w *filesWriter) writeToFile(path string, buf []byte) error { acquireWriter := func() (io.Writer, error) { w.lock.Lock() defer w.lock.Unlock() - if wr, ok := w.writers.Get(file); ok { - debug.Log("Used cached writer for %s", file.path) + if wr, ok := w.writers.Get(path); ok { + debug.Log("Used cached writer for %s", path) return wr.(*os.File), nil } var flags int - if _, append := w.inprogress[file]; append { + if _, append := w.inprogress[path]; append { flags = os.O_APPEND | os.O_WRONLY } else { - w.inprogress[file] = struct{}{} + w.inprogress[path] = struct{}{} flags = os.O_CREATE | os.O_TRUNC | os.O_WRONLY } - wr, err := os.OpenFile(file.path, flags, 0600) + wr, err := os.OpenFile(path, flags, 0600) if err != nil { return nil, err } - w.writers.Add(file, wr) - debug.Log("Opened and cached writer for %s", file.path) + w.writers.Add(path, wr) + debug.Log("Opened and cached writer for %s", path) return wr, nil } @@ -57,14 +57,14 @@ func (w *filesWriter) writeToFile(file *fileInfo, buf []byte) error { return err } if n != len(buf) { - return errors.Errorf("error writing file %v: wrong length written, want %d, got %d", file.path, len(buf), n) + return errors.Errorf("error writing file %v: wrong length written, want %d, got %d", path, len(buf), n) } return nil } -func (w *filesWriter) close(file *fileInfo) { +func (w *filesWriter) close(path string) { w.lock.Lock() defer w.lock.Unlock() - w.writers.Remove(file) - delete(w.inprogress, file) + w.writers.Remove(path) + delete(w.inprogress, path) } diff --git a/internal/restorer/fileswriter_test.go b/internal/restorer/fileswriter_test.go index fd8fa9c92..45c2a88fb 100644 --- a/internal/restorer/fileswriter_test.go +++ b/internal/restorer/fileswriter_test.go @@ -13,8 +13,8 @@ func TestFilesWriterBasic(t *testing.T) { w := newFilesWriter(1) - f1 := &fileInfo{path: dir + "/f1"} - f2 := &fileInfo{path: dir + "/f2"} + f1 := dir + "/f1" + f2 := dir + "/f2" rtest.OK(t, w.writeToFile(f1, []byte{1})) rtest.Equals(t, 1, w.writers.Len()) @@ -34,11 +34,11 @@ func TestFilesWriterBasic(t *testing.T) { rtest.Equals(t, 0, w.writers.Len()) rtest.Equals(t, 0, len(w.inprogress)) - buf, err := ioutil.ReadFile(f1.path) + buf, err := ioutil.ReadFile(f1) rtest.OK(t, err) rtest.Equals(t, []byte{1, 1}, buf) - buf, err = ioutil.ReadFile(f2.path) + buf, err = ioutil.ReadFile(f2) rtest.OK(t, err) rtest.Equals(t, []byte{2, 2}, buf) } diff --git a/internal/restorer/packqueue.go b/internal/restorer/packqueue.go index fb1517a17..fe8259846 100644 --- a/internal/restorer/packqueue.go +++ b/internal/restorer/packqueue.go @@ -112,7 +112,7 @@ func (h *packQueue) nextPack() (*packInfo, []*fileInfo) { var files []*fileInfo for file := range pack.files { h.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, packBlobs []restic.Blob) bool { - debug.Log("Pack #%d %s (%d blobs) used by %s", packIdx, packID.Str(), len(packBlobs), file.path) + debug.Log("Pack #%d %s (%d blobs) used by %s", packIdx, packID.Str(), len(packBlobs), file.location) if pack.id == packID { files = append(files, file) } diff --git a/internal/restorer/restorer.go b/internal/restorer/restorer.go index 6986c8549..ea61d3862 100644 --- a/internal/restorer/restorer.go +++ b/internal/restorer/restorer.go @@ -192,7 +192,7 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error { idx := restic.NewHardlinkIndex() - filerestorer := newFileRestorer(res.repo.Backend().Load, res.repo.Key(), filePackTraverser{lookup: res.repo.Index().Lookup}) + filerestorer := newFileRestorer(dst, res.repo.Backend().Load, res.repo.Key(), filePackTraverser{lookup: res.repo.Index().Lookup}) // path->node map, only needed to call res.Error, which uses the node during tests nodes := make(map[string]*restic.Node) @@ -221,11 +221,11 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error { if idx.Has(node.Inode, node.DeviceID) { return nil } - idx.Add(node.Inode, node.DeviceID, target) + idx.Add(node.Inode, node.DeviceID, location) } nodes[target] = node - filerestorer.addFile(target, node.Content) + filerestorer.addFile(location, node.Content) return nil }, @@ -244,8 +244,8 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error { return res.traverseTree(ctx, dst, string(filepath.Separator), *res.sn.Tree, treeVisitor{ enterDir: noop, visitNode: func(node *restic.Node, target, location string) error { - if idx.Has(node.Inode, node.DeviceID) && idx.GetFilename(node.Inode, node.DeviceID) != target { - return res.restoreHardlinkAt(node, idx.GetFilename(node.Inode, node.DeviceID), target, location) + if idx.Has(node.Inode, node.DeviceID) && idx.GetFilename(node.Inode, node.DeviceID) != location { + return res.restoreHardlinkAt(node, filerestorer.targetPath(idx.GetFilename(node.Inode, node.DeviceID)), target, location) } if node.Type != "file" { From 9e24154ec9f6f33a9ec0f7dd1b9c6345c0027f3d Mon Sep 17 00:00:00 2001 From: Igor Fedorenko Date: Fri, 14 Sep 2018 20:55:30 -0400 Subject: [PATCH 10/13] restore: significantly reduce memory footprint reworked restore error callback to use file location path instead of much heavier Node. this reduced restore memory usage by as much as 50% in some of my tests. Signed-off-by: Igor Fedorenko --- cmd/restic/cmd_restore.go | 4 +- internal/restorer/restorer.go | 18 +++----- internal/restorer/restorer_test.go | 70 +++++++++++++++--------------- 3 files changed, 45 insertions(+), 47 deletions(-) diff --git a/cmd/restic/cmd_restore.go b/cmd/restic/cmd_restore.go index 4bf59c06f..477192eab 100644 --- a/cmd/restic/cmd_restore.go +++ b/cmd/restic/cmd_restore.go @@ -113,8 +113,8 @@ func runRestore(opts RestoreOptions, gopts GlobalOptions, args []string) error { } totalErrors := 0 - res.Error = func(dir string, node *restic.Node, err error) error { - Warnf("ignoring error for %s: %s\n", dir, err) + res.Error = func(location string, err error) error { + Warnf("ignoring error for %s: %s\n", location, err) totalErrors++ return nil } diff --git a/internal/restorer/restorer.go b/internal/restorer/restorer.go index ea61d3862..e328402c3 100644 --- a/internal/restorer/restorer.go +++ b/internal/restorer/restorer.go @@ -18,11 +18,11 @@ type Restorer struct { repo restic.Repository sn *restic.Snapshot - Error func(dir string, node *restic.Node, err error) error + Error func(location string, err error) error SelectFilter func(item string, dstpath string, node *restic.Node) (selectedForRestore bool, childMayBeSelected bool) } -var restorerAbortOnAllErrors = func(str string, node *restic.Node, err error) error { return err } +var restorerAbortOnAllErrors = func(location string, err error) error { return err } // NewRestorer creates a restorer preloaded with the content from the snapshot id. func NewRestorer(repo restic.Repository, id restic.ID) (*Restorer, error) { @@ -55,7 +55,7 @@ func (res *Restorer) traverseTree(ctx context.Context, target, location string, tree, err := res.repo.LoadTree(ctx, treeID) if err != nil { debug.Log("error loading tree %v: %v", treeID, err) - return res.Error(location, nil, err) + return res.Error(location, err) } for _, node := range tree.Nodes { @@ -65,7 +65,7 @@ func (res *Restorer) traverseTree(ctx context.Context, target, location string, nodeName := filepath.Base(filepath.Join(string(filepath.Separator), node.Name)) if nodeName != node.Name { debug.Log("node %q has invalid name %q", node.Name, nodeName) - err := res.Error(location, node, errors.New("node has invalid name")) + err := res.Error(location, errors.Errorf("invalid child node name %s", node.Name)) if err != nil { return err } @@ -78,7 +78,7 @@ func (res *Restorer) traverseTree(ctx context.Context, target, location string, if target == nodeTarget || !fs.HasPathPrefix(target, nodeTarget) { debug.Log("target: %v %v", target, nodeTarget) debug.Log("node %q has invalid target path %q", node.Name, nodeTarget) - err := res.Error(nodeLocation, node, errors.New("node has invalid path")) + err := res.Error(nodeLocation, errors.New("node has invalid path")) if err != nil { return err } @@ -95,7 +95,7 @@ func (res *Restorer) traverseTree(ctx context.Context, target, location string, sanitizeError := func(err error) error { if err != nil { - err = res.Error(nodeTarget, node, err) + err = res.Error(nodeLocation, err) } return err } @@ -194,9 +194,6 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error { filerestorer := newFileRestorer(dst, res.repo.Backend().Load, res.repo.Key(), filePackTraverser{lookup: res.repo.Index().Lookup}) - // path->node map, only needed to call res.Error, which uses the node during tests - nodes := make(map[string]*restic.Node) - // first tree pass: create directories and collect all files to restore err = res.traverseTree(ctx, dst, string(filepath.Separator), *res.sn.Tree, treeVisitor{ enterDir: func(node *restic.Node, target, location string) error { @@ -224,7 +221,6 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error { idx.Add(node.Inode, node.DeviceID, location) } - nodes[target] = node filerestorer.addFile(location, node.Content) return nil @@ -235,7 +231,7 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error { return err } - err = filerestorer.restoreFiles(ctx, func(path string, err error) { res.Error(path, nodes[path], err) }) + err = filerestorer.restoreFiles(ctx, func(location string, err error) { res.Error(location, err) }) if err != nil { return err } diff --git a/internal/restorer/restorer_test.go b/internal/restorer/restorer_test.go index a0194fa30..1e73674cd 100644 --- a/internal/restorer/restorer_test.go +++ b/internal/restorer/restorer_test.go @@ -134,8 +134,8 @@ func TestRestorer(t *testing.T) { var tests = []struct { Snapshot Files map[string]string - ErrorsMust map[string]string - ErrorsMay map[string]string + ErrorsMust map[string]map[string]struct{} + ErrorsMay map[string]map[string]struct{} Select func(item string, dstpath string, node *restic.Node) (selectForRestore bool, childMayBeSelected bool) }{ // valid test cases @@ -249,9 +249,11 @@ func TestRestorer(t *testing.T) { `..\..\foo\..\bar\..\xx\test2`: File{"test2\n"}, }, }, - ErrorsMay: map[string]string{ - `/#..\test`: "node has invalid name", - `/#..\..\foo\..\bar\..\xx\test2`: "node has invalid name", + ErrorsMay: map[string]map[string]struct{}{ + `/`: { + `invalid child node name ..\test`: struct{}{}, + `invalid child node name ..\..\foo\..\bar\..\xx\test2`: struct{}{}, + }, }, }, { @@ -261,9 +263,11 @@ func TestRestorer(t *testing.T) { `../../foo/../bar/../xx/test2`: File{"test2\n"}, }, }, - ErrorsMay: map[string]string{ - `/#../test`: "node has invalid name", - `/#../../foo/../bar/../xx/test2`: "node has invalid name", + ErrorsMay: map[string]map[string]struct{}{ + `/`: { + `invalid child node name ../test`: struct{}{}, + `invalid child node name ../../foo/../bar/../xx/test2`: struct{}{}, + }, }, }, { @@ -290,8 +294,10 @@ func TestRestorer(t *testing.T) { Files: map[string]string{ "top": "toplevel file", }, - ErrorsMust: map[string]string{ - `/x#..`: "node has invalid name", + ErrorsMust: map[string]map[string]struct{}{ + `/x`: { + `invalid child node name ..`: struct{}{}, + }, }, }, } @@ -329,11 +335,14 @@ func TestRestorer(t *testing.T) { return true, true } - errors := make(map[string]string) - res.Error = func(dir string, node *restic.Node, err error) error { - t.Logf("restore returned error for %q in dir %v: %v", node.Name, dir, err) - dir = toSlash(dir) - errors[dir+"#"+node.Name] = err.Error() + errors := make(map[string]map[string]struct{}) + res.Error = func(location string, err error) error { + location = toSlash(location) + t.Logf("restore returned error for %q: %v", location, err) + if errors[location] == nil { + errors[location] = make(map[string]struct{}) + } + errors[location][err.Error()] = struct{}{} return nil } @@ -345,33 +354,27 @@ func TestRestorer(t *testing.T) { t.Fatal(err) } - for filename, errorMessage := range test.ErrorsMust { - msg, ok := errors[filename] + for location, expectedErrors := range test.ErrorsMust { + actualErrors, ok := errors[location] if !ok { - t.Errorf("expected error for %v, found none", filename) + t.Errorf("expected error(s) for %v, found none", location) continue } - if msg != "" && msg != errorMessage { - t.Errorf("wrong error message for %v: got %q, want %q", - filename, msg, errorMessage) - } + rtest.Equals(t, expectedErrors, actualErrors) - delete(errors, filename) + delete(errors, location) } - for filename, errorMessage := range test.ErrorsMay { - msg, ok := errors[filename] + for location, expectedErrors := range test.ErrorsMay { + actualErrors, ok := errors[location] if !ok { continue } - if msg != "" && msg != errorMessage { - t.Errorf("wrong error message for %v: got %q, want %q", - filename, msg, errorMessage) - } + rtest.Equals(t, expectedErrors, actualErrors) - delete(errors, filename) + delete(errors, location) } for filename, err := range errors { @@ -436,10 +439,9 @@ func TestRestorerRelative(t *testing.T) { defer cleanup() errors := make(map[string]string) - res.Error = func(dir string, node *restic.Node, err error) error { - t.Logf("restore returned error for %q in dir %v: %v", node.Name, dir, err) - dir = toSlash(dir) - errors[dir+"#"+node.Name] = err.Error() + res.Error = func(location string, err error) error { + t.Logf("restore returned error for %q: %v", location, err) + errors[location] = err.Error() return nil } From c2bcb764cd249f31e1bfab06844198ad131ea358 Mon Sep 17 00:00:00 2001 From: Igor Fedorenko Date: Thu, 27 Sep 2018 10:35:34 -0400 Subject: [PATCH 11/13] restorer: Add a note on hardlink metadata Signed-off-by: Igor Fedorenko --- internal/restorer/restorer.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internal/restorer/restorer.go b/internal/restorer/restorer.go index e328402c3..291ede287 100644 --- a/internal/restorer/restorer.go +++ b/internal/restorer/restorer.go @@ -171,7 +171,8 @@ func (res *Restorer) restoreHardlinkAt(node *restic.Node, target, path, location if err != nil { return errors.Wrap(err, "CreateHardlink") } - return res.restoreNodeMetadataTo(node, target, location) + // TODO investigate if hardlinks have separate metadata on any supported system + return res.restoreNodeMetadataTo(node, path, location) } // RestoreTo creates the directories and files in the snapshot below dst. From bda8d7722edf054392f2dfce5877754757f80982 Mon Sep 17 00:00:00 2001 From: Igor Fedorenko Date: Thu, 27 Sep 2018 08:59:33 -0400 Subject: [PATCH 12/13] restorer: Optimize empty file restore don't create fileInfo structs for empty files. this saves memory. this also avoids extra serial scan of all fileInfo, which should make restore faster and more consistent. Signed-off-by: Igor Fedorenko --- internal/restorer/filerestorer.go | 51 +++++-------- internal/restorer/filerestorer_test.go | 12 ---- internal/restorer/restorer.go | 33 +++++++-- internal/restorer/restorer_test.go | 95 ++++++++++++++----------- internal/restorer/restorer_unix_test.go | 61 ++++++++++++++++ 5 files changed, 163 insertions(+), 89 deletions(-) create mode 100644 internal/restorer/restorer_unix_test.go diff --git a/internal/restorer/filerestorer.go b/internal/restorer/filerestorer.go index c8ce43d60..4baf9b567 100644 --- a/internal/restorer/filerestorer.go +++ b/internal/restorer/filerestorer.go @@ -3,7 +3,6 @@ package restorer import ( "context" "io" - "os" "path/filepath" "github.com/restic/restic/internal/crypto" @@ -95,37 +94,25 @@ type processingInfo struct { } func (r *fileRestorer) restoreFiles(ctx context.Context, onError func(path string, err error)) error { - for _, file := range r.files { - dbgmsg := file.location + ": " - r.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, packBlobs []restic.Blob) bool { - if packIdx > 0 { - dbgmsg += ", " - } - dbgmsg += "pack{id=" + packID.Str() + ", blobs: " - for blobIdx, blob := range packBlobs { - if blobIdx > 0 { - dbgmsg += ", " - } - dbgmsg += blob.ID.Str() - } - dbgmsg += "}" - return true // keep going - }) - debug.Log(dbgmsg) - } - - // synchronously create empty files (empty files need no packs and are ignored by packQueue) - for _, file := range r.files { - fullpath := r.targetPath(file.location) - if len(file.blobs) == 0 { - wr, err := os.OpenFile(fullpath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600) - if err == nil { - wr.Close() - } else { - onError(file.location, err) - } - } - } + // TODO conditionally enable when debug log is on + // for _, file := range r.files { + // dbgmsg := file.location + ": " + // r.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, packBlobs []restic.Blob) bool { + // if packIdx > 0 { + // dbgmsg += ", " + // } + // dbgmsg += "pack{id=" + packID.Str() + ", blobs: " + // for blobIdx, blob := range packBlobs { + // if blobIdx > 0 { + // dbgmsg += ", " + // } + // dbgmsg += blob.ID.Str() + // } + // dbgmsg += "}" + // return true // keep going + // }) + // debug.Log(dbgmsg) + // } inprogress := make(map[*fileInfo]struct{}) queue, err := newPackQueue(r.idx, r.files, func(files map[*fileInfo]struct{}) bool { diff --git a/internal/restorer/filerestorer_test.go b/internal/restorer/filerestorer_test.go index 9584fabf7..dd022e9d4 100644 --- a/internal/restorer/filerestorer_test.go +++ b/internal/restorer/filerestorer_test.go @@ -210,15 +210,3 @@ func TestFileRestorerBasic(t *testing.T) { }, }) } - -func TestFileRestorerEmptyFile(t *testing.T) { - tempdir, cleanup := rtest.TempDir(t) - defer cleanup() - - restoreAndVerify(t, tempdir, []TestFile{ - TestFile{ - name: "empty", - blobs: []TestBlob{}, - }, - }) -} diff --git a/internal/restorer/restorer.go b/internal/restorer/restorer.go index 291ede287..b6fb2b60e 100644 --- a/internal/restorer/restorer.go +++ b/internal/restorer/restorer.go @@ -175,6 +175,19 @@ func (res *Restorer) restoreHardlinkAt(node *restic.Node, target, path, location return res.restoreNodeMetadataTo(node, path, location) } +func (res *Restorer) restoreEmptyFileAt(node *restic.Node, target, location string) error { + wr, err := os.OpenFile(target, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600) + if err != nil { + return err + } + err = wr.Close() + if err != nil { + return err + } + + return res.restoreNodeMetadataTo(node, target, location) +} + // RestoreTo creates the directories and files in the snapshot below dst. // Before an item is created, res.Filter is called. func (res *Restorer) RestoreTo(ctx context.Context, dst string) error { @@ -215,6 +228,10 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error { return nil } + if node.Size == 0 { + return nil // deal with empty files later + } + if node.Links > 1 { if idx.Has(node.Inode, node.DeviceID) { return nil @@ -241,14 +258,22 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error { return res.traverseTree(ctx, dst, string(filepath.Separator), *res.sn.Tree, treeVisitor{ enterDir: noop, visitNode: func(node *restic.Node, target, location string) error { - if idx.Has(node.Inode, node.DeviceID) && idx.GetFilename(node.Inode, node.DeviceID) != location { - return res.restoreHardlinkAt(node, filerestorer.targetPath(idx.GetFilename(node.Inode, node.DeviceID)), target, location) - } - if node.Type != "file" { return res.restoreNodeTo(ctx, node, target, location) } + // create empty files, but not hardlinks to empty files + if node.Size == 0 && (node.Links < 2 || !idx.Has(node.Inode, node.DeviceID)) { + if node.Links > 1 { + idx.Add(node.Inode, node.DeviceID, location) + } + return res.restoreEmptyFileAt(node, target, location) + } + + if idx.Has(node.Inode, node.DeviceID) && idx.GetFilename(node.Inode, node.DeviceID) != location { + return res.restoreHardlinkAt(node, filerestorer.targetPath(idx.GetFilename(node.Inode, node.DeviceID)), target, location) + } + return res.restoreNodeMetadataTo(node, target, location) }, leaveDir: restoreNodeMetadata, diff --git a/internal/restorer/restorer_test.go b/internal/restorer/restorer_test.go index 1e73674cd..70136bfe3 100644 --- a/internal/restorer/restorer_test.go +++ b/internal/restorer/restorer_test.go @@ -24,7 +24,9 @@ type Snapshot struct { } type File struct { - Data string + Data string + Links uint64 + Inode uint64 } type Dir struct { @@ -51,22 +53,33 @@ func saveDir(t testing.TB, repo restic.Repository, nodes map[string]Node, inode tree := &restic.Tree{} for name, n := range nodes { inode++ - var id restic.ID switch node := n.(type) { case File: - id = saveFile(t, repo, node) + fi := n.(File).Inode + if fi == 0 { + fi = inode + } + lc := n.(File).Links + if lc == 0 { + lc = 1 + } + fc := []restic.ID{} + if len(n.(File).Data) > 0 { + fc = append(fc, saveFile(t, repo, node)) + } tree.Insert(&restic.Node{ Type: "file", Mode: 0644, Name: name, UID: uint32(os.Getuid()), GID: uint32(os.Getgid()), - Content: []restic.ID{id}, + Content: fc, Size: uint64(len(n.(File).Data)), - Inode: inode, + Inode: fi, + Links: lc, }) case Dir: - id = saveDir(t, repo, node.Nodes, inode) + id := saveDir(t, repo, node.Nodes, inode) mode := node.Mode if mode == 0 { @@ -98,7 +111,7 @@ func saveSnapshot(t testing.TB, repo restic.Repository, snapshot Snapshot) (*res ctx, cancel := context.WithCancel(context.Background()) defer cancel() - treeID := saveDir(t, repo, snapshot.Nodes, 0) + treeID := saveDir(t, repo, snapshot.Nodes, 1000) err := repo.Flush(ctx) if err != nil { @@ -142,10 +155,10 @@ func TestRestorer(t *testing.T) { { Snapshot: Snapshot{ Nodes: map[string]Node{ - "foo": File{"content: foo\n"}, + "foo": File{Data: "content: foo\n"}, "dirtest": Dir{ Nodes: map[string]Node{ - "file": File{"content: file\n"}, + "file": File{Data: "content: file\n"}, }, }, }, @@ -158,13 +171,13 @@ func TestRestorer(t *testing.T) { { Snapshot: Snapshot{ Nodes: map[string]Node{ - "top": File{"toplevel file"}, + "top": File{Data: "toplevel file"}, "dir": Dir{ Nodes: map[string]Node{ - "file": File{"file in dir"}, + "file": File{Data: "file in dir"}, "subdir": Dir{ Nodes: map[string]Node{ - "file": File{"file in subdir"}, + "file": File{Data: "file in subdir"}, }, }, }, @@ -183,7 +196,7 @@ func TestRestorer(t *testing.T) { "dir": Dir{ Mode: 0444, }, - "file": File{"top-level file"}, + "file": File{Data: "top-level file"}, }, }, Files: map[string]string{ @@ -196,7 +209,7 @@ func TestRestorer(t *testing.T) { "dir": Dir{ Mode: 0555, Nodes: map[string]Node{ - "file": File{"file in dir"}, + "file": File{Data: "file in dir"}, }, }, }, @@ -208,7 +221,7 @@ func TestRestorer(t *testing.T) { { Snapshot: Snapshot{ Nodes: map[string]Node{ - "topfile": File{"top-level file"}, + "topfile": File{Data: "top-level file"}, }, }, Files: map[string]string{ @@ -220,7 +233,7 @@ func TestRestorer(t *testing.T) { Nodes: map[string]Node{ "dir": Dir{ Nodes: map[string]Node{ - "file": File{"content: file\n"}, + "file": File{Data: "content: file\n"}, }, }, }, @@ -245,8 +258,8 @@ func TestRestorer(t *testing.T) { { Snapshot: Snapshot{ Nodes: map[string]Node{ - `..\test`: File{"foo\n"}, - `..\..\foo\..\bar\..\xx\test2`: File{"test2\n"}, + `..\test`: File{Data: "foo\n"}, + `..\..\foo\..\bar\..\xx\test2`: File{Data: "test2\n"}, }, }, ErrorsMay: map[string]map[string]struct{}{ @@ -259,8 +272,8 @@ func TestRestorer(t *testing.T) { { Snapshot: Snapshot{ Nodes: map[string]Node{ - `../test`: File{"foo\n"}, - `../../foo/../bar/../xx/test2`: File{"test2\n"}, + `../test`: File{Data: "foo\n"}, + `../../foo/../bar/../xx/test2`: File{Data: "test2\n"}, }, }, ErrorsMay: map[string]map[string]struct{}{ @@ -273,16 +286,16 @@ func TestRestorer(t *testing.T) { { Snapshot: Snapshot{ Nodes: map[string]Node{ - "top": File{"toplevel file"}, + "top": File{Data: "toplevel file"}, "x": Dir{ Nodes: map[string]Node{ - "file1": File{"file1"}, + "file1": File{Data: "file1"}, "..": Dir{ Nodes: map[string]Node{ - "file2": File{"file2"}, + "file2": File{Data: "file2"}, "..": Dir{ Nodes: map[string]Node{ - "file2": File{"file2"}, + "file2": File{Data: "file2"}, }, }, }, @@ -404,10 +417,10 @@ func TestRestorerRelative(t *testing.T) { { Snapshot: Snapshot{ Nodes: map[string]Node{ - "foo": File{"content: foo\n"}, + "foo": File{Data: "content: foo\n"}, "dirtest": Dir{ Nodes: map[string]Node{ - "file": File{"content: file\n"}, + "file": File{Data: "content: file\n"}, }, }, }, @@ -526,12 +539,12 @@ func TestRestorerTraverseTree(t *testing.T) { Snapshot: Snapshot{ Nodes: map[string]Node{ "dir": Dir{Nodes: map[string]Node{ - "otherfile": File{"x"}, + "otherfile": File{Data: "x"}, "subdir": Dir{Nodes: map[string]Node{ - "file": File{"content: file\n"}, + "file": File{Data: "content: file\n"}, }}, }}, - "foo": File{"content: foo\n"}, + "foo": File{Data: "content: foo\n"}, }, }, Select: func(item string, dstpath string, node *restic.Node) (selectForRestore bool, childMayBeSelected bool) { @@ -553,12 +566,12 @@ func TestRestorerTraverseTree(t *testing.T) { Snapshot: Snapshot{ Nodes: map[string]Node{ "dir": Dir{Nodes: map[string]Node{ - "otherfile": File{"x"}, + "otherfile": File{Data: "x"}, "subdir": Dir{Nodes: map[string]Node{ - "file": File{"content: file\n"}, + "file": File{Data: "content: file\n"}, }}, }}, - "foo": File{"content: foo\n"}, + "foo": File{Data: "content: foo\n"}, }, }, Select: func(item string, dstpath string, node *restic.Node) (selectForRestore bool, childMayBeSelected bool) { @@ -574,11 +587,11 @@ func TestRestorerTraverseTree(t *testing.T) { { Snapshot: Snapshot{ Nodes: map[string]Node{ - "aaa": File{"content: foo\n"}, + "aaa": File{Data: "content: foo\n"}, "dir": Dir{Nodes: map[string]Node{ - "otherfile": File{"x"}, + "otherfile": File{Data: "x"}, "subdir": Dir{Nodes: map[string]Node{ - "file": File{"content: file\n"}, + "file": File{Data: "content: file\n"}, }}, }}, }, @@ -599,12 +612,12 @@ func TestRestorerTraverseTree(t *testing.T) { Snapshot: Snapshot{ Nodes: map[string]Node{ "dir": Dir{Nodes: map[string]Node{ - "otherfile": File{"x"}, + "otherfile": File{Data: "x"}, "subdir": Dir{Nodes: map[string]Node{ - "file": File{"content: file\n"}, + "file": File{Data: "content: file\n"}, }}, }}, - "foo": File{"content: foo\n"}, + "foo": File{Data: "content: foo\n"}, }, }, Select: func(item string, dstpath string, node *restic.Node) (selectForRestore bool, childMayBeSelected bool) { @@ -628,12 +641,12 @@ func TestRestorerTraverseTree(t *testing.T) { Snapshot: Snapshot{ Nodes: map[string]Node{ "dir": Dir{Nodes: map[string]Node{ - "otherfile": File{"x"}, + "otherfile": File{Data: "x"}, "subdir": Dir{Nodes: map[string]Node{ - "file": File{"content: file\n"}, + "file": File{Data: "content: file\n"}, }}, }}, - "foo": File{"content: foo\n"}, + "foo": File{Data: "content: foo\n"}, }, }, Select: func(item string, dstpath string, node *restic.Node) (selectForRestore bool, childMayBeSelected bool) { diff --git a/internal/restorer/restorer_unix_test.go b/internal/restorer/restorer_unix_test.go new file mode 100644 index 000000000..fc80015c1 --- /dev/null +++ b/internal/restorer/restorer_unix_test.go @@ -0,0 +1,61 @@ +//+build !windows + +package restorer + +import ( + "context" + "os" + "path/filepath" + "syscall" + "testing" + + "github.com/restic/restic/internal/repository" + "github.com/restic/restic/internal/restic" + rtest "github.com/restic/restic/internal/test" +) + +func TestRestorerRestoreEmptyHardlinkedFileds(t *testing.T) { + repo, cleanup := repository.TestRepository(t) + defer cleanup() + + _, id := saveSnapshot(t, repo, Snapshot{ + Nodes: map[string]Node{ + "dirtest": Dir{ + Nodes: map[string]Node{ + "file1": File{Links: 2, Inode: 1}, + "file2": File{Links: 2, Inode: 1}, + }, + }, + }, + }) + + res, err := NewRestorer(repo, id) + rtest.OK(t, err) + + res.SelectFilter = func(item string, dstpath string, node *restic.Node) (selectedForRestore bool, childMayBeSelected bool) { + return true, true + } + + tempdir, cleanup := rtest.TempDir(t) + defer cleanup() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + err = res.RestoreTo(ctx, tempdir) + rtest.OK(t, err) + + f1, err := os.Stat(filepath.Join(tempdir, "dirtest/file1")) + rtest.OK(t, err) + rtest.Equals(t, int64(0), f1.Size()) + s1, ok1 := f1.Sys().(*syscall.Stat_t) + + f2, err := os.Stat(filepath.Join(tempdir, "dirtest/file2")) + rtest.OK(t, err) + rtest.Equals(t, int64(0), f2.Size()) + s2, ok2 := f2.Sys().(*syscall.Stat_t) + + if ok1 && ok2 { + rtest.Equals(t, s1.Ino, s2.Ino) + } +} From 21a3486ebbf37d56379a10b01464f5549060f474 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sun, 14 Oct 2018 17:38:12 +0200 Subject: [PATCH 13/13] Rework changelog entry --- changelog/unreleased/issue-1605 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/changelog/unreleased/issue-1605 b/changelog/unreleased/issue-1605 index 2d8597610..bc78a6d98 100644 --- a/changelog/unreleased/issue-1605 +++ b/changelog/unreleased/issue-1605 @@ -1,11 +1,11 @@ Enhancement: Concurrent restore This change significantly improves restore performance, especially -when using high-latency remote repositories like B2. +when using high-latency remote repositories like B2. -Implementation uses threads to download and process miltiple remote -files concurrently. To further reduce restore time, each remote -file is downloaded using single repository request. +The implementation now uses several concurrent threads to download and process +multiple remote files concurrently. To further reduce restore time, each remote +file is downloaded using a single repository request. https://github.com/restic/restic/issues/1605 https://github.com/restic/restic/pull/1719