From 5d6ce59a8d34debbfd552cd01d7ecf96fc31443f Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 20 Jul 2024 12:23:12 +0200 Subject: [PATCH 1/4] restorer: also truncate files if their content is already uptodate Files for which no blobs have to be restored, still have to be truncated to the correct size. Take a file with content "foobar" that should be replaced by restore with content "foo". The first three bytes are already uptodate, such that no data has to be written. As file truncation normally happens when writing data, a special case is necessary. This no blobs written special case is unified with the empty file special case. --- internal/restorer/filerestorer.go | 49 +++++++++++++++---------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/internal/restorer/filerestorer.go b/internal/restorer/filerestorer.go index 648633361..1ea82cabb 100644 --- a/internal/restorer/filerestorer.go +++ b/internal/restorer/filerestorer.go @@ -120,26 +120,21 @@ func (r *fileRestorer) restoreFiles(ctx context.Context) error { // create packInfo from fileInfo for _, file := range r.files { fileBlobs := file.blobs.(restic.IDs) - if len(fileBlobs) == 0 { - err := r.restoreEmptyFileAt(file.location) - if errFile := r.sanitizeError(file, err); errFile != nil { - return errFile - } - } - largeFile := len(fileBlobs) > largeFileBlobCount var packsMap map[restic.ID][]fileBlobInfo if largeFile { packsMap = make(map[restic.ID][]fileBlobInfo) } fileOffset := int64(0) + restoredBlobs := false err := r.forEachBlob(fileBlobs, func(packID restic.ID, blob restic.Blob, idx int) { - if largeFile { - if !file.state.HasMatchingBlob(idx) { + if !file.state.HasMatchingBlob(idx) { + if largeFile { packsMap[packID] = append(packsMap[packID], fileBlobInfo{id: blob.ID, offset: fileOffset}) - } else { - r.reportBlobProgress(file, uint64(blob.DataLength())) } + restoredBlobs = true + } else { + r.reportBlobProgress(file, uint64(blob.DataLength())) } fileOffset += int64(blob.DataLength()) pack, ok := packs[packID] @@ -175,6 +170,19 @@ func (r *fileRestorer) restoreFiles(ctx context.Context) error { if largeFile { file.blobs = packsMap } + + // empty file or one with already uptodate content. Make sure that the file size is correct + if !restoredBlobs { + err := r.truncateFileToSize(file.location, file.size) + if errFile := r.sanitizeError(file, err); errFile != nil { + return errFile + } + + // the progress events were already sent for non-zero size files + if file.size == 0 { + r.reportBlobProgress(file, 0) + } + } } // drop no longer necessary file list r.files = nil @@ -214,17 +222,12 @@ func (r *fileRestorer) restoreFiles(ctx context.Context) error { return wg.Wait() } -func (r *fileRestorer) restoreEmptyFileAt(location string) error { - f, err := createFile(r.targetPath(location), 0, false, r.allowRecursiveDelete) +func (r *fileRestorer) truncateFileToSize(location string, size int64) error { + f, err := createFile(r.targetPath(location), size, false, r.allowRecursiveDelete) if err != nil { return err } - if err = f.Close(); err != nil { - return err - } - - r.progress.AddProgress(location, restore.ActionFileRestored, 0, 0) - return nil + return f.Close() } type blobToFileOffsetsMapping map[restic.ID]struct { @@ -248,12 +251,8 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) error { if fileBlobs, ok := file.blobs.(restic.IDs); ok { fileOffset := int64(0) err := r.forEachBlob(fileBlobs, func(packID restic.ID, blob restic.Blob, idx int) { - if packID.Equal(pack.id) { - if !file.state.HasMatchingBlob(idx) { - addBlob(blob, fileOffset) - } else { - r.reportBlobProgress(file, uint64(blob.DataLength())) - } + if packID.Equal(pack.id) && !file.state.HasMatchingBlob(idx) { + addBlob(blob, fileOffset) } fileOffset += int64(blob.DataLength()) }) From 6f8e17a46351167f2bbdedba7f82b9147db07fd9 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 20 Jul 2024 12:32:08 +0200 Subject: [PATCH 2/4] restorer: minor code cleanups --- internal/restorer/filerestorer.go | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/internal/restorer/filerestorer.go b/internal/restorer/filerestorer.go index 1ea82cabb..6d530b25c 100644 --- a/internal/restorer/filerestorer.go +++ b/internal/restorer/filerestorer.go @@ -93,17 +93,20 @@ func (r *fileRestorer) targetPath(location string) string { return filepath.Join(r.dst, location) } -func (r *fileRestorer) forEachBlob(blobIDs []restic.ID, fn func(packID restic.ID, packBlob restic.Blob, idx int)) error { +func (r *fileRestorer) forEachBlob(blobIDs []restic.ID, fn func(packID restic.ID, packBlob restic.Blob, idx int, fileOffset int64)) error { if len(blobIDs) == 0 { return nil } + fileOffset := int64(0) for i, blobID := range blobIDs { packs := r.idx(restic.DataBlob, blobID) if len(packs) == 0 { return errors.Errorf("Unknown blob %s", blobID.String()) } - fn(packs[0].PackID, packs[0].Blob, i) + pb := packs[0] + fn(pb.PackID, pb.Blob, i, fileOffset) + fileOffset += int64(pb.DataLength()) } return nil @@ -124,10 +127,10 @@ func (r *fileRestorer) restoreFiles(ctx context.Context) error { var packsMap map[restic.ID][]fileBlobInfo if largeFile { packsMap = make(map[restic.ID][]fileBlobInfo) + file.blobs = packsMap } - fileOffset := int64(0) restoredBlobs := false - err := r.forEachBlob(fileBlobs, func(packID restic.ID, blob restic.Blob, idx int) { + err := r.forEachBlob(fileBlobs, func(packID restic.ID, blob restic.Blob, idx int, fileOffset int64) { if !file.state.HasMatchingBlob(idx) { if largeFile { packsMap[packID] = append(packsMap[packID], fileBlobInfo{id: blob.ID, offset: fileOffset}) @@ -136,7 +139,6 @@ func (r *fileRestorer) restoreFiles(ctx context.Context) error { } else { r.reportBlobProgress(file, uint64(blob.DataLength())) } - fileOffset += int64(blob.DataLength()) pack, ok := packs[packID] if !ok { pack = &packInfo{ @@ -151,6 +153,11 @@ func (r *fileRestorer) restoreFiles(ctx context.Context) error { file.sparse = r.sparse } }) + if err != nil { + // repository index is messed up, can't do anything + return err + } + if len(fileBlobs) == 1 { // no need to preallocate files with a single block, thus we can always consider them to be sparse // in addition, a short chunk will never match r.zeroChunk which would prevent sparseness for short files @@ -163,14 +170,6 @@ func (r *fileRestorer) restoreFiles(ctx context.Context) error { file.sparse = false } - if err != nil { - // repository index is messed up, can't do anything - return err - } - if largeFile { - file.blobs = packsMap - } - // empty file or one with already uptodate content. Make sure that the file size is correct if !restoredBlobs { err := r.truncateFileToSize(file.location, file.size) @@ -249,12 +248,10 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) error { blobInfo.files[file] = append(blobInfo.files[file], fileOffset) } if fileBlobs, ok := file.blobs.(restic.IDs); ok { - fileOffset := int64(0) - err := r.forEachBlob(fileBlobs, func(packID restic.ID, blob restic.Blob, idx int) { + err := r.forEachBlob(fileBlobs, func(packID restic.ID, blob restic.Blob, idx int, fileOffset int64) { if packID.Equal(pack.id) && !file.state.HasMatchingBlob(idx) { addBlob(blob, fileOffset) } - fileOffset += int64(blob.DataLength()) }) if err != nil { // restoreFiles should have caught this error before From 10efa771038c451fd1752edf633589ea60c14e56 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 20 Jul 2024 12:32:27 +0200 Subject: [PATCH 3/4] restorer: add test for file truncation case --- internal/restorer/restorer_test.go | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/internal/restorer/restorer_test.go b/internal/restorer/restorer_test.go index f8f6f92c0..9c02afe68 100644 --- a/internal/restorer/restorer_test.go +++ b/internal/restorer/restorer_test.go @@ -1003,6 +1003,7 @@ func TestRestorerOverwriteBehavior(t *testing.T) { "dirtest": Dir{ Nodes: map[string]Node{ "file": File{Data: "content: file\n", ModTime: baseTime}, + "foo": File{Data: "content: foobar", ModTime: baseTime}, }, ModTime: baseTime, }, @@ -1014,6 +1015,7 @@ func TestRestorerOverwriteBehavior(t *testing.T) { "dirtest": Dir{ Nodes: map[string]Node{ "file": File{Data: "content: file2\n", ModTime: baseTime.Add(-time.Second)}, + "foo": File{Data: "content: foo", ModTime: baseTime}, }, }, }, @@ -1029,13 +1031,14 @@ func TestRestorerOverwriteBehavior(t *testing.T) { Files: map[string]string{ "foo": "content: new\n", "dirtest/file": "content: file2\n", + "dirtest/foo": "content: foo", }, Progress: restoreui.State{ - FilesFinished: 3, - FilesTotal: 3, + FilesFinished: 4, + FilesTotal: 4, FilesSkipped: 0, - AllBytesWritten: 28, - AllBytesTotal: 28, + AllBytesWritten: 40, + AllBytesTotal: 40, AllBytesSkipped: 0, }, }, @@ -1044,13 +1047,14 @@ func TestRestorerOverwriteBehavior(t *testing.T) { Files: map[string]string{ "foo": "content: new\n", "dirtest/file": "content: file2\n", + "dirtest/foo": "content: foo", }, Progress: restoreui.State{ - FilesFinished: 3, - FilesTotal: 3, + FilesFinished: 4, + FilesTotal: 4, FilesSkipped: 0, - AllBytesWritten: 28, - AllBytesTotal: 28, + AllBytesWritten: 40, + AllBytesTotal: 40, AllBytesSkipped: 0, }, }, @@ -1059,14 +1063,15 @@ func TestRestorerOverwriteBehavior(t *testing.T) { Files: map[string]string{ "foo": "content: new\n", "dirtest/file": "content: file\n", + "dirtest/foo": "content: foobar", }, Progress: restoreui.State{ FilesFinished: 2, FilesTotal: 2, - FilesSkipped: 1, + FilesSkipped: 2, AllBytesWritten: 13, AllBytesTotal: 13, - AllBytesSkipped: 15, + AllBytesSkipped: 27, }, }, { @@ -1074,14 +1079,15 @@ func TestRestorerOverwriteBehavior(t *testing.T) { Files: map[string]string{ "foo": "content: foo\n", "dirtest/file": "content: file\n", + "dirtest/foo": "content: foobar", }, Progress: restoreui.State{ FilesFinished: 1, FilesTotal: 1, - FilesSkipped: 2, + FilesSkipped: 3, AllBytesWritten: 0, AllBytesTotal: 0, - AllBytesSkipped: 28, + AllBytesSkipped: 40, }, }, } From 0dcac90bea39ad8f763922e82d2716290dbbcb97 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 21 Jul 2024 12:03:28 +0200 Subject: [PATCH 4/4] restorer: don't track already uptodate blobs --- internal/restorer/filerestorer.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/internal/restorer/filerestorer.go b/internal/restorer/filerestorer.go index 6d530b25c..e517e6284 100644 --- a/internal/restorer/filerestorer.go +++ b/internal/restorer/filerestorer.go @@ -138,6 +138,8 @@ func (r *fileRestorer) restoreFiles(ctx context.Context) error { restoredBlobs = true } else { r.reportBlobProgress(file, uint64(blob.DataLength())) + // completely ignore blob + return } pack, ok := packs[packID] if !ok {