restore: separately restore blobs that are frequently referenced
Writing these blobs to their files can take a long time and consequently cause the backend connection to time out. Avoid that by retrieving these blobs separately.
This commit is contained in:
parent
2267910418
commit
e78be75d1e
2 changed files with 47 additions and 1 deletions
internal/restorer
|
@ -242,8 +242,33 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) error {
|
|||
|
||||
// track already processed blobs for precise error reporting
|
||||
processedBlobs := restic.NewBlobSet()
|
||||
err := r.downloadBlobs(ctx, pack.id, blobs, processedBlobs)
|
||||
for _, entry := range blobs {
|
||||
occurrences := 0
|
||||
for _, offsets := range entry.files {
|
||||
occurrences += len(offsets)
|
||||
}
|
||||
// With a maximum blob size of 8MB, the normal blob streaming has to write
|
||||
// at most 800MB for a single blob. This should be short enough to avoid
|
||||
// network connection timeouts. Based on a quick test, a limit of 100 only
|
||||
// selects a very small number of blobs (the number of references per blob
|
||||
// - aka. `count` - seem to follow a expontential distribution)
|
||||
if occurrences > 100 {
|
||||
// process frequently referenced blobs first as these can take a long time to write
|
||||
// which can cause backend connections to time out
|
||||
delete(blobs, entry.blob.ID)
|
||||
partialBlobs := blobToFileOffsetsMapping{entry.blob.ID: entry}
|
||||
err := r.downloadBlobs(ctx, pack.id, partialBlobs, processedBlobs)
|
||||
if err := r.reportError(blobs, processedBlobs, err); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(blobs) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
err := r.downloadBlobs(ctx, pack.id, blobs, processedBlobs)
|
||||
return r.reportError(blobs, processedBlobs, err)
|
||||
}
|
||||
|
||||
|
|
|
@ -248,6 +248,27 @@ func TestFileRestorerPackSkip(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestFileRestorerFrequentBlob(t *testing.T) {
|
||||
tempdir := rtest.TempDir(t)
|
||||
|
||||
for _, sparse := range []bool{false, true} {
|
||||
blobs := []TestBlob{
|
||||
{"data1-1", "pack1-1"},
|
||||
}
|
||||
for i := 0; i < 10000; i++ {
|
||||
blobs = append(blobs, TestBlob{"a", "pack1-1"})
|
||||
}
|
||||
blobs = append(blobs, TestBlob{"end", "pack1-1"})
|
||||
|
||||
restoreAndVerify(t, tempdir, []TestFile{
|
||||
{
|
||||
name: "file1",
|
||||
blobs: blobs,
|
||||
},
|
||||
}, nil, sparse)
|
||||
}
|
||||
}
|
||||
|
||||
func TestErrorRestoreFiles(t *testing.T) {
|
||||
tempdir := rtest.TempDir(t)
|
||||
content := []TestFile{
|
||||
|
|
Loading…
Reference in a new issue