repository: Add StreamPacks function
The function supports efficiently loading a specified list of blobs from a single pack in a streaming fashion. That is there's no need for temporary files independent of the pack size.
This commit is contained in:
parent
153e2ba859
commit
c4a2bfcb39
1 changed files with 85 additions and 0 deletions
|
@ -1,12 +1,14 @@
|
||||||
package repository
|
package repository
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bufio"
|
||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
|
"sort"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"github.com/restic/chunker"
|
"github.com/restic/chunker"
|
||||||
|
@ -25,6 +27,8 @@ import (
|
||||||
"golang.org/x/sync/errgroup"
|
"golang.org/x/sync/errgroup"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const maxStreamBufferSize = 4 * 1024 * 1024
|
||||||
|
|
||||||
// Repository is used to access a repository in a backend.
|
// Repository is used to access a repository in a backend.
|
||||||
type Repository struct {
|
type Repository struct {
|
||||||
be restic.Backend
|
be restic.Backend
|
||||||
|
@ -782,3 +786,84 @@ func DownloadAndHash(ctx context.Context, be Loader, h restic.Handle) (tmpfile *
|
||||||
|
|
||||||
return tmpfile, hash, size, err
|
return tmpfile, hash, size, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type BackendLoadFn func(ctx context.Context, h restic.Handle, length int, offset int64, fn func(rd io.Reader) error) error
|
||||||
|
|
||||||
|
func StreamPack(ctx context.Context, beLoad BackendLoadFn, key *crypto.Key, packID restic.ID, blobs []restic.Blob, handleBlobFn func(blob restic.BlobHandle, buf []byte, err error) error) error {
|
||||||
|
if len(blobs) == 0 {
|
||||||
|
// nothing to do
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.Slice(blobs, func(i, j int) bool {
|
||||||
|
return blobs[i].Offset < blobs[j].Offset
|
||||||
|
})
|
||||||
|
h := restic.Handle{Type: restic.PackFile, Name: packID.String()}
|
||||||
|
|
||||||
|
dataStart := blobs[0].Offset
|
||||||
|
dataEnd := blobs[len(blobs)-1].Offset + blobs[len(blobs)-1].Length
|
||||||
|
|
||||||
|
debug.Log("streaming pack %v (%d to %d bytes), blobs: %v", packID, dataStart, dataEnd, len(blobs))
|
||||||
|
|
||||||
|
// stream blobs in pack
|
||||||
|
err := beLoad(ctx, h, int(dataEnd-dataStart), int64(dataStart), func(rd io.Reader) error {
|
||||||
|
bufferSize := int(dataEnd - dataStart)
|
||||||
|
if bufferSize > maxStreamBufferSize {
|
||||||
|
bufferSize = maxStreamBufferSize
|
||||||
|
}
|
||||||
|
bufRd := bufio.NewReaderSize(rd, bufferSize)
|
||||||
|
currentBlobEnd := dataStart
|
||||||
|
var buf []byte
|
||||||
|
for _, entry := range blobs {
|
||||||
|
skipBytes := int(entry.Offset - currentBlobEnd)
|
||||||
|
if skipBytes < 0 {
|
||||||
|
return errors.Errorf("overlapping blobs in pack %v", packID)
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err := bufRd.Discard(skipBytes)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
h := restic.BlobHandle{ID: entry.ID, Type: entry.Type}
|
||||||
|
debug.Log(" process blob %v, skipped %d, %v", h, skipBytes, entry)
|
||||||
|
|
||||||
|
if uint(cap(buf)) < entry.Length {
|
||||||
|
buf = make([]byte, entry.Length)
|
||||||
|
}
|
||||||
|
buf = buf[:entry.Length]
|
||||||
|
|
||||||
|
n, err := io.ReadFull(bufRd, buf)
|
||||||
|
if err != nil {
|
||||||
|
debug.Log(" read error %v", err)
|
||||||
|
return errors.Wrap(err, "ReadFull")
|
||||||
|
}
|
||||||
|
|
||||||
|
if n != len(buf) {
|
||||||
|
return errors.Errorf("read blob %v from %v: not enough bytes read, want %v, got %v",
|
||||||
|
h, packID.Str(), len(buf), n)
|
||||||
|
}
|
||||||
|
currentBlobEnd = entry.Offset + entry.Length
|
||||||
|
|
||||||
|
// decryption errors are likely permanent, give the caller a chance to skip them
|
||||||
|
nonce, ciphertext := buf[:key.NonceSize()], buf[key.NonceSize():]
|
||||||
|
plaintext, err := key.Open(ciphertext[:0], nonce, ciphertext, nil)
|
||||||
|
if err == nil {
|
||||||
|
id := restic.Hash(plaintext)
|
||||||
|
if !id.Equal(entry.ID) {
|
||||||
|
debug.Log("read blob %v/%v from %v: wrong data returned, hash is %v",
|
||||||
|
h.Type, h.ID, packID.Str(), id)
|
||||||
|
err = errors.Errorf("read blob %v from %v: wrong data returned, hash is %v",
|
||||||
|
h, packID.Str(), id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
err = handleBlobFn(entry.BlobHandle, plaintext, err)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
return errors.Wrap(err, "StreamPack")
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue