repository: store oversized blobs in separate pack files

Store oversized blobs in separate pack files as the blobs is large
enough to warrant its own pack file. This simplifies the garbage
collection of such blobs and keeps the cache smaller, as oversize (tree)
blobs only have to be downloaded if they are actually used.
This commit is contained in:
Michael Eischer 2023-09-28 20:58:45 +02:00
parent c635e30e3f
commit cb9cbe55d9
2 changed files with 30 additions and 5 deletions

View file

@ -70,14 +70,19 @@ func (r *packerManager) SaveBlob(ctx context.Context, t restic.BlobType, id rest
var err error
packer := r.packer
if r.packer == nil {
// use separate packer if compressed length is larger than the packsize
// this speeds up the garbage collection of oversized blobs and reduces the cache size
// as the oversize blobs are only downloaded if necessary
if len(ciphertext) >= int(r.packSize) || r.packer == nil {
packer, err = r.newPacker()
if err != nil {
return 0, err
}
// don't store packer for oversized blob
if r.packer == nil {
r.packer = packer
}
}
// remember packer
r.packer = packer
// save ciphertext
// Add only appends bytes in memory to avoid being a scaling bottleneck
@ -91,8 +96,10 @@ func (r *packerManager) SaveBlob(ctx context.Context, t restic.BlobType, id rest
debug.Log("pack is not full enough (%d bytes)", packer.Size())
return size, nil
}
// forget full packer
r.packer = nil
if packer == r.packer {
// forget full packer
r.packer = nil
}
// call while holding lock to prevent findPacker from creating new packers if the uploaders are busy
// else write the pack to the backend

View file

@ -89,6 +89,24 @@ func testPackerManager(t testing.TB) int64 {
return int64(bytes)
}
func TestPackerManagerWithOversizeBlob(t *testing.T) {
packFiles := int(0)
sizeLimit := uint(512 * 1024)
pm := newPackerManager(crypto.NewRandomKey(), restic.DataBlob, sizeLimit, func(ctx context.Context, tp restic.BlobType, p *Packer) error {
packFiles++
return nil
})
for _, i := range []uint{sizeLimit / 2, sizeLimit, sizeLimit / 3} {
_, err := pm.SaveBlob(context.TODO(), restic.DataBlob, restic.ID{}, make([]byte, i), 0)
test.OK(t, err)
}
test.OK(t, pm.Flush(context.TODO()))
// oversized blob must be stored in a separate packfile
test.Equals(t, packFiles, 2)
}
func BenchmarkPackerManager(t *testing.B) {
// Run testPackerManager if it hasn't run already, to set totalSize.
once.Do(func() {