repository: store oversized blobs in separate pack files

Store oversized blobs in separate pack files as the blobs is large
enough to warrant its own pack file. This simplifies the garbage
collection of such blobs and keeps the cache smaller, as oversize (tree)
blobs only have to be downloaded if they are actually used.
This commit is contained in:
Michael Eischer 2023-09-28 20:58:45 +02:00
parent c635e30e3f
commit cb9cbe55d9
2 changed files with 30 additions and 5 deletions

View file

@ -70,14 +70,19 @@ func (r *packerManager) SaveBlob(ctx context.Context, t restic.BlobType, id rest
var err error var err error
packer := r.packer packer := r.packer
if r.packer == nil { // use separate packer if compressed length is larger than the packsize
// this speeds up the garbage collection of oversized blobs and reduces the cache size
// as the oversize blobs are only downloaded if necessary
if len(ciphertext) >= int(r.packSize) || r.packer == nil {
packer, err = r.newPacker() packer, err = r.newPacker()
if err != nil { if err != nil {
return 0, err return 0, err
} }
} // don't store packer for oversized blob
// remember packer if r.packer == nil {
r.packer = packer r.packer = packer
}
}
// save ciphertext // save ciphertext
// Add only appends bytes in memory to avoid being a scaling bottleneck // Add only appends bytes in memory to avoid being a scaling bottleneck
@ -91,8 +96,10 @@ func (r *packerManager) SaveBlob(ctx context.Context, t restic.BlobType, id rest
debug.Log("pack is not full enough (%d bytes)", packer.Size()) debug.Log("pack is not full enough (%d bytes)", packer.Size())
return size, nil return size, nil
} }
if packer == r.packer {
// forget full packer // forget full packer
r.packer = nil r.packer = nil
}
// call while holding lock to prevent findPacker from creating new packers if the uploaders are busy // call while holding lock to prevent findPacker from creating new packers if the uploaders are busy
// else write the pack to the backend // else write the pack to the backend

View file

@ -89,6 +89,24 @@ func testPackerManager(t testing.TB) int64 {
return int64(bytes) return int64(bytes)
} }
func TestPackerManagerWithOversizeBlob(t *testing.T) {
packFiles := int(0)
sizeLimit := uint(512 * 1024)
pm := newPackerManager(crypto.NewRandomKey(), restic.DataBlob, sizeLimit, func(ctx context.Context, tp restic.BlobType, p *Packer) error {
packFiles++
return nil
})
for _, i := range []uint{sizeLimit / 2, sizeLimit, sizeLimit / 3} {
_, err := pm.SaveBlob(context.TODO(), restic.DataBlob, restic.ID{}, make([]byte, i), 0)
test.OK(t, err)
}
test.OK(t, pm.Flush(context.TODO()))
// oversized blob must be stored in a separate packfile
test.Equals(t, packFiles, 2)
}
func BenchmarkPackerManager(t *testing.B) { func BenchmarkPackerManager(t *testing.B) {
// Run testPackerManager if it hasn't run already, to set totalSize. // Run testPackerManager if it hasn't run already, to set totalSize.
once.Do(func() { once.Do(func() {