add changelog, benchmark, memory calculation
This commit is contained in:
parent
dd7b4f54f5
commit
7419844885
3 changed files with 42 additions and 8 deletions
6
changelog/unreleased/pull-2781
Normal file
6
changelog/unreleased/pull-2781
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
Enhancement: Reduce memory consumption of in-memory index
|
||||||
|
|
||||||
|
We've improved how the index is stored in memory.
|
||||||
|
This change reduces memory usage for large repositories by about 30-40%.
|
||||||
|
|
||||||
|
https://github.com/restic/restic/pull/2781
|
|
@ -13,6 +13,28 @@ import (
|
||||||
"github.com/restic/restic/internal/debug"
|
"github.com/restic/restic/internal/debug"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// In large repositories, millions of blobs are stored in the repository
|
||||||
|
// and restic needs to store an index entry for each blob in memory for
|
||||||
|
// most operations.
|
||||||
|
// Hence the index data structure defined here is one of the main contributions
|
||||||
|
// to the total memory requirements of restic.
|
||||||
|
//
|
||||||
|
// We use a map to store each index entry.
|
||||||
|
// The key of the map is a BlobHandle
|
||||||
|
// The entries of the maps are slices which contain the actual index entries.
|
||||||
|
//
|
||||||
|
// To compute the needed amount of memory, we need some assumptions.
|
||||||
|
// Maps need an overhead of allocated but not needed elements.
|
||||||
|
// For computations, we assume an overhead of 50% and use OF=1.5 (overhead factor)
|
||||||
|
//
|
||||||
|
// We have the following sizes:
|
||||||
|
// key: 32 + 1 = 33 bytes
|
||||||
|
// slice: 24 bytes (pointer, len and cap)
|
||||||
|
// indexEntry: 32 + 8 + 8 = 48 bytes
|
||||||
|
//
|
||||||
|
// To save N index entries, we therefore need:
|
||||||
|
// N * OF * (33 + 24) bytes + N * 48 bytes = N * 134 bytes
|
||||||
|
|
||||||
// Index holds a lookup table for id -> pack.
|
// Index holds a lookup table for id -> pack.
|
||||||
type Index struct {
|
type Index struct {
|
||||||
m sync.Mutex
|
m sync.Mutex
|
||||||
|
|
|
@ -398,18 +398,16 @@ func createRandomIndex(rng *rand.Rand) (idx *repository.Index, lookupID restic.I
|
||||||
// create index with 200k pack files
|
// create index with 200k pack files
|
||||||
for i := 0; i < 200000; i++ {
|
for i := 0; i < 200000; i++ {
|
||||||
packID := NewRandomTestID(rng)
|
packID := NewRandomTestID(rng)
|
||||||
|
var blobs []restic.Blob
|
||||||
offset := 0
|
offset := 0
|
||||||
for offset < maxPackSize {
|
for offset < maxPackSize {
|
||||||
size := 2000 + rand.Intn(4*1024*1024)
|
size := 2000 + rand.Intn(4*1024*1024)
|
||||||
id := NewRandomTestID(rng)
|
id := NewRandomTestID(rng)
|
||||||
idx.Store(restic.PackedBlob{
|
blobs = append(blobs, restic.Blob{
|
||||||
PackID: packID,
|
|
||||||
Blob: restic.Blob{
|
|
||||||
Type: restic.DataBlob,
|
Type: restic.DataBlob,
|
||||||
ID: id,
|
ID: id,
|
||||||
Length: uint(size),
|
Length: uint(size),
|
||||||
Offset: uint(offset),
|
Offset: uint(offset),
|
||||||
},
|
|
||||||
})
|
})
|
||||||
|
|
||||||
offset += size
|
offset += size
|
||||||
|
@ -418,6 +416,7 @@ func createRandomIndex(rng *rand.Rand) (idx *repository.Index, lookupID restic.I
|
||||||
lookupID = id
|
lookupID = id
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
idx.StorePack(packID, blobs)
|
||||||
}
|
}
|
||||||
|
|
||||||
return idx, lookupID
|
return idx, lookupID
|
||||||
|
@ -444,6 +443,13 @@ func BenchmarkIndexHasKnown(b *testing.B) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func BenchmarkIndexAlloc(b *testing.B) {
|
||||||
|
b.ReportAllocs()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
createRandomIndex(rand.New(rand.NewSource(0)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestIndexHas(t *testing.T) {
|
func TestIndexHas(t *testing.T) {
|
||||||
type testEntry struct {
|
type testEntry struct {
|
||||||
id restic.ID
|
id restic.ID
|
||||||
|
|
Loading…
Reference in a new issue