forked from TrueCloudLab/restic
7bda28f31f
These are faster to construct but slower to access. The allocation rate is halved, the peak memory usage almost halved compared to standard map. Benchmark results on linux/amd64, -benchtime=3s -count=20: name old time/op new time/op delta PackerManager-8 178ms ± 0% 178ms ± 0% ~ (p=0.231 n=20+20) DecodeIndex-8 4.54s ± 0% 4.30s ± 0% -5.20% (p=0.000 n=18+17) DecodeIndexParallel-8 4.54s ± 0% 4.30s ± 0% -5.22% (p=0.000 n=19+18) IndexHasUnknown-8 44.4ns ± 5% 50.5ns ±11% +13.82% (p=0.000 n=19+17) IndexHasKnown-8 48.3ns ± 0% 51.5ns ±12% +6.68% (p=0.001 n=16+20) IndexAlloc-8 758ms ± 1% 616ms ± 1% -18.69% (p=0.000 n=19+19) IndexAllocParallel-8 234ms ± 3% 204ms ± 2% -12.60% (p=0.000 n=20+18) MasterIndexLookupSingleIndex-8 122ns ± 0% 145ns ± 9% +18.44% (p=0.000 n=14+20) MasterIndexLookupMultipleIndex-8 369ns ± 2% 429ns ± 8% +16.27% (p=0.000 n=20+20) MasterIndexLookupSingleIndexUnknown-8 68.4ns ± 5% 74.9ns ±13% +9.47% (p=0.000 n=20+20) MasterIndexLookupMultipleIndexUnknown-8 315ns ± 3% 369ns ±11% +17.14% (p=0.000 n=20+20) MasterIndexLookupParallel/known,indices=5-8 743ns ± 1% 816ns ± 2% +9.87% (p=0.000 n=17+17) MasterIndexLookupParallel/unknown,indices=5-8 238ns ± 1% 260ns ± 2% +9.14% (p=0.000 n=19+20) MasterIndexLookupParallel/known,indices=10-8 1.01µs ± 3% 1.11µs ± 2% +9.79% (p=0.000 n=19+20) MasterIndexLookupParallel/unknown,indices=10-8 222ns ± 0% 269ns ± 2% +20.83% (p=0.000 n=16+20) MasterIndexLookupParallel/known,indices=20-8 1.06µs ± 2% 1.19µs ± 2% +12.95% (p=0.000 n=19+18) MasterIndexLookupParallel/unknown,indices=20-8 413ns ± 1% 530ns ± 1% +28.19% (p=0.000 n=18+20) SaveAndEncrypt-8 30.2ms ± 1% 30.4ms ± 0% +0.71% (p=0.000 n=19+19) LoadTree-8 540µs ± 1% 576µs ± 1% +6.73% (p=0.000 n=20+20) LoadBlob-8 5.64ms ± 0% 5.64ms ± 0% ~ (p=0.883 n=18+17) LoadAndDecrypt-8 5.93ms ± 0% 5.95ms ± 1% ~ (p=0.247 n=20+19) LoadIndex-8 25.1ms ± 0% 24.5ms ± 1% -2.54% (p=0.000 n=18+17) name old speed new speed delta PackerManager-8 296MB/s ± 0% 296MB/s ± 0% ~ (p=0.229 n=20+20) SaveAndEncrypt-8 139MB/s ± 1% 138MB/s ± 0% -0.71% (p=0.000 n=19+19) LoadBlob-8 177MB/s ± 0% 177MB/s ± 0% ~ (p=0.890 n=18+17) LoadAndDecrypt-8 169MB/s ± 0% 168MB/s ± 1% ~ (p=0.227 n=20+19) name old alloc/op new alloc/op delta PackerManager-8 91.8kB ± 0% 91.8kB ± 0% ~ (p=0.772 n=12+19) IndexAlloc-8 786MB ± 0% 400MB ± 0% -49.04% (p=0.000 n=20+18) IndexAllocParallel-8 786MB ± 0% 401MB ± 0% -49.04% (p=0.000 n=19+15) SaveAndEncrypt-8 21.0MB ± 0% 21.0MB ± 0% +0.00% (p=0.000 n=19+19) name old allocs/op new allocs/op delta PackerManager-8 1.41k ± 0% 1.41k ± 0% ~ (all equal) IndexAlloc-8 977k ± 0% 907k ± 0% -7.18% (p=0.000 n=20+20) IndexAllocParallel-8 977k ± 0% 907k ± 0% -7.17% (p=0.000 n=19+15) SaveAndEncrypt-8 73.0 ± 0% 73.0 ± 0% ~ (all equal)
168 lines
4 KiB
Go
168 lines
4 KiB
Go
package repository
|
|
|
|
import (
|
|
"crypto/rand"
|
|
"encoding/binary"
|
|
|
|
"github.com/restic/restic/internal/restic"
|
|
|
|
"github.com/dchest/siphash"
|
|
)
|
|
|
|
// An indexMap is a chained hash table that maps blob IDs to indexEntries.
|
|
// It allows storing multiple entries with the same key.
|
|
//
|
|
// IndexMap uses some optimizations that are not compatible with supporting
|
|
// deletions.
|
|
//
|
|
// The buckets in this hash table contain only pointers, rather than inlined
|
|
// key-value pairs like the standard Go map. This way, only a pointer array
|
|
// needs to be resized when the table grows, preventing memory usage spikes.
|
|
type indexMap struct {
|
|
// The number of buckets is always a power of two and never zero.
|
|
buckets []*indexEntry
|
|
numentries uint
|
|
|
|
key0, key1 uint64 // Key for hash randomization.
|
|
|
|
free *indexEntry // Free list.
|
|
}
|
|
|
|
const (
|
|
growthFactor = 2 // Must be a power of 2.
|
|
maxLoad = 4 // Max. number of entries per bucket.
|
|
)
|
|
|
|
// add inserts an indexEntry for the given arguments into the map,
|
|
// using id as the key.
|
|
func (m *indexMap) add(id restic.ID, packIdx int, offset, length uint32) {
|
|
switch {
|
|
case m.numentries == 0: // Lazy initialization.
|
|
m.init()
|
|
case m.numentries >= maxLoad*uint(len(m.buckets)):
|
|
m.grow()
|
|
}
|
|
|
|
h := m.hash(id)
|
|
e := m.newEntry()
|
|
e.id = id
|
|
e.next = m.buckets[h] // Prepend to existing chain.
|
|
e.packIndex = packIdx
|
|
e.offset = offset
|
|
e.length = length
|
|
|
|
m.buckets[h] = e
|
|
m.numentries++
|
|
}
|
|
|
|
// foreach calls fn for all entries in the map, until fn returns false.
|
|
func (m *indexMap) foreach(fn func(*indexEntry) bool) {
|
|
for _, e := range m.buckets {
|
|
for e != nil {
|
|
if !fn(e) {
|
|
return
|
|
}
|
|
e = e.next
|
|
}
|
|
}
|
|
}
|
|
|
|
// foreachWithID calls fn for all entries with the given id.
|
|
func (m *indexMap) foreachWithID(id restic.ID, fn func(*indexEntry)) {
|
|
if len(m.buckets) == 0 {
|
|
return
|
|
}
|
|
|
|
h := m.hash(id)
|
|
for e := m.buckets[h]; e != nil; e = e.next {
|
|
if e.id != id {
|
|
continue
|
|
}
|
|
fn(e)
|
|
}
|
|
}
|
|
|
|
// get returns the first entry for the given id.
|
|
func (m *indexMap) get(id restic.ID) *indexEntry {
|
|
if len(m.buckets) == 0 {
|
|
return nil
|
|
}
|
|
|
|
h := m.hash(id)
|
|
for e := m.buckets[h]; e != nil; e = e.next {
|
|
if e.id == id {
|
|
return e
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (m *indexMap) grow() {
|
|
old := m.buckets
|
|
m.buckets = make([]*indexEntry, growthFactor*len(m.buckets))
|
|
|
|
for _, e := range old {
|
|
for e != nil {
|
|
h := m.hash(e.id)
|
|
next := e.next
|
|
e.next = m.buckets[h]
|
|
m.buckets[h] = e
|
|
e = next
|
|
}
|
|
}
|
|
}
|
|
|
|
func (m *indexMap) hash(id restic.ID) uint {
|
|
// We use siphash with a randomly generated 128-bit key, to prevent
|
|
// backups of specially crafted inputs from degrading performance.
|
|
// While SHA-256 should be collision-resistant, for hash table indices
|
|
// we use only a few bits of it and finding collisions for those is
|
|
// much easier than breaking the whole algorithm.
|
|
h := uint(siphash.Hash(m.key0, m.key1, id[:]))
|
|
return h & uint(len(m.buckets)-1)
|
|
}
|
|
|
|
func (m *indexMap) init() {
|
|
const initialBuckets = 64
|
|
m.buckets = make([]*indexEntry, initialBuckets)
|
|
|
|
var buf [16]byte
|
|
if _, err := rand.Read(buf[:]); err != nil {
|
|
panic(err) // Very little we can do here.
|
|
}
|
|
m.key0 = binary.LittleEndian.Uint64(buf[:8])
|
|
m.key1 = binary.LittleEndian.Uint64(buf[8:])
|
|
}
|
|
|
|
func (m *indexMap) len() uint { return m.numentries }
|
|
|
|
func (m *indexMap) newEntry() *indexEntry {
|
|
// Allocating in batches means that we get closer to optimal space usage,
|
|
// as Go's malloc will overallocate for structures of size 56 (indexEntry
|
|
// on amd64).
|
|
//
|
|
// 256*56 and 256*48 both have minimal malloc overhead among reasonable sizes.
|
|
// See src/runtime/sizeclasses.go in the standard library.
|
|
const entryAllocBatch = 256
|
|
|
|
if m.free == nil {
|
|
free := new([entryAllocBatch]indexEntry)
|
|
for i := range free[:len(free)-1] {
|
|
free[i].next = &free[i+1]
|
|
}
|
|
m.free = &free[0]
|
|
}
|
|
|
|
e := m.free
|
|
m.free = m.free.next
|
|
|
|
return e
|
|
}
|
|
|
|
type indexEntry struct {
|
|
id restic.ID
|
|
next *indexEntry
|
|
packIndex int // Position in containing Index's packs field.
|
|
offset uint32
|
|
length uint32
|
|
}
|