fc05e35a08
Iterating through the indexmap according to the bucket order has the problem that all indexEntries are accessed in random order which is rather cache inefficient. As we already keep a list of all allocated blocks, just iterate through it. This allows iterating through a batch of indexEntries without random memory accesses. In addition, the packID will likely remain similar across multiple blobs as all blobs of a pack file are added as a single batch.
233 lines
5.1 KiB
Go
233 lines
5.1 KiB
Go
package index
|
|
|
|
import (
|
|
"hash/maphash"
|
|
"math"
|
|
|
|
"github.com/restic/restic/internal/restic"
|
|
)
|
|
|
|
// An indexMap is a chained hash table that maps blob IDs to indexEntries.
|
|
// It allows storing multiple entries with the same key.
|
|
//
|
|
// IndexMap uses some optimizations that are not compatible with supporting
|
|
// deletions.
|
|
//
|
|
// The buckets in this hash table contain only pointers, rather than inlined
|
|
// key-value pairs like the standard Go map. This way, only a pointer array
|
|
// needs to be resized when the table grows, preventing memory usage spikes.
|
|
type indexMap struct {
|
|
// The number of buckets is always a power of two and never zero.
|
|
buckets []uint
|
|
|
|
mh maphash.Hash
|
|
|
|
blockList hashedArrayTree
|
|
}
|
|
|
|
const (
|
|
growthFactor = 2 // Must be a power of 2.
|
|
maxLoad = 4 // Max. number of entries per bucket.
|
|
)
|
|
|
|
// add inserts an indexEntry for the given arguments into the map,
|
|
// using id as the key.
|
|
func (m *indexMap) add(id restic.ID, packIdx int, offset, length uint32, uncompressedLength uint32) {
|
|
switch {
|
|
case m.len() == math.MaxUint: // Lazy initialization.
|
|
m.init()
|
|
case m.len() >= maxLoad*uint(len(m.buckets)):
|
|
m.grow()
|
|
}
|
|
|
|
h := m.hash(id)
|
|
e, idx := m.newEntry()
|
|
e.id = id
|
|
e.next = m.buckets[h] // Prepend to existing chain.
|
|
e.packIndex = packIdx
|
|
e.offset = offset
|
|
e.length = length
|
|
e.uncompressedLength = uncompressedLength
|
|
|
|
m.buckets[h] = idx
|
|
}
|
|
|
|
// foreach calls fn for all entries in the map, until fn returns false.
|
|
func (m *indexMap) foreach(fn func(*indexEntry) bool) {
|
|
blockCount := m.blockList.Size()
|
|
for i := uint(1); i < blockCount; i++ {
|
|
if !fn(m.resolve(i)) {
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// foreachWithID calls fn for all entries with the given id.
|
|
func (m *indexMap) foreachWithID(id restic.ID, fn func(*indexEntry)) {
|
|
if len(m.buckets) == 0 {
|
|
return
|
|
}
|
|
|
|
h := m.hash(id)
|
|
ei := m.buckets[h]
|
|
for ei != 0 {
|
|
e := m.resolve(ei)
|
|
ei = e.next
|
|
if e.id != id {
|
|
continue
|
|
}
|
|
fn(e)
|
|
}
|
|
}
|
|
|
|
// get returns the first entry for the given id.
|
|
func (m *indexMap) get(id restic.ID) *indexEntry {
|
|
if len(m.buckets) == 0 {
|
|
return nil
|
|
}
|
|
|
|
h := m.hash(id)
|
|
ei := m.buckets[h]
|
|
for ei != 0 {
|
|
e := m.resolve(ei)
|
|
if e.id == id {
|
|
return e
|
|
}
|
|
ei = e.next
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (m *indexMap) grow() {
|
|
old := m.buckets
|
|
m.buckets = make([]uint, growthFactor*len(m.buckets))
|
|
|
|
for _, ei := range old {
|
|
for ei != 0 {
|
|
e := m.resolve(ei)
|
|
h := m.hash(e.id)
|
|
next := e.next
|
|
e.next = m.buckets[h]
|
|
m.buckets[h] = ei
|
|
ei = next
|
|
}
|
|
}
|
|
}
|
|
|
|
func (m *indexMap) hash(id restic.ID) uint {
|
|
// We use maphash to prevent backups of specially crafted inputs
|
|
// from degrading performance.
|
|
// While SHA-256 should be collision-resistant, for hash table indices
|
|
// we use only a few bits of it and finding collisions for those is
|
|
// much easier than breaking the whole algorithm.
|
|
m.mh.Reset()
|
|
_, _ = m.mh.Write(id[:])
|
|
h := uint(m.mh.Sum64())
|
|
return h & uint(len(m.buckets)-1)
|
|
}
|
|
|
|
func (m *indexMap) init() {
|
|
const initialBuckets = 64
|
|
m.buckets = make([]uint, initialBuckets)
|
|
// first entry in blockList serves as null byte
|
|
m.blockList = *newHAT()
|
|
m.newEntry()
|
|
}
|
|
|
|
func (m *indexMap) len() uint { return m.blockList.Size() - 1 }
|
|
|
|
func (m *indexMap) newEntry() (*indexEntry, uint) {
|
|
return m.blockList.Alloc()
|
|
}
|
|
|
|
func (m *indexMap) resolve(idx uint) *indexEntry {
|
|
return m.blockList.Ref(idx)
|
|
}
|
|
|
|
type indexEntry struct {
|
|
id restic.ID
|
|
next uint
|
|
packIndex int // Position in containing Index's packs field.
|
|
offset uint32
|
|
length uint32
|
|
uncompressedLength uint32
|
|
}
|
|
|
|
type hashedArrayTree struct {
|
|
mask uint
|
|
maskShift uint
|
|
blockSize uint
|
|
|
|
size uint
|
|
blockList [][]indexEntry
|
|
}
|
|
|
|
func newHAT() *hashedArrayTree {
|
|
// start with a small block size
|
|
blockSizePower := uint(2)
|
|
blockSize := uint(1 << blockSizePower)
|
|
|
|
return &hashedArrayTree{
|
|
mask: blockSize - 1,
|
|
maskShift: blockSizePower,
|
|
blockSize: blockSize,
|
|
size: 0,
|
|
blockList: make([][]indexEntry, blockSize),
|
|
}
|
|
}
|
|
|
|
func (h *hashedArrayTree) Alloc() (*indexEntry, uint) {
|
|
h.grow()
|
|
size := h.size
|
|
idx, subIdx := h.index(size)
|
|
h.size++
|
|
return &h.blockList[idx][subIdx], size
|
|
}
|
|
|
|
func (h *hashedArrayTree) index(pos uint) (idx uint, subIdx uint) {
|
|
subIdx = pos & h.mask
|
|
idx = pos >> h.maskShift
|
|
return
|
|
}
|
|
|
|
func (h *hashedArrayTree) Ref(pos uint) *indexEntry {
|
|
if pos >= h.size {
|
|
panic("array index out of bounds")
|
|
}
|
|
|
|
idx, subIdx := h.index(pos)
|
|
return &h.blockList[idx][subIdx]
|
|
}
|
|
|
|
func (h *hashedArrayTree) Size() uint {
|
|
return h.size
|
|
}
|
|
|
|
func (h *hashedArrayTree) grow() {
|
|
idx, subIdx := h.index(h.size)
|
|
if int(idx) == len(h.blockList) {
|
|
// blockList is too small -> double list and block size
|
|
oldBlocks := h.blockList
|
|
h.blockList = make([][]indexEntry, h.blockSize)
|
|
|
|
h.blockSize *= 2
|
|
h.mask = h.mask*2 + 1
|
|
h.maskShift++
|
|
idx = idx / 2
|
|
|
|
// pairwise merging of blocks
|
|
for i := 0; i < len(oldBlocks); i += 2 {
|
|
block := make([]indexEntry, 0, h.blockSize)
|
|
block = append(block, oldBlocks[i]...)
|
|
block = append(block, oldBlocks[i+1]...)
|
|
h.blockList[i/2] = block
|
|
// allow GC
|
|
oldBlocks[i] = nil
|
|
oldBlocks[i+1] = nil
|
|
}
|
|
}
|
|
if subIdx == 0 {
|
|
// new index entry batch
|
|
h.blockList[idx] = make([]indexEntry, h.blockSize)
|
|
}
|
|
}
|