forked from TrueCloudLab/restic
9f3ca97ee8
* uses less memory as common prefix is only stored once * stepping stone for simpler error callback api, which will allow further memory footprint reduction Signed-off-by: Igor Fedorenko <igor@ifedorenko.com>
224 lines
8 KiB
Go
224 lines
8 KiB
Go
package restorer
|
|
|
|
import (
|
|
"container/heap"
|
|
|
|
"github.com/restic/restic/internal/debug"
|
|
"github.com/restic/restic/internal/restic"
|
|
)
|
|
|
|
// packQueue tracks remaining file contents restore work and decides what pack
|
|
// to download and files to write next.
|
|
//
|
|
// The packs in the queue can be in one of three states: waiting, ready and
|
|
// in-progress.
|
|
// Waiting packs are the packs that only have blobs from the "middle" of their
|
|
// corresponding files and therefore cannot be used until blobs from some other
|
|
// packs are written to the files first.
|
|
// In-progress packs are the packs that were removed from the queue by #nextPack
|
|
// and must be first returned to the queue before they are considered again.
|
|
// Ready packs are the packs can be immediately used to restore at least one
|
|
// file. Internally ready packs are kept in a heap and are ordered according
|
|
// to these criteria:
|
|
// - Packs with "head" blobs of in-progress files are considered first. The
|
|
// idea is to complete restore of in-progress files before starting restore
|
|
// of other files. This is both more intuitive and also reduces number of
|
|
// open file handles needed during restore.
|
|
// - Packs with smallest cost are considered next. Pack cost is measured in
|
|
// number of other packs required before all blobs in the pack can be used
|
|
// and the pack can be removed from the pack cache.
|
|
// For example, consisder a file that requires two blobs, blob1 from pack1
|
|
// and blob2 from pack2. The cost of pack2 is 1, because blob2 cannot be
|
|
// used before blob1 is available. The higher the cost, the longer the pack
|
|
// must be cached locally to avoid redownload.
|
|
//
|
|
// Pack queue implementation is NOT thread safe. All pack queue methods must
|
|
// be called from single gorouting AND packInfo and fileInfo instances must
|
|
// be updated synchronously from the same gorouting.
|
|
type packQueue struct {
|
|
idx filePackTraverser
|
|
|
|
packs map[restic.ID]*packInfo // waiting and ready packs
|
|
inprogress map[*packInfo]struct{} // inprogress packs
|
|
|
|
heap *packHeap // heap of ready packs
|
|
}
|
|
|
|
func newPackQueue(idx filePackTraverser, files []*fileInfo, inprogress func(files map[*fileInfo]struct{}) bool) (*packQueue, error) {
|
|
packs := make(map[restic.ID]*packInfo) // all packs
|
|
|
|
// create packInfo from fileInfo
|
|
for _, file := range files {
|
|
err := idx.forEachFilePack(file, func(packIdx int, packID restic.ID, _ []restic.Blob) bool {
|
|
pack, ok := packs[packID]
|
|
if !ok {
|
|
pack = &packInfo{
|
|
id: packID,
|
|
index: -1,
|
|
files: make(map[*fileInfo]struct{}),
|
|
}
|
|
packs[packID] = pack
|
|
}
|
|
pack.files[file] = struct{}{}
|
|
pack.cost += packIdx
|
|
|
|
return true // keep going
|
|
})
|
|
if err != nil {
|
|
// repository index is messed up, can't do anything
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
// create packInfo heap
|
|
pheap := &packHeap{inprogress: inprogress}
|
|
headPacks := restic.NewIDSet()
|
|
for _, file := range files {
|
|
idx.forEachFilePack(file, func(packIdx int, packID restic.ID, _ []restic.Blob) bool {
|
|
if !headPacks.Has(packID) {
|
|
headPacks.Insert(packID)
|
|
pack := packs[packID]
|
|
pack.index = len(pheap.elements)
|
|
pheap.elements = append(pheap.elements, pack)
|
|
}
|
|
return false // only first pack
|
|
})
|
|
}
|
|
heap.Init(pheap)
|
|
|
|
return &packQueue{idx: idx, packs: packs, heap: pheap, inprogress: make(map[*packInfo]struct{})}, nil
|
|
}
|
|
|
|
// isEmpty returns true if the queue is empty, i.e. there are no more packs to
|
|
// download and files to write to.
|
|
func (h *packQueue) isEmpty() bool {
|
|
return len(h.packs) == 0 && len(h.inprogress) == 0
|
|
}
|
|
|
|
// nextPack returns next ready pack and corresponding files ready for download
|
|
// and processing. The returned pack and the files are marked as "in progress"
|
|
// internally and must be first returned to the queue before they are
|
|
// considered by #nextPack again.
|
|
func (h *packQueue) nextPack() (*packInfo, []*fileInfo) {
|
|
debug.Log("Ready packs %d, outstanding packs %d, inprogress packs %d", h.heap.Len(), len(h.packs), len(h.inprogress))
|
|
|
|
if h.heap.Len() == 0 {
|
|
return nil, nil
|
|
}
|
|
|
|
pack := heap.Pop(h.heap).(*packInfo)
|
|
h.inprogress[pack] = struct{}{}
|
|
debug.Log("Popped pack %s (%d files), heap size=%d", pack.id.Str(), len(pack.files), len(h.heap.elements))
|
|
var files []*fileInfo
|
|
for file := range pack.files {
|
|
h.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, packBlobs []restic.Blob) bool {
|
|
debug.Log("Pack #%d %s (%d blobs) used by %s", packIdx, packID.Str(), len(packBlobs), file.location)
|
|
if pack.id == packID {
|
|
files = append(files, file)
|
|
}
|
|
return false // only interested in the fist pack here
|
|
})
|
|
}
|
|
|
|
return pack, files
|
|
}
|
|
|
|
// requeuePack conditionally adds back to the queue pack previously returned by
|
|
// #nextPack.
|
|
// If the pack is needed to restore any incomplete files, adds the pack to the
|
|
// queue and adjusts order of all affected packs in the queue. Has no effect
|
|
// if the pack is not required to restore any files.
|
|
// Returns true if the pack was added to the queue, false otherwise.
|
|
func (h *packQueue) requeuePack(pack *packInfo, success []*fileInfo, failure []*fileInfo) bool {
|
|
debug.Log("Requeue pack %s (%d/%d/%d files/success/failure)", pack.id.Str(), len(pack.files), len(success), len(failure))
|
|
|
|
// maintain inprogress pack set
|
|
delete(h.inprogress, pack)
|
|
|
|
affectedPacks := make(map[*packInfo]struct{})
|
|
affectedPacks[pack] = struct{}{} // this pack is alwats affected
|
|
|
|
// apply download success/failure to the packs
|
|
onFailure := func(file *fileInfo) {
|
|
h.idx.forEachFilePack(file, func(packInx int, packID restic.ID, _ []restic.Blob) bool {
|
|
pack := h.packs[packID]
|
|
delete(pack.files, file)
|
|
pack.cost -= packInx
|
|
affectedPacks[pack] = struct{}{}
|
|
return true // keep going
|
|
})
|
|
}
|
|
for _, file := range failure {
|
|
onFailure(file)
|
|
}
|
|
onSuccess := func(pack *packInfo, file *fileInfo) {
|
|
remove := true
|
|
h.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, _ []restic.Blob) bool {
|
|
if packID.Equal(pack.id) {
|
|
// the pack has more blobs required by the file
|
|
remove = false
|
|
}
|
|
otherPack := h.packs[packID]
|
|
otherPack.cost--
|
|
affectedPacks[otherPack] = struct{}{}
|
|
return true // keep going
|
|
})
|
|
if remove {
|
|
delete(pack.files, file)
|
|
}
|
|
}
|
|
for _, file := range success {
|
|
onSuccess(pack, file)
|
|
}
|
|
|
|
// drop/update affected packs
|
|
isReady := func(affectedPack *packInfo) (ready bool) {
|
|
for file := range affectedPack.files {
|
|
h.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, _ []restic.Blob) bool {
|
|
if packID.Equal(affectedPack.id) {
|
|
ready = true
|
|
}
|
|
return false // only file's first pack matters
|
|
})
|
|
if ready {
|
|
break
|
|
}
|
|
}
|
|
return ready
|
|
}
|
|
for affectedPack := range affectedPacks {
|
|
if _, inprogress := h.inprogress[affectedPack]; !inprogress {
|
|
if len(affectedPack.files) == 0 {
|
|
// drop the pack if it isn't inprogress and has no files that need it
|
|
if affectedPack.index >= 0 {
|
|
// This can't happen unless there is a bug elsewhere:
|
|
// - "current" pack isn't in the heap, hence its index must be < 0
|
|
// - "other" packs can't be ready (i.e. in heap) unless they have other files
|
|
// in which case len(affectedPack.files) must be > 0
|
|
debug.Log("corrupted ready heap: removed unexpected ready pack %s", affectedPack.id.Str())
|
|
heap.Remove(h.heap, affectedPack.index)
|
|
}
|
|
delete(h.packs, affectedPack.id)
|
|
} else {
|
|
ready := isReady(affectedPack)
|
|
switch {
|
|
case ready && affectedPack.index < 0:
|
|
heap.Push(h.heap, affectedPack)
|
|
case ready && affectedPack.index >= 0:
|
|
heap.Fix(h.heap, affectedPack.index)
|
|
case !ready && affectedPack.index >= 0:
|
|
// This can't happen unless there is a bug elsewhere:
|
|
// - "current" pack isn't in the heap, hence its index must be < 0
|
|
// - "other" packs can't have same head blobs as the "current" pack,
|
|
// hence "other" packs can't change their readiness
|
|
debug.Log("corrupted ready heap: removed unexpected waiting pack %s", affectedPack.id.Str())
|
|
heap.Remove(h.heap, affectedPack.index)
|
|
case !ready && affectedPack.index < 0:
|
|
// do nothing
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return len(pack.files) > 0
|
|
}
|