restic/internal/archiver/tree_saver.go
Michael Eischer 2b88cd6eab archiver: Restructure SaveTree to work like SaveDir
SaveTree did not use the TreeSaver but rather managed the tree
collection and upload itself. This prevents using the parallelism
offered by the TreeSaver and duplicates all related code. Using the
TreeSaver can provide some speed-ups as all steps within the backup tree
now rely on FutureNodes. This can be especially relevant for backups
with large amounts of explicitly specified files.

The main difference between SaveTree and SaveDir is, that only the
former can save tree blobs in which nodes have a different name than the
actual file on disk. This is the result of resolving name conflicts
between multiple files with the same name. The filename that must be
used within the snapshot is now passed directly to
restic.NodeFromFileInfo. This ensures that a FutureNode already contains
the correct filename.
2022-10-08 21:28:39 +02:00

167 lines
3.6 KiB
Go

package archiver
import (
"context"
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/restic"
"golang.org/x/sync/errgroup"
)
// TreeSaver concurrently saves incoming trees to the repo.
type TreeSaver struct {
saveBlob func(ctx context.Context, t restic.BlobType, buf *Buffer) FutureBlob
errFn ErrorFunc
ch chan<- saveTreeJob
}
// NewTreeSaver returns a new tree saver. A worker pool with treeWorkers is
// started, it is stopped when ctx is cancelled.
func NewTreeSaver(ctx context.Context, wg *errgroup.Group, treeWorkers uint, saveBlob func(ctx context.Context, t restic.BlobType, buf *Buffer) FutureBlob, errFn ErrorFunc) *TreeSaver {
ch := make(chan saveTreeJob)
s := &TreeSaver{
ch: ch,
saveBlob: saveBlob,
errFn: errFn,
}
for i := uint(0); i < treeWorkers; i++ {
wg.Go(func() error {
return s.worker(ctx, ch)
})
}
return s
}
func (s *TreeSaver) TriggerShutdown() {
close(s.ch)
}
// Save stores the dir d and returns the data once it has been completed.
func (s *TreeSaver) Save(ctx context.Context, snPath string, target string, node *restic.Node, nodes []FutureNode, complete CompleteFunc) FutureNode {
fn, ch := newFutureNode()
job := saveTreeJob{
snPath: snPath,
target: target,
node: node,
nodes: nodes,
ch: ch,
complete: complete,
}
select {
case s.ch <- job:
case <-ctx.Done():
debug.Log("not saving tree, context is cancelled")
close(ch)
}
return fn
}
type saveTreeJob struct {
snPath string
target string
node *restic.Node
nodes []FutureNode
ch chan<- futureNodeResult
complete CompleteFunc
}
// save stores the nodes as a tree in the repo.
func (s *TreeSaver) save(ctx context.Context, job *saveTreeJob) (*restic.Node, ItemStats, error) {
var stats ItemStats
node := job.node
nodes := job.nodes
// allow GC of nodes array once the loop is finished
job.nodes = nil
builder := restic.NewTreeJSONBuilder()
for i, fn := range nodes {
// fn is a copy, so clear the original value explicitly
nodes[i] = FutureNode{}
fnr := fn.take(ctx)
// return the error if it wasn't ignored
if fnr.err != nil {
debug.Log("err for %v: %v", fnr.snPath, fnr.err)
fnr.err = s.errFn(fnr.target, fnr.err)
if fnr.err == nil {
// ignore error
continue
}
return nil, stats, fnr.err
}
// when the error is ignored, the node could not be saved, so ignore it
if fnr.node == nil {
debug.Log("%v excluded: %v", fnr.snPath, fnr.target)
continue
}
debug.Log("insert %v", fnr.node.Name)
err := builder.AddNode(fnr.node)
if err != nil {
return nil, stats, err
}
}
buf, err := builder.Finalize()
if err != nil {
return nil, stats, err
}
b := &Buffer{Data: buf}
res := s.saveBlob(ctx, restic.TreeBlob, b)
sbr := res.Take(ctx)
if !sbr.known {
stats.TreeBlobs++
stats.TreeSize += uint64(sbr.length)
stats.TreeSizeInRepo += uint64(sbr.sizeInRepo)
}
// The context was canceled in the meantime, id might be invalid
if ctx.Err() != nil {
return nil, stats, ctx.Err()
}
node.Subtree = &sbr.id
return node, stats, nil
}
func (s *TreeSaver) worker(ctx context.Context, jobs <-chan saveTreeJob) error {
for {
var job saveTreeJob
var ok bool
select {
case <-ctx.Done():
return nil
case job, ok = <-jobs:
if !ok {
return nil
}
}
node, stats, err := s.save(ctx, &job)
if err != nil {
debug.Log("error saving tree blob: %v", err)
close(job.ch)
return err
}
if job.complete != nil {
job.complete(node, stats)
}
job.ch <- futureNodeResult{
snPath: job.snPath,
target: job.target,
node: node,
stats: stats,
}
close(job.ch)
}
}