forked from TrueCloudLab/restic
Parallelize ForAllSnapshots
This commit is contained in:
parent
5b9ee56335
commit
26f85779be
2 changed files with 71 additions and 86 deletions
|
@ -298,86 +298,6 @@ func (e Error) Error() string {
|
||||||
return e.Err.Error()
|
return e.Err.Error()
|
||||||
}
|
}
|
||||||
|
|
||||||
func loadTreeFromSnapshot(ctx context.Context, repo restic.Repository, id restic.ID) (restic.ID, error) {
|
|
||||||
sn, err := restic.LoadSnapshot(ctx, repo, id)
|
|
||||||
if err != nil {
|
|
||||||
debug.Log("error loading snapshot %v: %v", id, err)
|
|
||||||
return restic.ID{}, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if sn.Tree == nil {
|
|
||||||
debug.Log("snapshot %v has no tree", id)
|
|
||||||
return restic.ID{}, errors.Errorf("snapshot %v has no tree", id)
|
|
||||||
}
|
|
||||||
|
|
||||||
return *sn.Tree, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// loadSnapshotTreeIDs loads all snapshots from backend and returns the tree IDs.
|
|
||||||
func loadSnapshotTreeIDs(ctx context.Context, repo restic.Repository) (restic.IDs, []error) {
|
|
||||||
var trees struct {
|
|
||||||
IDs restic.IDs
|
|
||||||
sync.Mutex
|
|
||||||
}
|
|
||||||
|
|
||||||
var errs struct {
|
|
||||||
errs []error
|
|
||||||
sync.Mutex
|
|
||||||
}
|
|
||||||
|
|
||||||
// track spawned goroutines using wg, create a new context which is
|
|
||||||
// cancelled as soon as an error occurs.
|
|
||||||
wg, ctx := errgroup.WithContext(ctx)
|
|
||||||
|
|
||||||
ch := make(chan restic.ID)
|
|
||||||
|
|
||||||
// send list of index files through ch, which is closed afterwards
|
|
||||||
wg.Go(func() error {
|
|
||||||
defer close(ch)
|
|
||||||
return repo.List(ctx, restic.SnapshotFile, func(id restic.ID, size int64) error {
|
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
return nil
|
|
||||||
case ch <- id:
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
// a worker receives an index ID from ch, loads the snapshot and the tree,
|
|
||||||
// and adds the result to errs and trees.
|
|
||||||
worker := func() error {
|
|
||||||
for id := range ch {
|
|
||||||
debug.Log("load snapshot %v", id)
|
|
||||||
|
|
||||||
treeID, err := loadTreeFromSnapshot(ctx, repo, id)
|
|
||||||
if err != nil {
|
|
||||||
errs.Lock()
|
|
||||||
errs.errs = append(errs.errs, err)
|
|
||||||
errs.Unlock()
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
debug.Log("snapshot %v has tree %v", id, treeID)
|
|
||||||
trees.Lock()
|
|
||||||
trees.IDs = append(trees.IDs, treeID)
|
|
||||||
trees.Unlock()
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := 0; i < defaultParallelism; i++ {
|
|
||||||
wg.Go(worker)
|
|
||||||
}
|
|
||||||
|
|
||||||
err := wg.Wait()
|
|
||||||
if err != nil {
|
|
||||||
errs.errs = append(errs.errs, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return trees.IDs, errs.errs
|
|
||||||
}
|
|
||||||
|
|
||||||
// TreeError collects several errors that occurred while processing a tree.
|
// TreeError collects several errors that occurred while processing a tree.
|
||||||
type TreeError struct {
|
type TreeError struct {
|
||||||
ID restic.ID
|
ID restic.ID
|
||||||
|
@ -586,6 +506,24 @@ func (c *Checker) filterTrees(ctx context.Context, backlog restic.IDs, loaderCha
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func loadSnapshotTreeIDs(ctx context.Context, repo restic.Repository) (ids restic.IDs, errs []error) {
|
||||||
|
err := restic.ForAllSnapshots(ctx, repo, nil, func(id restic.ID, sn *restic.Snapshot, err error) error {
|
||||||
|
if err != nil {
|
||||||
|
errs = append(errs, err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
treeID := *sn.Tree
|
||||||
|
debug.Log("snapshot %v has tree %v", id, treeID)
|
||||||
|
ids = append(ids, treeID)
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
errs = append(errs, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return ids, errs
|
||||||
|
}
|
||||||
|
|
||||||
// Structure checks that for all snapshots all referenced data blobs and
|
// Structure checks that for all snapshots all referenced data blobs and
|
||||||
// subtrees are available in the index. errChan is closed after all trees have
|
// subtrees are available in the index. errChan is closed after all trees have
|
||||||
// been traversed.
|
// been traversed.
|
||||||
|
|
|
@ -5,8 +5,11 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"os/user"
|
"os/user"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"golang.org/x/sync/errgroup"
|
||||||
|
|
||||||
"github.com/restic/restic/internal/debug"
|
"github.com/restic/restic/internal/debug"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -66,17 +69,61 @@ func LoadSnapshot(ctx context.Context, repo Repository, id ID) (*Snapshot, error
|
||||||
return sn, nil
|
return sn, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// ForAllSnapshots reads all snapshots and calls the given function.
|
const loadSnapshotParallelism = 5
|
||||||
|
|
||||||
|
// ForAllSnapshots reads all snapshots in parallel and calls the
|
||||||
|
// given function. It is guaranteed that the function is not run concurrently.
|
||||||
|
// If the called function returns an error, this function is cancelled and
|
||||||
|
// also returns this error.
|
||||||
// If a snapshot ID is in excludeIDs, it will be ignored.
|
// If a snapshot ID is in excludeIDs, it will be ignored.
|
||||||
func ForAllSnapshots(ctx context.Context, repo Repository, excludeIDs IDSet, fn func(ID, *Snapshot, error) error) error {
|
func ForAllSnapshots(ctx context.Context, repo Repository, excludeIDs IDSet, fn func(ID, *Snapshot, error) error) error {
|
||||||
return repo.List(ctx, SnapshotFile, func(id ID, size int64) error {
|
var m sync.Mutex
|
||||||
if excludeIDs.Has(id) {
|
|
||||||
|
// track spawned goroutines using wg, create a new context which is
|
||||||
|
// cancelled as soon as an error occurs.
|
||||||
|
wg, ctx := errgroup.WithContext(ctx)
|
||||||
|
|
||||||
|
ch := make(chan ID)
|
||||||
|
|
||||||
|
// send list of snapshot files through ch, which is closed afterwards
|
||||||
|
wg.Go(func() error {
|
||||||
|
defer close(ch)
|
||||||
|
return repo.List(ctx, SnapshotFile, func(id ID, size int64) error {
|
||||||
|
if excludeIDs.Has(id) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return nil
|
||||||
|
case ch <- id:
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
})
|
||||||
sn, err := LoadSnapshot(ctx, repo, id)
|
|
||||||
return fn(id, sn, err)
|
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// a worker receives an snapshot ID from ch, loads the snapshot
|
||||||
|
// and runs fn with id, the snapshot and the error
|
||||||
|
worker := func() error {
|
||||||
|
for id := range ch {
|
||||||
|
debug.Log("load snapshot %v", id)
|
||||||
|
sn, err := LoadSnapshot(ctx, repo, id)
|
||||||
|
|
||||||
|
m.Lock()
|
||||||
|
err = fn(id, sn, err)
|
||||||
|
m.Unlock()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < loadSnapshotParallelism; i++ {
|
||||||
|
wg.Go(worker)
|
||||||
|
}
|
||||||
|
|
||||||
|
return wg.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
// LoadAllSnapshots returns a list of all snapshots in the repo.
|
// LoadAllSnapshots returns a list of all snapshots in the repo.
|
||||||
|
|
Loading…
Reference in a new issue