restic: Actually parallelize FindUsedBlobs

This commit is contained in:
Michael Eischer 2020-11-07 01:12:07 +01:00 committed by Alexander Neumann
parent 6e03f80ca2
commit f2a1b125cb

View file

@ -1,6 +1,11 @@
package restic
import "context"
import (
"context"
"sync"
"golang.org/x/sync/errgroup"
)
// TreeLoader loads a tree from a repository.
type TreeLoader interface {
@ -10,30 +15,38 @@ type TreeLoader interface {
// FindUsedBlobs traverses the tree ID and adds all seen blobs (trees and data
// blobs) to the set blobs. Already seen tree blobs will not be visited again.
func FindUsedBlobs(ctx context.Context, repo TreeLoader, treeID ID, blobs BlobSet) error {
var lock sync.Mutex
wg, ctx := errgroup.WithContext(ctx)
treeStream := StreamTrees(ctx, wg, repo, IDs{treeID}, func(treeID ID) bool {
// locking is necessary the goroutine below concurrently adds data blobs
lock.Lock()
h := BlobHandle{ID: treeID, Type: TreeBlob}
if blobs.Has(h) {
return nil
}
blobReferenced := blobs.Has(h)
// noop if already referenced
blobs.Insert(h)
lock.Unlock()
return blobReferenced
})
tree, err := repo.LoadTree(ctx, treeID)
if err != nil {
return err
wg.Go(func() error {
for tree := range treeStream {
if tree.Error != nil {
return tree.Error
}
lock.Lock()
for _, node := range tree.Nodes {
switch node.Type {
case "file":
for _, blob := range node.Content {
blobs.Insert(BlobHandle{ID: blob, Type: DataBlob})
}
case "dir":
err := FindUsedBlobs(ctx, repo, *node.Subtree, blobs)
if err != nil {
return err
}
}
lock.Unlock()
}
return nil
})
return wg.Wait()
}