repository: let prune control data structure of usedBlobs set

This commit is contained in:
Michael Eischer 2024-05-20 11:47:53 +02:00
parent 2033c02b09
commit 77873f5a9d
4 changed files with 14 additions and 22 deletions

View file

@ -9,7 +9,6 @@ import (
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/index"
"github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/ui"
@ -189,8 +188,8 @@ func runPruneWithRepo(ctx context.Context, opts PruneOptions, gopts GlobalOption
RepackUncompressed: opts.RepackUncompressed,
}
plan, err := repository.PlanPrune(ctx, popts, repo, func(ctx context.Context, repo restic.Repository) (usedBlobs *index.AssociatedSet[uint8], err error) {
return getUsedBlobs(ctx, repo, ignoreSnapshots, printer)
plan, err := repository.PlanPrune(ctx, popts, repo, func(ctx context.Context, repo restic.Repository, usedBlobs restic.FindBlobSet) error {
return getUsedBlobs(ctx, repo, usedBlobs, ignoreSnapshots, printer)
}, printer)
if err != nil {
return err
@ -256,10 +255,10 @@ func printPruneStats(printer progress.Printer, stats repository.PruneStats) erro
return nil
}
func getUsedBlobs(ctx context.Context, repo restic.Repository, ignoreSnapshots restic.IDSet, printer progress.Printer) (usedBlobs *index.AssociatedSet[uint8], err error) {
func getUsedBlobs(ctx context.Context, repo restic.Repository, usedBlobs restic.FindBlobSet, ignoreSnapshots restic.IDSet, printer progress.Printer) error {
var snapshotTrees restic.IDs
printer.P("loading all snapshots...\n")
err = restic.ForAllSnapshots(ctx, repo, repo, ignoreSnapshots,
err := restic.ForAllSnapshots(ctx, repo, repo, ignoreSnapshots,
func(id restic.ID, sn *restic.Snapshot, err error) error {
if err != nil {
debug.Log("failed to load snapshot %v (error %v)", id, err)
@ -270,20 +269,14 @@ func getUsedBlobs(ctx context.Context, repo restic.Repository, ignoreSnapshots r
return nil
})
if err != nil {
return nil, errors.Fatalf("failed loading snapshot: %v", err)
return errors.Fatalf("failed loading snapshot: %v", err)
}
printer.P("finding data that is still in use for %d snapshots\n", len(snapshotTrees))
usedBlobs = index.NewAssociatedSet[uint8](repo.Index().(*index.MasterIndex))
bar := printer.NewCounter("snapshots")
bar.SetMax(uint64(len(snapshotTrees)))
defer bar.Done()
err = restic.FindUsedBlobs(ctx, repo, snapshotTrees, usedBlobs, bar)
if err != nil {
return nil, err
}
return usedBlobs, nil
return restic.FindUsedBlobs(ctx, repo, snapshotTrees, usedBlobs, bar)
}

View file

@ -91,7 +91,7 @@ type packInfoWithID struct {
// PlanPrune selects which files to rewrite and which to delete and which blobs to keep.
// Also some summary statistics are returned.
func PlanPrune(ctx context.Context, opts PruneOptions, repo *Repository, getUsedBlobs func(ctx context.Context, repo restic.Repository) (usedBlobs *index.AssociatedSet[uint8], err error), printer progress.Printer) (*PrunePlan, error) {
func PlanPrune(ctx context.Context, opts PruneOptions, repo *Repository, getUsedBlobs func(ctx context.Context, repo restic.Repository, usedBlobs restic.FindBlobSet) error, printer progress.Printer) (*PrunePlan, error) {
var stats PruneStats
if opts.UnsafeRecovery {
@ -105,7 +105,8 @@ func PlanPrune(ctx context.Context, opts PruneOptions, repo *Repository, getUsed
return nil, fmt.Errorf("compression requires at least repository format version 2")
}
usedBlobs, err := getUsedBlobs(ctx, repo)
usedBlobs := index.NewAssociatedSet[uint8](repo.idx)
err := getUsedBlobs(ctx, repo, usedBlobs)
if err != nil {
return nil, err
}

View file

@ -6,7 +6,6 @@ import (
"testing"
"github.com/restic/restic/internal/checker"
"github.com/restic/restic/internal/index"
"github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/restic"
rtest "github.com/restic/restic/internal/test"
@ -31,12 +30,11 @@ func testPrune(t *testing.T, opts repository.PruneOptions, errOnUnused bool) {
}
rtest.OK(t, repo.Flush(context.TODO()))
plan, err := repository.PlanPrune(context.TODO(), opts, repo, func(ctx context.Context, repo restic.Repository) (usedBlobs *index.AssociatedSet[uint8], err error) {
idx := index.NewAssociatedSet[uint8](repo.Index().(*index.MasterIndex))
plan, err := repository.PlanPrune(context.TODO(), opts, repo, func(ctx context.Context, repo restic.Repository, usedBlobs restic.FindBlobSet) error {
for blob := range keep {
idx.Insert(blob)
usedBlobs.Insert(blob)
}
return idx, nil
return nil
}, &progress.NoopPrinter{})
rtest.OK(t, err)

View file

@ -15,14 +15,14 @@ type Loader interface {
Connections() uint
}
type findBlobSet interface {
type FindBlobSet interface {
Has(bh BlobHandle) bool
Insert(bh BlobHandle)
}
// FindUsedBlobs traverses the tree ID and adds all seen blobs (trees and data
// blobs) to the set blobs. Already seen tree blobs will not be visited again.
func FindUsedBlobs(ctx context.Context, repo Loader, treeIDs IDs, blobs findBlobSet, p *progress.Counter) error {
func FindUsedBlobs(ctx context.Context, repo Loader, treeIDs IDs, blobs FindBlobSet, p *progress.Counter) error {
var lock sync.Mutex
wg, ctx := errgroup.WithContext(ctx)