From 93098e9265d23ea6d4b9760bfb4a92d4193c04e2 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Fri, 2 Jun 2023 20:18:46 +0200 Subject: [PATCH] prune: hide implementation details of counted blob set --- cmd/restic/cmd_prune.go | 4 +- internal/repository/prune.go | 36 ++++++++--------- internal/repository/prune_test.go | 2 +- internal/restic/counted_blob_set.go | 61 +++++++++++++++++++---------- 4 files changed, 61 insertions(+), 42 deletions(-) diff --git a/cmd/restic/cmd_prune.go b/cmd/restic/cmd_prune.go index 7872589be..31f920be5 100644 --- a/cmd/restic/cmd_prune.go +++ b/cmd/restic/cmd_prune.go @@ -188,7 +188,7 @@ func runPruneWithRepo(ctx context.Context, opts PruneOptions, gopts GlobalOption RepackUncompressed: opts.RepackUncompressed, } - plan, err := repository.PlanPrune(ctx, popts, repo, func(ctx context.Context, repo restic.Repository) (usedBlobs restic.CountedBlobSet, err error) { + plan, err := repository.PlanPrune(ctx, popts, repo, func(ctx context.Context, repo restic.Repository) (usedBlobs *restic.CountedBlobSet, err error) { return getUsedBlobs(ctx, repo, ignoreSnapshots, printer) }, printer) if err != nil { @@ -255,7 +255,7 @@ func printPruneStats(printer progress.Printer, stats repository.PruneStats) erro return nil } -func getUsedBlobs(ctx context.Context, repo restic.Repository, ignoreSnapshots restic.IDSet, printer progress.Printer) (usedBlobs restic.CountedBlobSet, err error) { +func getUsedBlobs(ctx context.Context, repo restic.Repository, ignoreSnapshots restic.IDSet, printer progress.Printer) (usedBlobs *restic.CountedBlobSet, err error) { var snapshotTrees restic.IDs printer.P("loading all snapshots...\n") err = restic.ForAllSnapshots(ctx, repo, repo, ignoreSnapshots, diff --git a/internal/repository/prune.go b/internal/repository/prune.go index 49869fcac..25bf3621a 100644 --- a/internal/repository/prune.go +++ b/internal/repository/prune.go @@ -60,11 +60,11 @@ type PruneStats struct { } type PrunePlan struct { - removePacksFirst restic.IDSet // packs to remove first (unreferenced packs) - repackPacks restic.IDSet // packs to repack - keepBlobs restic.CountedBlobSet // blobs to keep during repacking - removePacks restic.IDSet // packs to remove - ignorePacks restic.IDSet // packs to ignore when rebuilding the index + removePacksFirst restic.IDSet // packs to remove first (unreferenced packs) + repackPacks restic.IDSet // packs to repack + keepBlobs *restic.CountedBlobSet // blobs to keep during repacking + removePacks restic.IDSet // packs to remove + ignorePacks restic.IDSet // packs to ignore when rebuilding the index repo *Repository stats PruneStats @@ -90,7 +90,7 @@ type packInfoWithID struct { // PlanPrune selects which files to rewrite and which to delete and which blobs to keep. // Also some summary statistics are returned. -func PlanPrune(ctx context.Context, opts PruneOptions, repo *Repository, getUsedBlobs func(ctx context.Context, repo restic.Repository) (usedBlobs restic.CountedBlobSet, err error), printer progress.Printer) (*PrunePlan, error) { +func PlanPrune(ctx context.Context, opts PruneOptions, repo *Repository, getUsedBlobs func(ctx context.Context, repo restic.Repository) (usedBlobs *restic.CountedBlobSet, err error), printer progress.Printer) (*PrunePlan, error) { var stats PruneStats if opts.UnsafeRecovery { @@ -152,13 +152,13 @@ func PlanPrune(ctx context.Context, opts PruneOptions, repo *Repository, getUsed return &plan, nil } -func packInfoFromIndex(ctx context.Context, idx restic.ListBlobser, usedBlobs restic.CountedBlobSet, stats *PruneStats, printer progress.Printer) (restic.CountedBlobSet, map[restic.ID]packInfo, error) { +func packInfoFromIndex(ctx context.Context, idx restic.ListBlobser, usedBlobs *restic.CountedBlobSet, stats *PruneStats, printer progress.Printer) (*restic.CountedBlobSet, map[restic.ID]packInfo, error) { // iterate over all blobs in index to find out which blobs are duplicates // The counter in usedBlobs describes how many instances of the blob exist in the repository index // Thus 0 == blob is missing, 1 == blob exists once, >= 2 == duplicates exist err := idx.ListBlobs(ctx, func(blob restic.PackedBlob) { bh := blob.BlobHandle - count, ok := usedBlobs[bh] + count, ok := usedBlobs.Get(bh) if ok { if count < math.MaxUint8 { // don't overflow, but saturate count at 255 @@ -167,7 +167,7 @@ func packInfoFromIndex(ctx context.Context, idx restic.ListBlobser, usedBlobs re count++ } - usedBlobs[bh] = count + usedBlobs.Set(bh, count) } }) if err != nil { @@ -176,12 +176,12 @@ func packInfoFromIndex(ctx context.Context, idx restic.ListBlobser, usedBlobs re // Check if all used blobs have been found in index missingBlobs := restic.NewBlobSet() - for bh, count := range usedBlobs { + usedBlobs.For(func(bh restic.BlobHandle, count uint8) { if count == 0 { // blob does not exist in any pack files missingBlobs.Insert(bh) } - } + }) if len(missingBlobs) != 0 { printer.E("%v not found in the index\n\n"+ @@ -221,7 +221,7 @@ func packInfoFromIndex(ctx context.Context, idx restic.ListBlobser, usedBlobs re bh := blob.BlobHandle size := uint64(blob.Length) - dupCount := usedBlobs[bh] + dupCount, _ := usedBlobs.Get(bh) switch { case dupCount >= 2: hasDuplicates = true @@ -266,7 +266,7 @@ func packInfoFromIndex(ctx context.Context, idx restic.ListBlobser, usedBlobs re // iterate again over all blobs in index (this is pretty cheap, all in-mem) err = idx.ListBlobs(ctx, func(blob restic.PackedBlob) { bh := blob.BlobHandle - count, ok := usedBlobs[bh] + count, ok := usedBlobs.Get(bh) // skip non-duplicate, aka. normal blobs // count == 0 is used to mark that this was a duplicate blob with only a single occurrence remaining if !ok || count == 1 { @@ -290,7 +290,7 @@ func packInfoFromIndex(ctx context.Context, idx restic.ListBlobser, usedBlobs re stats.Size.Duplicate -= size stats.Blobs.Duplicate-- // let other occurrences remain marked as unused - usedBlobs[bh] = 1 + usedBlobs.Set(bh, 1) default: // remain unused and decrease counter count-- @@ -299,7 +299,7 @@ func packInfoFromIndex(ctx context.Context, idx restic.ListBlobser, usedBlobs re // thus use the special value zero. This will select the last instance of the blob for keeping. count = 0 } - usedBlobs[bh] = count + usedBlobs.Set(bh, count) } // update indexPack indexPack[blob.PackID] = ip @@ -311,11 +311,11 @@ func packInfoFromIndex(ctx context.Context, idx restic.ListBlobser, usedBlobs re // Sanity check. If no duplicates exist, all blobs have value 1. After handling // duplicates, this also applies to duplicates. - for _, count := range usedBlobs { + usedBlobs.For(func(_ restic.BlobHandle, count uint8) { if count != 1 { panic("internal error during blob selection") } - } + }) return usedBlobs, indexPack, nil } @@ -567,7 +567,7 @@ func (plan *PrunePlan) Execute(ctx context.Context, printer progress.Printer) er // Also remove repacked packs plan.removePacks.Merge(plan.repackPacks) - if len(plan.keepBlobs) != 0 { + if plan.keepBlobs.Len() != 0 { printer.E("%v was not repacked\n\n"+ "Integrity check failed.\n"+ "Please report this error (along with the output of the 'prune' run) at\n"+ diff --git a/internal/repository/prune_test.go b/internal/repository/prune_test.go index dbf36ffd0..2b3b3b0cd 100644 --- a/internal/repository/prune_test.go +++ b/internal/repository/prune_test.go @@ -30,7 +30,7 @@ func testPrune(t *testing.T, opts repository.PruneOptions, errOnUnused bool) { } rtest.OK(t, repo.Flush(context.TODO())) - plan, err := repository.PlanPrune(context.TODO(), opts, repo, func(ctx context.Context, repo restic.Repository) (usedBlobs restic.CountedBlobSet, err error) { + plan, err := repository.PlanPrune(context.TODO(), opts, repo, func(ctx context.Context, repo restic.Repository) (usedBlobs *restic.CountedBlobSet, err error) { return restic.NewCountedBlobSet(keep.List()...), nil }, &progress.NoopPrinter{}) rtest.OK(t, err) diff --git a/internal/restic/counted_blob_set.go b/internal/restic/counted_blob_set.go index f965d3129..f0a43d5d6 100644 --- a/internal/restic/counted_blob_set.go +++ b/internal/restic/counted_blob_set.go @@ -5,42 +5,54 @@ import "sort" // CountedBlobSet is a set of blobs. For each blob it also stores a uint8 value // which can be used to track some information. The CountedBlobSet does not use // that value in any way. New entries are created with value 0. -type CountedBlobSet map[BlobHandle]uint8 +type CountedBlobSet struct { + m map[BlobHandle]uint8 +} // NewCountedBlobSet returns a new CountedBlobSet, populated with ids. -func NewCountedBlobSet(handles ...BlobHandle) CountedBlobSet { - m := make(CountedBlobSet) +func NewCountedBlobSet(handles ...BlobHandle) *CountedBlobSet { + m := CountedBlobSet{} + m.m = make(map[BlobHandle]uint8) for _, h := range handles { - m[h] = 0 + m.m[h] = 0 } - return m + return &m +} + +func (s *CountedBlobSet) Get(h BlobHandle) (uint8, bool) { + val, ok := s.m[h] + return val, ok +} + +func (s *CountedBlobSet) Set(h BlobHandle, value uint8) { + s.m[h] = value } // Has returns true iff id is contained in the set. -func (s CountedBlobSet) Has(h BlobHandle) bool { - _, ok := s[h] +func (s *CountedBlobSet) Has(h BlobHandle) bool { + _, ok := s.m[h] return ok } // Insert adds id to the set. -func (s CountedBlobSet) Insert(h BlobHandle) { - s[h] = 0 +func (s *CountedBlobSet) Insert(h BlobHandle) { + s.m[h] = 0 } // Delete removes id from the set. -func (s CountedBlobSet) Delete(h BlobHandle) { - delete(s, h) +func (s *CountedBlobSet) Delete(h BlobHandle) { + delete(s.m, h) } -func (s CountedBlobSet) Len() int { - return len(s) +func (s *CountedBlobSet) Len() int { + return len(s.m) } // List returns a sorted slice of all BlobHandle in the set. -func (s CountedBlobSet) List() BlobHandles { - list := make(BlobHandles, 0, len(s)) - for h := range s { +func (s *CountedBlobSet) List() BlobHandles { + list := make(BlobHandles, 0, len(s.m)) + for h := range s.m { list = append(list, h) } @@ -49,7 +61,7 @@ func (s CountedBlobSet) List() BlobHandles { return list } -func (s CountedBlobSet) String() string { +func (s *CountedBlobSet) String() string { str := s.List().String() if len(str) < 2 { return "{}" @@ -59,10 +71,17 @@ func (s CountedBlobSet) String() string { } // Copy returns a copy of the CountedBlobSet. -func (s CountedBlobSet) Copy() CountedBlobSet { - cp := make(CountedBlobSet, len(s)) - for k, v := range s { - cp[k] = v +func (s *CountedBlobSet) Copy() *CountedBlobSet { + cp := &CountedBlobSet{} + cp.m = make(map[BlobHandle]uint8, len(s.m)) + for k, v := range s.m { + cp.m[k] = v } return cp } + +func (s *CountedBlobSet) For(cb func(h BlobHandle, value uint8)) { + for k, v := range s.m { + cb(k, v) + } +}