prune: hide implementation details of counted blob set

2023-06-02 20:18:46 +02:00 · 2023-06-02 20:18:46 +02:00 · 93098e9265
commit 93098e9265
parent ff4775a15f
4 changed files with 61 additions and 42 deletions
--- a/cmd/restic/cmd_prune.go
+++ b/cmd/restic/cmd_prune.go
@ -188,7 +188,7 @@ func runPruneWithRepo(ctx context.Context, opts PruneOptions, gopts GlobalOption
 		RepackUncompressed: opts.RepackUncompressed,
 	}

-	plan, err := repository.PlanPrune(ctx, popts, repo, func(ctx context.Context, repo restic.Repository) (usedBlobs restic.CountedBlobSet, err error) {
+	plan, err := repository.PlanPrune(ctx, popts, repo, func(ctx context.Context, repo restic.Repository) (usedBlobs *restic.CountedBlobSet, err error) {
 		return getUsedBlobs(ctx, repo, ignoreSnapshots, printer)
 	}, printer)
 	if err != nil {
@ -255,7 +255,7 @@ func printPruneStats(printer progress.Printer, stats repository.PruneStats) erro
 	return nil
 }

-func getUsedBlobs(ctx context.Context, repo restic.Repository, ignoreSnapshots restic.IDSet, printer progress.Printer) (usedBlobs restic.CountedBlobSet, err error) {
+func getUsedBlobs(ctx context.Context, repo restic.Repository, ignoreSnapshots restic.IDSet, printer progress.Printer) (usedBlobs *restic.CountedBlobSet, err error) {
 	var snapshotTrees restic.IDs
 	printer.P("loading all snapshots...\n")
 	err = restic.ForAllSnapshots(ctx, repo, repo, ignoreSnapshots,
--- a/internal/repository/prune.go
+++ b/internal/repository/prune.go
@ -60,11 +60,11 @@ type PruneStats struct {
 }

 type PrunePlan struct {
-	removePacksFirst restic.IDSet          // packs to remove first (unreferenced packs)
-	repackPacks      restic.IDSet          // packs to repack
-	keepBlobs        restic.CountedBlobSet // blobs to keep during repacking
-	removePacks      restic.IDSet          // packs to remove
-	ignorePacks      restic.IDSet          // packs to ignore when rebuilding the index
+	removePacksFirst restic.IDSet           // packs to remove first (unreferenced packs)
+	repackPacks      restic.IDSet           // packs to repack
+	keepBlobs        *restic.CountedBlobSet // blobs to keep during repacking
+	removePacks      restic.IDSet           // packs to remove
+	ignorePacks      restic.IDSet           // packs to ignore when rebuilding the index

 	repo  *Repository
 	stats PruneStats
@ -90,7 +90,7 @@ type packInfoWithID struct {

 // PlanPrune selects which files to rewrite and which to delete and which blobs to keep.
 // Also some summary statistics are returned.
-func PlanPrune(ctx context.Context, opts PruneOptions, repo *Repository, getUsedBlobs func(ctx context.Context, repo restic.Repository) (usedBlobs restic.CountedBlobSet, err error), printer progress.Printer) (*PrunePlan, error) {
+func PlanPrune(ctx context.Context, opts PruneOptions, repo *Repository, getUsedBlobs func(ctx context.Context, repo restic.Repository) (usedBlobs *restic.CountedBlobSet, err error), printer progress.Printer) (*PrunePlan, error) {
 	var stats PruneStats

 	if opts.UnsafeRecovery {
@ -152,13 +152,13 @@ func PlanPrune(ctx context.Context, opts PruneOptions, repo *Repository, getUsed
 	return &plan, nil
 }

-func packInfoFromIndex(ctx context.Context, idx restic.ListBlobser, usedBlobs restic.CountedBlobSet, stats *PruneStats, printer progress.Printer) (restic.CountedBlobSet, map[restic.ID]packInfo, error) {
+func packInfoFromIndex(ctx context.Context, idx restic.ListBlobser, usedBlobs *restic.CountedBlobSet, stats *PruneStats, printer progress.Printer) (*restic.CountedBlobSet, map[restic.ID]packInfo, error) {
 	// iterate over all blobs in index to find out which blobs are duplicates
 	// The counter in usedBlobs describes how many instances of the blob exist in the repository index
 	// Thus 0 == blob is missing, 1 == blob exists once, >= 2 == duplicates exist
 	err := idx.ListBlobs(ctx, func(blob restic.PackedBlob) {
 		bh := blob.BlobHandle
-		count, ok := usedBlobs[bh]
+		count, ok := usedBlobs.Get(bh)
 		if ok {
 			if count < math.MaxUint8 {
 				// don't overflow, but saturate count at 255
@ -167,7 +167,7 @@ func packInfoFromIndex(ctx context.Context, idx restic.ListBlobser, usedBlobs re
 				count++
 			}

-			usedBlobs[bh] = count
+			usedBlobs.Set(bh, count)
 		}
 	})
 	if err != nil {
@ -176,12 +176,12 @@ func packInfoFromIndex(ctx context.Context, idx restic.ListBlobser, usedBlobs re

 	// Check if all used blobs have been found in index
 	missingBlobs := restic.NewBlobSet()
-	for bh, count := range usedBlobs {
+	usedBlobs.For(func(bh restic.BlobHandle, count uint8) {
 		if count == 0 {
 			// blob does not exist in any pack files
 			missingBlobs.Insert(bh)
 		}
-	}
+	})

 	if len(missingBlobs) != 0 {
 		printer.E("%v not found in the index\n\n"+
@ -221,7 +221,7 @@ func packInfoFromIndex(ctx context.Context, idx restic.ListBlobser, usedBlobs re

 		bh := blob.BlobHandle
 		size := uint64(blob.Length)
-		dupCount := usedBlobs[bh]
+		dupCount, _ := usedBlobs.Get(bh)
 		switch {
 		case dupCount >= 2:
 			hasDuplicates = true
@ -266,7 +266,7 @@ func packInfoFromIndex(ctx context.Context, idx restic.ListBlobser, usedBlobs re
 		// iterate again over all blobs in index (this is pretty cheap, all in-mem)
 		err = idx.ListBlobs(ctx, func(blob restic.PackedBlob) {
 			bh := blob.BlobHandle
-			count, ok := usedBlobs[bh]
+			count, ok := usedBlobs.Get(bh)
 			// skip non-duplicate, aka. normal blobs
 			// count == 0 is used to mark that this was a duplicate blob with only a single occurrence remaining
 			if !ok || count == 1 {
@ -290,7 +290,7 @@ func packInfoFromIndex(ctx context.Context, idx restic.ListBlobser, usedBlobs re
 				stats.Size.Duplicate -= size
 				stats.Blobs.Duplicate--
 				// let other occurrences remain marked as unused
-				usedBlobs[bh] = 1
+				usedBlobs.Set(bh, 1)
 			default:
 				// remain unused and decrease counter
 				count--
@ -299,7 +299,7 @@ func packInfoFromIndex(ctx context.Context, idx restic.ListBlobser, usedBlobs re
 					// thus use the special value zero. This will select the last instance of the blob for keeping.
 					count = 0
 				}
-				usedBlobs[bh] = count
+				usedBlobs.Set(bh, count)
 			}
 			// update indexPack
 			indexPack[blob.PackID] = ip
@ -311,11 +311,11 @@ func packInfoFromIndex(ctx context.Context, idx restic.ListBlobser, usedBlobs re

 	// Sanity check. If no duplicates exist, all blobs have value 1. After handling
 	// duplicates, this also applies to duplicates.
-	for _, count := range usedBlobs {
+	usedBlobs.For(func(_ restic.BlobHandle, count uint8) {
 		if count != 1 {
 			panic("internal error during blob selection")
 		}
-	}
+	})

 	return usedBlobs, indexPack, nil
 }
@ -567,7 +567,7 @@ func (plan *PrunePlan) Execute(ctx context.Context, printer progress.Printer) er
 		// Also remove repacked packs
 		plan.removePacks.Merge(plan.repackPacks)

-		if len(plan.keepBlobs) != 0 {
+		if plan.keepBlobs.Len() != 0 {
 			printer.E("%v was not repacked\n\n"+
 				"Integrity check failed.\n"+
 				"Please report this error (along with the output of the 'prune' run) at\n"+
--- a/internal/repository/prune_test.go
+++ b/internal/repository/prune_test.go
@ -30,7 +30,7 @@ func testPrune(t *testing.T, opts repository.PruneOptions, errOnUnused bool) {
 	}
 	rtest.OK(t, repo.Flush(context.TODO()))

-	plan, err := repository.PlanPrune(context.TODO(), opts, repo, func(ctx context.Context, repo restic.Repository) (usedBlobs restic.CountedBlobSet, err error) {
+	plan, err := repository.PlanPrune(context.TODO(), opts, repo, func(ctx context.Context, repo restic.Repository) (usedBlobs *restic.CountedBlobSet, err error) {
 		return restic.NewCountedBlobSet(keep.List()...), nil
 	}, &progress.NoopPrinter{})
 	rtest.OK(t, err)
--- a/internal/restic/counted_blob_set.go
+++ b/internal/restic/counted_blob_set.go
@ -5,42 +5,54 @@ import "sort"
 // CountedBlobSet is a set of blobs. For each blob it also stores a uint8 value
 // which can be used to track some information. The CountedBlobSet does not use
 // that value in any way. New entries are created with value 0.
-type CountedBlobSet map[BlobHandle]uint8
+type CountedBlobSet struct {
+	m map[BlobHandle]uint8
+}

 // NewCountedBlobSet returns a new CountedBlobSet, populated with ids.
-func NewCountedBlobSet(handles ...BlobHandle) CountedBlobSet {
-	m := make(CountedBlobSet)
+func NewCountedBlobSet(handles ...BlobHandle) *CountedBlobSet {
+	m := CountedBlobSet{}
+	m.m = make(map[BlobHandle]uint8)
 	for _, h := range handles {
-		m[h] = 0
+		m.m[h] = 0
 	}

-	return m
+	return &m
+}
+
+func (s *CountedBlobSet) Get(h BlobHandle) (uint8, bool) {
+	val, ok := s.m[h]
+	return val, ok
+}
+
+func (s *CountedBlobSet) Set(h BlobHandle, value uint8) {
+	s.m[h] = value
 }

 // Has returns true iff id is contained in the set.
-func (s CountedBlobSet) Has(h BlobHandle) bool {
-	_, ok := s[h]
+func (s *CountedBlobSet) Has(h BlobHandle) bool {
+	_, ok := s.m[h]
 	return ok
 }

 // Insert adds id to the set.
-func (s CountedBlobSet) Insert(h BlobHandle) {
-	s[h] = 0
+func (s *CountedBlobSet) Insert(h BlobHandle) {
+	s.m[h] = 0
 }

 // Delete removes id from the set.
-func (s CountedBlobSet) Delete(h BlobHandle) {
-	delete(s, h)
+func (s *CountedBlobSet) Delete(h BlobHandle) {
+	delete(s.m, h)
 }

-func (s CountedBlobSet) Len() int {
-	return len(s)
+func (s *CountedBlobSet) Len() int {
+	return len(s.m)
 }

 // List returns a sorted slice of all BlobHandle in the set.
-func (s CountedBlobSet) List() BlobHandles {
-	list := make(BlobHandles, 0, len(s))
-	for h := range s {
+func (s *CountedBlobSet) List() BlobHandles {
+	list := make(BlobHandles, 0, len(s.m))
+	for h := range s.m {
 		list = append(list, h)
 	}

@ -49,7 +61,7 @@ func (s CountedBlobSet) List() BlobHandles {
 	return list
 }

-func (s CountedBlobSet) String() string {
+func (s *CountedBlobSet) String() string {
 	str := s.List().String()
 	if len(str) < 2 {
 		return "{}"
@ -59,10 +71,17 @@ func (s CountedBlobSet) String() string {
 }

 // Copy returns a copy of the CountedBlobSet.
-func (s CountedBlobSet) Copy() CountedBlobSet {
-	cp := make(CountedBlobSet, len(s))
-	for k, v := range s {
-		cp[k] = v
+func (s *CountedBlobSet) Copy() *CountedBlobSet {
+	cp := &CountedBlobSet{}
+	cp.m = make(map[BlobHandle]uint8, len(s.m))
+	for k, v := range s.m {
+		cp.m[k] = v
 	}
 	return cp
 }
+
+func (s *CountedBlobSet) For(cb func(h BlobHandle, value uint8)) {
+	for k, v := range s.m {
+		cb(k, v)
+	}
+}