forked from TrueCloudLab/restic
prune: hide implementation details of counted blob set
This commit is contained in:
parent
ff4775a15f
commit
93098e9265
4 changed files with 61 additions and 42 deletions
|
@ -188,7 +188,7 @@ func runPruneWithRepo(ctx context.Context, opts PruneOptions, gopts GlobalOption
|
||||||
RepackUncompressed: opts.RepackUncompressed,
|
RepackUncompressed: opts.RepackUncompressed,
|
||||||
}
|
}
|
||||||
|
|
||||||
plan, err := repository.PlanPrune(ctx, popts, repo, func(ctx context.Context, repo restic.Repository) (usedBlobs restic.CountedBlobSet, err error) {
|
plan, err := repository.PlanPrune(ctx, popts, repo, func(ctx context.Context, repo restic.Repository) (usedBlobs *restic.CountedBlobSet, err error) {
|
||||||
return getUsedBlobs(ctx, repo, ignoreSnapshots, printer)
|
return getUsedBlobs(ctx, repo, ignoreSnapshots, printer)
|
||||||
}, printer)
|
}, printer)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -255,7 +255,7 @@ func printPruneStats(printer progress.Printer, stats repository.PruneStats) erro
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getUsedBlobs(ctx context.Context, repo restic.Repository, ignoreSnapshots restic.IDSet, printer progress.Printer) (usedBlobs restic.CountedBlobSet, err error) {
|
func getUsedBlobs(ctx context.Context, repo restic.Repository, ignoreSnapshots restic.IDSet, printer progress.Printer) (usedBlobs *restic.CountedBlobSet, err error) {
|
||||||
var snapshotTrees restic.IDs
|
var snapshotTrees restic.IDs
|
||||||
printer.P("loading all snapshots...\n")
|
printer.P("loading all snapshots...\n")
|
||||||
err = restic.ForAllSnapshots(ctx, repo, repo, ignoreSnapshots,
|
err = restic.ForAllSnapshots(ctx, repo, repo, ignoreSnapshots,
|
||||||
|
|
|
@ -62,7 +62,7 @@ type PruneStats struct {
|
||||||
type PrunePlan struct {
|
type PrunePlan struct {
|
||||||
removePacksFirst restic.IDSet // packs to remove first (unreferenced packs)
|
removePacksFirst restic.IDSet // packs to remove first (unreferenced packs)
|
||||||
repackPacks restic.IDSet // packs to repack
|
repackPacks restic.IDSet // packs to repack
|
||||||
keepBlobs restic.CountedBlobSet // blobs to keep during repacking
|
keepBlobs *restic.CountedBlobSet // blobs to keep during repacking
|
||||||
removePacks restic.IDSet // packs to remove
|
removePacks restic.IDSet // packs to remove
|
||||||
ignorePacks restic.IDSet // packs to ignore when rebuilding the index
|
ignorePacks restic.IDSet // packs to ignore when rebuilding the index
|
||||||
|
|
||||||
|
@ -90,7 +90,7 @@ type packInfoWithID struct {
|
||||||
|
|
||||||
// PlanPrune selects which files to rewrite and which to delete and which blobs to keep.
|
// PlanPrune selects which files to rewrite and which to delete and which blobs to keep.
|
||||||
// Also some summary statistics are returned.
|
// Also some summary statistics are returned.
|
||||||
func PlanPrune(ctx context.Context, opts PruneOptions, repo *Repository, getUsedBlobs func(ctx context.Context, repo restic.Repository) (usedBlobs restic.CountedBlobSet, err error), printer progress.Printer) (*PrunePlan, error) {
|
func PlanPrune(ctx context.Context, opts PruneOptions, repo *Repository, getUsedBlobs func(ctx context.Context, repo restic.Repository) (usedBlobs *restic.CountedBlobSet, err error), printer progress.Printer) (*PrunePlan, error) {
|
||||||
var stats PruneStats
|
var stats PruneStats
|
||||||
|
|
||||||
if opts.UnsafeRecovery {
|
if opts.UnsafeRecovery {
|
||||||
|
@ -152,13 +152,13 @@ func PlanPrune(ctx context.Context, opts PruneOptions, repo *Repository, getUsed
|
||||||
return &plan, nil
|
return &plan, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func packInfoFromIndex(ctx context.Context, idx restic.ListBlobser, usedBlobs restic.CountedBlobSet, stats *PruneStats, printer progress.Printer) (restic.CountedBlobSet, map[restic.ID]packInfo, error) {
|
func packInfoFromIndex(ctx context.Context, idx restic.ListBlobser, usedBlobs *restic.CountedBlobSet, stats *PruneStats, printer progress.Printer) (*restic.CountedBlobSet, map[restic.ID]packInfo, error) {
|
||||||
// iterate over all blobs in index to find out which blobs are duplicates
|
// iterate over all blobs in index to find out which blobs are duplicates
|
||||||
// The counter in usedBlobs describes how many instances of the blob exist in the repository index
|
// The counter in usedBlobs describes how many instances of the blob exist in the repository index
|
||||||
// Thus 0 == blob is missing, 1 == blob exists once, >= 2 == duplicates exist
|
// Thus 0 == blob is missing, 1 == blob exists once, >= 2 == duplicates exist
|
||||||
err := idx.ListBlobs(ctx, func(blob restic.PackedBlob) {
|
err := idx.ListBlobs(ctx, func(blob restic.PackedBlob) {
|
||||||
bh := blob.BlobHandle
|
bh := blob.BlobHandle
|
||||||
count, ok := usedBlobs[bh]
|
count, ok := usedBlobs.Get(bh)
|
||||||
if ok {
|
if ok {
|
||||||
if count < math.MaxUint8 {
|
if count < math.MaxUint8 {
|
||||||
// don't overflow, but saturate count at 255
|
// don't overflow, but saturate count at 255
|
||||||
|
@ -167,7 +167,7 @@ func packInfoFromIndex(ctx context.Context, idx restic.ListBlobser, usedBlobs re
|
||||||
count++
|
count++
|
||||||
}
|
}
|
||||||
|
|
||||||
usedBlobs[bh] = count
|
usedBlobs.Set(bh, count)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -176,12 +176,12 @@ func packInfoFromIndex(ctx context.Context, idx restic.ListBlobser, usedBlobs re
|
||||||
|
|
||||||
// Check if all used blobs have been found in index
|
// Check if all used blobs have been found in index
|
||||||
missingBlobs := restic.NewBlobSet()
|
missingBlobs := restic.NewBlobSet()
|
||||||
for bh, count := range usedBlobs {
|
usedBlobs.For(func(bh restic.BlobHandle, count uint8) {
|
||||||
if count == 0 {
|
if count == 0 {
|
||||||
// blob does not exist in any pack files
|
// blob does not exist in any pack files
|
||||||
missingBlobs.Insert(bh)
|
missingBlobs.Insert(bh)
|
||||||
}
|
}
|
||||||
}
|
})
|
||||||
|
|
||||||
if len(missingBlobs) != 0 {
|
if len(missingBlobs) != 0 {
|
||||||
printer.E("%v not found in the index\n\n"+
|
printer.E("%v not found in the index\n\n"+
|
||||||
|
@ -221,7 +221,7 @@ func packInfoFromIndex(ctx context.Context, idx restic.ListBlobser, usedBlobs re
|
||||||
|
|
||||||
bh := blob.BlobHandle
|
bh := blob.BlobHandle
|
||||||
size := uint64(blob.Length)
|
size := uint64(blob.Length)
|
||||||
dupCount := usedBlobs[bh]
|
dupCount, _ := usedBlobs.Get(bh)
|
||||||
switch {
|
switch {
|
||||||
case dupCount >= 2:
|
case dupCount >= 2:
|
||||||
hasDuplicates = true
|
hasDuplicates = true
|
||||||
|
@ -266,7 +266,7 @@ func packInfoFromIndex(ctx context.Context, idx restic.ListBlobser, usedBlobs re
|
||||||
// iterate again over all blobs in index (this is pretty cheap, all in-mem)
|
// iterate again over all blobs in index (this is pretty cheap, all in-mem)
|
||||||
err = idx.ListBlobs(ctx, func(blob restic.PackedBlob) {
|
err = idx.ListBlobs(ctx, func(blob restic.PackedBlob) {
|
||||||
bh := blob.BlobHandle
|
bh := blob.BlobHandle
|
||||||
count, ok := usedBlobs[bh]
|
count, ok := usedBlobs.Get(bh)
|
||||||
// skip non-duplicate, aka. normal blobs
|
// skip non-duplicate, aka. normal blobs
|
||||||
// count == 0 is used to mark that this was a duplicate blob with only a single occurrence remaining
|
// count == 0 is used to mark that this was a duplicate blob with only a single occurrence remaining
|
||||||
if !ok || count == 1 {
|
if !ok || count == 1 {
|
||||||
|
@ -290,7 +290,7 @@ func packInfoFromIndex(ctx context.Context, idx restic.ListBlobser, usedBlobs re
|
||||||
stats.Size.Duplicate -= size
|
stats.Size.Duplicate -= size
|
||||||
stats.Blobs.Duplicate--
|
stats.Blobs.Duplicate--
|
||||||
// let other occurrences remain marked as unused
|
// let other occurrences remain marked as unused
|
||||||
usedBlobs[bh] = 1
|
usedBlobs.Set(bh, 1)
|
||||||
default:
|
default:
|
||||||
// remain unused and decrease counter
|
// remain unused and decrease counter
|
||||||
count--
|
count--
|
||||||
|
@ -299,7 +299,7 @@ func packInfoFromIndex(ctx context.Context, idx restic.ListBlobser, usedBlobs re
|
||||||
// thus use the special value zero. This will select the last instance of the blob for keeping.
|
// thus use the special value zero. This will select the last instance of the blob for keeping.
|
||||||
count = 0
|
count = 0
|
||||||
}
|
}
|
||||||
usedBlobs[bh] = count
|
usedBlobs.Set(bh, count)
|
||||||
}
|
}
|
||||||
// update indexPack
|
// update indexPack
|
||||||
indexPack[blob.PackID] = ip
|
indexPack[blob.PackID] = ip
|
||||||
|
@ -311,11 +311,11 @@ func packInfoFromIndex(ctx context.Context, idx restic.ListBlobser, usedBlobs re
|
||||||
|
|
||||||
// Sanity check. If no duplicates exist, all blobs have value 1. After handling
|
// Sanity check. If no duplicates exist, all blobs have value 1. After handling
|
||||||
// duplicates, this also applies to duplicates.
|
// duplicates, this also applies to duplicates.
|
||||||
for _, count := range usedBlobs {
|
usedBlobs.For(func(_ restic.BlobHandle, count uint8) {
|
||||||
if count != 1 {
|
if count != 1 {
|
||||||
panic("internal error during blob selection")
|
panic("internal error during blob selection")
|
||||||
}
|
}
|
||||||
}
|
})
|
||||||
|
|
||||||
return usedBlobs, indexPack, nil
|
return usedBlobs, indexPack, nil
|
||||||
}
|
}
|
||||||
|
@ -567,7 +567,7 @@ func (plan *PrunePlan) Execute(ctx context.Context, printer progress.Printer) er
|
||||||
// Also remove repacked packs
|
// Also remove repacked packs
|
||||||
plan.removePacks.Merge(plan.repackPacks)
|
plan.removePacks.Merge(plan.repackPacks)
|
||||||
|
|
||||||
if len(plan.keepBlobs) != 0 {
|
if plan.keepBlobs.Len() != 0 {
|
||||||
printer.E("%v was not repacked\n\n"+
|
printer.E("%v was not repacked\n\n"+
|
||||||
"Integrity check failed.\n"+
|
"Integrity check failed.\n"+
|
||||||
"Please report this error (along with the output of the 'prune' run) at\n"+
|
"Please report this error (along with the output of the 'prune' run) at\n"+
|
||||||
|
|
|
@ -30,7 +30,7 @@ func testPrune(t *testing.T, opts repository.PruneOptions, errOnUnused bool) {
|
||||||
}
|
}
|
||||||
rtest.OK(t, repo.Flush(context.TODO()))
|
rtest.OK(t, repo.Flush(context.TODO()))
|
||||||
|
|
||||||
plan, err := repository.PlanPrune(context.TODO(), opts, repo, func(ctx context.Context, repo restic.Repository) (usedBlobs restic.CountedBlobSet, err error) {
|
plan, err := repository.PlanPrune(context.TODO(), opts, repo, func(ctx context.Context, repo restic.Repository) (usedBlobs *restic.CountedBlobSet, err error) {
|
||||||
return restic.NewCountedBlobSet(keep.List()...), nil
|
return restic.NewCountedBlobSet(keep.List()...), nil
|
||||||
}, &progress.NoopPrinter{})
|
}, &progress.NoopPrinter{})
|
||||||
rtest.OK(t, err)
|
rtest.OK(t, err)
|
||||||
|
|
|
@ -5,42 +5,54 @@ import "sort"
|
||||||
// CountedBlobSet is a set of blobs. For each blob it also stores a uint8 value
|
// CountedBlobSet is a set of blobs. For each blob it also stores a uint8 value
|
||||||
// which can be used to track some information. The CountedBlobSet does not use
|
// which can be used to track some information. The CountedBlobSet does not use
|
||||||
// that value in any way. New entries are created with value 0.
|
// that value in any way. New entries are created with value 0.
|
||||||
type CountedBlobSet map[BlobHandle]uint8
|
type CountedBlobSet struct {
|
||||||
|
m map[BlobHandle]uint8
|
||||||
|
}
|
||||||
|
|
||||||
// NewCountedBlobSet returns a new CountedBlobSet, populated with ids.
|
// NewCountedBlobSet returns a new CountedBlobSet, populated with ids.
|
||||||
func NewCountedBlobSet(handles ...BlobHandle) CountedBlobSet {
|
func NewCountedBlobSet(handles ...BlobHandle) *CountedBlobSet {
|
||||||
m := make(CountedBlobSet)
|
m := CountedBlobSet{}
|
||||||
|
m.m = make(map[BlobHandle]uint8)
|
||||||
for _, h := range handles {
|
for _, h := range handles {
|
||||||
m[h] = 0
|
m.m[h] = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
return m
|
return &m
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *CountedBlobSet) Get(h BlobHandle) (uint8, bool) {
|
||||||
|
val, ok := s.m[h]
|
||||||
|
return val, ok
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *CountedBlobSet) Set(h BlobHandle, value uint8) {
|
||||||
|
s.m[h] = value
|
||||||
}
|
}
|
||||||
|
|
||||||
// Has returns true iff id is contained in the set.
|
// Has returns true iff id is contained in the set.
|
||||||
func (s CountedBlobSet) Has(h BlobHandle) bool {
|
func (s *CountedBlobSet) Has(h BlobHandle) bool {
|
||||||
_, ok := s[h]
|
_, ok := s.m[h]
|
||||||
return ok
|
return ok
|
||||||
}
|
}
|
||||||
|
|
||||||
// Insert adds id to the set.
|
// Insert adds id to the set.
|
||||||
func (s CountedBlobSet) Insert(h BlobHandle) {
|
func (s *CountedBlobSet) Insert(h BlobHandle) {
|
||||||
s[h] = 0
|
s.m[h] = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// Delete removes id from the set.
|
// Delete removes id from the set.
|
||||||
func (s CountedBlobSet) Delete(h BlobHandle) {
|
func (s *CountedBlobSet) Delete(h BlobHandle) {
|
||||||
delete(s, h)
|
delete(s.m, h)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s CountedBlobSet) Len() int {
|
func (s *CountedBlobSet) Len() int {
|
||||||
return len(s)
|
return len(s.m)
|
||||||
}
|
}
|
||||||
|
|
||||||
// List returns a sorted slice of all BlobHandle in the set.
|
// List returns a sorted slice of all BlobHandle in the set.
|
||||||
func (s CountedBlobSet) List() BlobHandles {
|
func (s *CountedBlobSet) List() BlobHandles {
|
||||||
list := make(BlobHandles, 0, len(s))
|
list := make(BlobHandles, 0, len(s.m))
|
||||||
for h := range s {
|
for h := range s.m {
|
||||||
list = append(list, h)
|
list = append(list, h)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -49,7 +61,7 @@ func (s CountedBlobSet) List() BlobHandles {
|
||||||
return list
|
return list
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s CountedBlobSet) String() string {
|
func (s *CountedBlobSet) String() string {
|
||||||
str := s.List().String()
|
str := s.List().String()
|
||||||
if len(str) < 2 {
|
if len(str) < 2 {
|
||||||
return "{}"
|
return "{}"
|
||||||
|
@ -59,10 +71,17 @@ func (s CountedBlobSet) String() string {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Copy returns a copy of the CountedBlobSet.
|
// Copy returns a copy of the CountedBlobSet.
|
||||||
func (s CountedBlobSet) Copy() CountedBlobSet {
|
func (s *CountedBlobSet) Copy() *CountedBlobSet {
|
||||||
cp := make(CountedBlobSet, len(s))
|
cp := &CountedBlobSet{}
|
||||||
for k, v := range s {
|
cp.m = make(map[BlobHandle]uint8, len(s.m))
|
||||||
cp[k] = v
|
for k, v := range s.m {
|
||||||
|
cp.m[k] = v
|
||||||
}
|
}
|
||||||
return cp
|
return cp
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *CountedBlobSet) For(cb func(h BlobHandle, value uint8)) {
|
||||||
|
for k, v := range s.m {
|
||||||
|
cb(k, v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue