Merge pull request #3980 from MichaelEischer/prune-compression-stats
prune: Correctly count used/duplicate blobs for partially compressed repos
This commit is contained in:
commit
66818a8f98
2 changed files with 28 additions and 13 deletions
12
changelog/unreleased/issue-3918
Normal file
12
changelog/unreleased/issue-3918
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
Bugfix: Correct prune statistics for partially compressed repositories
|
||||||
|
|
||||||
|
In a partially compressed repository, one data blob can exist both in an
|
||||||
|
uncompressed and a compressed version. This caused the prune statistics to
|
||||||
|
become inaccurate and for example report a too high value for the unused size:
|
||||||
|
|
||||||
|
> unused size after prune: 16777215.991 TiB
|
||||||
|
|
||||||
|
This has been fixed.
|
||||||
|
|
||||||
|
https://github.com/restic/restic/issues/3918
|
||||||
|
https://github.com/restic/restic/pull/3980
|
|
@ -307,7 +307,6 @@ func packInfoFromIndex(ctx context.Context, idx restic.MasterIndex, usedBlobs re
|
||||||
// Thus 0 == blob is missing, 1 == blob exists once, >= 2 == duplicates exist
|
// Thus 0 == blob is missing, 1 == blob exists once, >= 2 == duplicates exist
|
||||||
idx.Each(ctx, func(blob restic.PackedBlob) {
|
idx.Each(ctx, func(blob restic.PackedBlob) {
|
||||||
bh := blob.BlobHandle
|
bh := blob.BlobHandle
|
||||||
size := uint64(blob.Length)
|
|
||||||
count, ok := usedBlobs[bh]
|
count, ok := usedBlobs[bh]
|
||||||
if ok {
|
if ok {
|
||||||
if count < math.MaxUint8 {
|
if count < math.MaxUint8 {
|
||||||
|
@ -317,19 +316,7 @@ func packInfoFromIndex(ctx context.Context, idx restic.MasterIndex, usedBlobs re
|
||||||
count++
|
count++
|
||||||
}
|
}
|
||||||
|
|
||||||
if count == 1 {
|
|
||||||
stats.size.used += size
|
|
||||||
stats.blobs.used++
|
|
||||||
} else {
|
|
||||||
// duplicate if counted more than once
|
|
||||||
stats.size.duplicate += size
|
|
||||||
stats.blobs.duplicate++
|
|
||||||
}
|
|
||||||
|
|
||||||
usedBlobs[bh] = count
|
usedBlobs[bh] = count
|
||||||
} else {
|
|
||||||
stats.size.unused += size
|
|
||||||
stats.blobs.unused++
|
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@ -383,12 +370,22 @@ func packInfoFromIndex(ctx context.Context, idx restic.MasterIndex, usedBlobs re
|
||||||
// mark as unused for now, we will later on select one copy
|
// mark as unused for now, we will later on select one copy
|
||||||
ip.unusedSize += size
|
ip.unusedSize += size
|
||||||
ip.unusedBlobs++
|
ip.unusedBlobs++
|
||||||
|
|
||||||
|
// count as duplicate, will later on change one copy to be counted as used
|
||||||
|
stats.size.duplicate += size
|
||||||
|
stats.blobs.duplicate++
|
||||||
case dupCount == 1: // used blob, not duplicate
|
case dupCount == 1: // used blob, not duplicate
|
||||||
ip.usedSize += size
|
ip.usedSize += size
|
||||||
ip.usedBlobs++
|
ip.usedBlobs++
|
||||||
|
|
||||||
|
stats.size.used += size
|
||||||
|
stats.blobs.used++
|
||||||
default: // unused blob
|
default: // unused blob
|
||||||
ip.unusedSize += size
|
ip.unusedSize += size
|
||||||
ip.unusedBlobs++
|
ip.unusedBlobs++
|
||||||
|
|
||||||
|
stats.size.unused += size
|
||||||
|
stats.blobs.unused++
|
||||||
}
|
}
|
||||||
if !blob.IsCompressed() {
|
if !blob.IsCompressed() {
|
||||||
ip.uncompressed = true
|
ip.uncompressed = true
|
||||||
|
@ -421,6 +418,11 @@ func packInfoFromIndex(ctx context.Context, idx restic.MasterIndex, usedBlobs re
|
||||||
ip.usedBlobs++
|
ip.usedBlobs++
|
||||||
ip.unusedSize -= size
|
ip.unusedSize -= size
|
||||||
ip.unusedBlobs--
|
ip.unusedBlobs--
|
||||||
|
// same for the global statistics
|
||||||
|
stats.size.used += size
|
||||||
|
stats.blobs.used++
|
||||||
|
stats.size.duplicate -= size
|
||||||
|
stats.blobs.duplicate--
|
||||||
// let other occurences remain marked as unused
|
// let other occurences remain marked as unused
|
||||||
usedBlobs[bh] = 1
|
usedBlobs[bh] = 1
|
||||||
default:
|
default:
|
||||||
|
@ -686,6 +688,7 @@ func printPruneStats(gopts GlobalOptions, stats pruneStats) error {
|
||||||
func doPrune(ctx context.Context, opts PruneOptions, gopts GlobalOptions, repo restic.Repository, plan prunePlan) (err error) {
|
func doPrune(ctx context.Context, opts PruneOptions, gopts GlobalOptions, repo restic.Repository, plan prunePlan) (err error) {
|
||||||
if opts.DryRun {
|
if opts.DryRun {
|
||||||
if !gopts.JSON && gopts.verbosity >= 2 {
|
if !gopts.JSON && gopts.verbosity >= 2 {
|
||||||
|
Printf("Repeated prune dry-runs can report slightly different amounts of data to keep or repack. This is expected behavior.\n\n")
|
||||||
if len(plan.removePacksFirst) > 0 {
|
if len(plan.removePacksFirst) > 0 {
|
||||||
Printf("Would have removed the following unreferenced packs:\n%v\n\n", plan.removePacksFirst)
|
Printf("Would have removed the following unreferenced packs:\n%v\n\n", plan.removePacksFirst)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue