forked from TrueCloudLab/restic
Merge pull request #3980 from MichaelEischer/prune-compression-stats
prune: Correctly count used/duplicate blobs for partially compressed repos
This commit is contained in:
commit
66818a8f98
2 changed files with 28 additions and 13 deletions
12
changelog/unreleased/issue-3918
Normal file
12
changelog/unreleased/issue-3918
Normal file
|
@ -0,0 +1,12 @@
|
|||
Bugfix: Correct prune statistics for partially compressed repositories
|
||||
|
||||
In a partially compressed repository, one data blob can exist both in an
|
||||
uncompressed and a compressed version. This caused the prune statistics to
|
||||
become inaccurate and for example report a too high value for the unused size:
|
||||
|
||||
> unused size after prune: 16777215.991 TiB
|
||||
|
||||
This has been fixed.
|
||||
|
||||
https://github.com/restic/restic/issues/3918
|
||||
https://github.com/restic/restic/pull/3980
|
|
@ -307,7 +307,6 @@ func packInfoFromIndex(ctx context.Context, idx restic.MasterIndex, usedBlobs re
|
|||
// Thus 0 == blob is missing, 1 == blob exists once, >= 2 == duplicates exist
|
||||
idx.Each(ctx, func(blob restic.PackedBlob) {
|
||||
bh := blob.BlobHandle
|
||||
size := uint64(blob.Length)
|
||||
count, ok := usedBlobs[bh]
|
||||
if ok {
|
||||
if count < math.MaxUint8 {
|
||||
|
@ -317,19 +316,7 @@ func packInfoFromIndex(ctx context.Context, idx restic.MasterIndex, usedBlobs re
|
|||
count++
|
||||
}
|
||||
|
||||
if count == 1 {
|
||||
stats.size.used += size
|
||||
stats.blobs.used++
|
||||
} else {
|
||||
// duplicate if counted more than once
|
||||
stats.size.duplicate += size
|
||||
stats.blobs.duplicate++
|
||||
}
|
||||
|
||||
usedBlobs[bh] = count
|
||||
} else {
|
||||
stats.size.unused += size
|
||||
stats.blobs.unused++
|
||||
}
|
||||
})
|
||||
|
||||
|
@ -383,12 +370,22 @@ func packInfoFromIndex(ctx context.Context, idx restic.MasterIndex, usedBlobs re
|
|||
// mark as unused for now, we will later on select one copy
|
||||
ip.unusedSize += size
|
||||
ip.unusedBlobs++
|
||||
|
||||
// count as duplicate, will later on change one copy to be counted as used
|
||||
stats.size.duplicate += size
|
||||
stats.blobs.duplicate++
|
||||
case dupCount == 1: // used blob, not duplicate
|
||||
ip.usedSize += size
|
||||
ip.usedBlobs++
|
||||
|
||||
stats.size.used += size
|
||||
stats.blobs.used++
|
||||
default: // unused blob
|
||||
ip.unusedSize += size
|
||||
ip.unusedBlobs++
|
||||
|
||||
stats.size.unused += size
|
||||
stats.blobs.unused++
|
||||
}
|
||||
if !blob.IsCompressed() {
|
||||
ip.uncompressed = true
|
||||
|
@ -421,6 +418,11 @@ func packInfoFromIndex(ctx context.Context, idx restic.MasterIndex, usedBlobs re
|
|||
ip.usedBlobs++
|
||||
ip.unusedSize -= size
|
||||
ip.unusedBlobs--
|
||||
// same for the global statistics
|
||||
stats.size.used += size
|
||||
stats.blobs.used++
|
||||
stats.size.duplicate -= size
|
||||
stats.blobs.duplicate--
|
||||
// let other occurences remain marked as unused
|
||||
usedBlobs[bh] = 1
|
||||
default:
|
||||
|
@ -686,6 +688,7 @@ func printPruneStats(gopts GlobalOptions, stats pruneStats) error {
|
|||
func doPrune(ctx context.Context, opts PruneOptions, gopts GlobalOptions, repo restic.Repository, plan prunePlan) (err error) {
|
||||
if opts.DryRun {
|
||||
if !gopts.JSON && gopts.verbosity >= 2 {
|
||||
Printf("Repeated prune dry-runs can report slightly different amounts of data to keep or repack. This is expected behavior.\n\n")
|
||||
if len(plan.removePacksFirst) > 0 {
|
||||
Printf("Would have removed the following unreferenced packs:\n%v\n\n", plan.removePacksFirst)
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue