From 9be1bd2accbaadd7119d51f7a989ea37e2ee3b92 Mon Sep 17 00:00:00 2001
From: Michael Eischer <michael.eischer@fau.de>
Date: Sun, 17 Jul 2022 00:27:40 +0200
Subject: [PATCH] prune: handle very high duplication of some blobs

Suggested-By: Alexander Weiss <alex@weissfam.de>
---
 changelog/unreleased/issue-3114 | 12 ++++++------
 cmd/restic/cmd_prune.go         | 13 ++++++-------
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/changelog/unreleased/issue-3114 b/changelog/unreleased/issue-3114
index 68b2556c8..c7cf8c7b9 100644
--- a/changelog/unreleased/issue-3114
+++ b/changelog/unreleased/issue-3114
@@ -1,10 +1,10 @@
-Enhancement: Improve `prune` in presence of duplicate blobs
+Enhancement: Optimize handling of duplicate blobs in `prune`
+
+Restic `prune` always used to repack all data files containing duplicate
+blobs. This effectively removed all duplicates during prune. However, as a
+consequence all these data files were repacked even if the unused repository
+space threshold could be reached with less work.
 
-Restic `prune` always used to repack all pack files containing duplicate
-blobs. This effectively removed all duplicates during prune. However, one 
-of the consequences was that all those pack files were downloadeded and
-duplicate blobs did not contribute to the threshold for unused repository
-space.
 This is now changed and `prune` works nice and fast also if there are lots
 of duplicates.
 
diff --git a/cmd/restic/cmd_prune.go b/cmd/restic/cmd_prune.go
index 96e115b66..1c63e0755 100644
--- a/cmd/restic/cmd_prune.go
+++ b/cmd/restic/cmd_prune.go
@@ -258,12 +258,11 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
 			count, ok := duplicateBlobs[bh]
 			if !ok {
 				count = 2 // this one is already the second blob!
-			} else {
+			} else if count < math.MaxUint8 {
+				// don't overflow, but saturate count at 255
+				// this can lead to a non-optimal pack selection, but won't cause
+				// problems otherwise
 				count++
-				if count == 0 {
-					// catch uint8 overflow
-					panic("too many duplicates, prune can only handly up to 255!")
-				}
 			}
 			duplicateBlobs[bh] = count
 			stats.size.duplicate += size
@@ -326,9 +325,9 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
 	}
 
 	// if duplicate blobs exist, those will be set to either "used" or "unused":
-	// - mark only one occurency of duplicate blobs as used
+	// - mark only one occurence of duplicate blobs as used
 	// - if there are already some used blobs in a pack, possibly mark duplicates in this pack as "used"
-	// - if there are no used blobs in a pack, possibly mark duplicates as "usused"
+	// - if there are no used blobs in a pack, possibly mark duplicates as "unused"
 	if len(duplicateBlobs) > 0 {
 		// iterate again over all blobs in index (this is pretty cheap, all in-mem)
 		for blob := range repo.Index().Each(ctx) {