From 51aff3ca571abb1abee27d0a2f3eee9908b68b04 Mon Sep 17 00:00:00 2001
From: Alexander Neumann <alexander@bumpern.de>
Date: Sun, 1 Nov 2015 22:45:52 +0100
Subject: [PATCH] Add FindBlobsForPacks()

---
 checker/repacker.go      | 26 +++++++++++----
 checker/repacker_test.go | 69 ++++++++++++++++++++++++++++++----------
 2 files changed, 73 insertions(+), 22 deletions(-)

diff --git a/checker/repacker.go b/checker/repacker.go
index 39fd47355..e2669fa03 100644
--- a/checker/repacker.go
+++ b/checker/repacker.go
@@ -11,14 +11,14 @@ import (
 // Repacker extracts still used blobs from packs with unused blobs and creates
 // new packs.
 type Repacker struct {
-	unusedBlobs []backend.ID
+	unusedBlobs backend.IDSet
 	src, dst    *repository.Repository
 }
 
 // NewRepacker returns a new repacker that (when Repack() in run) cleans up the
 // repository and creates new packs and indexs so that all blobs in unusedBlobs
 // aren't used any more.
-func NewRepacker(src, dst *repository.Repository, unusedBlobs []backend.ID) *Repacker {
+func NewRepacker(src, dst *repository.Repository, unusedBlobs backend.IDSet) *Repacker {
 	return &Repacker{
 		src:         src,
 		dst:         dst,
@@ -30,7 +30,8 @@ func NewRepacker(src, dst *repository.Repository, unusedBlobs []backend.ID) *Rep
 // blobs, extracts them and creates new packs with just the still-in-use blobs.
 func (r *Repacker) Repack() error {
 	debug.Log("Repacker.Repack", "searching packs for %v", r.unusedBlobs)
-	packs, err := FindPacksforBlobs(r.src, r.unusedBlobs)
+
+	packs, err := FindPacksForBlobs(r.src, r.unusedBlobs)
 	if err != nil {
 		return err
 	}
@@ -40,11 +41,11 @@ func (r *Repacker) Repack() error {
 	return nil
 }
 
-// FindPacksforBlobs returns the set of packs that contain the blobs.
-func FindPacksforBlobs(repo *repository.Repository, blobs []backend.ID) (backend.IDSet, error) {
+// FindPacksForBlobs returns the set of packs that contain the blobs.
+func FindPacksForBlobs(repo *repository.Repository, blobs backend.IDSet) (backend.IDSet, error) {
 	packs := backend.NewIDSet()
 	idx := repo.Index()
-	for _, id := range blobs {
+	for id := range blobs {
 		blob, err := idx.Lookup(id)
 		if err != nil {
 			return nil, err
@@ -56,6 +57,19 @@ func FindPacksforBlobs(repo *repository.Repository, blobs []backend.ID) (backend
 	return packs, nil
 }
 
+// FindBlobsForPacks returns the set of blobs contained in a pack of packs.
+func FindBlobsForPacks(repo *repository.Repository, packs backend.IDSet) (backend.IDSet, error) {
+	blobs := backend.NewIDSet()
+
+	for packID := range packs {
+		for _, packedBlob := range repo.Index().ListPack(packID) {
+			blobs.Insert(packedBlob.ID)
+		}
+	}
+
+	return blobs, nil
+}
+
 // repackBlob loads a single blob from src and saves it in dst.
 func repackBlob(src, dst *repository.Repository, id backend.ID) error {
 	blob, err := src.Index().Lookup(id)
diff --git a/checker/repacker_test.go b/checker/repacker_test.go
index c0e70ae12..a2e9979ce 100644
--- a/checker/repacker_test.go
+++ b/checker/repacker_test.go
@@ -10,15 +10,15 @@ import (
 )
 
 var findPackTests = []struct {
-	blobIDs backend.IDs
+	blobIDs backend.IDSet
 	packIDs backend.IDSet
 }{
 	{
-		backend.IDs{
-			ParseID("534f211b4fc2cf5b362a24e8eba22db5372a75b7e974603ff9263f5a471760f4"),
-			ParseID("51aa04744b518c6a85b4e7643cfa99d58789c2a6ca2a3fda831fa3032f28535c"),
-			ParseID("454515bca5f4f60349a527bd814cc2681bc3625716460cc6310771c966d8a3bf"),
-			ParseID("c01952de4d91da1b1b80bc6e06eaa4ec21523f4853b69dc8231708b9b7ec62d8"),
+		backend.IDSet{
+			ParseID("534f211b4fc2cf5b362a24e8eba22db5372a75b7e974603ff9263f5a471760f4"): struct{}{},
+			ParseID("51aa04744b518c6a85b4e7643cfa99d58789c2a6ca2a3fda831fa3032f28535c"): struct{}{},
+			ParseID("454515bca5f4f60349a527bd814cc2681bc3625716460cc6310771c966d8a3bf"): struct{}{},
+			ParseID("c01952de4d91da1b1b80bc6e06eaa4ec21523f4853b69dc8231708b9b7ec62d8"): struct{}{},
 		},
 		backend.IDSet{
 			ParseID("19a731a515618ec8b75fc0ff3b887d8feb83aef1001c9899f6702761142ed068"): struct{}{},
@@ -27,6 +27,41 @@ var findPackTests = []struct {
 	},
 }
 
+var findBlobTests = []struct {
+	packIDs backend.IDSet
+	blobIDs backend.IDSet
+}{
+	{
+		backend.IDSet{
+			ParseID("60e0438dcb978ec6860cc1f8c43da648170ee9129af8f650f876bad19f8f788e"): struct{}{},
+		},
+		backend.IDSet{
+			ParseID("356493f0b00a614d36c698591bbb2b1d801932d85328c1f508019550034549fc"): struct{}{},
+			ParseID("b8a6bcdddef5c0f542b4648b2ef79bc0ed4377d4109755d2fb78aff11e042663"): struct{}{},
+			ParseID("5714f7274a8aa69b1692916739dc3835d09aac5395946b8ec4f58e563947199a"): struct{}{},
+			ParseID("b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c"): struct{}{},
+			ParseID("08d0444e9987fa6e35ce4232b2b71473e1a8f66b2f9664cc44dc57aad3c5a63a"): struct{}{},
+		},
+	},
+	{
+		backend.IDSet{
+			ParseID("60e0438dcb978ec6860cc1f8c43da648170ee9129af8f650f876bad19f8f788e"): struct{}{},
+			ParseID("ff7e12cd66d896b08490e787d1915c641e678d7e6b4a00e60db5d13054f4def4"): struct{}{},
+		},
+		backend.IDSet{
+			ParseID("356493f0b00a614d36c698591bbb2b1d801932d85328c1f508019550034549fc"): struct{}{},
+			ParseID("b8a6bcdddef5c0f542b4648b2ef79bc0ed4377d4109755d2fb78aff11e042663"): struct{}{},
+			ParseID("5714f7274a8aa69b1692916739dc3835d09aac5395946b8ec4f58e563947199a"): struct{}{},
+			ParseID("b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c"): struct{}{},
+			ParseID("08d0444e9987fa6e35ce4232b2b71473e1a8f66b2f9664cc44dc57aad3c5a63a"): struct{}{},
+			ParseID("aa79d596dbd4c863e5400deaca869830888fe1ce9f51b4a983f532c77f16a596"): struct{}{},
+			ParseID("b2396c92781307111accf2ebb1cd62b58134b744d90cb6f153ca456a98dc3e76"): struct{}{},
+			ParseID("5249af22d3b2acd6da8048ac37b2a87fa346fabde55ed23bb866f7618843c9fe"): struct{}{},
+			ParseID("f41c2089a9d58a4b0bf39369fa37588e6578c928aea8e90a4490a6315b9905c1"): struct{}{},
+		},
+	},
+}
+
 func TestRepackerFindPacks(t *testing.T) {
 	WithTestEnvironment(t, checkerTestData, func(repodir string) {
 		repo := OpenLocalRepo(t, repodir)
@@ -34,10 +69,19 @@ func TestRepackerFindPacks(t *testing.T) {
 		OK(t, repo.LoadIndex())
 
 		for _, test := range findPackTests {
-			packIDs, err := checker.FindPacksforBlobs(repo, test.blobIDs)
+			packIDs, err := checker.FindPacksForBlobs(repo, test.blobIDs)
 			OK(t, err)
 			Equals(t, test.packIDs, packIDs)
 		}
+
+		for _, test := range findBlobTests {
+			blobs, err := checker.FindBlobsForPacks(repo, test.packIDs)
+			OK(t, err)
+
+			Assert(t, test.blobIDs.Equals(blobs),
+				"list of blobs for packs %v does not match, expected:\n  %v\ngot:\n  %v",
+				test.packIDs, test.blobIDs, blobs)
+		}
 	})
 }
 
@@ -67,15 +111,8 @@ func TestRepackBlobs(t *testing.T) {
 			t.Fatalf("expected unused blobs:\n  %v\ngot:\n  %v", unusedBlobs, list)
 		}
 
-		// repacker := checker.NewRepacker(repo, repo, repackBlobIDs)
-		// OK(t, repacker.Repack())
-
-		// err := checker.RepackBlobs(repo, repo, repackBlobIDs)
-		// OK(t, err)
-
-		// newPackIDs, err := checker.FindPacksforBlobs(repo, repackBlobIDs)
-		// OK(t, err)
-		// fmt.Printf("new pack IDs: %v\n", newPackIDs)
+		repacker := checker.NewRepacker(repo, repo, unusedBlobs)
+		OK(t, repacker.Repack())
 
 		chkr = checker.New(repo)
 		_, errs = chkr.LoadIndex()