Add FindBlobsForPacks()

This commit is contained in:
Alexander Neumann 2015-11-01 22:45:52 +01:00
parent 30cf002574
commit 51aff3ca57
2 changed files with 73 additions and 22 deletions

View file

@ -11,14 +11,14 @@ import (
// Repacker extracts still used blobs from packs with unused blobs and creates
// new packs.
type Repacker struct {
unusedBlobs []backend.ID
unusedBlobs backend.IDSet
src, dst *repository.Repository
}
// NewRepacker returns a new repacker that (when Repack() in run) cleans up the
// repository and creates new packs and indexs so that all blobs in unusedBlobs
// aren't used any more.
func NewRepacker(src, dst *repository.Repository, unusedBlobs []backend.ID) *Repacker {
func NewRepacker(src, dst *repository.Repository, unusedBlobs backend.IDSet) *Repacker {
return &Repacker{
src: src,
dst: dst,
@ -30,7 +30,8 @@ func NewRepacker(src, dst *repository.Repository, unusedBlobs []backend.ID) *Rep
// blobs, extracts them and creates new packs with just the still-in-use blobs.
func (r *Repacker) Repack() error {
debug.Log("Repacker.Repack", "searching packs for %v", r.unusedBlobs)
packs, err := FindPacksforBlobs(r.src, r.unusedBlobs)
packs, err := FindPacksForBlobs(r.src, r.unusedBlobs)
if err != nil {
return err
}
@ -40,11 +41,11 @@ func (r *Repacker) Repack() error {
return nil
}
// FindPacksforBlobs returns the set of packs that contain the blobs.
func FindPacksforBlobs(repo *repository.Repository, blobs []backend.ID) (backend.IDSet, error) {
// FindPacksForBlobs returns the set of packs that contain the blobs.
func FindPacksForBlobs(repo *repository.Repository, blobs backend.IDSet) (backend.IDSet, error) {
packs := backend.NewIDSet()
idx := repo.Index()
for _, id := range blobs {
for id := range blobs {
blob, err := idx.Lookup(id)
if err != nil {
return nil, err
@ -56,6 +57,19 @@ func FindPacksforBlobs(repo *repository.Repository, blobs []backend.ID) (backend
return packs, nil
}
// FindBlobsForPacks returns the set of blobs contained in a pack of packs.
func FindBlobsForPacks(repo *repository.Repository, packs backend.IDSet) (backend.IDSet, error) {
blobs := backend.NewIDSet()
for packID := range packs {
for _, packedBlob := range repo.Index().ListPack(packID) {
blobs.Insert(packedBlob.ID)
}
}
return blobs, nil
}
// repackBlob loads a single blob from src and saves it in dst.
func repackBlob(src, dst *repository.Repository, id backend.ID) error {
blob, err := src.Index().Lookup(id)

View file

@ -10,15 +10,15 @@ import (
)
var findPackTests = []struct {
blobIDs backend.IDs
blobIDs backend.IDSet
packIDs backend.IDSet
}{
{
backend.IDs{
ParseID("534f211b4fc2cf5b362a24e8eba22db5372a75b7e974603ff9263f5a471760f4"),
ParseID("51aa04744b518c6a85b4e7643cfa99d58789c2a6ca2a3fda831fa3032f28535c"),
ParseID("454515bca5f4f60349a527bd814cc2681bc3625716460cc6310771c966d8a3bf"),
ParseID("c01952de4d91da1b1b80bc6e06eaa4ec21523f4853b69dc8231708b9b7ec62d8"),
backend.IDSet{
ParseID("534f211b4fc2cf5b362a24e8eba22db5372a75b7e974603ff9263f5a471760f4"): struct{}{},
ParseID("51aa04744b518c6a85b4e7643cfa99d58789c2a6ca2a3fda831fa3032f28535c"): struct{}{},
ParseID("454515bca5f4f60349a527bd814cc2681bc3625716460cc6310771c966d8a3bf"): struct{}{},
ParseID("c01952de4d91da1b1b80bc6e06eaa4ec21523f4853b69dc8231708b9b7ec62d8"): struct{}{},
},
backend.IDSet{
ParseID("19a731a515618ec8b75fc0ff3b887d8feb83aef1001c9899f6702761142ed068"): struct{}{},
@ -27,6 +27,41 @@ var findPackTests = []struct {
},
}
var findBlobTests = []struct {
packIDs backend.IDSet
blobIDs backend.IDSet
}{
{
backend.IDSet{
ParseID("60e0438dcb978ec6860cc1f8c43da648170ee9129af8f650f876bad19f8f788e"): struct{}{},
},
backend.IDSet{
ParseID("356493f0b00a614d36c698591bbb2b1d801932d85328c1f508019550034549fc"): struct{}{},
ParseID("b8a6bcdddef5c0f542b4648b2ef79bc0ed4377d4109755d2fb78aff11e042663"): struct{}{},
ParseID("5714f7274a8aa69b1692916739dc3835d09aac5395946b8ec4f58e563947199a"): struct{}{},
ParseID("b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c"): struct{}{},
ParseID("08d0444e9987fa6e35ce4232b2b71473e1a8f66b2f9664cc44dc57aad3c5a63a"): struct{}{},
},
},
{
backend.IDSet{
ParseID("60e0438dcb978ec6860cc1f8c43da648170ee9129af8f650f876bad19f8f788e"): struct{}{},
ParseID("ff7e12cd66d896b08490e787d1915c641e678d7e6b4a00e60db5d13054f4def4"): struct{}{},
},
backend.IDSet{
ParseID("356493f0b00a614d36c698591bbb2b1d801932d85328c1f508019550034549fc"): struct{}{},
ParseID("b8a6bcdddef5c0f542b4648b2ef79bc0ed4377d4109755d2fb78aff11e042663"): struct{}{},
ParseID("5714f7274a8aa69b1692916739dc3835d09aac5395946b8ec4f58e563947199a"): struct{}{},
ParseID("b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c"): struct{}{},
ParseID("08d0444e9987fa6e35ce4232b2b71473e1a8f66b2f9664cc44dc57aad3c5a63a"): struct{}{},
ParseID("aa79d596dbd4c863e5400deaca869830888fe1ce9f51b4a983f532c77f16a596"): struct{}{},
ParseID("b2396c92781307111accf2ebb1cd62b58134b744d90cb6f153ca456a98dc3e76"): struct{}{},
ParseID("5249af22d3b2acd6da8048ac37b2a87fa346fabde55ed23bb866f7618843c9fe"): struct{}{},
ParseID("f41c2089a9d58a4b0bf39369fa37588e6578c928aea8e90a4490a6315b9905c1"): struct{}{},
},
},
}
func TestRepackerFindPacks(t *testing.T) {
WithTestEnvironment(t, checkerTestData, func(repodir string) {
repo := OpenLocalRepo(t, repodir)
@ -34,10 +69,19 @@ func TestRepackerFindPacks(t *testing.T) {
OK(t, repo.LoadIndex())
for _, test := range findPackTests {
packIDs, err := checker.FindPacksforBlobs(repo, test.blobIDs)
packIDs, err := checker.FindPacksForBlobs(repo, test.blobIDs)
OK(t, err)
Equals(t, test.packIDs, packIDs)
}
for _, test := range findBlobTests {
blobs, err := checker.FindBlobsForPacks(repo, test.packIDs)
OK(t, err)
Assert(t, test.blobIDs.Equals(blobs),
"list of blobs for packs %v does not match, expected:\n %v\ngot:\n %v",
test.packIDs, test.blobIDs, blobs)
}
})
}
@ -67,15 +111,8 @@ func TestRepackBlobs(t *testing.T) {
t.Fatalf("expected unused blobs:\n %v\ngot:\n %v", unusedBlobs, list)
}
// repacker := checker.NewRepacker(repo, repo, repackBlobIDs)
// OK(t, repacker.Repack())
// err := checker.RepackBlobs(repo, repo, repackBlobIDs)
// OK(t, err)
// newPackIDs, err := checker.FindPacksforBlobs(repo, repackBlobIDs)
// OK(t, err)
// fmt.Printf("new pack IDs: %v\n", newPackIDs)
repacker := checker.NewRepacker(repo, repo, unusedBlobs)
OK(t, repacker.Repack())
chkr = checker.New(repo)
_, errs = chkr.LoadIndex()