From 484331cd8de3435345edef742cc475b535af96da Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sat, 25 Jul 2015 14:20:02 +0200 Subject: [PATCH] Add repacker --- checker/checker.go | 2 +- checker/repacker.go | 108 +++++++++++++++++++++++++++++++++++++++ checker/repacker_test.go | 90 ++++++++++++++++++++++++++++++++ 3 files changed, 199 insertions(+), 1 deletion(-) create mode 100644 checker/repacker.go create mode 100644 checker/repacker_test.go diff --git a/checker/checker.go b/checker/checker.go index 2c9536f14..26219e604 100644 --- a/checker/checker.go +++ b/checker/checker.go @@ -617,7 +617,7 @@ func (c *Checker) UnusedBlobs() (blobs backend.IDs) { debug.Log("Checker.UnusedBlobs", "checking %d blobs", len(c.blobs)) for id := range c.blobs { if c.blobRefs.M[id] == 0 { - debug.Log("Checker.UnusedBlobs", "blob %v not not referenced", id.Str()) + debug.Log("Checker.UnusedBlobs", "blob %v not referenced", id.Str()) blobs = append(blobs, id) } } diff --git a/checker/repacker.go b/checker/repacker.go new file mode 100644 index 000000000..39fd47355 --- /dev/null +++ b/checker/repacker.go @@ -0,0 +1,108 @@ +package checker + +import ( + "errors" + + "github.com/restic/restic/backend" + "github.com/restic/restic/debug" + "github.com/restic/restic/repository" +) + +// Repacker extracts still used blobs from packs with unused blobs and creates +// new packs. +type Repacker struct { + unusedBlobs []backend.ID + src, dst *repository.Repository +} + +// NewRepacker returns a new repacker that (when Repack() in run) cleans up the +// repository and creates new packs and indexs so that all blobs in unusedBlobs +// aren't used any more. +func NewRepacker(src, dst *repository.Repository, unusedBlobs []backend.ID) *Repacker { + return &Repacker{ + src: src, + dst: dst, + unusedBlobs: unusedBlobs, + } +} + +// Repack runs the process of finding still used blobs in packs with unused +// blobs, extracts them and creates new packs with just the still-in-use blobs. +func (r *Repacker) Repack() error { + debug.Log("Repacker.Repack", "searching packs for %v", r.unusedBlobs) + packs, err := FindPacksforBlobs(r.src, r.unusedBlobs) + if err != nil { + return err + } + + debug.Log("Repacker.Repack", "found packs: %v", packs) + + return nil +} + +// FindPacksforBlobs returns the set of packs that contain the blobs. +func FindPacksforBlobs(repo *repository.Repository, blobs []backend.ID) (backend.IDSet, error) { + packs := backend.NewIDSet() + idx := repo.Index() + for _, id := range blobs { + blob, err := idx.Lookup(id) + if err != nil { + return nil, err + } + + packs.Insert(blob.PackID) + } + + return packs, nil +} + +// repackBlob loads a single blob from src and saves it in dst. +func repackBlob(src, dst *repository.Repository, id backend.ID) error { + blob, err := src.Index().Lookup(id) + if err != nil { + return err + } + + debug.Log("RepackBlobs", "repacking blob %v, len %v", id.Str(), blob.PlaintextLength()) + + buf := make([]byte, 0, blob.PlaintextLength()) + buf, err = src.LoadBlob(blob.Type, id, buf) + if err != nil { + return err + } + + if uint(len(buf)) != blob.PlaintextLength() { + debug.Log("RepackBlobs", "repack blob %v: len(buf) isn't equal to length: %v = %v", id.Str(), len(buf), blob.PlaintextLength()) + return errors.New("LoadBlob returned wrong data, len() doesn't match") + } + + _, err = dst.SaveAndEncrypt(blob.Type, buf, &id) + if err != nil { + return err + } + + return nil +} + +// RepackBlobs reads all blobs in blobIDs from src and saves them into new pack +// files in dst. Source and destination repo may be the same. +func RepackBlobs(src, dst *repository.Repository, blobIDs backend.IDs) (err error) { + for _, id := range blobIDs { + err = repackBlob(src, dst, id) + if err != nil { + return err + } + } + + err = dst.Flush() + if err != nil { + return err + } + + err = dst.SaveIndex() + if err != nil { + return err + } + + return nil +} diff --git a/checker/repacker_test.go b/checker/repacker_test.go new file mode 100644 index 000000000..c0e70ae12 --- /dev/null +++ b/checker/repacker_test.go @@ -0,0 +1,90 @@ +package checker_test + +import ( + "testing" + + "github.com/restic/restic/backend" + "github.com/restic/restic/checker" + + . "github.com/restic/restic/test" +) + +var findPackTests = []struct { + blobIDs backend.IDs + packIDs backend.IDSet +}{ + { + backend.IDs{ + ParseID("534f211b4fc2cf5b362a24e8eba22db5372a75b7e974603ff9263f5a471760f4"), + ParseID("51aa04744b518c6a85b4e7643cfa99d58789c2a6ca2a3fda831fa3032f28535c"), + ParseID("454515bca5f4f60349a527bd814cc2681bc3625716460cc6310771c966d8a3bf"), + ParseID("c01952de4d91da1b1b80bc6e06eaa4ec21523f4853b69dc8231708b9b7ec62d8"), + }, + backend.IDSet{ + ParseID("19a731a515618ec8b75fc0ff3b887d8feb83aef1001c9899f6702761142ed068"): struct{}{}, + ParseID("657f7fb64f6a854fff6fe9279998ee09034901eded4e6db9bcee0e59745bbce6"): struct{}{}, + }, + }, +} + +func TestRepackerFindPacks(t *testing.T) { + WithTestEnvironment(t, checkerTestData, func(repodir string) { + repo := OpenLocalRepo(t, repodir) + + OK(t, repo.LoadIndex()) + + for _, test := range findPackTests { + packIDs, err := checker.FindPacksforBlobs(repo, test.blobIDs) + OK(t, err) + Equals(t, test.packIDs, packIDs) + } + }) +} + +func TestRepackBlobs(t *testing.T) { + WithTestEnvironment(t, checkerTestData, func(repodir string) { + repo := OpenLocalRepo(t, repodir) + OK(t, repo.LoadIndex()) + + repo.Backend().Remove(backend.Snapshot, "c2b53c5e6a16db92fbb9aa08bd2794c58b379d8724d661ee30d20898bdfdff22") + + unusedBlobs := backend.IDSet{ + ParseID("5714f7274a8aa69b1692916739dc3835d09aac5395946b8ec4f58e563947199a"): struct{}{}, + ParseID("08d0444e9987fa6e35ce4232b2b71473e1a8f66b2f9664cc44dc57aad3c5a63a"): struct{}{}, + ParseID("356493f0b00a614d36c698591bbb2b1d801932d85328c1f508019550034549fc"): struct{}{}, + ParseID("b8a6bcdddef5c0f542b4648b2ef79bc0ed4377d4109755d2fb78aff11e042663"): struct{}{}, + } + + chkr := checker.New(repo) + _, errs := chkr.LoadIndex() + OKs(t, errs) + + errs = checkStruct(chkr) + OKs(t, errs) + + list := backend.NewIDSet(chkr.UnusedBlobs()...) + if !unusedBlobs.Equals(list) { + t.Fatalf("expected unused blobs:\n %v\ngot:\n %v", unusedBlobs, list) + } + + // repacker := checker.NewRepacker(repo, repo, repackBlobIDs) + // OK(t, repacker.Repack()) + + // err := checker.RepackBlobs(repo, repo, repackBlobIDs) + // OK(t, err) + + // newPackIDs, err := checker.FindPacksforBlobs(repo, repackBlobIDs) + // OK(t, err) + // fmt.Printf("new pack IDs: %v\n", newPackIDs) + + chkr = checker.New(repo) + _, errs = chkr.LoadIndex() + OKs(t, errs) + OKs(t, checkPacks(chkr)) + OKs(t, checkStruct(chkr)) + + blobs := chkr.UnusedBlobs() + Assert(t, len(blobs) == 0, + "expected zero unused blobs, got %v", blobs) + }) +}