From 50c2f2e87f2ab6dc09f1793129dc35c4b777b097 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sat, 8 Aug 2015 18:50:39 +0200 Subject: [PATCH 01/21] cmd_cat: allow dumping raw tree blobs --- cmd/restic/cmd_cat.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/cmd/restic/cmd_cat.go b/cmd/restic/cmd_cat.go index ca5c24aed..c8d7bffd8 100644 --- a/cmd/restic/cmd_cat.go +++ b/cmd/restic/cmd_cat.go @@ -167,12 +167,8 @@ func (cmd CmdCat) Execute(args []string) error { return err } - if blob.Type != pack.Data { - return errors.New("wrong type for blob") - } - buf := make([]byte, blob.Length) - data, err := repo.LoadBlob(pack.Data, id, buf) + data, err := repo.LoadBlob(blob.Type, id, buf) if err != nil { return err } From 181963ba0826e8ee28684fc861eb8fcbf66ca71f Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sun, 1 Nov 2015 22:14:44 +0100 Subject: [PATCH 02/21] Fix IDSet.String() --- backend/idset.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/idset.go b/backend/idset.go index 817f80fff..b34a69afb 100644 --- a/backend/idset.go +++ b/backend/idset.go @@ -66,5 +66,5 @@ func (s IDSet) String() string { return "{}" } - return "{" + str[1:len(str)-2] + "}" + return "{" + str[1:len(str)-1] + "}" } From 484331cd8de3435345edef742cc475b535af96da Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sat, 25 Jul 2015 14:20:02 +0200 Subject: [PATCH 03/21] Add repacker --- checker/checker.go | 2 +- checker/repacker.go | 108 +++++++++++++++++++++++++++++++++++++++ checker/repacker_test.go | 90 ++++++++++++++++++++++++++++++++ 3 files changed, 199 insertions(+), 1 deletion(-) create mode 100644 checker/repacker.go create mode 100644 checker/repacker_test.go diff --git a/checker/checker.go b/checker/checker.go index 2c9536f14..26219e604 100644 --- a/checker/checker.go +++ b/checker/checker.go @@ -617,7 +617,7 @@ func (c *Checker) UnusedBlobs() (blobs backend.IDs) { debug.Log("Checker.UnusedBlobs", "checking %d blobs", len(c.blobs)) for id := range c.blobs { if c.blobRefs.M[id] == 0 { - debug.Log("Checker.UnusedBlobs", "blob %v not not referenced", id.Str()) + debug.Log("Checker.UnusedBlobs", "blob %v not referenced", id.Str()) blobs = append(blobs, id) } } diff --git a/checker/repacker.go b/checker/repacker.go new file mode 100644 index 000000000..39fd47355 --- /dev/null +++ b/checker/repacker.go @@ -0,0 +1,108 @@ +package checker + +import ( + "errors" + + "github.com/restic/restic/backend" + "github.com/restic/restic/debug" + "github.com/restic/restic/repository" +) + +// Repacker extracts still used blobs from packs with unused blobs and creates +// new packs. +type Repacker struct { + unusedBlobs []backend.ID + src, dst *repository.Repository +} + +// NewRepacker returns a new repacker that (when Repack() in run) cleans up the +// repository and creates new packs and indexs so that all blobs in unusedBlobs +// aren't used any more. +func NewRepacker(src, dst *repository.Repository, unusedBlobs []backend.ID) *Repacker { + return &Repacker{ + src: src, + dst: dst, + unusedBlobs: unusedBlobs, + } +} + +// Repack runs the process of finding still used blobs in packs with unused +// blobs, extracts them and creates new packs with just the still-in-use blobs. +func (r *Repacker) Repack() error { + debug.Log("Repacker.Repack", "searching packs for %v", r.unusedBlobs) + packs, err := FindPacksforBlobs(r.src, r.unusedBlobs) + if err != nil { + return err + } + + debug.Log("Repacker.Repack", "found packs: %v", packs) + + return nil +} + +// FindPacksforBlobs returns the set of packs that contain the blobs. +func FindPacksforBlobs(repo *repository.Repository, blobs []backend.ID) (backend.IDSet, error) { + packs := backend.NewIDSet() + idx := repo.Index() + for _, id := range blobs { + blob, err := idx.Lookup(id) + if err != nil { + return nil, err + } + + packs.Insert(blob.PackID) + } + + return packs, nil +} + +// repackBlob loads a single blob from src and saves it in dst. +func repackBlob(src, dst *repository.Repository, id backend.ID) error { + blob, err := src.Index().Lookup(id) + if err != nil { + return err + } + + debug.Log("RepackBlobs", "repacking blob %v, len %v", id.Str(), blob.PlaintextLength()) + + buf := make([]byte, 0, blob.PlaintextLength()) + buf, err = src.LoadBlob(blob.Type, id, buf) + if err != nil { + return err + } + + if uint(len(buf)) != blob.PlaintextLength() { + debug.Log("RepackBlobs", "repack blob %v: len(buf) isn't equal to length: %v = %v", id.Str(), len(buf), blob.PlaintextLength()) + return errors.New("LoadBlob returned wrong data, len() doesn't match") + } + + _, err = dst.SaveAndEncrypt(blob.Type, buf, &id) + if err != nil { + return err + } + + return nil +} + +// RepackBlobs reads all blobs in blobIDs from src and saves them into new pack +// files in dst. Source and destination repo may be the same. +func RepackBlobs(src, dst *repository.Repository, blobIDs backend.IDs) (err error) { + for _, id := range blobIDs { + err = repackBlob(src, dst, id) + if err != nil { + return err + } + } + + err = dst.Flush() + if err != nil { + return err + } + + err = dst.SaveIndex() + if err != nil { + return err + } + + return nil +} diff --git a/checker/repacker_test.go b/checker/repacker_test.go new file mode 100644 index 000000000..c0e70ae12 --- /dev/null +++ b/checker/repacker_test.go @@ -0,0 +1,90 @@ +package checker_test + +import ( + "testing" + + "github.com/restic/restic/backend" + "github.com/restic/restic/checker" + + . "github.com/restic/restic/test" +) + +var findPackTests = []struct { + blobIDs backend.IDs + packIDs backend.IDSet +}{ + { + backend.IDs{ + ParseID("534f211b4fc2cf5b362a24e8eba22db5372a75b7e974603ff9263f5a471760f4"), + ParseID("51aa04744b518c6a85b4e7643cfa99d58789c2a6ca2a3fda831fa3032f28535c"), + ParseID("454515bca5f4f60349a527bd814cc2681bc3625716460cc6310771c966d8a3bf"), + ParseID("c01952de4d91da1b1b80bc6e06eaa4ec21523f4853b69dc8231708b9b7ec62d8"), + }, + backend.IDSet{ + ParseID("19a731a515618ec8b75fc0ff3b887d8feb83aef1001c9899f6702761142ed068"): struct{}{}, + ParseID("657f7fb64f6a854fff6fe9279998ee09034901eded4e6db9bcee0e59745bbce6"): struct{}{}, + }, + }, +} + +func TestRepackerFindPacks(t *testing.T) { + WithTestEnvironment(t, checkerTestData, func(repodir string) { + repo := OpenLocalRepo(t, repodir) + + OK(t, repo.LoadIndex()) + + for _, test := range findPackTests { + packIDs, err := checker.FindPacksforBlobs(repo, test.blobIDs) + OK(t, err) + Equals(t, test.packIDs, packIDs) + } + }) +} + +func TestRepackBlobs(t *testing.T) { + WithTestEnvironment(t, checkerTestData, func(repodir string) { + repo := OpenLocalRepo(t, repodir) + OK(t, repo.LoadIndex()) + + repo.Backend().Remove(backend.Snapshot, "c2b53c5e6a16db92fbb9aa08bd2794c58b379d8724d661ee30d20898bdfdff22") + + unusedBlobs := backend.IDSet{ + ParseID("5714f7274a8aa69b1692916739dc3835d09aac5395946b8ec4f58e563947199a"): struct{}{}, + ParseID("08d0444e9987fa6e35ce4232b2b71473e1a8f66b2f9664cc44dc57aad3c5a63a"): struct{}{}, + ParseID("356493f0b00a614d36c698591bbb2b1d801932d85328c1f508019550034549fc"): struct{}{}, + ParseID("b8a6bcdddef5c0f542b4648b2ef79bc0ed4377d4109755d2fb78aff11e042663"): struct{}{}, + } + + chkr := checker.New(repo) + _, errs := chkr.LoadIndex() + OKs(t, errs) + + errs = checkStruct(chkr) + OKs(t, errs) + + list := backend.NewIDSet(chkr.UnusedBlobs()...) + if !unusedBlobs.Equals(list) { + t.Fatalf("expected unused blobs:\n %v\ngot:\n %v", unusedBlobs, list) + } + + // repacker := checker.NewRepacker(repo, repo, repackBlobIDs) + // OK(t, repacker.Repack()) + + // err := checker.RepackBlobs(repo, repo, repackBlobIDs) + // OK(t, err) + + // newPackIDs, err := checker.FindPacksforBlobs(repo, repackBlobIDs) + // OK(t, err) + // fmt.Printf("new pack IDs: %v\n", newPackIDs) + + chkr = checker.New(repo) + _, errs = chkr.LoadIndex() + OKs(t, errs) + OKs(t, checkPacks(chkr)) + OKs(t, checkStruct(chkr)) + + blobs := chkr.UnusedBlobs() + Assert(t, len(blobs) == 0, + "expected zero unused blobs, got %v", blobs) + }) +} From 89a77ab2f95d98581d79448e1c690c98880dc2a4 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sun, 1 Nov 2015 22:32:28 +0100 Subject: [PATCH 04/21] Add Index.ListPack() --- repository/index.go | 20 ++++++++++++++++++++ repository/index_test.go | 23 +++++++++++++++++++++++ repository/master_index.go | 16 ++++++++++++++++ 3 files changed, 59 insertions(+) diff --git a/repository/index.go b/repository/index.go index 2ffac231c..1f9d65ad1 100644 --- a/repository/index.go +++ b/repository/index.go @@ -132,6 +132,26 @@ func (idx *Index) Lookup(id backend.ID) (pb PackedBlob, err error) { return PackedBlob{}, fmt.Errorf("id %v not found in index", id) } +// ListPack returns a list of blobs contained in a pack. +func (idx *Index) ListPack(id backend.ID) (list []PackedBlob) { + idx.m.Lock() + defer idx.m.Unlock() + + for blobID, entry := range idx.pack { + if entry.packID == id { + list = append(list, PackedBlob{ + ID: blobID, + Type: entry.tpe, + Length: entry.length, + Offset: entry.offset, + PackID: entry.packID, + }) + } + } + + return list +} + // Has returns true iff the id is listed in the index. func (idx *Index) Has(id backend.ID) bool { _, err := idx.Lookup(id) diff --git a/repository/index_test.go b/repository/index_test.go index 480619fa6..85674de77 100644 --- a/repository/index_test.go +++ b/repository/index_test.go @@ -240,6 +240,18 @@ var exampleTests = []struct { }, } +var exampleLookupTest = struct { + packID backend.ID + blobs backend.IDSet +}{ + ParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"), + backend.IDSet{ + ParseID("3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce"): struct{}{}, + ParseID("9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae"): struct{}{}, + ParseID("d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66"): struct{}{}, + }, +} + func TestIndexUnserialize(t *testing.T) { oldIdx := backend.IDs{ParseID("ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452")} @@ -257,6 +269,17 @@ func TestIndexUnserialize(t *testing.T) { } Equals(t, oldIdx, idx.Supersedes()) + + blobs := idx.ListPack(exampleLookupTest.packID) + if len(blobs) != len(exampleLookupTest.blobs) { + t.Fatalf("expected %d blobs in pack, got %d", len(exampleLookupTest.blobs), len(blobs)) + } + + for _, blob := range blobs { + if !exampleLookupTest.blobs.Has(blob.ID) { + t.Errorf("unexpected blob %v found", blob.ID.Str()) + } + } } func TestIndexUnserializeOld(t *testing.T) { diff --git a/repository/master_index.go b/repository/master_index.go index 5a5e499e6..bc3dea768 100644 --- a/repository/master_index.go +++ b/repository/master_index.go @@ -67,6 +67,22 @@ func (mi *MasterIndex) LookupSize(id backend.ID) (uint, error) { return 0, fmt.Errorf("id %v not found in any index", id) } +// ListPack returns the list of blobs in a pack. The first matching index is +// returned, or nil if no index contains information about the pack id. +func (mi *MasterIndex) ListPack(id backend.ID) (list []PackedBlob) { + mi.idxMutex.RLock() + defer mi.idxMutex.RUnlock() + + for _, idx := range mi.idx { + list := idx.ListPack(id) + if len(list) > 0 { + return list + } + } + + return nil +} + // Has queries all known Indexes for the ID and returns the first match. func (mi *MasterIndex) Has(id backend.ID) bool { mi.idxMutex.RLock() From 30cf0025741af61757e21242b2711b519756918f Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sun, 1 Nov 2015 22:45:10 +0100 Subject: [PATCH 05/21] Sort IDSet.List() --- backend/idset.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/backend/idset.go b/backend/idset.go index b34a69afb..de93f8754 100644 --- a/backend/idset.go +++ b/backend/idset.go @@ -1,5 +1,7 @@ package backend +import "sort" + // IDSet is a set of IDs. type IDSet map[ID]struct{} @@ -36,6 +38,8 @@ func (s IDSet) List() IDs { list = append(list, id) } + sort.Sort(list) + return list } From 51aff3ca571abb1abee27d0a2f3eee9908b68b04 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sun, 1 Nov 2015 22:45:52 +0100 Subject: [PATCH 06/21] Add FindBlobsForPacks() --- checker/repacker.go | 26 +++++++++++---- checker/repacker_test.go | 69 ++++++++++++++++++++++++++++++---------- 2 files changed, 73 insertions(+), 22 deletions(-) diff --git a/checker/repacker.go b/checker/repacker.go index 39fd47355..e2669fa03 100644 --- a/checker/repacker.go +++ b/checker/repacker.go @@ -11,14 +11,14 @@ import ( // Repacker extracts still used blobs from packs with unused blobs and creates // new packs. type Repacker struct { - unusedBlobs []backend.ID + unusedBlobs backend.IDSet src, dst *repository.Repository } // NewRepacker returns a new repacker that (when Repack() in run) cleans up the // repository and creates new packs and indexs so that all blobs in unusedBlobs // aren't used any more. -func NewRepacker(src, dst *repository.Repository, unusedBlobs []backend.ID) *Repacker { +func NewRepacker(src, dst *repository.Repository, unusedBlobs backend.IDSet) *Repacker { return &Repacker{ src: src, dst: dst, @@ -30,7 +30,8 @@ func NewRepacker(src, dst *repository.Repository, unusedBlobs []backend.ID) *Rep // blobs, extracts them and creates new packs with just the still-in-use blobs. func (r *Repacker) Repack() error { debug.Log("Repacker.Repack", "searching packs for %v", r.unusedBlobs) - packs, err := FindPacksforBlobs(r.src, r.unusedBlobs) + + packs, err := FindPacksForBlobs(r.src, r.unusedBlobs) if err != nil { return err } @@ -40,11 +41,11 @@ func (r *Repacker) Repack() error { return nil } -// FindPacksforBlobs returns the set of packs that contain the blobs. -func FindPacksforBlobs(repo *repository.Repository, blobs []backend.ID) (backend.IDSet, error) { +// FindPacksForBlobs returns the set of packs that contain the blobs. +func FindPacksForBlobs(repo *repository.Repository, blobs backend.IDSet) (backend.IDSet, error) { packs := backend.NewIDSet() idx := repo.Index() - for _, id := range blobs { + for id := range blobs { blob, err := idx.Lookup(id) if err != nil { return nil, err @@ -56,6 +57,19 @@ func FindPacksforBlobs(repo *repository.Repository, blobs []backend.ID) (backend return packs, nil } +// FindBlobsForPacks returns the set of blobs contained in a pack of packs. +func FindBlobsForPacks(repo *repository.Repository, packs backend.IDSet) (backend.IDSet, error) { + blobs := backend.NewIDSet() + + for packID := range packs { + for _, packedBlob := range repo.Index().ListPack(packID) { + blobs.Insert(packedBlob.ID) + } + } + + return blobs, nil +} + // repackBlob loads a single blob from src and saves it in dst. func repackBlob(src, dst *repository.Repository, id backend.ID) error { blob, err := src.Index().Lookup(id) diff --git a/checker/repacker_test.go b/checker/repacker_test.go index c0e70ae12..a2e9979ce 100644 --- a/checker/repacker_test.go +++ b/checker/repacker_test.go @@ -10,15 +10,15 @@ import ( ) var findPackTests = []struct { - blobIDs backend.IDs + blobIDs backend.IDSet packIDs backend.IDSet }{ { - backend.IDs{ - ParseID("534f211b4fc2cf5b362a24e8eba22db5372a75b7e974603ff9263f5a471760f4"), - ParseID("51aa04744b518c6a85b4e7643cfa99d58789c2a6ca2a3fda831fa3032f28535c"), - ParseID("454515bca5f4f60349a527bd814cc2681bc3625716460cc6310771c966d8a3bf"), - ParseID("c01952de4d91da1b1b80bc6e06eaa4ec21523f4853b69dc8231708b9b7ec62d8"), + backend.IDSet{ + ParseID("534f211b4fc2cf5b362a24e8eba22db5372a75b7e974603ff9263f5a471760f4"): struct{}{}, + ParseID("51aa04744b518c6a85b4e7643cfa99d58789c2a6ca2a3fda831fa3032f28535c"): struct{}{}, + ParseID("454515bca5f4f60349a527bd814cc2681bc3625716460cc6310771c966d8a3bf"): struct{}{}, + ParseID("c01952de4d91da1b1b80bc6e06eaa4ec21523f4853b69dc8231708b9b7ec62d8"): struct{}{}, }, backend.IDSet{ ParseID("19a731a515618ec8b75fc0ff3b887d8feb83aef1001c9899f6702761142ed068"): struct{}{}, @@ -27,6 +27,41 @@ var findPackTests = []struct { }, } +var findBlobTests = []struct { + packIDs backend.IDSet + blobIDs backend.IDSet +}{ + { + backend.IDSet{ + ParseID("60e0438dcb978ec6860cc1f8c43da648170ee9129af8f650f876bad19f8f788e"): struct{}{}, + }, + backend.IDSet{ + ParseID("356493f0b00a614d36c698591bbb2b1d801932d85328c1f508019550034549fc"): struct{}{}, + ParseID("b8a6bcdddef5c0f542b4648b2ef79bc0ed4377d4109755d2fb78aff11e042663"): struct{}{}, + ParseID("5714f7274a8aa69b1692916739dc3835d09aac5395946b8ec4f58e563947199a"): struct{}{}, + ParseID("b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c"): struct{}{}, + ParseID("08d0444e9987fa6e35ce4232b2b71473e1a8f66b2f9664cc44dc57aad3c5a63a"): struct{}{}, + }, + }, + { + backend.IDSet{ + ParseID("60e0438dcb978ec6860cc1f8c43da648170ee9129af8f650f876bad19f8f788e"): struct{}{}, + ParseID("ff7e12cd66d896b08490e787d1915c641e678d7e6b4a00e60db5d13054f4def4"): struct{}{}, + }, + backend.IDSet{ + ParseID("356493f0b00a614d36c698591bbb2b1d801932d85328c1f508019550034549fc"): struct{}{}, + ParseID("b8a6bcdddef5c0f542b4648b2ef79bc0ed4377d4109755d2fb78aff11e042663"): struct{}{}, + ParseID("5714f7274a8aa69b1692916739dc3835d09aac5395946b8ec4f58e563947199a"): struct{}{}, + ParseID("b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c"): struct{}{}, + ParseID("08d0444e9987fa6e35ce4232b2b71473e1a8f66b2f9664cc44dc57aad3c5a63a"): struct{}{}, + ParseID("aa79d596dbd4c863e5400deaca869830888fe1ce9f51b4a983f532c77f16a596"): struct{}{}, + ParseID("b2396c92781307111accf2ebb1cd62b58134b744d90cb6f153ca456a98dc3e76"): struct{}{}, + ParseID("5249af22d3b2acd6da8048ac37b2a87fa346fabde55ed23bb866f7618843c9fe"): struct{}{}, + ParseID("f41c2089a9d58a4b0bf39369fa37588e6578c928aea8e90a4490a6315b9905c1"): struct{}{}, + }, + }, +} + func TestRepackerFindPacks(t *testing.T) { WithTestEnvironment(t, checkerTestData, func(repodir string) { repo := OpenLocalRepo(t, repodir) @@ -34,10 +69,19 @@ func TestRepackerFindPacks(t *testing.T) { OK(t, repo.LoadIndex()) for _, test := range findPackTests { - packIDs, err := checker.FindPacksforBlobs(repo, test.blobIDs) + packIDs, err := checker.FindPacksForBlobs(repo, test.blobIDs) OK(t, err) Equals(t, test.packIDs, packIDs) } + + for _, test := range findBlobTests { + blobs, err := checker.FindBlobsForPacks(repo, test.packIDs) + OK(t, err) + + Assert(t, test.blobIDs.Equals(blobs), + "list of blobs for packs %v does not match, expected:\n %v\ngot:\n %v", + test.packIDs, test.blobIDs, blobs) + } }) } @@ -67,15 +111,8 @@ func TestRepackBlobs(t *testing.T) { t.Fatalf("expected unused blobs:\n %v\ngot:\n %v", unusedBlobs, list) } - // repacker := checker.NewRepacker(repo, repo, repackBlobIDs) - // OK(t, repacker.Repack()) - - // err := checker.RepackBlobs(repo, repo, repackBlobIDs) - // OK(t, err) - - // newPackIDs, err := checker.FindPacksforBlobs(repo, repackBlobIDs) - // OK(t, err) - // fmt.Printf("new pack IDs: %v\n", newPackIDs) + repacker := checker.NewRepacker(repo, repo, unusedBlobs) + OK(t, repacker.Repack()) chkr = checker.New(repo) _, errs = chkr.LoadIndex() From 266bc05edc69fa6aca4521059b8a147eaccfad3a Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sun, 1 Nov 2015 22:57:54 +0100 Subject: [PATCH 07/21] Add mostly ready repacker --- checker/repacker.go | 31 +++++++++++++++++++++++++++++-- checker/repacker_test.go | 2 +- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/checker/repacker.go b/checker/repacker.go index e2669fa03..be885c16f 100644 --- a/checker/repacker.go +++ b/checker/repacker.go @@ -38,6 +38,33 @@ func (r *Repacker) Repack() error { debug.Log("Repacker.Repack", "found packs: %v", packs) + blobs, err := FindBlobsForPacks(r.src, packs) + if err != nil { + return err + } + + debug.Log("Repacker.Repack", "found blobs: %v", blobs) + + for id := range r.unusedBlobs { + debug.Log("Repacker.Repack", "remove unused blob %v", id.Str()) + blobs.Delete(id) + } + + debug.Log("Repacker.Repack", "need to repack blobs: %v", blobs) + + err = RepackBlobs(r.src, r.dst, blobs) + if err != nil { + return err + } + + debug.Log("Repacker.Repack", "remove unneeded packs: %v", packs) + for packID := range packs { + err = r.src.Backend().Remove(backend.Data, packID.String()) + if err != nil { + return err + } + } + return nil } @@ -100,8 +127,8 @@ func repackBlob(src, dst *repository.Repository, id backend.ID) error { // RepackBlobs reads all blobs in blobIDs from src and saves them into new pack // files in dst. Source and destination repo may be the same. -func RepackBlobs(src, dst *repository.Repository, blobIDs backend.IDs) (err error) { - for _, id := range blobIDs { +func RepackBlobs(src, dst *repository.Repository, blobIDs backend.IDSet) (err error) { + for id := range blobIDs { err = repackBlob(src, dst, id) if err != nil { return err diff --git a/checker/repacker_test.go b/checker/repacker_test.go index a2e9979ce..f3e5bead8 100644 --- a/checker/repacker_test.go +++ b/checker/repacker_test.go @@ -85,7 +85,7 @@ func TestRepackerFindPacks(t *testing.T) { }) } -func TestRepackBlobs(t *testing.T) { +func TestRepacker(t *testing.T) { WithTestEnvironment(t, checkerTestData, func(repodir string) { repo := OpenLocalRepo(t, repodir) OK(t, repo.LoadIndex()) From 60a34087c914f38899b1df7c99f02ed217c40b4d Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Mon, 2 Nov 2015 18:51:24 +0100 Subject: [PATCH 08/21] Move LoadIndexWithDecoder to index.go --- repository/index.go | 26 ++++++++++++++++++++++++++ repository/repository.go | 19 ------------------- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/repository/index.go b/repository/index.go index 1f9d65ad1..d74131b84 100644 --- a/repository/index.go +++ b/repository/index.go @@ -514,6 +514,32 @@ func ConvertIndexes(repo *Repository) error { return nil } +// LoadIndexWithDecoder loads the index and decodes it with fn. +func LoadIndexWithDecoder(repo *Repository, id string, fn func(io.Reader) (*Index, error)) (*Index, error) { + debug.Log("LoadIndexWithDecoder", "Loading index %v", id[:8]) + + idxID, err := backend.ParseID(id) + if err != nil { + return nil, err + } + + rd, err := repo.GetDecryptReader(backend.Index, idxID.String()) + if err != nil { + return nil, err + } + defer rd.Close() + + idx, err := fn(rd) + if err != nil { + debug.Log("LoadIndexWithDecoder", "error while decoding index %v: %v", id, err) + return nil, err + } + + idx.id = idxID + + return idx, nil +} + // ConvertIndex loads the given index from the repo and converts them to the new // format (if necessary). When the conversion is succcessful, the old index // is removed. Returned is either the old id (if no conversion was needed) or diff --git a/repository/repository.go b/repository/repository.go index 51e5613b4..d119d51d5 100644 --- a/repository/repository.go +++ b/repository/repository.go @@ -650,25 +650,6 @@ func (r *Repository) GetDecryptReader(t backend.Type, id string) (io.ReadCloser, return newDecryptReadCloser(r.key, rd) } -// LoadIndexWithDecoder loads the index and decodes it with fn. -func LoadIndexWithDecoder(repo *Repository, id string, fn func(io.Reader) (*Index, error)) (*Index, error) { - debug.Log("LoadIndexWithDecoder", "Loading index %v", id[:8]) - - rd, err := repo.GetDecryptReader(backend.Index, id) - if err != nil { - return nil, err - } - defer rd.Close() - - idx, err := fn(rd) - if err != nil { - debug.Log("LoadIndexWithDecoder", "error while decoding index %v: %v", id, err) - return nil, err - } - - return idx, nil -} - // SearchKey finds a key with the supplied password, afterwards the config is // read and parsed. func (r *Repository) SearchKey(password string) error { From f3f84b154441584750de7a2d89ecde2278430482 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Mon, 2 Nov 2015 18:51:45 +0100 Subject: [PATCH 09/21] Add ID handling for index --- repository/index.go | 36 +++++++++++++++++++++++++++++++++++- repository/index_test.go | 6 ++++++ repository/repository.go | 3 ++- 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/repository/index.go b/repository/index.go index d74131b84..e7df3168e 100644 --- a/repository/index.go +++ b/repository/index.go @@ -19,7 +19,8 @@ type Index struct { m sync.Mutex pack map[backend.ID]indexEntry - final bool // set to true for all indexes read from the backend ("finalized") + final bool // set to true for all indexes read from the backend ("finalized") + id backend.ID // set to the ID of the index when it's finalized supersedes backend.IDs created time.Time } @@ -395,6 +396,39 @@ func (idx *Index) Finalize(w io.Writer) error { return idx.encode(w) } +// ID returns the ID of the index, if available. If the index is not yet +// finalized, an error is returned. +func (idx *Index) ID() (backend.ID, error) { + idx.m.Lock() + defer idx.m.Unlock() + + if !idx.final { + return backend.ID{}, errors.New("index not finalized") + } + + return idx.id, nil +} + +// SetID sets the ID the index has been written to. This requires that +// Finalize() has been called before, otherwise an error is returned. +func (idx *Index) SetID(id backend.ID) error { + idx.m.Lock() + defer idx.m.Unlock() + + if !idx.final { + return errors.New("indexs is not final") + } + + if !idx.id.IsNull() { + return errors.New("ID already set") + } + + debug.Log("Index.SetID", "ID set to %v", id.Str()) + idx.id = id + + return nil +} + // Dump writes the pretty-printed JSON representation of the index to w. func (idx *Index) Dump(w io.Writer) error { debug.Log("Index.Dump", "dumping index") diff --git a/repository/index_test.go b/repository/index_test.go index 85674de77..d403ed8ad 100644 --- a/repository/index_test.go +++ b/repository/index_test.go @@ -117,6 +117,12 @@ func TestIndexSerialize(t *testing.T) { Assert(t, idx.Final(), "index not final after encoding") + id := randomID() + idx.SetID(id) + id2, err := idx.ID() + Assert(t, id2.Equal(id), + "wrong ID returned: want %v, got %v", id, id2) + idx3, err := repository.DecodeIndex(wr3) OK(t, err) Assert(t, idx3 != nil, diff --git a/repository/repository.go b/repository/repository.go index d119d51d5..436212bcc 100644 --- a/repository/repository.go +++ b/repository/repository.go @@ -526,7 +526,8 @@ func SaveIndex(repo *Repository, index *Index) (backend.ID, error) { } sid := blob.ID() - return sid, nil + err = index.SetID(sid) + return sid, err } // saveIndex saves all indexes in the backend. From 1fc0d789136f33554bd901a6497b32580dea478d Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Mon, 2 Nov 2015 19:05:19 +0100 Subject: [PATCH 10/21] Refactor Index.Store() to take a PackedBlob --- cmd/restic/cmd_rebuild_index.go | 10 ++++++++-- repository/index.go | 35 ++++++++++++++++++++++----------- repository/index_test.go | 32 ++++++++++++++++++++++++++---- repository/repository.go | 8 +++++++- 4 files changed, 66 insertions(+), 19 deletions(-) diff --git a/cmd/restic/cmd_rebuild_index.go b/cmd/restic/cmd_rebuild_index.go index 3582079b1..7a550354b 100644 --- a/cmd/restic/cmd_rebuild_index.go +++ b/cmd/restic/cmd_rebuild_index.go @@ -90,7 +90,7 @@ func (cmd CmdRebuildIndex) RebuildIndex() error { } blobsDone[b] = struct{}{} - combinedIndex.Store(packedBlob.Type, packedBlob.ID, packedBlob.PackID, packedBlob.Offset, packedBlob.Length) + combinedIndex.Store(packedBlob) } combinedIndex.AddToSupersedes(indexID) @@ -162,7 +162,13 @@ func (cmd CmdRebuildIndex) RebuildIndex() error { for _, blob := range up.Entries { debug.Log("RebuildIndex.RebuildIndex", "pack %v: blob %v", packID.Str(), blob) - combinedIndex.Store(blob.Type, blob.ID, packID, blob.Offset, blob.Length) + combinedIndex.Store(repository.PackedBlob{ + Type: blob.Type, + ID: blob.ID, + PackID: packID, + Offset: blob.Offset, + Length: blob.Length, + }) } err = rd.Close() diff --git a/repository/index.go b/repository/index.go index e7df3168e..bb62f2ae9 100644 --- a/repository/index.go +++ b/repository/index.go @@ -40,12 +40,12 @@ func NewIndex() *Index { } } -func (idx *Index) store(t pack.BlobType, id backend.ID, pack backend.ID, offset, length uint) { - idx.pack[id] = indexEntry{ - tpe: t, - packID: pack, - offset: offset, - length: length, +func (idx *Index) store(blob PackedBlob) { + idx.pack[blob.ID] = indexEntry{ + tpe: blob.Type, + packID: blob.PackID, + offset: blob.Offset, + length: blob.Length, } } @@ -96,7 +96,7 @@ var IndexFull = func(idx *Index) bool { // Store remembers the id and pack in the index. An existing entry will be // silently overwritten. -func (idx *Index) Store(t pack.BlobType, id backend.ID, pack backend.ID, offset, length uint) { +func (idx *Index) Store(blob PackedBlob) { idx.m.Lock() defer idx.m.Unlock() @@ -104,10 +104,9 @@ func (idx *Index) Store(t pack.BlobType, id backend.ID, pack backend.ID, offset, panic("store new item in finalized index") } - debug.Log("Index.Store", "pack %v contains id %v (%v), offset %v, length %v", - pack.Str(), id.Str(), t, offset, length) + debug.Log("Index.Store", "%v", blob) - idx.store(t, id, pack, offset, length) + idx.store(blob) } // Lookup queries the index for the blob ID and returns a PackedBlob. @@ -489,7 +488,13 @@ func DecodeIndex(rd io.Reader) (idx *Index, err error) { idx = NewIndex() for _, pack := range idxJSON.Packs { for _, blob := range pack.Blobs { - idx.store(blob.Type, blob.ID, pack.ID, blob.Offset, blob.Length) + idx.store(PackedBlob{ + Type: blob.Type, + ID: blob.ID, + Offset: blob.Offset, + Length: blob.Length, + PackID: pack.ID, + }) } } idx.supersedes = idxJSON.Supersedes @@ -514,7 +519,13 @@ func DecodeOldIndex(rd io.Reader) (idx *Index, err error) { idx = NewIndex() for _, pack := range list { for _, blob := range pack.Blobs { - idx.store(blob.Type, blob.ID, pack.ID, blob.Offset, blob.Length) + idx.store(PackedBlob{ + Type: blob.Type, + ID: blob.ID, + PackID: pack.ID, + Offset: blob.Offset, + Length: blob.Length, + }) } } diff --git a/repository/index_test.go b/repository/index_test.go index d403ed8ad..db4d79345 100644 --- a/repository/index_test.go +++ b/repository/index_test.go @@ -41,7 +41,13 @@ func TestIndexSerialize(t *testing.T) { for j := 0; j < 20; j++ { id := randomID() length := uint(i*100 + j) - idx.Store(pack.Data, id, packID, pos, length) + idx.Store(repository.PackedBlob{ + Type: pack.Data, + ID: id, + PackID: packID, + Offset: pos, + Length: length, + }) tests = append(tests, testEntry{ id: id, @@ -95,7 +101,13 @@ func TestIndexSerialize(t *testing.T) { for j := 0; j < 10; j++ { id := randomID() length := uint(i*100 + j) - idx.Store(pack.Data, id, packID, pos, length) + idx.Store(repository.PackedBlob{ + Type: pack.Data, + ID: id, + PackID: packID, + Offset: pos, + Length: length, + }) newtests = append(newtests, testEntry{ id: id, @@ -154,7 +166,13 @@ func TestIndexSize(t *testing.T) { for j := 0; j < blobs; j++ { id := randomID() length := uint(i*100 + j) - idx.Store(pack.Data, id, packID, pos, length) + idx.Store(repository.PackedBlob{ + Type: pack.Data, + ID: id, + PackID: packID, + Offset: pos, + Length: length, + }) pos += length } @@ -361,7 +379,13 @@ func TestIndexPacks(t *testing.T) { for i := 0; i < 20; i++ { packID := randomID() - idx.Store(pack.Data, randomID(), packID, 0, 23) + idx.Store(repository.PackedBlob{ + Type: pack.Data, + ID: randomID(), + PackID: packID, + Offset: 0, + Length: 23, + }) packs.Insert(packID) } diff --git a/repository/repository.go b/repository/repository.go index 436212bcc..752ad6b7a 100644 --- a/repository/repository.go +++ b/repository/repository.go @@ -270,7 +270,13 @@ func (r *Repository) savePacker(p *pack.Packer) error { // update blobs in the index for _, b := range p.Blobs() { debug.Log("Repo.savePacker", " updating blob %v to pack %v", b.ID.Str(), sid.Str()) - r.idx.Current().Store(b.Type, b.ID, sid, b.Offset, uint(b.Length)) + r.idx.Current().Store(PackedBlob{ + Type: b.Type, + ID: b.ID, + PackID: sid, + Offset: b.Offset, + Length: uint(b.Length), + }) r.idx.RemoveFromInFlight(b.ID) } From db41102bfab02c7f8b9b77b883ebe7aaed79de18 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Mon, 2 Nov 2015 19:28:30 +0100 Subject: [PATCH 11/21] Finalize repacker --- checker/repacker.go | 46 +++++++++++++++++++++++++------------- checker/repacker_test.go | 2 +- repository/index.go | 7 +++++- repository/master_index.go | 44 ++++++++++++++++++++++++++++++++++++ 4 files changed, 81 insertions(+), 18 deletions(-) diff --git a/checker/repacker.go b/checker/repacker.go index be885c16f..513a48352 100644 --- a/checker/repacker.go +++ b/checker/repacker.go @@ -12,16 +12,15 @@ import ( // new packs. type Repacker struct { unusedBlobs backend.IDSet - src, dst *repository.Repository + repo *repository.Repository } // NewRepacker returns a new repacker that (when Repack() in run) cleans up the // repository and creates new packs and indexs so that all blobs in unusedBlobs // aren't used any more. -func NewRepacker(src, dst *repository.Repository, unusedBlobs backend.IDSet) *Repacker { +func NewRepacker(repo *repository.Repository, unusedBlobs backend.IDSet) *Repacker { return &Repacker{ - src: src, - dst: dst, + repo: repo, unusedBlobs: unusedBlobs, } } @@ -31,14 +30,14 @@ func NewRepacker(src, dst *repository.Repository, unusedBlobs backend.IDSet) *Re func (r *Repacker) Repack() error { debug.Log("Repacker.Repack", "searching packs for %v", r.unusedBlobs) - packs, err := FindPacksForBlobs(r.src, r.unusedBlobs) + unneededPacks, err := FindPacksForBlobs(r.repo, r.unusedBlobs) if err != nil { return err } - debug.Log("Repacker.Repack", "found packs: %v", packs) + debug.Log("Repacker.Repack", "found packs: %v", unneededPacks) - blobs, err := FindBlobsForPacks(r.src, packs) + blobs, err := FindBlobsForPacks(r.repo, unneededPacks) if err != nil { return err } @@ -52,19 +51,39 @@ func (r *Repacker) Repack() error { debug.Log("Repacker.Repack", "need to repack blobs: %v", blobs) - err = RepackBlobs(r.src, r.dst, blobs) + err = RepackBlobs(r.repo, r.repo, blobs) if err != nil { return err } - debug.Log("Repacker.Repack", "remove unneeded packs: %v", packs) - for packID := range packs { - err = r.src.Backend().Remove(backend.Data, packID.String()) + debug.Log("Repacker.Repack", "remove unneeded packs: %v", unneededPacks) + for packID := range unneededPacks { + err = r.repo.Backend().Remove(backend.Data, packID.String()) if err != nil { return err } } + debug.Log("Repacker.Repack", "rebuild index") + idx, err := r.repo.Index().RebuildIndex(unneededPacks) + + newIndexID, err := repository.SaveIndex(r.repo, idx) + debug.Log("Repacker.Repack", "saved new index at %v, err %v", newIndexID, err) + if err != nil { + return err + } + + debug.Log("Repacker.Repack", "remove old indexes: %v", idx.Supersedes()) + for _, id := range idx.Supersedes() { + err = r.repo.Backend().Remove(backend.Index, id.String()) + if err != nil { + debug.Log("Repacker.Repack", "error removing index %v: %v", id.Str(), err) + return err + } + + debug.Log("Repacker.Repack", "removed index %v", id.Str()) + } + return nil } @@ -140,10 +159,5 @@ func RepackBlobs(src, dst *repository.Repository, blobIDs backend.IDSet) (err er return err } - err = dst.SaveIndex() - if err != nil { - return err - } - return nil } diff --git a/checker/repacker_test.go b/checker/repacker_test.go index f3e5bead8..dcf7b2040 100644 --- a/checker/repacker_test.go +++ b/checker/repacker_test.go @@ -111,7 +111,7 @@ func TestRepacker(t *testing.T) { t.Fatalf("expected unused blobs:\n %v\ngot:\n %v", unusedBlobs, list) } - repacker := checker.NewRepacker(repo, repo, unusedBlobs) + repacker := checker.NewRepacker(repo, unusedBlobs) OK(t, repacker.Repack()) chkr = checker.New(repo) diff --git a/repository/index.go b/repository/index.go index bb62f2ae9..f305a7bcb 100644 --- a/repository/index.go +++ b/repository/index.go @@ -439,7 +439,12 @@ func (idx *Index) Dump(w io.Writer) error { return err } - buf, err := json.MarshalIndent(list, "", " ") + outer := jsonIndex{ + Supersedes: idx.Supersedes(), + Packs: list, + } + + buf, err := json.MarshalIndent(outer, "", " ") if err != nil { return err } diff --git a/repository/master_index.go b/repository/master_index.go index bc3dea768..0de8e9c31 100644 --- a/repository/master_index.go +++ b/repository/master_index.go @@ -240,3 +240,47 @@ func (mi *MasterIndex) All() []*Index { return mi.idx } + +// RebuildIndex combines all known indexes to a new index, leaving out any +// packs whose ID is contained in packBlacklist. The new index contains the IDs +// of all known indexes in the "supersedes" field. +func (mi *MasterIndex) RebuildIndex(packBlacklist backend.IDSet) (*Index, error) { + mi.idxMutex.Lock() + defer mi.idxMutex.Unlock() + + debug.Log("MasterIndex.RebuildIndex", "start rebuilding index, blob blacklist: %v", packBlacklist) + + newIndex := NewIndex() + done := make(chan struct{}) + defer close(done) + + for i, idx := range mi.idx { + debug.Log("MasterIndex.RebuildIndex", "adding %d index ", i) + + for pb := range idx.Each(done) { + if packBlacklist.Has(pb.PackID) { + continue + } + + newIndex.Store(pb) + } + + if !idx.Final() { + continue + } + + id, err := idx.ID() + if err != nil { + return nil, err + } + + debug.Log("MasterIndex.RebuildIndex", "adding index id %v to supersedes field", id) + + err = newIndex.AddToSupersedes(id) + if err != nil { + return nil, err + } + } + + return newIndex, nil +} From cd948b56aca7bc79fc3532787a364b5638a541ee Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sun, 8 Nov 2015 20:46:52 +0100 Subject: [PATCH 12/21] cmd_check: Don't display unused blobs by default --- cmd/restic/cmd_check.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/cmd/restic/cmd_check.go b/cmd/restic/cmd_check.go index 919568618..e8e54aa37 100644 --- a/cmd/restic/cmd_check.go +++ b/cmd/restic/cmd_check.go @@ -10,8 +10,9 @@ import ( ) type CmdCheck struct { - ReadData bool `long:"read-data" description:"Read data blobs" default:"false"` - RemoveOrphaned bool `long:"remove" description:"Remove data that isn't used" default:"false"` + ReadData bool `long:"read-data" description:"Read data blobs" default:"false"` + RemoveOrphaned bool `long:"remove" description:"Remove data that isn't used" default:"false"` + CheckUnused bool `long:"check-unused" description:"Check for unused blobs" default:"false"` global *GlobalOptions } @@ -106,8 +107,11 @@ func (cmd CmdCheck) Execute(args []string) error { } } - for _, id := range chkr.UnusedBlobs() { - cmd.global.Verbosef("unused blob %v\n", id.Str()) + if cmd.CheckUnused { + for _, id := range chkr.UnusedBlobs() { + cmd.global.Verbosef("unused blob %v\n", id.Str()) + errorsFound = true + } } if foundOrphanedPacks && cmd.RemoveOrphaned { From c4fc7b52ae06c17545692d4fdbc2c2071c849086 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sun, 8 Nov 2015 21:10:03 +0100 Subject: [PATCH 13/21] Add 'optimize' command that repacks blobs --- cmd/restic/cmd_optimize.go | 84 ++++++++++++++++++++++++++++++++++ cmd/restic/integration_test.go | 27 ++++++++++- 2 files changed, 109 insertions(+), 2 deletions(-) create mode 100644 cmd/restic/cmd_optimize.go diff --git a/cmd/restic/cmd_optimize.go b/cmd/restic/cmd_optimize.go new file mode 100644 index 000000000..1e29ce1d7 --- /dev/null +++ b/cmd/restic/cmd_optimize.go @@ -0,0 +1,84 @@ +package main + +import ( + "errors" + "fmt" + + "github.com/restic/restic/backend" + "github.com/restic/restic/checker" +) + +type CmdOptimize struct { + global *GlobalOptions +} + +func init() { + _, err := parser.AddCommand("optimize", + "optimize the repository", + "The optimize command reorganizes the repository and removes uneeded data", + &CmdOptimize{global: &globalOpts}) + if err != nil { + panic(err) + } +} + +func (cmd CmdOptimize) Usage() string { + return "[optimize-options]" +} + +func (cmd CmdOptimize) Execute(args []string) error { + if len(args) != 0 { + return errors.New("optimize has no arguments") + } + + repo, err := cmd.global.OpenRepository() + if err != nil { + return err + } + + cmd.global.Verbosef("Create exclusive lock for repository\n") + lock, err := lockRepoExclusive(repo) + defer unlockRepo(lock) + if err != nil { + return err + } + + chkr := checker.New(repo) + + cmd.global.Verbosef("Load indexes\n") + _, errs := chkr.LoadIndex() + + if len(errs) > 0 { + for _, err := range errs { + cmd.global.Warnf("error: %v\n", err) + } + return fmt.Errorf("LoadIndex returned errors") + } + + done := make(chan struct{}) + errChan := make(chan error) + go chkr.Structure(errChan, done) + + for err := range errChan { + if e, ok := err.(checker.TreeError); ok { + cmd.global.Warnf("error for tree %v:\n", e.ID.Str()) + for _, treeErr := range e.Errors { + cmd.global.Warnf(" %v\n", treeErr) + } + } else { + cmd.global.Warnf("error: %v\n", err) + } + } + + unusedBlobs := backend.NewIDSet(chkr.UnusedBlobs()...) + cmd.global.Verbosef("%d unused blobs found, repacking...\n", len(unusedBlobs)) + + repacker := checker.NewRepacker(repo, unusedBlobs) + err = repacker.Repack() + if err != nil { + return err + } + + cmd.global.Verbosef("repacking done\n") + return nil +} diff --git a/cmd/restic/integration_test.go b/cmd/restic/integration_test.go index fa95eca92..5c5a196d1 100644 --- a/cmd/restic/integration_test.go +++ b/cmd/restic/integration_test.go @@ -61,7 +61,7 @@ func cmdBackupExcludes(t testing.TB, global GlobalOptions, target []string, pare OK(t, cmd.Execute(target)) } -func cmdList(t testing.TB, global GlobalOptions, tpe string) []backend.ID { +func cmdList(t testing.TB, global GlobalOptions, tpe string) backend.IDs { var buf bytes.Buffer global.stdout = &buf cmd := &CmdList{global: &global} @@ -87,7 +87,11 @@ func cmdRestoreIncludes(t testing.TB, global GlobalOptions, dir string, snapshot } func cmdCheck(t testing.TB, global GlobalOptions) { - cmd := &CmdCheck{global: &global, ReadData: true} + cmd := &CmdCheck{ + global: &global, + ReadData: true, + CheckUnused: true, + } OK(t, cmd.Execute(nil)) } @@ -105,6 +109,11 @@ func cmdRebuildIndex(t testing.TB, global GlobalOptions) { OK(t, cmd.Execute(nil)) } +func cmdOptimize(t testing.TB, global GlobalOptions) { + cmd := &CmdOptimize{global: &global} + OK(t, cmd.Execute(nil)) +} + func cmdLs(t testing.TB, global GlobalOptions, snapshotID string) []string { var buf bytes.Buffer global.stdout = &buf @@ -689,3 +698,17 @@ func TestRebuildIndexAlwaysFull(t *testing.T) { repository.IndexFull = func(*repository.Index) bool { return true } TestRebuildIndex(t) } + +func TestOptimizeRemoveUnusedBlobs(t *testing.T) { + withTestEnvironment(t, func(env *testEnvironment, global GlobalOptions) { + datafile := filepath.Join("..", "..", "checker", "testdata", "checker-test-repo.tar.gz") + SetupTarTestFixture(t, env.base, datafile) + + // snapshotIDs := cmdList(t, global, "snapshots") + // t.Logf("snapshots: %v", snapshotIDs) + + OK(t, os.Remove(filepath.Join(env.repo, "snapshots", "a13c11e582b77a693dd75ab4e3a3ba96538a056594a4b9076e4cacebe6e06d43"))) + cmdOptimize(t, global) + cmdCheck(t, global) + }) +} From 43e2c9837ef3fd104b80737b71288730e0f85f48 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sun, 8 Nov 2015 21:24:51 +0100 Subject: [PATCH 14/21] check: removing orphaned packs is handled in 'optimize' --- cmd/restic/cmd_check.go | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/cmd/restic/cmd_check.go b/cmd/restic/cmd_check.go index e8e54aa37..a94ad138b 100644 --- a/cmd/restic/cmd_check.go +++ b/cmd/restic/cmd_check.go @@ -5,14 +5,12 @@ import ( "fmt" "os" - "github.com/restic/restic/backend" "github.com/restic/restic/checker" ) type CmdCheck struct { - ReadData bool `long:"read-data" description:"Read data blobs" default:"false"` - RemoveOrphaned bool `long:"remove" description:"Remove data that isn't used" default:"false"` - CheckUnused bool `long:"check-unused" description:"Check for unused blobs" default:"false"` + ReadData bool `long:"read-data" description:"Read data blobs" default:"false"` + CheckUnused bool `long:"check-unused" description:"Check for unused blobs" default:"false"` global *GlobalOptions } @@ -81,14 +79,9 @@ func (cmd CmdCheck) Execute(args []string) error { cmd.global.Verbosef("Check all packs\n") go chkr.Packs(errChan, done) - foundOrphanedPacks := false for err := range errChan { errorsFound = true fmt.Fprintf(os.Stderr, "%v\n", err) - - if e, ok := err.(checker.PackError); ok && e.Orphaned { - foundOrphanedPacks = true - } } cmd.global.Verbosef("Check snapshots, trees and blobs\n") @@ -114,19 +107,6 @@ func (cmd CmdCheck) Execute(args []string) error { } } - if foundOrphanedPacks && cmd.RemoveOrphaned { - IDs := chkr.OrphanedPacks() - cmd.global.Verbosef("Remove %d orphaned packs... ", len(IDs)) - - for _, id := range IDs { - if err := repo.Backend().Remove(backend.Data, id.String()); err != nil { - fmt.Fprintf(os.Stderr, "%v\n", err) - } - } - - cmd.global.Verbosef("done\n") - } - if errorsFound { return errors.New("repository contains errors") } From 0222b1701eff500adc6ac354e00f80169edd5f5b Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sun, 8 Nov 2015 21:35:48 +0100 Subject: [PATCH 15/21] Remvoe automatic index conversion --- checker/checker.go | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/checker/checker.go b/checker/checker.go index 26219e604..55639d69c 100644 --- a/checker/checker.go +++ b/checker/checker.go @@ -3,7 +3,6 @@ package checker import ( "errors" "fmt" - "os" "sync" "github.com/restic/restic" @@ -73,14 +72,8 @@ func (c *Checker) LoadIndex() (hints []error, errs []error) { debug.Log("LoadIndex", "worker got index %v", id) idx, err := repository.LoadIndexWithDecoder(c.repo, id.String(), repository.DecodeIndex) if err == repository.ErrOldIndexFormat { - debug.Log("LoadIndex", "old index format found, converting") - fmt.Fprintf(os.Stderr, "convert index %v to new format\n", id.Str()) - id, err = repository.ConvertIndex(c.repo, id) - if err != nil { - return err - } - - idx, err = repository.LoadIndexWithDecoder(c.repo, id.String(), repository.DecodeIndex) + debug.Log("LoadIndex", "index %v has old format", id.Str()) + idx, err = repository.LoadIndexWithDecoder(c.repo, id.String(), repository.DecodeOldIndex) } if err != nil { From c59b12c93957ef0539b1c6ba9ab157be291272c5 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sun, 8 Nov 2015 21:50:48 +0100 Subject: [PATCH 16/21] Show a hint whech the checker finds an old index --- checker/checker.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/checker/checker.go b/checker/checker.go index 55639d69c..7064e56c9 100644 --- a/checker/checker.go +++ b/checker/checker.go @@ -58,6 +58,16 @@ func (e ErrDuplicatePacks) Error() string { return fmt.Sprintf("pack %v contained in several indexes: %v", e.PackID.Str(), e.Indexes) } +// ErrOldIndexFormat is returned when an index with the old format is +// found. +type ErrOldIndexFormat struct { + backend.ID +} + +func (err ErrOldIndexFormat) Error() string { + return fmt.Sprintf("index %v has old format", err.ID.Str()) +} + // LoadIndex loads all index files. func (c *Checker) LoadIndex() (hints []error, errs []error) { debug.Log("LoadIndex", "Start") @@ -73,6 +83,8 @@ func (c *Checker) LoadIndex() (hints []error, errs []error) { idx, err := repository.LoadIndexWithDecoder(c.repo, id.String(), repository.DecodeIndex) if err == repository.ErrOldIndexFormat { debug.Log("LoadIndex", "index %v has old format", id.Str()) + hints = append(hints, ErrOldIndexFormat{id}) + idx, err = repository.LoadIndexWithDecoder(c.repo, id.String(), repository.DecodeOldIndex) } From 2e6eee991d2c59e8189c4ef1d816eca0535d2b3b Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sun, 8 Nov 2015 22:21:08 +0100 Subject: [PATCH 17/21] Add test for optimize command with old indexes --- cmd/restic/integration_test.go | 43 ++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/cmd/restic/integration_test.go b/cmd/restic/integration_test.go index 5c5a196d1..2fafee92f 100644 --- a/cmd/restic/integration_test.go +++ b/cmd/restic/integration_test.go @@ -699,16 +699,35 @@ func TestRebuildIndexAlwaysFull(t *testing.T) { TestRebuildIndex(t) } -func TestOptimizeRemoveUnusedBlobs(t *testing.T) { - withTestEnvironment(t, func(env *testEnvironment, global GlobalOptions) { - datafile := filepath.Join("..", "..", "checker", "testdata", "checker-test-repo.tar.gz") - SetupTarTestFixture(t, env.base, datafile) - - // snapshotIDs := cmdList(t, global, "snapshots") - // t.Logf("snapshots: %v", snapshotIDs) - - OK(t, os.Remove(filepath.Join(env.repo, "snapshots", "a13c11e582b77a693dd75ab4e3a3ba96538a056594a4b9076e4cacebe6e06d43"))) - cmdOptimize(t, global) - cmdCheck(t, global) - }) +var optimizeTests = []struct { + testFilename string + snapshotID string +}{ + { + filepath.Join("..", "..", "checker", "testdata", "checker-test-repo.tar.gz"), + "a13c11e582b77a693dd75ab4e3a3ba96538a056594a4b9076e4cacebe6e06d43", + }, + { + filepath.Join("..", "..", "repository", "testdata", "old-index-repo.tar.gz"), + "", + }, +} + +func TestOptimizeRemoveUnusedBlobs(t *testing.T) { + for i, test := range optimizeTests { + withTestEnvironment(t, func(env *testEnvironment, global GlobalOptions) { + SetupTarTestFixture(t, env.base, test.testFilename) + + if test.snapshotID != "" { + OK(t, os.Remove(filepath.Join(env.repo, "snapshots", test.snapshotID))) + } + + cmdOptimize(t, global) + output := cmdCheckOutput(t, global) + + if len(output) > 0 { + t.Errorf("expected no output for check in test %d, got:\n%v", i, output) + } + }) + } } From 6c54d3fa82c49b57b7500daeaa903bfb191161bb Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sun, 8 Nov 2015 22:21:29 +0100 Subject: [PATCH 18/21] index: also mark old index as final on decode --- checker/repacker.go | 4 ++-- repository/index.go | 1 + repository/master_index.go | 8 +++++--- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/checker/repacker.go b/checker/repacker.go index 513a48352..f3b158d0c 100644 --- a/checker/repacker.go +++ b/checker/repacker.go @@ -64,11 +64,11 @@ func (r *Repacker) Repack() error { } } - debug.Log("Repacker.Repack", "rebuild index") + debug.Log("Repacker.Repack", "rebuild index, unneeded packs: %v", unneededPacks) idx, err := r.repo.Index().RebuildIndex(unneededPacks) newIndexID, err := repository.SaveIndex(r.repo, idx) - debug.Log("Repacker.Repack", "saved new index at %v, err %v", newIndexID, err) + debug.Log("Repacker.Repack", "saved new index at %v, err %v", newIndexID.Str(), err) if err != nil { return err } diff --git a/repository/index.go b/repository/index.go index f305a7bcb..cd4e6e9dd 100644 --- a/repository/index.go +++ b/repository/index.go @@ -533,6 +533,7 @@ func DecodeOldIndex(rd io.Reader) (idx *Index, err error) { }) } } + idx.final = true debug.Log("Index.DecodeOldIndex", "done") return idx, err diff --git a/repository/master_index.go b/repository/master_index.go index 0de8e9c31..96cabe957 100644 --- a/repository/master_index.go +++ b/repository/master_index.go @@ -248,14 +248,14 @@ func (mi *MasterIndex) RebuildIndex(packBlacklist backend.IDSet) (*Index, error) mi.idxMutex.Lock() defer mi.idxMutex.Unlock() - debug.Log("MasterIndex.RebuildIndex", "start rebuilding index, blob blacklist: %v", packBlacklist) + debug.Log("MasterIndex.RebuildIndex", "start rebuilding index of %d indexes, pack blacklist: %v", len(mi.idx), packBlacklist) newIndex := NewIndex() done := make(chan struct{}) defer close(done) for i, idx := range mi.idx { - debug.Log("MasterIndex.RebuildIndex", "adding %d index ", i) + debug.Log("MasterIndex.RebuildIndex", "adding index %d", i) for pb := range idx.Each(done) { if packBlacklist.Has(pb.PackID) { @@ -266,15 +266,17 @@ func (mi *MasterIndex) RebuildIndex(packBlacklist backend.IDSet) (*Index, error) } if !idx.Final() { + debug.Log("MasterIndex.RebuildIndex", "index %d isn't final, don't add to supersedes field", i) continue } id, err := idx.ID() if err != nil { + debug.Log("MasterIndex.RebuildIndex", "index %d does not have an ID: %v", err) return nil, err } - debug.Log("MasterIndex.RebuildIndex", "adding index id %v to supersedes field", id) + debug.Log("MasterIndex.RebuildIndex", "adding index id %v to supersedes field", id.Str()) err = newIndex.AddToSupersedes(id) if err != nil { From 5776b8f01cf2bf8f7d2449f0408b247bd9ba188a Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sun, 8 Nov 2015 22:27:13 +0100 Subject: [PATCH 19/21] remove ConvertIndex --- cmd/restic/integration_test.go | 2 +- .../restic}/testdata/old-index-repo.tar.gz | Bin repository/index.go | 26 --------- repository/index_test.go | 51 ------------------ 4 files changed, 1 insertion(+), 78 deletions(-) rename {repository => cmd/restic}/testdata/old-index-repo.tar.gz (100%) diff --git a/cmd/restic/integration_test.go b/cmd/restic/integration_test.go index 2fafee92f..01bb0b14a 100644 --- a/cmd/restic/integration_test.go +++ b/cmd/restic/integration_test.go @@ -708,7 +708,7 @@ var optimizeTests = []struct { "a13c11e582b77a693dd75ab4e3a3ba96538a056594a4b9076e4cacebe6e06d43", }, { - filepath.Join("..", "..", "repository", "testdata", "old-index-repo.tar.gz"), + filepath.Join("testdata", "old-index-repo.tar.gz"), "", }, } diff --git a/repository/testdata/old-index-repo.tar.gz b/cmd/restic/testdata/old-index-repo.tar.gz similarity index 100% rename from repository/testdata/old-index-repo.tar.gz rename to cmd/restic/testdata/old-index-repo.tar.gz diff --git a/repository/index.go b/repository/index.go index cd4e6e9dd..4694c5d13 100644 --- a/repository/index.go +++ b/repository/index.go @@ -539,32 +539,6 @@ func DecodeOldIndex(rd io.Reader) (idx *Index, err error) { return idx, err } -// ConvertIndexes loads all indexes from the repo and converts them to the new -// format (if necessary). When the conversion is succcessful, the old indexes -// are removed. -func ConvertIndexes(repo *Repository) error { - debug.Log("ConvertIndexes", "start") - done := make(chan struct{}) - defer close(done) - - for id := range repo.List(backend.Index, done) { - debug.Log("ConvertIndexes", "checking index %v", id.Str()) - - newID, err := ConvertIndex(repo, id) - if err != nil { - debug.Log("ConvertIndexes", "Converting index %v returns error: %v", id.Str(), err) - return err - } - - if id != newID { - debug.Log("ConvertIndexes", "index %v converted to new format as %v", id.Str(), newID.Str()) - } - } - - debug.Log("ConvertIndexes", "done") - return nil -} - // LoadIndexWithDecoder loads the index and decodes it with fn. func LoadIndexWithDecoder(repo *Repository, id string, fn func(io.Reader) (*Index, error)) (*Index, error) { debug.Log("LoadIndexWithDecoder", "Loading index %v", id[:8]) diff --git a/repository/index_test.go b/repository/index_test.go index db4d79345..4a6f270fd 100644 --- a/repository/index_test.go +++ b/repository/index_test.go @@ -4,7 +4,6 @@ import ( "bytes" "crypto/rand" "io" - "path/filepath" "testing" "github.com/restic/restic/backend" @@ -323,56 +322,6 @@ func TestIndexUnserializeOld(t *testing.T) { Equals(t, 0, len(idx.Supersedes())) } -var oldIndexTestRepo = filepath.Join("testdata", "old-index-repo.tar.gz") - -func TestConvertIndex(t *testing.T) { - WithTestEnvironment(t, oldIndexTestRepo, func(repodir string) { - repo := OpenLocalRepo(t, repodir) - - old := make(map[backend.ID]*repository.Index) - for id := range repo.List(backend.Index, nil) { - idx, err := repository.LoadIndex(repo, id.String()) - OK(t, err) - old[id] = idx - } - - OK(t, repository.ConvertIndexes(repo)) - - for id := range repo.List(backend.Index, nil) { - idx, err := repository.LoadIndexWithDecoder(repo, id.String(), repository.DecodeIndex) - OK(t, err) - - Assert(t, len(idx.Supersedes()) == 1, - "Expected index %v to supersed exactly one index, got %v", id, idx.Supersedes()) - - oldIndexID := idx.Supersedes()[0] - - oldIndex, ok := old[oldIndexID] - Assert(t, ok, - "Index %v superseds %v, but that wasn't found in the old index map", id.Str(), oldIndexID.Str()) - - Assert(t, idx.Count(pack.Data) == oldIndex.Count(pack.Data), - "Index %v count blobs %v: %v != %v", id.Str(), pack.Data, idx.Count(pack.Data), oldIndex.Count(pack.Data)) - Assert(t, idx.Count(pack.Tree) == oldIndex.Count(pack.Tree), - "Index %v count blobs %v: %v != %v", id.Str(), pack.Tree, idx.Count(pack.Tree), oldIndex.Count(pack.Tree)) - - for packedBlob := range idx.Each(nil) { - blob, err := oldIndex.Lookup(packedBlob.ID) - OK(t, err) - - Assert(t, blob.PackID == packedBlob.PackID, - "Check blob %v: pack ID %v != %v", packedBlob.ID, blob.PackID, packedBlob.PackID) - Assert(t, blob.Type == packedBlob.Type, - "Check blob %v: Type %v != %v", packedBlob.ID, blob.Type, packedBlob.Type) - Assert(t, blob.Offset == packedBlob.Offset, - "Check blob %v: Type %v != %v", packedBlob.ID, blob.Offset, packedBlob.Offset) - Assert(t, blob.Length == packedBlob.Length, - "Check blob %v: Type %v != %v", packedBlob.ID, blob.Length, packedBlob.Length) - } - } - }) -} - func TestIndexPacks(t *testing.T) { idx := repository.NewIndex() packs := backend.NewIDSet() From 742d69bf4d3913d459fe02ecd0adefbfaf4dd15c Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sun, 8 Nov 2015 22:38:17 +0100 Subject: [PATCH 20/21] Add another test for optimizing unused blobs --- cmd/restic/integration_test.go | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/cmd/restic/integration_test.go b/cmd/restic/integration_test.go index 01bb0b14a..baf12fe02 100644 --- a/cmd/restic/integration_test.go +++ b/cmd/restic/integration_test.go @@ -701,15 +701,22 @@ func TestRebuildIndexAlwaysFull(t *testing.T) { var optimizeTests = []struct { testFilename string - snapshotID string + snapshots backend.IDSet }{ { filepath.Join("..", "..", "checker", "testdata", "checker-test-repo.tar.gz"), - "a13c11e582b77a693dd75ab4e3a3ba96538a056594a4b9076e4cacebe6e06d43", + backend.NewIDSet(ParseID("a13c11e582b77a693dd75ab4e3a3ba96538a056594a4b9076e4cacebe6e06d43")), }, { filepath.Join("testdata", "old-index-repo.tar.gz"), - "", + nil, + }, + { + filepath.Join("testdata", "old-index-repo.tar.gz"), + backend.NewIDSet( + ParseID("f7d83db709977178c9d1a09e4009355e534cde1a135b8186b8b118a3fc4fcd41"), + ParseID("51d249d28815200d59e4be7b3f21a157b864dc343353df9d8e498220c2499b02"), + ), }, } @@ -718,8 +725,8 @@ func TestOptimizeRemoveUnusedBlobs(t *testing.T) { withTestEnvironment(t, func(env *testEnvironment, global GlobalOptions) { SetupTarTestFixture(t, env.base, test.testFilename) - if test.snapshotID != "" { - OK(t, os.Remove(filepath.Join(env.repo, "snapshots", test.snapshotID))) + for id := range test.snapshots { + OK(t, os.Remove(filepath.Join(env.repo, "snapshots", id.String()))) } cmdOptimize(t, global) From 1f9aea9905307c4f082e1e7528ed40fd0e943038 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sun, 8 Nov 2015 22:41:45 +0100 Subject: [PATCH 21/21] fix test on windows, reset read-only flag --- cmd/restic/integration_helpers_test.go | 10 ++++++++++ cmd/restic/integration_test.go | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/cmd/restic/integration_helpers_test.go b/cmd/restic/integration_helpers_test.go index 857096c58..734d974c1 100644 --- a/cmd/restic/integration_helpers_test.go +++ b/cmd/restic/integration_helpers_test.go @@ -216,3 +216,13 @@ func withTestEnvironment(t testing.TB, f func(*testEnvironment, GlobalOptions)) RemoveAll(t, tempdir) } + +// removeFile resets the read-only flag and then deletes the file. +func removeFile(fn string) error { + err := os.Chmod(fn, 0666) + if err != nil { + return err + } + + return os.Remove(fn) +} diff --git a/cmd/restic/integration_test.go b/cmd/restic/integration_test.go index baf12fe02..263f6cd16 100644 --- a/cmd/restic/integration_test.go +++ b/cmd/restic/integration_test.go @@ -726,7 +726,7 @@ func TestOptimizeRemoveUnusedBlobs(t *testing.T) { SetupTarTestFixture(t, env.base, test.testFilename) for id := range test.snapshots { - OK(t, os.Remove(filepath.Join(env.repo, "snapshots", id.String()))) + OK(t, removeFile(filepath.Join(env.repo, "snapshots", id.String()))) } cmdOptimize(t, global)