From 9d1fb94c6c937a371a6be1437b831a7554072f75 Mon Sep 17 00:00:00 2001 From: Alexander Weiss Date: Sun, 14 Jun 2020 13:26:10 +0200 Subject: [PATCH] make Lookup() return all blobs + simplify syntax --- cmd/restic/cmd_cat.go | 3 +- cmd/restic/cmd_find.go | 4 +- internal/archiver/blob_saver_test.go | 4 +- internal/repository/index.go | 7 +- internal/repository/index_test.go | 20 +--- internal/repository/master_index.go | 11 +-- internal/repository/master_index_test.go | 111 +++++++++++++++++++---- internal/repository/repack_test.go | 10 +- internal/repository/repository.go | 4 +- internal/restic/repository.go | 2 +- internal/restorer/filerestorer.go | 20 ++-- internal/restorer/filerestorer_test.go | 6 +- internal/restorer/restorer.go | 4 +- 13 files changed, 134 insertions(+), 72 deletions(-) diff --git a/cmd/restic/cmd_cat.go b/cmd/restic/cmd_cat.go index 5c0e94d96..9ea094171 100644 --- a/cmd/restic/cmd_cat.go +++ b/cmd/restic/cmd_cat.go @@ -165,8 +165,7 @@ func runCat(gopts GlobalOptions, args []string) error { case "blob": for _, t := range []restic.BlobType{restic.DataBlob, restic.TreeBlob} { - _, found := repo.Index().Lookup(id, t) - if !found { + if !repo.Index().Has(id, t) { continue } diff --git a/cmd/restic/cmd_find.go b/cmd/restic/cmd_find.go index baeafb016..fbae49e75 100644 --- a/cmd/restic/cmd_find.go +++ b/cmd/restic/cmd_find.go @@ -465,8 +465,8 @@ func (f *Finder) findObjectPack(ctx context.Context, id string, t restic.BlobTyp return } - blobs, found := idx.Lookup(rid, t) - if !found { + blobs := idx.Lookup(rid, t) + if len(blobs) == 0 { Printf("Object %s not found in the index\n", rid.Str()) return } diff --git a/internal/archiver/blob_saver_test.go b/internal/archiver/blob_saver_test.go index 18832ae1b..b3b235283 100644 --- a/internal/archiver/blob_saver_test.go +++ b/internal/archiver/blob_saver_test.go @@ -40,7 +40,7 @@ func TestBlobSaver(t *testing.T) { tmb, ctx := tomb.WithContext(ctx) saver := &saveFail{ - idx: repository.NewIndex(), + idx: repository.NewMasterIndex(), } b := NewBlobSaver(ctx, tmb, saver, uint(runtime.NumCPU())) @@ -86,7 +86,7 @@ func TestBlobSaverError(t *testing.T) { tmb, ctx := tomb.WithContext(ctx) saver := &saveFail{ - idx: repository.NewIndex(), + idx: repository.NewMasterIndex(), failAt: int32(test.failAt), } diff --git a/internal/repository/index.go b/internal/repository/index.go index e6053fa72..cb1951d5a 100644 --- a/internal/repository/index.go +++ b/internal/repository/index.go @@ -174,8 +174,9 @@ func (idx *Index) toPackedBlob(e *indexEntry, typ restic.BlobType) restic.Packed } } -// Lookup queries the index for the blob ID and returns a restic.PackedBlob. -func (idx *Index) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic.PackedBlob, found bool) { +// Lookup queries the index for the blob ID and returns all entries including +// duplicates. Adds found entries to blobs and returns the result. +func (idx *Index) Lookup(id restic.ID, tpe restic.BlobType, blobs []restic.PackedBlob) []restic.PackedBlob { idx.m.Lock() defer idx.m.Unlock() @@ -183,7 +184,7 @@ func (idx *Index) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic.Pack blobs = append(blobs, idx.toPackedBlob(e, tpe)) }) - return blobs, len(blobs) > 0 + return blobs } // ListPack returns a list of blobs contained in a pack. diff --git a/internal/repository/index_test.go b/internal/repository/index_test.go index 7b95a13c2..e65e663e4 100644 --- a/internal/repository/index_test.go +++ b/internal/repository/index_test.go @@ -66,9 +66,7 @@ func TestIndexSerialize(t *testing.T) { rtest.OK(t, err) for _, testBlob := range tests { - list, found := idx.Lookup(testBlob.id, testBlob.tpe) - rtest.Assert(t, found, "Expected to find blob id %v", testBlob.id.Str()) - + list := idx.Lookup(testBlob.id, testBlob.tpe, nil) if len(list) != 1 { t.Errorf("expected one result for blob %v, got %v: %v", testBlob.id.Str(), len(list), list) } @@ -79,9 +77,7 @@ func TestIndexSerialize(t *testing.T) { rtest.Equals(t, testBlob.offset, result.Offset) rtest.Equals(t, testBlob.length, result.Length) - list2, found := idx2.Lookup(testBlob.id, testBlob.tpe) - rtest.Assert(t, found, "Expected to find blob id %v", testBlob.id) - + list2 := idx2.Lookup(testBlob.id, testBlob.tpe, nil) if len(list2) != 1 { t.Errorf("expected one result for blob %v, got %v: %v", testBlob.id.Str(), len(list2), list2) } @@ -148,9 +144,7 @@ func TestIndexSerialize(t *testing.T) { // all new blobs must be in the index for _, testBlob := range newtests { - list, found := idx3.Lookup(testBlob.id, testBlob.tpe) - rtest.Assert(t, found, "Expected to find blob id %v", testBlob.id.Str()) - + list := idx3.Lookup(testBlob.id, testBlob.tpe, nil) if len(list) != 1 { t.Errorf("expected one result for blob %v, got %v: %v", testBlob.id.Str(), len(list), list) } @@ -295,9 +289,7 @@ func TestIndexUnserialize(t *testing.T) { rtest.OK(t, err) for _, test := range exampleTests { - list, found := idx.Lookup(test.id, test.tpe) - rtest.Assert(t, found, "Expected to find blob id %v", test.id.Str()) - + list := idx.Lookup(test.id, test.tpe, nil) if len(list) != 1 { t.Errorf("expected one result for blob %v, got %v: %v", test.id.Str(), len(list), list) } @@ -368,9 +360,7 @@ func TestIndexUnserializeOld(t *testing.T) { rtest.OK(t, err) for _, test := range exampleTests { - list, found := idx.Lookup(test.id, test.tpe) - rtest.Assert(t, found, "Expected to find blob id %v", test.id.Str()) - + list := idx.Lookup(test.id, test.tpe, nil) if len(list) != 1 { t.Errorf("expected one result for blob %v, got %v: %v", test.id.Str(), len(list), list) } diff --git a/internal/repository/master_index.go b/internal/repository/master_index.go index 2705b5994..318bf7c31 100644 --- a/internal/repository/master_index.go +++ b/internal/repository/master_index.go @@ -26,19 +26,16 @@ func NewMasterIndex() *MasterIndex { return &MasterIndex{idx: idx, pendingBlobs: restic.NewBlobSet()} } -// Lookup queries all known Indexes for the ID and returns the first match. -func (mi *MasterIndex) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic.PackedBlob, found bool) { +// Lookup queries all known Indexes for the ID and returns all matches. +func (mi *MasterIndex) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic.PackedBlob) { mi.idxMutex.RLock() defer mi.idxMutex.RUnlock() for _, idx := range mi.idx { - blobs, found = idx.Lookup(id, tpe) - if found { - return - } + blobs = idx.Lookup(id, tpe, blobs) } - return nil, false + return blobs } // LookupSize queries all known Indexes for the ID and returns the first match. diff --git a/internal/repository/master_index_test.go b/internal/repository/master_index_test.go index e16d55bd5..61e2377ce 100644 --- a/internal/repository/master_index_test.go +++ b/internal/repository/master_index_test.go @@ -10,16 +10,17 @@ import ( rtest "github.com/restic/restic/internal/test" ) -func TestMasterIndexLookup(t *testing.T) { +func TestMasterIndex(t *testing.T) { idInIdx1 := restic.NewRandomID() idInIdx2 := restic.NewRandomID() + idInIdx12 := restic.NewRandomID() blob1 := restic.PackedBlob{ PackID: restic.NewRandomID(), Blob: restic.Blob{ Type: restic.DataBlob, ID: idInIdx1, - Length: 10, + Length: uint(restic.CiphertextLength(10)), Offset: 0, }, } @@ -29,32 +30,103 @@ func TestMasterIndexLookup(t *testing.T) { Blob: restic.Blob{ Type: restic.DataBlob, ID: idInIdx2, - Length: 100, + Length: uint(restic.CiphertextLength(100)), Offset: 10, }, } + blob12a := restic.PackedBlob{ + PackID: restic.NewRandomID(), + Blob: restic.Blob{ + Type: restic.TreeBlob, + ID: idInIdx12, + Length: uint(restic.CiphertextLength(123)), + Offset: 110, + }, + } + + blob12b := restic.PackedBlob{ + PackID: restic.NewRandomID(), + Blob: restic.Blob{ + Type: restic.TreeBlob, + ID: idInIdx12, + Length: uint(restic.CiphertextLength(123)), + Offset: 50, + }, + } + idx1 := repository.NewIndex() idx1.Store(blob1) + idx1.Store(blob12a) idx2 := repository.NewIndex() idx2.Store(blob2) + idx2.Store(blob12b) mIdx := repository.NewMasterIndex() mIdx.Insert(idx1) mIdx.Insert(idx2) - blobs, found := mIdx.Lookup(idInIdx1, restic.DataBlob) - rtest.Assert(t, found, "Expected to find blob id %v from index 1", idInIdx1) + // test idInIdx1 + found := mIdx.Has(idInIdx1, restic.DataBlob) + rtest.Equals(t, true, found) + + blobs := mIdx.Lookup(idInIdx1, restic.DataBlob) rtest.Equals(t, []restic.PackedBlob{blob1}, blobs) - blobs, found = mIdx.Lookup(idInIdx2, restic.DataBlob) - rtest.Assert(t, found, "Expected to find blob id %v from index 2", idInIdx2) + size, found := mIdx.LookupSize(idInIdx1, restic.DataBlob) + rtest.Equals(t, true, found) + rtest.Equals(t, uint(10), size) + + // test idInIdx2 + found = mIdx.Has(idInIdx2, restic.DataBlob) + rtest.Equals(t, true, found) + + blobs = mIdx.Lookup(idInIdx2, restic.DataBlob) rtest.Equals(t, []restic.PackedBlob{blob2}, blobs) - blobs, found = mIdx.Lookup(restic.NewRandomID(), restic.DataBlob) - rtest.Assert(t, !found, "Expected to not find a blob when fetching with a random id") + size, found = mIdx.LookupSize(idInIdx2, restic.DataBlob) + rtest.Equals(t, true, found) + rtest.Equals(t, uint(100), size) + + // test idInIdx12 + found = mIdx.Has(idInIdx12, restic.TreeBlob) + rtest.Equals(t, true, found) + + blobs = mIdx.Lookup(idInIdx12, restic.TreeBlob) + rtest.Equals(t, 2, len(blobs)) + + // test Lookup result for blob12a + found = false + if blobs[0] == blob12a || blobs[1] == blob12a { + found = true + } + rtest.Assert(t, found, "blob12a not found in result") + + // test Lookup result for blob12b + found = false + if blobs[0] == blob12b || blobs[1] == blob12b { + found = true + } + rtest.Assert(t, found, "blob12a not found in result") + + size, found = mIdx.LookupSize(idInIdx12, restic.TreeBlob) + rtest.Equals(t, true, found) + rtest.Equals(t, uint(123), size) + + // test not in index + found = mIdx.Has(restic.NewRandomID(), restic.TreeBlob) + rtest.Assert(t, !found, "Expected no blobs when fetching with a random id") + blobs = mIdx.Lookup(restic.NewRandomID(), restic.DataBlob) rtest.Assert(t, blobs == nil, "Expected no blobs when fetching with a random id") + size, found = mIdx.LookupSize(restic.NewRandomID(), restic.DataBlob) + rtest.Assert(t, !found, "Expected no blobs when fetching with a random id") + + // Test Count + num := mIdx.Count(restic.DataBlob) + rtest.Equals(t, uint(2), num) + num = mIdx.Count(restic.TreeBlob) + rtest.Equals(t, uint(2), num) } func TestMasterMergeFinalIndexes(t *testing.T) { @@ -96,16 +168,13 @@ func TestMasterMergeFinalIndexes(t *testing.T) { mIdx.MergeFinalIndexes() - blobs, found := mIdx.Lookup(idInIdx1, restic.DataBlob) - rtest.Assert(t, found, "Expected to find blob id %v from index 1", idInIdx1) + blobs := mIdx.Lookup(idInIdx1, restic.DataBlob) rtest.Equals(t, []restic.PackedBlob{blob1}, blobs) - blobs, found = mIdx.Lookup(idInIdx2, restic.DataBlob) - rtest.Assert(t, found, "Expected to find blob id %v from index 2", idInIdx2) + blobs = mIdx.Lookup(idInIdx2, restic.DataBlob) rtest.Equals(t, []restic.PackedBlob{blob2}, blobs) - blobs, found = mIdx.Lookup(restic.NewRandomID(), restic.DataBlob) - rtest.Assert(t, !found, "Expected to not find a blob when fetching with a random id") + blobs = mIdx.Lookup(restic.NewRandomID(), restic.DataBlob) rtest.Assert(t, blobs == nil, "Expected no blobs when fetching with a random id") } @@ -196,6 +265,16 @@ func BenchmarkMasterIndexLookupParallel(b *testing.B) { } }) }) - + } +} + +func BenchmarkMasterIndexLookupBlobSize(b *testing.B) { + rng := rand.New(rand.NewSource(0)) + mIdx, lookupID := createRandomMasterIndex(rand.New(rng), 5, 200000) + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + mIdx.LookupSize(lookupID, restic.DataBlob) } } diff --git a/internal/repository/repack_test.go b/internal/repository/repack_test.go index 0505c7211..76375723e 100644 --- a/internal/repository/repack_test.go +++ b/internal/repository/repack_test.go @@ -110,8 +110,8 @@ func findPacksForBlobs(t *testing.T, repo restic.Repository, blobs restic.BlobSe idx := repo.Index() for h := range blobs { - list, found := idx.Lookup(h.ID, h.Type) - if !found { + list := idx.Lookup(h.ID, h.Type) + if len(list) == 0 { t.Fatal("Failed to find blob", h.ID.Str(), "with type", h.Type) } @@ -215,8 +215,8 @@ func TestRepack(t *testing.T) { idx := repo.Index() for h := range keepBlobs { - list, found := idx.Lookup(h.ID, h.Type) - if !found { + list := idx.Lookup(h.ID, h.Type) + if len(list) == 0 { t.Errorf("unable to find blob %v in repo", h.ID.Str()) continue } @@ -234,7 +234,7 @@ func TestRepack(t *testing.T) { } for h := range removeBlobs { - if _, found := idx.Lookup(h.ID, h.Type); found { + if _, found := repo.LookupBlobSize(h.ID, h.Type); found { t.Errorf("blob %v still contained in the repo", h) } } diff --git a/internal/repository/repository.go b/internal/repository/repository.go index f5d07a08b..7e4f28d19 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -146,8 +146,8 @@ func (r *Repository) LoadBlob(ctx context.Context, t restic.BlobType, id restic. debug.Log("load %v with id %v (buf len %v, cap %d)", t, id, len(buf), cap(buf)) // lookup packs - blobs, found := r.idx.Lookup(id, t) - if !found { + blobs := r.idx.Lookup(id, t) + if len(blobs) == 0 { debug.Log("id %v not found in index", id) return nil, errors.Errorf("id %v not found in repository", id) } diff --git a/internal/restic/repository.go b/internal/restic/repository.go index 33299b13f..a38452bdc 100644 --- a/internal/restic/repository.go +++ b/internal/restic/repository.go @@ -60,7 +60,7 @@ type Lister interface { // Index keeps track of the blobs are stored within files. type Index interface { Has(ID, BlobType) bool - Lookup(ID, BlobType) ([]PackedBlob, bool) + Lookup(ID, BlobType) []PackedBlob Count(BlobType) uint // Each returns a channel that yields all blobs known to the index. When diff --git a/internal/restorer/filerestorer.go b/internal/restorer/filerestorer.go index f8c9881d0..edb2faf62 100644 --- a/internal/restorer/filerestorer.go +++ b/internal/restorer/filerestorer.go @@ -51,7 +51,7 @@ type packInfo struct { // fileRestorer restores set of files type fileRestorer struct { key *crypto.Key - idx func(restic.ID, restic.BlobType) ([]restic.PackedBlob, bool) + idx func(restic.ID, restic.BlobType) []restic.PackedBlob packLoader func(ctx context.Context, h restic.Handle, length int, offset int64, fn func(rd io.Reader) error) error filesWriter *filesWriter @@ -63,7 +63,7 @@ type fileRestorer struct { func newFileRestorer(dst string, packLoader func(ctx context.Context, h restic.Handle, length int, offset int64, fn func(rd io.Reader) error) error, key *crypto.Key, - idx func(restic.ID, restic.BlobType) ([]restic.PackedBlob, bool)) *fileRestorer { + idx func(restic.ID, restic.BlobType) []restic.PackedBlob) *fileRestorer { return &fileRestorer{ key: key, @@ -88,8 +88,8 @@ func (r *fileRestorer) forEachBlob(blobIDs []restic.ID, fn func(packID restic.ID } for _, blobID := range blobIDs { - packs, found := r.idx(blobID, restic.DataBlob) - if !found { + packs := r.idx(blobID, restic.DataBlob) + if len(packs) == 0 { return errors.Errorf("Unknown blob %s", blobID.String()) } fn(packs[0].PackID, packs[0].Blob) @@ -208,13 +208,11 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) { }) } else if packsMap, ok := file.blobs.(map[restic.ID][]fileBlobInfo); ok { for _, blob := range packsMap[pack.id] { - idxPacks, found := r.idx(blob.id, restic.DataBlob) - if found { - for _, idxPack := range idxPacks { - if idxPack.PackID.Equal(pack.id) { - addBlob(idxPack.Blob, blob.offset) - break - } + idxPacks := r.idx(blob.id, restic.DataBlob) + for _, idxPack := range idxPacks { + if idxPack.PackID.Equal(pack.id) { + addBlob(idxPack.Blob, blob.offset) + break } } } diff --git a/internal/restorer/filerestorer_test.go b/internal/restorer/filerestorer_test.go index 9d362b13c..a1b9b17f8 100644 --- a/internal/restorer/filerestorer_test.go +++ b/internal/restorer/filerestorer_test.go @@ -39,9 +39,9 @@ type TestRepo struct { loader func(ctx context.Context, h restic.Handle, length int, offset int64, fn func(rd io.Reader) error) error } -func (i *TestRepo) Lookup(blobID restic.ID, _ restic.BlobType) ([]restic.PackedBlob, bool) { - packs, found := i.blobs[blobID] - return packs, found +func (i *TestRepo) Lookup(blobID restic.ID, _ restic.BlobType) []restic.PackedBlob { + packs := i.blobs[blobID] + return packs } func (i *TestRepo) packName(pack *packInfo) string { diff --git a/internal/restorer/restorer.go b/internal/restorer/restorer.go index 7f497c5d6..415155de7 100644 --- a/internal/restorer/restorer.go +++ b/internal/restorer/restorer.go @@ -5,7 +5,6 @@ import ( "os" "path/filepath" - "github.com/restic/restic/internal/crypto" "github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/debug" @@ -313,8 +312,7 @@ func (res *Restorer) VerifyFiles(ctx context.Context, dst string) (int, error) { offset := int64(0) for _, blobID := range node.Content { - blobs, _ := res.repo.Index().Lookup(blobID, restic.DataBlob) - length := blobs[0].Length - uint(crypto.Extension) + length, _ := res.repo.LookupBlobSize(blobID, restic.DataBlob) buf := make([]byte, length) // TODO do I want to reuse the buffer somehow? _, err = file.ReadAt(buf, offset) if err != nil {