repository/master_index: Optimize Index.Lookup()

When looking up a blob in the master index, with several
indexes present in the master index, a significant amount of time
is spent generating errors for each failed lookup.  However, these
errors are often used to check if a blob is present, but the contents
are not inspected making the overhead of the error not useful.

Instead, change Index.Lookup (and Index.LookupSize) to instead return
a boolean denoting if the blob was found instead of an error.  Also change
all the calls to these functions to handle the new function signature.

benchmark                                            old ns/op     new ns/op     delta
BenchmarkMasterIndexLookupSingleIndex-6              820           897           +9.39%
BenchmarkMasterIndexLookupMultipleIndex-6            12821         2001          -84.39%
BenchmarkMasterIndexLookupSingleIndexUnknown-6       5378          492           -90.85%
BenchmarkMasterIndexLookupMultipleIndexUnknown-6     17026         1649          -90.31%

benchmark                                            old allocs     new allocs     delta
BenchmarkMasterIndexLookupSingleIndex-6              9              9              +0.00%
BenchmarkMasterIndexLookupMultipleIndex-6            59             19             -67.80%
BenchmarkMasterIndexLookupSingleIndexUnknown-6       22             6              -72.73%
BenchmarkMasterIndexLookupMultipleIndexUnknown-6     72             16             -77.78%

benchmark                                            old bytes     new bytes     delta
BenchmarkMasterIndexLookupSingleIndex-6              160           160           +0.00%
BenchmarkMasterIndexLookupMultipleIndex-6            3200          240           -92.50%
BenchmarkMasterIndexLookupSingleIndexUnknown-6       1232          48            -96.10%
BenchmarkMasterIndexLookupMultipleIndexUnknown-6     4272          128           -97.00%
This commit is contained in:
Matthew Dawson 2018-01-12 01:20:12 -05:00
parent ebce4b2581
commit df2c03a6a4
No known key found for this signature in database
GPG key ID: 404D7F645F682028
15 changed files with 192 additions and 68 deletions

View file

@ -165,8 +165,8 @@ func runCat(gopts GlobalOptions, args []string) error {
case "blob":
for _, t := range []restic.BlobType{restic.DataBlob, restic.TreeBlob} {
list, err := repo.Index().Lookup(id, t)
if err != nil {
list, found := repo.Index().Lookup(id, t)
if !found {
continue
}
blob := list[0]

View file

@ -135,9 +135,9 @@ func updateBlobs(repo restic.Repository, blobs restic.BlobSet, stats *DiffStat)
stats.TreeBlobs++
}
size, err := repo.LookupBlobSize(h.ID, h.Type)
if err != nil {
Warnf("unable to find blob size for %v: %v\n", h, err)
size, found := repo.LookupBlobSize(h.ID, h.Type)
if !found {
Warnf("unable to find blob size for %v\n", h)
continue
}

View file

@ -59,9 +59,9 @@ func splitPath(path string) []string {
func dumpNode(ctx context.Context, repo restic.Repository, node *restic.Node) error {
var buf []byte
for _, id := range node.Content {
size, err := repo.LookupBlobSize(id, restic.DataBlob)
if err != nil {
return err
size, found := repo.LookupBlobSize(id, restic.DataBlob)
if !found {
return errors.Errorf("id %v not found in repository", id)
}
buf = buf[:cap(buf)]

View file

@ -43,9 +43,9 @@ func checkSavedFile(t *testing.T, repo restic.Repository, treeID restic.ID, name
// check blobs
for i, id := range node.Content {
size, err := repo.LookupBlobSize(id, restic.DataBlob)
if err != nil {
t.Fatal(err)
size, found := repo.LookupBlobSize(id, restic.DataBlob)
if !found {
t.Fatal("Failed to find blob", id.Str())
}
buf := restic.NewBlobBuffer(int(size))
@ -55,7 +55,7 @@ func checkSavedFile(t *testing.T, repo restic.Repository, treeID restic.ID, name
}
buf2 := make([]byte, int(size))
_, err = io.ReadFull(rd, buf2)
_, err := io.ReadFull(rd, buf2)
if err != nil {
t.Fatal(err)
}

View file

@ -86,8 +86,8 @@ func (arch *Archiver) isKnownBlob(id restic.ID, t restic.BlobType) bool {
arch.knownBlobs.Insert(id)
_, err := arch.repo.Index().Lookup(id, t)
if err == nil {
_, found := arch.repo.Index().Lookup(id, t)
if found {
return true
}

View file

@ -4,6 +4,7 @@
package fuse
import (
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/debug"
@ -36,9 +37,10 @@ func newFile(ctx context.Context, root *Root, inode uint64, node *restic.Node) (
for i, id := range node.Content {
size, ok := root.blobSizeCache.Lookup(id)
if !ok {
size, err = root.repo.LookupBlobSize(id, restic.DataBlob)
if err != nil {
return nil, err
var found bool
size, found = root.repo.LookupBlobSize(id, restic.DataBlob)
if !found {
return nil, errors.Errorf("id %v not found in repository", id)
}
}

View file

@ -81,8 +81,8 @@ func TestFuseFile(t *testing.T) {
memfile []byte
)
for _, id := range content {
size, err := repo.LookupBlobSize(id, restic.DataBlob)
rtest.OK(t, err)
size, found := repo.LookupBlobSize(id, restic.DataBlob)
rtest.Assert(t, found, "Expected to find blob id %v", id)
filesize += uint64(size)
buf := restic.NewBlobBuffer(int(size))

View file

@ -110,7 +110,7 @@ func (idx *Index) Store(blob restic.PackedBlob) {
}
// Lookup queries the index for the blob ID and returns a restic.PackedBlob.
func (idx *Index) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic.PackedBlob, err error) {
func (idx *Index) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic.PackedBlob, found bool) {
idx.m.Lock()
defer idx.m.Unlock()
@ -136,11 +136,11 @@ func (idx *Index) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic.Pack
blobs = append(blobs, blob)
}
return blobs, nil
return blobs, true
}
debug.Log("id %v not found", id.Str())
return nil, errors.Errorf("id %v not found in index", id)
return nil, false
}
// ListPack returns a list of blobs contained in a pack.
@ -180,13 +180,13 @@ func (idx *Index) Has(id restic.ID, tpe restic.BlobType) bool {
// LookupSize returns the length of the plaintext content of the blob with the
// given id.
func (idx *Index) LookupSize(id restic.ID, tpe restic.BlobType) (plaintextLength uint, err error) {
blobs, err := idx.Lookup(id, tpe)
if err != nil {
return 0, err
func (idx *Index) LookupSize(id restic.ID, tpe restic.BlobType) (plaintextLength uint, found bool) {
blobs, found := idx.Lookup(id, tpe)
if !found {
return 0, found
}
return uint(restic.PlaintextLength(int(blobs[0].Length))), nil
return uint(restic.PlaintextLength(int(blobs[0].Length))), true
}
// Supersedes returns the list of indexes this index supersedes, if any.

View file

@ -65,8 +65,8 @@ func TestIndexSerialize(t *testing.T) {
rtest.OK(t, err)
for _, testBlob := range tests {
list, err := idx.Lookup(testBlob.id, testBlob.tpe)
rtest.OK(t, err)
list, found := idx.Lookup(testBlob.id, testBlob.tpe)
rtest.Assert(t, found, "Expected to find blob id %v", testBlob.id.Str())
if len(list) != 1 {
t.Errorf("expected one result for blob %v, got %v: %v", testBlob.id.Str(), len(list), list)
@ -78,8 +78,8 @@ func TestIndexSerialize(t *testing.T) {
rtest.Equals(t, testBlob.offset, result.Offset)
rtest.Equals(t, testBlob.length, result.Length)
list2, err := idx2.Lookup(testBlob.id, testBlob.tpe)
rtest.OK(t, err)
list2, found := idx2.Lookup(testBlob.id, testBlob.tpe)
rtest.Assert(t, found, "Expected to find blob id %v", testBlob.id)
if len(list2) != 1 {
t.Errorf("expected one result for blob %v, got %v: %v", testBlob.id.Str(), len(list2), list2)
@ -146,8 +146,8 @@ func TestIndexSerialize(t *testing.T) {
// all new blobs must be in the index
for _, testBlob := range newtests {
list, err := idx3.Lookup(testBlob.id, testBlob.tpe)
rtest.OK(t, err)
list, found := idx3.Lookup(testBlob.id, testBlob.tpe)
rtest.Assert(t, found, "Expected to find blob id %v", testBlob.id.Str())
if len(list) != 1 {
t.Errorf("expected one result for blob %v, got %v: %v", testBlob.id.Str(), len(list), list)
@ -293,8 +293,8 @@ func TestIndexUnserialize(t *testing.T) {
rtest.OK(t, err)
for _, test := range exampleTests {
list, err := idx.Lookup(test.id, test.tpe)
rtest.OK(t, err)
list, found := idx.Lookup(test.id, test.tpe)
rtest.Assert(t, found, "Expected to find blob id %v", test.id.Str())
if len(list) != 1 {
t.Errorf("expected one result for blob %v, got %v: %v", test.id.Str(), len(list), list)
@ -341,8 +341,8 @@ func TestIndexUnserializeOld(t *testing.T) {
rtest.OK(t, err)
for _, test := range exampleTests {
list, err := idx.Lookup(test.id, test.tpe)
rtest.OK(t, err)
list, found := idx.Lookup(test.id, test.tpe)
rtest.Assert(t, found, "Expected to find blob id %v", test.id.Str())
if len(list) != 1 {
t.Errorf("expected one result for blob %v, got %v: %v", test.id.Str(), len(list), list)

View file

@ -4,7 +4,6 @@ import (
"context"
"sync"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/debug"
@ -22,36 +21,36 @@ func NewMasterIndex() *MasterIndex {
}
// Lookup queries all known Indexes for the ID and returns the first match.
func (mi *MasterIndex) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic.PackedBlob, err error) {
func (mi *MasterIndex) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic.PackedBlob, found bool) {
mi.idxMutex.RLock()
defer mi.idxMutex.RUnlock()
debug.Log("looking up id %v, tpe %v", id.Str(), tpe)
for _, idx := range mi.idx {
blobs, err = idx.Lookup(id, tpe)
if err == nil {
blobs, found = idx.Lookup(id, tpe)
if found {
debug.Log("found id %v: %v", id.Str(), blobs)
return
}
}
debug.Log("id %v not found in any index", id.Str())
return nil, errors.Errorf("id %v not found in any index", id)
return nil, false
}
// LookupSize queries all known Indexes for the ID and returns the first match.
func (mi *MasterIndex) LookupSize(id restic.ID, tpe restic.BlobType) (uint, error) {
func (mi *MasterIndex) LookupSize(id restic.ID, tpe restic.BlobType) (uint, bool) {
mi.idxMutex.RLock()
defer mi.idxMutex.RUnlock()
for _, idx := range mi.idx {
if idx.Has(id, tpe) {
return idx.LookupSize(id, tpe)
if size, found := idx.LookupSize(id, tpe); found {
return size, found
}
}
return 0, errors.Errorf("id %v not found in any index", id)
return 0, false
}
// ListPack returns the list of blobs in a pack. The first matching index is

View file

@ -0,0 +1,123 @@
package repository_test
import (
"math/rand"
"testing"
"github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/restic"
rtest "github.com/restic/restic/internal/test"
)
func TestMasterIndexLookup(t *testing.T) {
idInIdx1 := restic.NewRandomID()
idInIdx2 := restic.NewRandomID()
blob1 := restic.PackedBlob{
PackID: restic.NewRandomID(),
Blob: restic.Blob{
Type: restic.DataBlob,
ID: idInIdx1,
Length: 10,
Offset: 0,
},
}
blob2 := restic.PackedBlob{
PackID: restic.NewRandomID(),
Blob: restic.Blob{
Type: restic.DataBlob,
ID: idInIdx2,
Length: 100,
Offset: 10,
},
}
idx1 := repository.NewIndex()
idx1.Store(blob1)
idx2 := repository.NewIndex()
idx2.Store(blob2)
mIdx := repository.NewMasterIndex()
mIdx.Insert(idx1)
mIdx.Insert(idx2)
blobs, found := mIdx.Lookup(idInIdx1, restic.DataBlob)
rtest.Assert(t, found, "Expected to find blob id %v from index 1", idInIdx1)
rtest.Equals(t, []restic.PackedBlob{blob1}, blobs)
blobs, found = mIdx.Lookup(idInIdx2, restic.DataBlob)
rtest.Assert(t, found, "Expected to find blob id %v from index 2", idInIdx2)
rtest.Equals(t, []restic.PackedBlob{blob2}, blobs)
blobs, found = mIdx.Lookup(restic.NewRandomID(), restic.DataBlob)
rtest.Assert(t, !found, "Expected to not find a blob when fetching with a random id")
rtest.Assert(t, blobs == nil, "Expected no blobs when fetching with a random id")
}
func BenchmarkMasterIndexLookupSingleIndex(b *testing.B) {
idx1, lookupID := createRandomIndex(rand.New(rand.NewSource(0)))
mIdx := repository.NewMasterIndex()
mIdx.Insert(idx1)
b.ResetTimer()
for i := 0; i < b.N; i++ {
mIdx.Lookup(lookupID, restic.DataBlob)
}
}
func BenchmarkMasterIndexLookupMultipleIndex(b *testing.B) {
rng := rand.New(rand.NewSource(0))
mIdx := repository.NewMasterIndex()
for i := 0; i < 5; i++ {
idx, _ := createRandomIndex(rand.New(rng))
mIdx.Insert(idx)
}
idx1, lookupID := createRandomIndex(rand.New(rng))
mIdx.Insert(idx1)
b.ResetTimer()
for i := 0; i < b.N; i++ {
mIdx.Lookup(lookupID, restic.DataBlob)
}
}
func BenchmarkMasterIndexLookupSingleIndexUnknown(b *testing.B) {
lookupID := restic.NewRandomID()
idx1, _ := createRandomIndex(rand.New(rand.NewSource(0)))
mIdx := repository.NewMasterIndex()
mIdx.Insert(idx1)
b.ResetTimer()
for i := 0; i < b.N; i++ {
mIdx.Lookup(lookupID, restic.DataBlob)
}
}
func BenchmarkMasterIndexLookupMultipleIndexUnknown(b *testing.B) {
rng := rand.New(rand.NewSource(0))
lookupID := restic.NewRandomID()
mIdx := repository.NewMasterIndex()
for i := 0; i < 5; i++ {
idx, _ := createRandomIndex(rand.New(rng))
mIdx.Insert(idx)
}
idx1, _ := createRandomIndex(rand.New(rng))
mIdx.Insert(idx1)
b.ResetTimer()
for i := 0; i < b.N; i++ {
mIdx.Lookup(lookupID, restic.DataBlob)
}
}

View file

@ -114,9 +114,9 @@ func findPacksForBlobs(t *testing.T, repo restic.Repository, blobs restic.BlobSe
idx := repo.Index()
for h := range blobs {
list, err := idx.Lookup(h.ID, h.Type)
if err != nil {
t.Fatal(err)
list, found := idx.Lookup(h.ID, h.Type)
if !found {
t.Fatal("Failed to find blob", h.ID.Str(), "with type", h.Type)
}
for _, pb := range list {
@ -215,8 +215,8 @@ func TestRepack(t *testing.T) {
idx := repo.Index()
for h := range keepBlobs {
list, err := idx.Lookup(h.ID, h.Type)
if err != nil {
list, found := idx.Lookup(h.ID, h.Type)
if !found {
t.Errorf("unable to find blob %v in repo", h.ID.Str())
continue
}
@ -234,7 +234,7 @@ func TestRepack(t *testing.T) {
}
for h := range removeBlobs {
if _, err := idx.Lookup(h.ID, h.Type); err == nil {
if _, found := idx.Lookup(h.ID, h.Type); found {
t.Errorf("blob %v still contained in the repo", h)
}
}

View file

@ -115,10 +115,10 @@ func (r *Repository) loadBlob(ctx context.Context, id restic.ID, t restic.BlobTy
debug.Log("load %v with id %v (buf len %v, cap %d)", t, id.Str(), len(plaintextBuf), cap(plaintextBuf))
// lookup packs
blobs, err := r.idx.Lookup(id, t)
if err != nil {
debug.Log("id %v not found in index: %v", id.Str(), err)
return 0, err
blobs, found := r.idx.Lookup(id, t)
if !found {
debug.Log("id %v not found in index", id.Str())
return 0, errors.Errorf("id %v not found in repository", id)
}
// try cached pack files first
@ -193,7 +193,7 @@ func (r *Repository) LoadJSONUnpacked(ctx context.Context, t restic.FileType, id
}
// LookupBlobSize returns the size of blob id.
func (r *Repository) LookupBlobSize(id restic.ID, tpe restic.BlobType) (uint, error) {
func (r *Repository) LookupBlobSize(id restic.ID, tpe restic.BlobType) (uint, bool) {
return r.idx.LookupSize(id, tpe)
}
@ -588,9 +588,9 @@ func (r *Repository) Close() error {
// space.
func (r *Repository) LoadBlob(ctx context.Context, t restic.BlobType, id restic.ID, buf []byte) (int, error) {
debug.Log("load blob %v into buf (len %v, cap %v)", id.Str(), len(buf), cap(buf))
size, err := r.idx.LookupSize(id, t)
if err != nil {
return 0, err
size, found := r.idx.LookupSize(id, t)
if !found {
return 0, errors.Errorf("id %v not found in repository", id)
}
if cap(buf) < restic.CiphertextLength(int(size)) {
@ -622,9 +622,9 @@ func (r *Repository) SaveBlob(ctx context.Context, t restic.BlobType, buf []byte
func (r *Repository) LoadTree(ctx context.Context, id restic.ID) (*restic.Tree, error) {
debug.Log("load tree %v", id.Str())
size, err := r.idx.LookupSize(id, restic.TreeBlob)
if err != nil {
return nil, err
size, found := r.idx.LookupSize(id, restic.TreeBlob)
if !found {
return nil, errors.Errorf("tree %v not found in repository", id)
}
debug.Log("size is %d, create buffer", size)

View file

@ -285,9 +285,9 @@ func (node Node) createFileAt(ctx context.Context, path string, repo Repository,
func (node Node) writeNodeContent(ctx context.Context, repo Repository, f *os.File) error {
var buf []byte
for _, id := range node.Content {
size, err := repo.LookupBlobSize(id, DataBlob)
if err != nil {
return err
size, found := repo.LookupBlobSize(id, DataBlob)
if !found {
return errors.Errorf("id %v not found in repository", id)
}
buf = buf[:cap(buf)]

View file

@ -24,7 +24,7 @@ type Repository interface {
Config() Config
LookupBlobSize(ID, BlobType) (uint, error)
LookupBlobSize(ID, BlobType) (uint, bool)
List(context.Context, FileType) <-chan ID
ListPack(context.Context, ID) ([]Blob, int64, error)
@ -52,7 +52,7 @@ type Lister interface {
// Index keeps track of the blobs are stored within files.
type Index interface {
Has(ID, BlobType) bool
Lookup(ID, BlobType) ([]PackedBlob, error)
Lookup(ID, BlobType) ([]PackedBlob, bool)
Count(BlobType) uint
// Each returns a channel that yields all blobs known to the index. When