repository/index: Optimize index.Has()
When backing up several million files (>14M tested here) with few changes, a large amount of time is spent failing to find an id in an index and creating an error to signify this. Since this is checked using the Has method, which doesn't use this error, this time creating the error is wasted. Instead, directly check if the given id and type are present in the index. This also avoids reporting all the packs containing this blob, further reducing cpu usage.
This commit is contained in:
parent
d77a326bb0
commit
539599d1f1
2 changed files with 55 additions and 5 deletions
|
@ -169,12 +169,13 @@ func (idx *Index) ListPack(id restic.ID) (list []restic.PackedBlob) {
|
|||
|
||||
// Has returns true iff the id is listed in the index.
|
||||
func (idx *Index) Has(id restic.ID, tpe restic.BlobType) bool {
|
||||
_, err := idx.Lookup(id, tpe)
|
||||
if err == nil {
|
||||
return true
|
||||
}
|
||||
idx.m.Lock()
|
||||
defer idx.m.Unlock()
|
||||
|
||||
return false
|
||||
h := restic.BlobHandle{ID: id, Type: tpe}
|
||||
|
||||
_, ok := idx.pack[h]
|
||||
return ok
|
||||
}
|
||||
|
||||
// LookupSize returns the length of the plaintext content of the blob with the
|
||||
|
|
|
@ -434,3 +434,52 @@ func BenchmarkIndexHasKnown(b *testing.B) {
|
|||
idx.Has(lookupID, restic.DataBlob)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIndexHas(t *testing.T) {
|
||||
type testEntry struct {
|
||||
id restic.ID
|
||||
pack restic.ID
|
||||
tpe restic.BlobType
|
||||
offset, length uint
|
||||
}
|
||||
tests := []testEntry{}
|
||||
|
||||
idx := repository.NewIndex()
|
||||
|
||||
// create 50 packs with 20 blobs each
|
||||
for i := 0; i < 50; i++ {
|
||||
packID := restic.NewRandomID()
|
||||
|
||||
pos := uint(0)
|
||||
for j := 0; j < 20; j++ {
|
||||
id := restic.NewRandomID()
|
||||
length := uint(i*100 + j)
|
||||
idx.Store(restic.PackedBlob{
|
||||
Blob: restic.Blob{
|
||||
Type: restic.DataBlob,
|
||||
ID: id,
|
||||
Offset: pos,
|
||||
Length: length,
|
||||
},
|
||||
PackID: packID,
|
||||
})
|
||||
|
||||
tests = append(tests, testEntry{
|
||||
id: id,
|
||||
pack: packID,
|
||||
tpe: restic.DataBlob,
|
||||
offset: pos,
|
||||
length: length,
|
||||
})
|
||||
|
||||
pos += length
|
||||
}
|
||||
}
|
||||
|
||||
for _, testBlob := range tests {
|
||||
rtest.Assert(t, idx.Has(testBlob.id, testBlob.tpe), "Index reports not having data blob added to it")
|
||||
}
|
||||
|
||||
rtest.Assert(t, !idx.Has(restic.NewRandomID(), restic.DataBlob), "Index reports having a data blob not added to it")
|
||||
rtest.Assert(t, !idx.Has(tests[0].id, restic.TreeBlob), "Index reports having a tree blob added to it with the same id as a data blob")
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue