Merge pull request from greatroar/optimize-sortbycached

Optimize sorting blobs by cache status
This commit is contained in:
MichaelEischer 2020-07-25 12:35:42 +02:00 committed by GitHub
commit bbc960f957
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 87 additions and 9 deletions

View file

@ -111,29 +111,33 @@ func (r *Repository) LoadAndDecrypt(ctx context.Context, buf []byte, t restic.Fi
return plaintext, nil
}
// sortCachedPacks moves all cached pack files to the front of blobs.
func (r *Repository) sortCachedPacks(blobs []restic.PackedBlob) []restic.PackedBlob {
if r.Cache == nil {
return blobs
type haver interface {
Has(restic.Handle) bool
}
// sortCachedPacksFirst moves all cached pack files to the front of blobs.
func sortCachedPacksFirst(cache haver, blobs []restic.PackedBlob) {
if cache == nil {
return
}
// no need to sort a list with one element
if len(blobs) == 1 {
return blobs
return
}
cached := make([]restic.PackedBlob, 0, len(blobs)/2)
cached := blobs[:0]
noncached := make([]restic.PackedBlob, 0, len(blobs)/2)
for _, blob := range blobs {
if r.Cache.Has(restic.Handle{Type: restic.DataFile, Name: blob.PackID.String()}) {
if cache.Has(restic.Handle{Type: restic.DataFile, Name: blob.PackID.String()}) {
cached = append(cached, blob)
continue
}
noncached = append(noncached, blob)
}
return append(cached, noncached...)
copy(blobs[len(cached):], noncached)
}
// LoadBlob loads a blob of type t from the repository.
@ -149,7 +153,7 @@ func (r *Repository) LoadBlob(ctx context.Context, t restic.BlobType, id restic.
}
// try cached pack files first
blobs = r.sortCachedPacks(blobs)
sortCachedPacksFirst(r.Cache, blobs)
var lastError error
for _, blob := range blobs {

View file

@ -0,0 +1,74 @@
package repository
import (
"math/rand"
"sort"
"testing"
"github.com/restic/restic/internal/restic"
rtest "github.com/restic/restic/internal/test"
)
type mapcache map[restic.Handle]bool
func (c mapcache) Has(h restic.Handle) bool { return c[h] }
func TestSortCachedPacksFirst(t *testing.T) {
var (
blobs, sorted [100]restic.PackedBlob
cache = make(mapcache)
r = rand.New(rand.NewSource(1261))
)
for i := 0; i < len(blobs); i++ {
var id restic.ID
r.Read(id[:])
blobs[i] = restic.PackedBlob{PackID: id}
if i%3 == 0 {
h := restic.Handle{Name: id.String(), Type: restic.DataFile}
cache[h] = true
}
}
copy(sorted[:], blobs[:])
sort.SliceStable(sorted[:], func(i, j int) bool {
hi := restic.Handle{Type: restic.DataFile, Name: sorted[i].PackID.String()}
hj := restic.Handle{Type: restic.DataFile, Name: sorted[j].PackID.String()}
return cache.Has(hi) && !cache.Has(hj)
})
sortCachedPacksFirst(cache, blobs[:])
rtest.Equals(t, sorted, blobs)
}
func BenchmarkSortCachedPacksFirst(b *testing.B) {
const nblobs = 512 // Corresponds to a file of ca. 2GB.
var (
blobs [nblobs]restic.PackedBlob
cache = make(mapcache)
r = rand.New(rand.NewSource(1261))
)
for i := 0; i < nblobs; i++ {
var id restic.ID
r.Read(id[:])
blobs[i] = restic.PackedBlob{PackID: id}
if i%3 == 0 {
h := restic.Handle{Name: id.String(), Type: restic.DataFile}
cache[h] = true
}
}
var cpy [nblobs]restic.PackedBlob
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
copy(cpy[:], blobs[:])
sortCachedPacksFirst(cache, cpy[:])
}
}