Optimize Repository.ListPack()

Use pack file size returned by Backend.List() to avoid extra per-pack
Backend.Stat() requests

Signed-off-by: Igor Fedorenko <igor@ifedorenko.com>
This commit is contained in:
Igor Fedorenko 2018-01-23 21:43:21 -05:00
parent 7e6bfdae79
commit 0084e42cb6
6 changed files with 27 additions and 22 deletions

View file

@ -0,0 +1,7 @@
Enhancement: Reduce number of remote requests during Repository.ListPack()
This change eliminates redundant remote repository calls and improves
repository reindex and purge time.
https://github.com/restic/restic/issues/1567
https://github.com/restic/restic/pull/1575

View file

@ -49,23 +49,21 @@ func New(ctx context.Context, repo restic.Repository, ignorePacks restic.IDSet,
for job := range ch {
p.Report(restic.Stat{Blobs: 1})
packID := job.Data.(restic.ID)
j := job.Result.(list.Result)
if job.Error != nil {
cause := errors.Cause(job.Error)
if _, ok := cause.(pack.InvalidFileError); ok {
invalidFiles = append(invalidFiles, packID)
invalidFiles = append(invalidFiles, j.PackID())
continue
}
fmt.Fprintf(os.Stderr, "pack file cannot be listed %v: %v\n", packID.Str(), job.Error)
fmt.Fprintf(os.Stderr, "pack file cannot be listed %v: %v\n", j.PackID(), job.Error)
continue
}
j := job.Result.(list.Result)
debug.Log("pack %v contains %d blobs", j.PackID(), len(j.Entries()))
debug.Log("pack %v contains %d blobs", packID.Str(), len(j.Entries()))
err := idx.AddPack(packID, j.Size(), j.Entries())
err := idx.AddPack(j.PackID(), j.Size(), j.Entries())
if err != nil {
return nil, nil, err
}

View file

@ -12,7 +12,7 @@ const listPackWorkers = 10
// Lister combines lists packs in a repo and blobs in a pack.
type Lister interface {
List(context.Context, restic.FileType, func(restic.ID, int64) error) error
ListPack(context.Context, restic.ID) ([]restic.Blob, int64, error)
ListPack(context.Context, restic.ID, int64) ([]restic.Blob, int64, error)
}
// Result is returned in the channel from LoadBlobsFromAllPacks.
@ -39,12 +39,17 @@ func (l Result) Entries() []restic.Blob {
// AllPacks sends the contents of all packs to ch.
func AllPacks(ctx context.Context, repo Lister, ignorePacks restic.IDSet, ch chan<- worker.Job) {
type fileInfo struct {
id restic.ID
size int64
}
f := func(ctx context.Context, job worker.Job) (interface{}, error) {
packID := job.Data.(restic.ID)
entries, size, err := repo.ListPack(ctx, packID)
packInfo := job.Data.(fileInfo)
entries, size, err := repo.ListPack(ctx, packInfo.id, packInfo.size)
return Result{
packID: packID,
packID: packInfo.id,
size: size,
entries: entries,
}, err
@ -62,7 +67,7 @@ func AllPacks(ctx context.Context, repo Lister, ignorePacks restic.IDSet, ch cha
}
select {
case jobCh <- worker.Job{Data: id}:
case jobCh <- worker.Job{Data: fileInfo{id: id, size: size}, Result: Result{packID: id}}:
case <-ctx.Done():
return ctx.Err()
}

View file

@ -75,7 +75,7 @@ func selectBlobs(t *testing.T, repo restic.Repository, p float32) (list1, list2
blobs := restic.NewBlobSet()
err := repo.List(context.TODO(), restic.DataFile, func(id restic.ID, size int64) error {
entries, _, err := repo.ListPack(context.TODO(), id)
entries, _, err := repo.ListPack(context.TODO(), id, size)
if err != nil {
t.Fatalf("error listing pack %v: %v", id, err)
}

View file

@ -549,20 +549,15 @@ func (r *Repository) List(ctx context.Context, t restic.FileType, fn func(restic
// ListPack returns the list of blobs saved in the pack id and the length of
// the file as stored in the backend.
func (r *Repository) ListPack(ctx context.Context, id restic.ID) ([]restic.Blob, int64, error) {
func (r *Repository) ListPack(ctx context.Context, id restic.ID, size int64) ([]restic.Blob, int64, error) {
h := restic.Handle{Type: restic.DataFile, Name: id.String()}
blobInfo, err := r.Backend().Stat(ctx, h)
blobs, err := pack.List(r.Key(), restic.ReaderAt(r.Backend(), h), size)
if err != nil {
return nil, 0, err
}
blobs, err := pack.List(r.Key(), restic.ReaderAt(r.Backend(), h), blobInfo.Size)
if err != nil {
return nil, 0, err
}
return blobs, blobInfo.Size, nil
return blobs, size, nil
}
// Delete calls backend.Delete() if implemented, and returns an error

View file

@ -32,7 +32,7 @@ type Repository interface {
//
// The function fn is called in the same Goroutine List() was called from.
List(ctx context.Context, t FileType, fn func(ID, int64) error) error
ListPack(context.Context, ID) ([]Blob, int64, error)
ListPack(context.Context, ID, int64) ([]Blob, int64, error)
Flush(context.Context) error