Merge pull request #1575 from ifedorenko/1567_optimize-repository-ListPack

Optimize Repository.ListPack()
This commit is contained in:
Alexander Neumann 2018-01-24 20:01:15 +01:00
commit be205563be
6 changed files with 27 additions and 22 deletions

View file

@ -0,0 +1,7 @@
Enhancement: Reduce number of remote requests during Repository.ListPack()
This change eliminates redundant remote repository calls and improves
repository reindex and purge time.
https://github.com/restic/restic/issues/1567
https://github.com/restic/restic/pull/1575

View file

@ -49,23 +49,21 @@ func New(ctx context.Context, repo restic.Repository, ignorePacks restic.IDSet,
for job := range ch { for job := range ch {
p.Report(restic.Stat{Blobs: 1}) p.Report(restic.Stat{Blobs: 1})
packID := job.Data.(restic.ID) j := job.Result.(list.Result)
if job.Error != nil { if job.Error != nil {
cause := errors.Cause(job.Error) cause := errors.Cause(job.Error)
if _, ok := cause.(pack.InvalidFileError); ok { if _, ok := cause.(pack.InvalidFileError); ok {
invalidFiles = append(invalidFiles, packID) invalidFiles = append(invalidFiles, j.PackID())
continue continue
} }
fmt.Fprintf(os.Stderr, "pack file cannot be listed %v: %v\n", packID.Str(), job.Error) fmt.Fprintf(os.Stderr, "pack file cannot be listed %v: %v\n", j.PackID(), job.Error)
continue continue
} }
j := job.Result.(list.Result) debug.Log("pack %v contains %d blobs", j.PackID(), len(j.Entries()))
debug.Log("pack %v contains %d blobs", packID.Str(), len(j.Entries())) err := idx.AddPack(j.PackID(), j.Size(), j.Entries())
err := idx.AddPack(packID, j.Size(), j.Entries())
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
} }

View file

@ -12,7 +12,7 @@ const listPackWorkers = 10
// Lister combines lists packs in a repo and blobs in a pack. // Lister combines lists packs in a repo and blobs in a pack.
type Lister interface { type Lister interface {
List(context.Context, restic.FileType, func(restic.ID, int64) error) error List(context.Context, restic.FileType, func(restic.ID, int64) error) error
ListPack(context.Context, restic.ID) ([]restic.Blob, int64, error) ListPack(context.Context, restic.ID, int64) ([]restic.Blob, int64, error)
} }
// Result is returned in the channel from LoadBlobsFromAllPacks. // Result is returned in the channel from LoadBlobsFromAllPacks.
@ -39,12 +39,17 @@ func (l Result) Entries() []restic.Blob {
// AllPacks sends the contents of all packs to ch. // AllPacks sends the contents of all packs to ch.
func AllPacks(ctx context.Context, repo Lister, ignorePacks restic.IDSet, ch chan<- worker.Job) { func AllPacks(ctx context.Context, repo Lister, ignorePacks restic.IDSet, ch chan<- worker.Job) {
type fileInfo struct {
id restic.ID
size int64
}
f := func(ctx context.Context, job worker.Job) (interface{}, error) { f := func(ctx context.Context, job worker.Job) (interface{}, error) {
packID := job.Data.(restic.ID) packInfo := job.Data.(fileInfo)
entries, size, err := repo.ListPack(ctx, packID) entries, size, err := repo.ListPack(ctx, packInfo.id, packInfo.size)
return Result{ return Result{
packID: packID, packID: packInfo.id,
size: size, size: size,
entries: entries, entries: entries,
}, err }, err
@ -62,7 +67,7 @@ func AllPacks(ctx context.Context, repo Lister, ignorePacks restic.IDSet, ch cha
} }
select { select {
case jobCh <- worker.Job{Data: id}: case jobCh <- worker.Job{Data: fileInfo{id: id, size: size}, Result: Result{packID: id}}:
case <-ctx.Done(): case <-ctx.Done():
return ctx.Err() return ctx.Err()
} }

View file

@ -75,7 +75,7 @@ func selectBlobs(t *testing.T, repo restic.Repository, p float32) (list1, list2
blobs := restic.NewBlobSet() blobs := restic.NewBlobSet()
err := repo.List(context.TODO(), restic.DataFile, func(id restic.ID, size int64) error { err := repo.List(context.TODO(), restic.DataFile, func(id restic.ID, size int64) error {
entries, _, err := repo.ListPack(context.TODO(), id) entries, _, err := repo.ListPack(context.TODO(), id, size)
if err != nil { if err != nil {
t.Fatalf("error listing pack %v: %v", id, err) t.Fatalf("error listing pack %v: %v", id, err)
} }

View file

@ -549,20 +549,15 @@ func (r *Repository) List(ctx context.Context, t restic.FileType, fn func(restic
// ListPack returns the list of blobs saved in the pack id and the length of // ListPack returns the list of blobs saved in the pack id and the length of
// the file as stored in the backend. // the file as stored in the backend.
func (r *Repository) ListPack(ctx context.Context, id restic.ID) ([]restic.Blob, int64, error) { func (r *Repository) ListPack(ctx context.Context, id restic.ID, size int64) ([]restic.Blob, int64, error) {
h := restic.Handle{Type: restic.DataFile, Name: id.String()} h := restic.Handle{Type: restic.DataFile, Name: id.String()}
blobInfo, err := r.Backend().Stat(ctx, h) blobs, err := pack.List(r.Key(), restic.ReaderAt(r.Backend(), h), size)
if err != nil { if err != nil {
return nil, 0, err return nil, 0, err
} }
blobs, err := pack.List(r.Key(), restic.ReaderAt(r.Backend(), h), blobInfo.Size) return blobs, size, nil
if err != nil {
return nil, 0, err
}
return blobs, blobInfo.Size, nil
} }
// Delete calls backend.Delete() if implemented, and returns an error // Delete calls backend.Delete() if implemented, and returns an error

View file

@ -32,7 +32,7 @@ type Repository interface {
// //
// The function fn is called in the same Goroutine List() was called from. // The function fn is called in the same Goroutine List() was called from.
List(ctx context.Context, t FileType, fn func(ID, int64) error) error List(ctx context.Context, t FileType, fn func(ID, int64) error) error
ListPack(context.Context, ID) ([]Blob, int64, error) ListPack(context.Context, ID, int64) ([]Blob, int64, error)
Flush(context.Context) error Flush(context.Context) error