From 0084e42cb6a77efba29a79ca31d5c83b9e891b5a Mon Sep 17 00:00:00 2001 From: Igor Fedorenko Date: Tue, 23 Jan 2018 21:43:21 -0500 Subject: [PATCH] Optimize Repository.ListPack() Use pack file size returned by Backend.List() to avoid extra per-pack Backend.Stat() requests Signed-off-by: Igor Fedorenko --- changelog/0.8.2/pull-1575 | 7 +++++++ internal/index/index.go | 12 +++++------- internal/list/list.go | 15 ++++++++++----- internal/repository/repack_test.go | 2 +- internal/repository/repository.go | 11 +++-------- internal/restic/repository.go | 2 +- 6 files changed, 27 insertions(+), 22 deletions(-) create mode 100644 changelog/0.8.2/pull-1575 diff --git a/changelog/0.8.2/pull-1575 b/changelog/0.8.2/pull-1575 new file mode 100644 index 000000000..40a069917 --- /dev/null +++ b/changelog/0.8.2/pull-1575 @@ -0,0 +1,7 @@ +Enhancement: Reduce number of remote requests during Repository.ListPack() + +This change eliminates redundant remote repository calls and improves +repository reindex and purge time. + +https://github.com/restic/restic/issues/1567 +https://github.com/restic/restic/pull/1575 diff --git a/internal/index/index.go b/internal/index/index.go index 732dc4b9c..1b7e2797d 100644 --- a/internal/index/index.go +++ b/internal/index/index.go @@ -49,23 +49,21 @@ func New(ctx context.Context, repo restic.Repository, ignorePacks restic.IDSet, for job := range ch { p.Report(restic.Stat{Blobs: 1}) - packID := job.Data.(restic.ID) + j := job.Result.(list.Result) if job.Error != nil { cause := errors.Cause(job.Error) if _, ok := cause.(pack.InvalidFileError); ok { - invalidFiles = append(invalidFiles, packID) + invalidFiles = append(invalidFiles, j.PackID()) continue } - fmt.Fprintf(os.Stderr, "pack file cannot be listed %v: %v\n", packID.Str(), job.Error) + fmt.Fprintf(os.Stderr, "pack file cannot be listed %v: %v\n", j.PackID(), job.Error) continue } - j := job.Result.(list.Result) + debug.Log("pack %v contains %d blobs", j.PackID(), len(j.Entries())) - debug.Log("pack %v contains %d blobs", packID.Str(), len(j.Entries())) - - err := idx.AddPack(packID, j.Size(), j.Entries()) + err := idx.AddPack(j.PackID(), j.Size(), j.Entries()) if err != nil { return nil, nil, err } diff --git a/internal/list/list.go b/internal/list/list.go index ffcc08729..9332f520f 100644 --- a/internal/list/list.go +++ b/internal/list/list.go @@ -12,7 +12,7 @@ const listPackWorkers = 10 // Lister combines lists packs in a repo and blobs in a pack. type Lister interface { List(context.Context, restic.FileType, func(restic.ID, int64) error) error - ListPack(context.Context, restic.ID) ([]restic.Blob, int64, error) + ListPack(context.Context, restic.ID, int64) ([]restic.Blob, int64, error) } // Result is returned in the channel from LoadBlobsFromAllPacks. @@ -39,12 +39,17 @@ func (l Result) Entries() []restic.Blob { // AllPacks sends the contents of all packs to ch. func AllPacks(ctx context.Context, repo Lister, ignorePacks restic.IDSet, ch chan<- worker.Job) { + type fileInfo struct { + id restic.ID + size int64 + } + f := func(ctx context.Context, job worker.Job) (interface{}, error) { - packID := job.Data.(restic.ID) - entries, size, err := repo.ListPack(ctx, packID) + packInfo := job.Data.(fileInfo) + entries, size, err := repo.ListPack(ctx, packInfo.id, packInfo.size) return Result{ - packID: packID, + packID: packInfo.id, size: size, entries: entries, }, err @@ -62,7 +67,7 @@ func AllPacks(ctx context.Context, repo Lister, ignorePacks restic.IDSet, ch cha } select { - case jobCh <- worker.Job{Data: id}: + case jobCh <- worker.Job{Data: fileInfo{id: id, size: size}, Result: Result{packID: id}}: case <-ctx.Done(): return ctx.Err() } diff --git a/internal/repository/repack_test.go b/internal/repository/repack_test.go index 458362c59..d0b1d907d 100644 --- a/internal/repository/repack_test.go +++ b/internal/repository/repack_test.go @@ -75,7 +75,7 @@ func selectBlobs(t *testing.T, repo restic.Repository, p float32) (list1, list2 blobs := restic.NewBlobSet() err := repo.List(context.TODO(), restic.DataFile, func(id restic.ID, size int64) error { - entries, _, err := repo.ListPack(context.TODO(), id) + entries, _, err := repo.ListPack(context.TODO(), id, size) if err != nil { t.Fatalf("error listing pack %v: %v", id, err) } diff --git a/internal/repository/repository.go b/internal/repository/repository.go index e772cd8ef..d28a9ddd7 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -549,20 +549,15 @@ func (r *Repository) List(ctx context.Context, t restic.FileType, fn func(restic // ListPack returns the list of blobs saved in the pack id and the length of // the file as stored in the backend. -func (r *Repository) ListPack(ctx context.Context, id restic.ID) ([]restic.Blob, int64, error) { +func (r *Repository) ListPack(ctx context.Context, id restic.ID, size int64) ([]restic.Blob, int64, error) { h := restic.Handle{Type: restic.DataFile, Name: id.String()} - blobInfo, err := r.Backend().Stat(ctx, h) + blobs, err := pack.List(r.Key(), restic.ReaderAt(r.Backend(), h), size) if err != nil { return nil, 0, err } - blobs, err := pack.List(r.Key(), restic.ReaderAt(r.Backend(), h), blobInfo.Size) - if err != nil { - return nil, 0, err - } - - return blobs, blobInfo.Size, nil + return blobs, size, nil } // Delete calls backend.Delete() if implemented, and returns an error diff --git a/internal/restic/repository.go b/internal/restic/repository.go index 1ab4a2766..21b36b2d8 100644 --- a/internal/restic/repository.go +++ b/internal/restic/repository.go @@ -32,7 +32,7 @@ type Repository interface { // // The function fn is called in the same Goroutine List() was called from. List(ctx context.Context, t FileType, fn func(ID, int64) error) error - ListPack(context.Context, ID) ([]Blob, int64, error) + ListPack(context.Context, ID, int64) ([]Blob, int64, error) Flush(context.Context) error