From 48959ebac04b54b74174eaeb6eef21e0afe9d26c Mon Sep 17 00:00:00 2001 From: James Hewitt Date: Sun, 10 Jul 2022 03:04:50 +0100 Subject: [PATCH] Pass the last paging flag to storage drivers Storage drivers may be able to take advantage of the hint to start their walk more efficiently. Start with s3, whose API takes a start-after parameter. Registries with many repositories can drastically reduce calls to s3 by telling s3 to only list results lexographically after the last parameter. Signed-off-by: James Hewitt --- registry/storage/catalog.go | 2 +- registry/storage/driver/azure/azure.go | 7 +++++++ registry/storage/driver/filesystem/driver.go | 7 +++++++ registry/storage/driver/inmemory/driver.go | 7 +++++++ registry/storage/driver/s3-aws/s3.go | 19 ++++++++++++++----- registry/storage/driver/storagedriver.go | 5 +++++ 6 files changed, 41 insertions(+), 6 deletions(-) diff --git a/registry/storage/catalog.go b/registry/storage/catalog.go index 55500694d..1d1c58274 100644 --- a/registry/storage/catalog.go +++ b/registry/storage/catalog.go @@ -27,7 +27,7 @@ func (reg *registry) Repositories(ctx context.Context, repos []string, last stri return 0, err } - err = reg.blobStore.driver.Walk(ctx, root, func(fileInfo driver.FileInfo) error { + err = reg.blobStore.driver.WalkWithStartAfterHint(ctx, root, last, func(fileInfo driver.FileInfo) error { err := handleRepository(fileInfo, root, last, func(repoPath string) error { foundRepos = append(foundRepos, repoPath) return nil diff --git a/registry/storage/driver/azure/azure.go b/registry/storage/driver/azure/azure.go index f2cb11018..74b715a3d 100644 --- a/registry/storage/driver/azure/azure.go +++ b/registry/storage/driver/azure/azure.go @@ -389,6 +389,13 @@ func (d *driver) Walk(ctx context.Context, path string, f storagedriver.WalkFn) return storagedriver.WalkFallback(ctx, d, path, f) } +// WalkWithStartAfterHint traverses a filesystem defined within driver, starting +// from the given path, calling f on each file and directory. The hint is ignored +// because it is not yet implemented. +func (d *driver) WalkWithStartAfterHint(ctx context.Context, path string, _ string, f storagedriver.WalkFn) error { + return d.Walk(ctx, path, f) +} + // directDescendants will find direct descendants (blobs or virtual containers) // of from list of blob paths and will return their full paths. Elements in blobs // list must be prefixed with a "/" and diff --git a/registry/storage/driver/filesystem/driver.go b/registry/storage/driver/filesystem/driver.go index de7ba0966..154d78516 100644 --- a/registry/storage/driver/filesystem/driver.go +++ b/registry/storage/driver/filesystem/driver.go @@ -294,6 +294,13 @@ func (d *driver) Walk(ctx context.Context, path string, f storagedriver.WalkFn) return storagedriver.WalkFallback(ctx, d, path, f) } +// WalkWithStartAfterHint traverses a filesystem defined within driver, starting +// from the given path, calling f on each file and directory. The hint is ignored +// because it is not yet implemented. +func (d *driver) WalkWithStartAfterHint(ctx context.Context, path string, _ string, f storagedriver.WalkFn) error { + return d.Walk(ctx, path, f) +} + // fullPath returns the absolute path of a key within the Driver's storage. func (d *driver) fullPath(subPath string) string { return path.Join(d.rootDirectory, subPath) diff --git a/registry/storage/driver/inmemory/driver.go b/registry/storage/driver/inmemory/driver.go index 6370ca638..a1b6c9f69 100644 --- a/registry/storage/driver/inmemory/driver.go +++ b/registry/storage/driver/inmemory/driver.go @@ -248,6 +248,13 @@ func (d *driver) Walk(ctx context.Context, path string, f storagedriver.WalkFn) return storagedriver.WalkFallback(ctx, d, path, f) } +// WalkWithStartAfterHint traverses a filesystem defined within driver, starting +// from the given path, calling f on each file and directory. The hint is ignored +// because it is not yet implemented. +func (d *driver) WalkWithStartAfterHint(ctx context.Context, path string, _ string, f storagedriver.WalkFn) error { + return d.Walk(ctx, path, f) +} + type writer struct { d *driver f *file diff --git a/registry/storage/driver/s3-aws/s3.go b/registry/storage/driver/s3-aws/s3.go index 8753b3455..085304e9d 100644 --- a/registry/storage/driver/s3-aws/s3.go +++ b/registry/storage/driver/s3-aws/s3.go @@ -1039,6 +1039,14 @@ func (d *driver) URLFor(ctx context.Context, path string, options map[string]int // Walk traverses a filesystem defined within driver, starting // from the given path, calling f on each file func (d *driver) Walk(ctx context.Context, from string, f storagedriver.WalkFn) error { + return d.WalkWithStartAfterHint(ctx, from, "", f) +} + +// WalkWithStartAfterHint traverses a filesystem defined within driver, starting +// from the given path, calling f on each file and directory. The start after hint +// is passed to the ListObjectsV2 API so that AWS can pre-filter any paths that are +// lexographically before the last paged item. +func (d *driver) WalkWithStartAfterHint(ctx context.Context, from string, startAfterHint string, f storagedriver.WalkFn) error { path := from if !strings.HasSuffix(path, "/") { path = path + "/" @@ -1050,7 +1058,7 @@ func (d *driver) Walk(ctx context.Context, from string, f storagedriver.WalkFn) } var objectCount int64 - if err := d.doWalk(ctx, &objectCount, d.s3Path(path), prefix, f); err != nil { + if err := d.doWalk(ctx, &objectCount, d.s3Path(path), prefix, startAfterHint, f); err != nil { return err } @@ -1062,7 +1070,7 @@ func (d *driver) Walk(ctx context.Context, from string, f storagedriver.WalkFn) return nil } -func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, prefix string, f storagedriver.WalkFn) error { +func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, prefix string, startAfter string, f storagedriver.WalkFn) error { var ( retError error // the most recent directory walked for de-duping @@ -1073,9 +1081,10 @@ func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, pre prevDir = strings.Replace(path, d.s3Path(""), prefix, 1) listObjectsInput := &s3.ListObjectsV2Input{ - Bucket: aws.String(d.Bucket), - Prefix: aws.String(path), - MaxKeys: aws.Int64(listMax), + Bucket: aws.String(d.Bucket), + Prefix: aws.String(path), + MaxKeys: aws.Int64(listMax), + StartAfter: aws.String(path + startAfter), } ctx, done := dcontext.WithTrace(parentCtx) diff --git a/registry/storage/driver/storagedriver.go b/registry/storage/driver/storagedriver.go index e5d482c09..b6411b406 100644 --- a/registry/storage/driver/storagedriver.go +++ b/registry/storage/driver/storagedriver.go @@ -90,6 +90,11 @@ type StorageDriver interface { // to a directory, the directory will not be entered and Walk // will continue the traversal. If fileInfo refers to a normal file, processing stops Walk(ctx context.Context, path string, f WalkFn) error + + // WalkWithStartAfterHint traverses a filesystem defined within driver as in + // Walk. If startAfterHint is set, the walk may start with the first item lexographically + // after the hint, but it is not guaranteed and drivers may start the walk from the path. + WalkWithStartAfterHint(ctx context.Context, path string, startAfterHint string, f WalkFn) error } // FileWriter provides an abstraction for an opened writable file-like object in