Pass the last paging flag to storage drivers

Storage drivers may be able to take advantage of the hint to start
their walk more efficiently. Start with s3, whose API takes a
start-after parameter. Registries with many repositories can drastically
reduce calls to s3 by telling s3 to only list results lexographically
after the last parameter.

Signed-off-by: James Hewitt <james.hewitt@uk.ibm.com>
This commit is contained in:
James Hewitt 2022-07-10 03:04:50 +01:00
parent ed256e07f1
commit 48959ebac0
No known key found for this signature in database
GPG key ID: EA6C3C654B6193E4
6 changed files with 41 additions and 6 deletions

View file

@ -27,7 +27,7 @@ func (reg *registry) Repositories(ctx context.Context, repos []string, last stri
return 0, err return 0, err
} }
err = reg.blobStore.driver.Walk(ctx, root, func(fileInfo driver.FileInfo) error { err = reg.blobStore.driver.WalkWithStartAfterHint(ctx, root, last, func(fileInfo driver.FileInfo) error {
err := handleRepository(fileInfo, root, last, func(repoPath string) error { err := handleRepository(fileInfo, root, last, func(repoPath string) error {
foundRepos = append(foundRepos, repoPath) foundRepos = append(foundRepos, repoPath)
return nil return nil

View file

@ -389,6 +389,13 @@ func (d *driver) Walk(ctx context.Context, path string, f storagedriver.WalkFn)
return storagedriver.WalkFallback(ctx, d, path, f) return storagedriver.WalkFallback(ctx, d, path, f)
} }
// WalkWithStartAfterHint traverses a filesystem defined within driver, starting
// from the given path, calling f on each file and directory. The hint is ignored
// because it is not yet implemented.
func (d *driver) WalkWithStartAfterHint(ctx context.Context, path string, _ string, f storagedriver.WalkFn) error {
return d.Walk(ctx, path, f)
}
// directDescendants will find direct descendants (blobs or virtual containers) // directDescendants will find direct descendants (blobs or virtual containers)
// of from list of blob paths and will return their full paths. Elements in blobs // of from list of blob paths and will return their full paths. Elements in blobs
// list must be prefixed with a "/" and // list must be prefixed with a "/" and

View file

@ -294,6 +294,13 @@ func (d *driver) Walk(ctx context.Context, path string, f storagedriver.WalkFn)
return storagedriver.WalkFallback(ctx, d, path, f) return storagedriver.WalkFallback(ctx, d, path, f)
} }
// WalkWithStartAfterHint traverses a filesystem defined within driver, starting
// from the given path, calling f on each file and directory. The hint is ignored
// because it is not yet implemented.
func (d *driver) WalkWithStartAfterHint(ctx context.Context, path string, _ string, f storagedriver.WalkFn) error {
return d.Walk(ctx, path, f)
}
// fullPath returns the absolute path of a key within the Driver's storage. // fullPath returns the absolute path of a key within the Driver's storage.
func (d *driver) fullPath(subPath string) string { func (d *driver) fullPath(subPath string) string {
return path.Join(d.rootDirectory, subPath) return path.Join(d.rootDirectory, subPath)

View file

@ -248,6 +248,13 @@ func (d *driver) Walk(ctx context.Context, path string, f storagedriver.WalkFn)
return storagedriver.WalkFallback(ctx, d, path, f) return storagedriver.WalkFallback(ctx, d, path, f)
} }
// WalkWithStartAfterHint traverses a filesystem defined within driver, starting
// from the given path, calling f on each file and directory. The hint is ignored
// because it is not yet implemented.
func (d *driver) WalkWithStartAfterHint(ctx context.Context, path string, _ string, f storagedriver.WalkFn) error {
return d.Walk(ctx, path, f)
}
type writer struct { type writer struct {
d *driver d *driver
f *file f *file

View file

@ -1039,6 +1039,14 @@ func (d *driver) URLFor(ctx context.Context, path string, options map[string]int
// Walk traverses a filesystem defined within driver, starting // Walk traverses a filesystem defined within driver, starting
// from the given path, calling f on each file // from the given path, calling f on each file
func (d *driver) Walk(ctx context.Context, from string, f storagedriver.WalkFn) error { func (d *driver) Walk(ctx context.Context, from string, f storagedriver.WalkFn) error {
return d.WalkWithStartAfterHint(ctx, from, "", f)
}
// WalkWithStartAfterHint traverses a filesystem defined within driver, starting
// from the given path, calling f on each file and directory. The start after hint
// is passed to the ListObjectsV2 API so that AWS can pre-filter any paths that are
// lexographically before the last paged item.
func (d *driver) WalkWithStartAfterHint(ctx context.Context, from string, startAfterHint string, f storagedriver.WalkFn) error {
path := from path := from
if !strings.HasSuffix(path, "/") { if !strings.HasSuffix(path, "/") {
path = path + "/" path = path + "/"
@ -1050,7 +1058,7 @@ func (d *driver) Walk(ctx context.Context, from string, f storagedriver.WalkFn)
} }
var objectCount int64 var objectCount int64
if err := d.doWalk(ctx, &objectCount, d.s3Path(path), prefix, f); err != nil { if err := d.doWalk(ctx, &objectCount, d.s3Path(path), prefix, startAfterHint, f); err != nil {
return err return err
} }
@ -1062,7 +1070,7 @@ func (d *driver) Walk(ctx context.Context, from string, f storagedriver.WalkFn)
return nil return nil
} }
func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, prefix string, f storagedriver.WalkFn) error { func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, prefix string, startAfter string, f storagedriver.WalkFn) error {
var ( var (
retError error retError error
// the most recent directory walked for de-duping // the most recent directory walked for de-duping
@ -1073,9 +1081,10 @@ func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, pre
prevDir = strings.Replace(path, d.s3Path(""), prefix, 1) prevDir = strings.Replace(path, d.s3Path(""), prefix, 1)
listObjectsInput := &s3.ListObjectsV2Input{ listObjectsInput := &s3.ListObjectsV2Input{
Bucket: aws.String(d.Bucket), Bucket: aws.String(d.Bucket),
Prefix: aws.String(path), Prefix: aws.String(path),
MaxKeys: aws.Int64(listMax), MaxKeys: aws.Int64(listMax),
StartAfter: aws.String(path + startAfter),
} }
ctx, done := dcontext.WithTrace(parentCtx) ctx, done := dcontext.WithTrace(parentCtx)

View file

@ -90,6 +90,11 @@ type StorageDriver interface {
// to a directory, the directory will not be entered and Walk // to a directory, the directory will not be entered and Walk
// will continue the traversal. If fileInfo refers to a normal file, processing stops // will continue the traversal. If fileInfo refers to a normal file, processing stops
Walk(ctx context.Context, path string, f WalkFn) error Walk(ctx context.Context, path string, f WalkFn) error
// WalkWithStartAfterHint traverses a filesystem defined within driver as in
// Walk. If startAfterHint is set, the walk may start with the first item lexographically
// after the hint, but it is not guaranteed and drivers may start the walk from the path.
WalkWithStartAfterHint(ctx context.Context, path string, startAfterHint string, f WalkFn) error
} }
// FileWriter provides an abstraction for an opened writable file-like object in // FileWriter provides an abstraction for an opened writable file-like object in