Pass the last paging flag to storage drivers

Storage drivers may be able to take advantage of the hint to start
their walk more efficiently. Start with s3, whose API takes a
start-after parameter. Registries with many repositories can drastically
reduce calls to s3 by telling s3 to only list results lexographically
after the last parameter.

Signed-off-by: James Hewitt <james.hewitt@uk.ibm.com>
This commit is contained in:
James Hewitt 2022-07-10 03:04:50 +01:00
parent ed256e07f1
commit 48959ebac0
No known key found for this signature in database
GPG key ID: EA6C3C654B6193E4
6 changed files with 41 additions and 6 deletions

View file

@ -27,7 +27,7 @@ func (reg *registry) Repositories(ctx context.Context, repos []string, last stri
return 0, err
}
err = reg.blobStore.driver.Walk(ctx, root, func(fileInfo driver.FileInfo) error {
err = reg.blobStore.driver.WalkWithStartAfterHint(ctx, root, last, func(fileInfo driver.FileInfo) error {
err := handleRepository(fileInfo, root, last, func(repoPath string) error {
foundRepos = append(foundRepos, repoPath)
return nil

View file

@ -389,6 +389,13 @@ func (d *driver) Walk(ctx context.Context, path string, f storagedriver.WalkFn)
return storagedriver.WalkFallback(ctx, d, path, f)
}
// WalkWithStartAfterHint traverses a filesystem defined within driver, starting
// from the given path, calling f on each file and directory. The hint is ignored
// because it is not yet implemented.
func (d *driver) WalkWithStartAfterHint(ctx context.Context, path string, _ string, f storagedriver.WalkFn) error {
return d.Walk(ctx, path, f)
}
// directDescendants will find direct descendants (blobs or virtual containers)
// of from list of blob paths and will return their full paths. Elements in blobs
// list must be prefixed with a "/" and

View file

@ -294,6 +294,13 @@ func (d *driver) Walk(ctx context.Context, path string, f storagedriver.WalkFn)
return storagedriver.WalkFallback(ctx, d, path, f)
}
// WalkWithStartAfterHint traverses a filesystem defined within driver, starting
// from the given path, calling f on each file and directory. The hint is ignored
// because it is not yet implemented.
func (d *driver) WalkWithStartAfterHint(ctx context.Context, path string, _ string, f storagedriver.WalkFn) error {
return d.Walk(ctx, path, f)
}
// fullPath returns the absolute path of a key within the Driver's storage.
func (d *driver) fullPath(subPath string) string {
return path.Join(d.rootDirectory, subPath)

View file

@ -248,6 +248,13 @@ func (d *driver) Walk(ctx context.Context, path string, f storagedriver.WalkFn)
return storagedriver.WalkFallback(ctx, d, path, f)
}
// WalkWithStartAfterHint traverses a filesystem defined within driver, starting
// from the given path, calling f on each file and directory. The hint is ignored
// because it is not yet implemented.
func (d *driver) WalkWithStartAfterHint(ctx context.Context, path string, _ string, f storagedriver.WalkFn) error {
return d.Walk(ctx, path, f)
}
type writer struct {
d *driver
f *file

View file

@ -1039,6 +1039,14 @@ func (d *driver) URLFor(ctx context.Context, path string, options map[string]int
// Walk traverses a filesystem defined within driver, starting
// from the given path, calling f on each file
func (d *driver) Walk(ctx context.Context, from string, f storagedriver.WalkFn) error {
return d.WalkWithStartAfterHint(ctx, from, "", f)
}
// WalkWithStartAfterHint traverses a filesystem defined within driver, starting
// from the given path, calling f on each file and directory. The start after hint
// is passed to the ListObjectsV2 API so that AWS can pre-filter any paths that are
// lexographically before the last paged item.
func (d *driver) WalkWithStartAfterHint(ctx context.Context, from string, startAfterHint string, f storagedriver.WalkFn) error {
path := from
if !strings.HasSuffix(path, "/") {
path = path + "/"
@ -1050,7 +1058,7 @@ func (d *driver) Walk(ctx context.Context, from string, f storagedriver.WalkFn)
}
var objectCount int64
if err := d.doWalk(ctx, &objectCount, d.s3Path(path), prefix, f); err != nil {
if err := d.doWalk(ctx, &objectCount, d.s3Path(path), prefix, startAfterHint, f); err != nil {
return err
}
@ -1062,7 +1070,7 @@ func (d *driver) Walk(ctx context.Context, from string, f storagedriver.WalkFn)
return nil
}
func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, prefix string, f storagedriver.WalkFn) error {
func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, prefix string, startAfter string, f storagedriver.WalkFn) error {
var (
retError error
// the most recent directory walked for de-duping
@ -1073,9 +1081,10 @@ func (d *driver) doWalk(parentCtx context.Context, objectCount *int64, path, pre
prevDir = strings.Replace(path, d.s3Path(""), prefix, 1)
listObjectsInput := &s3.ListObjectsV2Input{
Bucket: aws.String(d.Bucket),
Prefix: aws.String(path),
MaxKeys: aws.Int64(listMax),
Bucket: aws.String(d.Bucket),
Prefix: aws.String(path),
MaxKeys: aws.Int64(listMax),
StartAfter: aws.String(path + startAfter),
}
ctx, done := dcontext.WithTrace(parentCtx)

View file

@ -90,6 +90,11 @@ type StorageDriver interface {
// to a directory, the directory will not be entered and Walk
// will continue the traversal. If fileInfo refers to a normal file, processing stops
Walk(ctx context.Context, path string, f WalkFn) error
// WalkWithStartAfterHint traverses a filesystem defined within driver as in
// Walk. If startAfterHint is set, the walk may start with the first item lexographically
// after the hint, but it is not guaranteed and drivers may start the walk from the path.
WalkWithStartAfterHint(ctx context.Context, path string, startAfterHint string, f WalkFn) error
}
// FileWriter provides an abstraction for an opened writable file-like object in