forked from TrueCloudLab/distribution
e22f7cbc73
Storage drivers may be able to take advantage of the hint to start their walk more efficiently. For S3: The API takes a start-after parameter. Registries with many repositories can drastically reduce calls to s3 by telling s3 to only list results lexographically after the last parameter. For the fallback: We can start deeper in the tree and avoid statting the files and directories before the hint in a walk. For a filesystem this improves performance a little, but many of the API based drivers are currently treated like a filesystem, so this drastically improves the performance of GCP and Azure blob. Signed-off-by: James Hewitt <james.hewitt@uk.ibm.com>
125 lines
4.2 KiB
Go
125 lines
4.2 KiB
Go
package driver
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"path/filepath"
|
|
"sort"
|
|
"strings"
|
|
|
|
"github.com/sirupsen/logrus"
|
|
)
|
|
|
|
// ErrSkipDir is used as a return value from onFileFunc to indicate that
|
|
// the directory named in the call is to be skipped. It is not returned
|
|
// as an error by any function.
|
|
var ErrSkipDir = errors.New("skip this directory")
|
|
|
|
// ErrFilledBuffer is used as a return value from onFileFunc to indicate
|
|
// that the requested number of entries has been reached and the walk can
|
|
// stop.
|
|
var ErrFilledBuffer = errors.New("we have enough entries")
|
|
|
|
// WalkFn is called once per file by Walk
|
|
type WalkFn func(fileInfo FileInfo) error
|
|
|
|
// WalkFallback traverses a filesystem defined within driver, starting
|
|
// from the given path, calling f on each file. It uses the List method and Stat to drive itself.
|
|
// If the returned error from the WalkFn is ErrSkipDir the directory will not be entered and Walk
|
|
// will continue the traversal. If the returned error from the WalkFn is ErrFilledBuffer, the walk
|
|
// stops.
|
|
func WalkFallback(ctx context.Context, driver StorageDriver, from string, f WalkFn, options ...func(*WalkOptions)) error {
|
|
walkOptions := &WalkOptions{}
|
|
for _, o := range options {
|
|
o(walkOptions)
|
|
}
|
|
|
|
startAfterHint := walkOptions.StartAfterHint
|
|
// Ensure that we are checking the hint is contained within from by adding a "/".
|
|
// Add to both in case the hint and form are the same, which would still count.
|
|
rel, err := filepath.Rel(from, startAfterHint)
|
|
if err != nil || strings.HasPrefix(rel, "..") {
|
|
// The startAfterHint is outside from, so check if we even need to walk anything
|
|
// Replace any path separators with \x00 so that the sort works in a depth-first way
|
|
if strings.ReplaceAll(startAfterHint, "/", "\x00") < strings.ReplaceAll(from, "/", "\x00") {
|
|
_, err := doWalkFallback(ctx, driver, from, "", f)
|
|
return err
|
|
}
|
|
} else {
|
|
// The startAfterHint is within from.
|
|
// Walk up the tree until we hit from - we know it is contained.
|
|
// Ensure startAfterHint is never deeper than a child of the base
|
|
// directory so that doWalkFallback doesn't have to worry about
|
|
// depth-first comparisons
|
|
base := startAfterHint
|
|
for strings.HasPrefix(base, from) {
|
|
_, err = doWalkFallback(ctx, driver, base, startAfterHint, f)
|
|
switch err.(type) {
|
|
case nil:
|
|
// No error
|
|
case PathNotFoundError:
|
|
// dir doesn't exist, so nothing to walk
|
|
default:
|
|
return err
|
|
}
|
|
if base == from {
|
|
break
|
|
}
|
|
startAfterHint = base
|
|
base, _ = filepath.Split(startAfterHint)
|
|
if len(base) > 1 {
|
|
base = strings.TrimSuffix(base, "/")
|
|
}
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// doWalkFallback performs a depth first walk using recursion.
|
|
// from is the directory that this iteration of the function should walk.
|
|
// startAfterHint is the child within from to start the walk after. It should only ever be a child of from, or the empty string.
|
|
func doWalkFallback(ctx context.Context, driver StorageDriver, from string, startAfterHint string, f WalkFn) (bool, error) {
|
|
children, err := driver.List(ctx, from)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
sort.Strings(children)
|
|
for _, child := range children {
|
|
// The startAfterHint has been sanitised in WalkFallback and will either be
|
|
// empty, or be suitable for an <= check for this _from_.
|
|
if child <= startAfterHint {
|
|
continue
|
|
}
|
|
|
|
// TODO(stevvooe): Calling driver.Stat for every entry is quite
|
|
// expensive when running against backends with a slow Stat
|
|
// implementation, such as GCS. This is very likely a serious
|
|
// performance bottleneck.
|
|
// Those backends should have custom walk functions. See S3.
|
|
fileInfo, err := driver.Stat(ctx, child)
|
|
if err != nil {
|
|
switch err.(type) {
|
|
case PathNotFoundError:
|
|
// repository was removed in between listing and enumeration. Ignore it.
|
|
logrus.WithField("path", child).Infof("ignoring deleted path")
|
|
continue
|
|
default:
|
|
return false, err
|
|
}
|
|
}
|
|
err = f(fileInfo)
|
|
if err == nil && fileInfo.IsDir() {
|
|
if ok, err := doWalkFallback(ctx, driver, child, startAfterHint, f); err != nil || !ok {
|
|
return ok, err
|
|
}
|
|
} else if err == ErrSkipDir {
|
|
// don't traverse into this directory
|
|
} else if err == ErrFilledBuffer {
|
|
return false, nil // no error but stop iteration
|
|
} else if err != nil {
|
|
return false, err
|
|
}
|
|
}
|
|
return true, nil
|
|
}
|