distribution/registry/storage/driver/walk.go
Collin Shoop cf81f67a16 storagedriver/s3: Optimized Walk implementation + bugfix
Optimized S3 Walk impl by no longer listing files recursively. Overall gives a huge performance increase both in terms of runtime and S3 calls (up to ~500x).

Fixed a bug in WalkFallback where ErrSkipDir for was not handled as documented for non-directory.

Signed-off-by: Collin Shoop <cshoop@digitalocean.com>
2021-08-16 16:07:25 -04:00

66 lines
2.1 KiB
Go

package driver
import (
"context"
"errors"
"sort"
"github.com/sirupsen/logrus"
)
// ErrSkipDir is used as a return value from onFileFunc to indicate that
// the directory named in the call is to be skipped. It is not returned
// as an error by any function.
var ErrSkipDir = errors.New("skip this directory")
// WalkFn is called once per file by Walk
type WalkFn func(fileInfo FileInfo) error
// WalkFallback traverses a filesystem defined within driver, starting
// from the given path, calling f on each file. It uses the List method and Stat to drive itself.
// If the returned error from the WalkFn is ErrSkipDir and fileInfo refers
// to a directory, the directory will not be entered and Walk
// will continue the traversal. If fileInfo refers to a normal file, processing stops
func WalkFallback(ctx context.Context, driver StorageDriver, from string, f WalkFn) error {
_, err := doWalkFallback(ctx, driver, from, f)
return err
}
func doWalkFallback(ctx context.Context, driver StorageDriver, from string, f WalkFn) (bool, error) {
children, err := driver.List(ctx, from)
if err != nil {
return false, err
}
sort.Stable(sort.StringSlice(children))
for _, child := range children {
// TODO(stevvooe): Calling driver.Stat for every entry is quite
// expensive when running against backends with a slow Stat
// implementation, such as s3. This is very likely a serious
// performance bottleneck.
fileInfo, err := driver.Stat(ctx, child)
if err != nil {
switch err.(type) {
case PathNotFoundError:
// repository was removed in between listing and enumeration. Ignore it.
logrus.WithField("path", child).Infof("ignoring deleted path")
continue
default:
return false, err
}
}
err = f(fileInfo)
if err == nil && fileInfo.IsDir() {
if ok, err := doWalkFallback(ctx, driver, child, f); err != nil || !ok {
return ok, err
}
} else if err == ErrSkipDir {
// noop for folders, will just skip
if !fileInfo.IsDir() {
return false, nil // no error but stop iteration
}
} else if err != nil {
return false, err
}
}
return true, nil
}