From 32ac4679925ccfe7e191e170d77dacfd88671464 Mon Sep 17 00:00:00 2001 From: Sargun Dhillon Date: Wed, 29 Nov 2017 11:17:39 -0800 Subject: [PATCH] Introduce Walk Method Per Storage Driver Move the Walk types into registry/storage/driver, and add a Walk method to each storage driver. Although this is yet another API to implement, there is a fall back implementation that relies on List and Stat. For some filesystems this is very slow. Also, this WalkDir Method conforms better do a traditional WalkDir (a la filepath). This change is in preparation for refactoring. Signed-off-by: Sargun Dhillon --- registry/storage/catalog.go | 4 +- registry/storage/driver/azure/azure.go | 6 +++ registry/storage/driver/base/base.go | 12 +++++ registry/storage/driver/filesystem/driver.go | 6 +++ registry/storage/driver/gcs/gcs.go | 6 +++ registry/storage/driver/inmemory/driver.go | 6 +++ registry/storage/driver/oss/oss.go | 6 +++ registry/storage/driver/s3-aws/s3.go | 6 +++ registry/storage/driver/s3-goamz/s3.go | 6 +++ registry/storage/driver/storagedriver.go | 7 +++ registry/storage/driver/swift/swift.go | 6 +++ registry/storage/driver/walk.go | 52 ++++++++++++++++++++ registry/storage/purgeuploads.go | 2 +- registry/storage/walk.go | 20 +++----- registry/storage/walk_test.go | 2 +- 15 files changed, 129 insertions(+), 18 deletions(-) create mode 100644 registry/storage/driver/walk.go diff --git a/registry/storage/catalog.go b/registry/storage/catalog.go index f3c6fe9e1..219509808 100644 --- a/registry/storage/catalog.go +++ b/registry/storage/catalog.go @@ -144,9 +144,9 @@ func handleRepository(fileInfo driver.FileInfo, root, last string, fn func(repoP return err } } - return ErrSkipDir + return driver.ErrSkipDir } else if strings.HasPrefix(file, "_") { - return ErrSkipDir + return driver.ErrSkipDir } return nil diff --git a/registry/storage/driver/azure/azure.go b/registry/storage/driver/azure/azure.go index a37a22bdc..728d8a858 100644 --- a/registry/storage/driver/azure/azure.go +++ b/registry/storage/driver/azure/azure.go @@ -336,6 +336,12 @@ func (d *driver) URLFor(ctx context.Context, path string, options map[string]int return d.client.GetBlobSASURI(d.container, path, expiresTime, "r") } +// Walk traverses a filesystem defined within driver, starting +// from the given path, calling f on each file +func (d *driver) Walk(ctx context.Context, path string, f storagedriver.WalkFn) error { + return storagedriver.WalkFallback(ctx, d, path, f) +} + // directDescendants will find direct descendants (blobs or virtual containers) // of from list of blob paths and will return their full paths. Elements in blobs // list must be prefixed with a "/" and diff --git a/registry/storage/driver/base/base.go b/registry/storage/driver/base/base.go index 1778c6c53..149cfcb2b 100644 --- a/registry/storage/driver/base/base.go +++ b/registry/storage/driver/base/base.go @@ -197,3 +197,15 @@ func (base *Base) URLFor(ctx context.Context, path string, options map[string]in str, e := base.StorageDriver.URLFor(ctx, path, options) return str, base.setDriverName(e) } + +// Walk wraps Walk of underlying storage driver. +func (base *Base) Walk(ctx context.Context, path string, f storagedriver.WalkFn) error { + ctx, done := dcontext.WithTrace(ctx) + defer done("%s.Walk(%q)", base.Name(), path) + + if !storagedriver.PathRegexp.MatchString(path) { + return storagedriver.InvalidPathError{Path: path, DriverName: base.StorageDriver.Name()} + } + + return base.setDriverName(base.StorageDriver.Walk(ctx, path, f)) +} diff --git a/registry/storage/driver/filesystem/driver.go b/registry/storage/driver/filesystem/driver.go index 71c75fba4..9a9c062a7 100644 --- a/registry/storage/driver/filesystem/driver.go +++ b/registry/storage/driver/filesystem/driver.go @@ -315,6 +315,12 @@ func (d *driver) URLFor(ctx context.Context, path string, options map[string]int return "", storagedriver.ErrUnsupportedMethod{} } +// Walk traverses a filesystem defined within driver, starting +// from the given path, calling f on each file +func (d *driver) Walk(ctx context.Context, path string, f storagedriver.WalkFn) error { + return storagedriver.WalkFallback(ctx, d, path, f) +} + // fullPath returns the absolute path of a key within the Driver's storage. func (d *driver) fullPath(subPath string) string { return path.Join(d.rootDirectory, subPath) diff --git a/registry/storage/driver/gcs/gcs.go b/registry/storage/driver/gcs/gcs.go index dadae79c2..d129bee77 100644 --- a/registry/storage/driver/gcs/gcs.go +++ b/registry/storage/driver/gcs/gcs.go @@ -779,6 +779,12 @@ func (d *driver) URLFor(context context.Context, path string, options map[string return storage.SignedURL(d.bucket, name, opts) } +// Walk traverses a filesystem defined within driver, starting +// from the given path, calling f on each file +func (d *driver) Walk(ctx context.Context, path string, f storagedriver.WalkFn) error { + return storagedriver.WalkFallback(ctx, d, path, f) +} + func startSession(client *http.Client, bucket string, name string) (uri string, err error) { u := &url.URL{ Scheme: "https", diff --git a/registry/storage/driver/inmemory/driver.go b/registry/storage/driver/inmemory/driver.go index 14bc3940b..e18e29339 100644 --- a/registry/storage/driver/inmemory/driver.go +++ b/registry/storage/driver/inmemory/driver.go @@ -240,6 +240,12 @@ func (d *driver) URLFor(ctx context.Context, path string, options map[string]int return "", storagedriver.ErrUnsupportedMethod{} } +// Walk traverses a filesystem defined within driver, starting +// from the given path, calling f on each file +func (d *driver) Walk(ctx context.Context, path string, f storagedriver.WalkFn) error { + return storagedriver.WalkFallback(ctx, d, path, f) +} + type writer struct { d *driver f *file diff --git a/registry/storage/driver/oss/oss.go b/registry/storage/driver/oss/oss.go index f79e35372..1dcf42b87 100644 --- a/registry/storage/driver/oss/oss.go +++ b/registry/storage/driver/oss/oss.go @@ -479,6 +479,12 @@ func (d *driver) URLFor(ctx context.Context, path string, options map[string]int return signedURL, nil } +// Walk traverses a filesystem defined within driver, starting +// from the given path, calling f on each file +func (d *driver) Walk(ctx context.Context, path string, f storagedriver.WalkFn) error { + return storagedriver.WalkFallback(ctx, d, path, f) +} + func (d *driver) ossPath(path string) string { return strings.TrimLeft(strings.TrimRight(d.RootDirectory, "/")+path, "/") } diff --git a/registry/storage/driver/s3-aws/s3.go b/registry/storage/driver/s3-aws/s3.go index 33312afcc..f9a606532 100644 --- a/registry/storage/driver/s3-aws/s3.go +++ b/registry/storage/driver/s3-aws/s3.go @@ -874,6 +874,12 @@ func (d *driver) URLFor(ctx context.Context, path string, options map[string]int return req.Presign(expiresIn) } +// Walk traverses a filesystem defined within driver, starting +// from the given path, calling f on each file +func (d *driver) Walk(ctx context.Context, path string, f storagedriver.WalkFn) error { + return storagedriver.WalkFallback(ctx, d, path, f) +} + func (d *driver) s3Path(path string) string { return strings.TrimLeft(strings.TrimRight(d.RootDirectory, "/")+path, "/") } diff --git a/registry/storage/driver/s3-goamz/s3.go b/registry/storage/driver/s3-goamz/s3.go index 4850f6d75..4bee38d71 100644 --- a/registry/storage/driver/s3-goamz/s3.go +++ b/registry/storage/driver/s3-goamz/s3.go @@ -546,6 +546,12 @@ func (d *Driver) S3BucketKey(path string) string { return d.StorageDriver.(*driver).s3Path(path) } +// Walk traverses a filesystem defined within driver, starting +// from the given path, calling f on each file +func (d *driver) Walk(ctx context.Context, path string, f storagedriver.WalkFn) error { + return storagedriver.WalkFallback(ctx, d, path, f) +} + func parseError(path string, err error) error { if s3Err, ok := err.(*s3.Error); ok && s3Err.Code == "NoSuchKey" { return storagedriver.PathNotFoundError{Path: path} diff --git a/registry/storage/driver/storagedriver.go b/registry/storage/driver/storagedriver.go index 4b570dd62..b220713f2 100644 --- a/registry/storage/driver/storagedriver.go +++ b/registry/storage/driver/storagedriver.go @@ -83,6 +83,13 @@ type StorageDriver interface { // May return an ErrUnsupportedMethod in certain StorageDriver // implementations. URLFor(ctx context.Context, path string, options map[string]interface{}) (string, error) + + // Walk traverses a filesystem defined within driver, starting + // from the given path, calling f on each file. + // If the returned error from the WalkFn is ErrSkipDir and fileInfo refers + // to a directory, the directory will not be entered and Walk + // will continue the traversal. If fileInfo refers to a normal file, processing stops + Walk(ctx context.Context, path string, f WalkFn) error } // FileWriter provides an abstraction for an opened writable file-like object in diff --git a/registry/storage/driver/swift/swift.go b/registry/storage/driver/swift/swift.go index 11a33264b..5dbb57412 100644 --- a/registry/storage/driver/swift/swift.go +++ b/registry/storage/driver/swift/swift.go @@ -644,6 +644,12 @@ func (d *driver) URLFor(ctx context.Context, path string, options map[string]int return tempURL, nil } +// Walk traverses a filesystem defined within driver, starting +// from the given path, calling f on each file +func (d *driver) Walk(ctx context.Context, path string, f storagedriver.WalkFn) error { + return storagedriver.WalkFallback(ctx, d, path, f) +} + func (d *driver) swiftPath(path string) string { return strings.TrimLeft(strings.TrimRight(d.Prefix+"/files"+path, "/"), "/") } diff --git a/registry/storage/driver/walk.go b/registry/storage/driver/walk.go new file mode 100644 index 000000000..2a5bc51ff --- /dev/null +++ b/registry/storage/driver/walk.go @@ -0,0 +1,52 @@ +package driver + +import ( + "context" + "errors" + "sort" +) + +// ErrSkipDir is used as a return value from onFileFunc to indicate that +// the directory named in the call is to be skipped. It is not returned +// as an error by any function. +var ErrSkipDir = errors.New("skip this directory") + +// WalkFn is called once per file by Walk +type WalkFn func(fileInfo FileInfo) error + +// WalkFallback traverses a filesystem defined within driver, starting +// from the given path, calling f on each file. It uses the List method and Stat to drive itself. +// If the returned error from the WalkFn is ErrSkipDir and fileInfo refers +// to a directory, the directory will not be entered and Walk +// will continue the traversal. If fileInfo refers to a normal file, processing stops +func WalkFallback(ctx context.Context, driver StorageDriver, from string, f WalkFn) error { + children, err := driver.List(ctx, from) + if err != nil { + return err + } + sort.Stable(sort.StringSlice(children)) + for _, child := range children { + // TODO(stevvooe): Calling driver.Stat for every entry is quite + // expensive when running against backends with a slow Stat + // implementation, such as s3. This is very likely a serious + // performance bottleneck. + fileInfo, err := driver.Stat(ctx, child) + if err != nil { + return err + } + err = f(fileInfo) + if err == nil && fileInfo.IsDir() { + if err := WalkFallback(ctx, driver, child, f); err != nil { + return err + } + } else if err == ErrSkipDir { + // Stop iteration if it's a file, otherwise noop if it's a directory + if !fileInfo.IsDir() { + return nil + } + } else if err != nil { + return err + } + } + return nil +} diff --git a/registry/storage/purgeuploads.go b/registry/storage/purgeuploads.go index 2396e8034..4292bf7a0 100644 --- a/registry/storage/purgeuploads.go +++ b/registry/storage/purgeuploads.go @@ -75,7 +75,7 @@ func getOutstandingUploads(ctx context.Context, driver storageDriver.StorageDriv inUploadDir = (file == "_uploads") if fileInfo.IsDir() && !inUploadDir { - return ErrSkipDir + return storageDriver.ErrSkipDir } } diff --git a/registry/storage/walk.go b/registry/storage/walk.go index 3322d16a6..add3ec463 100644 --- a/registry/storage/walk.go +++ b/registry/storage/walk.go @@ -2,27 +2,19 @@ package storage import ( "context" - "errors" "fmt" "sort" storageDriver "github.com/docker/distribution/registry/storage/driver" ) -// ErrSkipDir is used as a return value from onFileFunc to indicate that -// the directory named in the call is to be skipped. It is not returned -// as an error by any function. -var ErrSkipDir = errors.New("skip this directory") - -// WalkFn is called once per file by Walk -// If the returned error is ErrSkipDir and fileInfo refers -// to a directory, the directory will not be entered and Walk -// will continue the traversal. Otherwise Walk will return -type WalkFn func(fileInfo storageDriver.FileInfo) error - // Walk traverses a filesystem defined within driver, starting // from the given path, calling f on each file -func Walk(ctx context.Context, driver storageDriver.StorageDriver, from string, f WalkFn) error { +// If the returned error from the WalkFn is ErrSkipDir and fileInfo refers +// to a directory, the directory will not be entered and Walk +// will continue the traversal. Otherwise Walk will return +// the error +func Walk(ctx context.Context, driver storageDriver.StorageDriver, from string, f storageDriver.WalkFn) error { children, err := driver.List(ctx, from) if err != nil { return err @@ -38,7 +30,7 @@ func Walk(ctx context.Context, driver storageDriver.StorageDriver, from string, return err } err = f(fileInfo) - skipDir := (err == ErrSkipDir) + skipDir := (err == storageDriver.ErrSkipDir) if err != nil && !skipDir { return err } diff --git a/registry/storage/walk_test.go b/registry/storage/walk_test.go index ca9caae03..b0e7f4bc4 100644 --- a/registry/storage/walk_test.go +++ b/registry/storage/walk_test.go @@ -131,7 +131,7 @@ func TestWalkSkipDir(t *testing.T) { filePath := fileInfo.Path() if filePath == "/a/b" { // skip processing /a/b/c and /a/b/c/d - return ErrSkipDir + return driver.ErrSkipDir } delete(expected, filePath) return nil