Use start after hint in walk fallback

Signed-off-by: James Hewitt <james.hewitt@uk.ibm.com>
This commit is contained in:
James Hewitt 2023-08-18 15:58:26 +01:00
parent 2245836358
commit 3c8b280408
No known key found for this signature in database
GPG key ID: EA6C3C654B6193E4
2 changed files with 87 additions and 9 deletions

View file

@ -4,6 +4,7 @@ import (
"context"
"errors"
"sort"
"strings"
"github.com/sirupsen/logrus"
)
@ -27,22 +28,36 @@ type WalkFn func(fileInfo FileInfo) error
// to a directory, the directory will not be entered and Walk
// will continue the traversal. If fileInfo refers to a normal file, processing stops
func WalkFallback(ctx context.Context, driver StorageDriver, from string, f WalkFn, options ...func(*WalkOptions)) error {
// No options are supported by the fallback, can be dropped
_, err := doWalkFallback(ctx, driver, from, f)
walkOptions := &WalkOptions{}
for _, o := range options {
o(walkOptions)
}
_, err := doWalkFallback(ctx, driver, from, walkOptions.StartAfterHint, f)
return err
}
func doWalkFallback(ctx context.Context, driver StorageDriver, from string, f WalkFn) (bool, error) {
func doWalkFallback(ctx context.Context, driver StorageDriver, from string, startAfterHint string, f WalkFn) (bool, error) {
children, err := driver.List(ctx, from)
if err != nil {
return false, err
}
sort.Stable(sort.StringSlice(children))
for _, child := range children {
// (@jamstah) This will still walk objects that are before the startAfterHint,
// but to avoid that we would need to cache top level folders to check if there
// are any children then call the WalkFn, which is extra overhead. As the
// hint is just a hint for performance reasons, its fine to walk the extra
// objects instead.
if child < startAfterHint && !strings.HasPrefix(startAfterHint, child) {
continue
}
// TODO(stevvooe): Calling driver.Stat for every entry is quite
// expensive when running against backends with a slow Stat
// implementation, such as s3. This is very likely a serious
// implementation, such as GCS. This is very likely a serious
// performance bottleneck.
// Those backends should have custom walk functions. See S3.
fileInfo, err := driver.Stat(ctx, child)
if err != nil {
switch err.(type) {
@ -56,7 +71,7 @@ func doWalkFallback(ctx context.Context, driver StorageDriver, from string, f Wa
}
err = f(fileInfo)
if err == nil && fileInfo.IsDir() {
if ok, err := doWalkFallback(ctx, driver, child, f); err != nil || !ok {
if ok, err := doWalkFallback(ctx, driver, child, startAfterHint, f); err != nil || !ok {
return ok, err
}
} else if err == ErrSkipDir {

View file

@ -78,9 +78,15 @@ func TestWalkFileRemoved(t *testing.T) {
func TestWalkFallback(t *testing.T) {
d := &fileSystem{
fileset: map[string][]string{
"/": {"/file1", "/folder1", "/folder2"},
"/folder1": {"/folder1/file1"},
"/folder2": {"/folder2/file1"},
"/": {"/file1", "/folder1", "/folder2", "/folder3", "/folder4"},
"/folder1": {"/folder1/file1"},
"/folder2": {"/folder2/file1"},
"/folder3": {"/folder3/subfolder1", "/folder3/subfolder2"},
"/folder3/subfolder1": {"/folder3/subfolder1/subfolder1"},
"/folder3/subfolder1/subfolder1": {"/folder3/subfolder1/subfolder1/file1"},
"/folder3/subfolder2": {"/folder3/subfolder2/subfolder1"},
"/folder3/subfolder2/subfolder1": {"/folder3/subfolder2/subfolder1/file1"},
"/folder4": {"/folder4/file1"},
},
}
noopFn := func(fileInfo FileInfo) error { return nil }
@ -89,6 +95,7 @@ func TestWalkFallback(t *testing.T) {
name string
fn WalkFn
from string
options []func(*WalkOptions)
expected []string
err bool
}{
@ -101,6 +108,15 @@ func TestWalkFallback(t *testing.T) {
"/folder1/file1",
"/folder2",
"/folder2/file1",
"/folder3",
"/folder3/subfolder1",
"/folder3/subfolder1/subfolder1",
"/folder3/subfolder1/subfolder1/file1",
"/folder3/subfolder2",
"/folder3/subfolder2/subfolder1",
"/folder3/subfolder2/subfolder1/file1",
"/folder4",
"/folder4/file1",
},
},
{
@ -120,8 +136,55 @@ func TestWalkFallback(t *testing.T) {
// skip /folder1/file1
"/folder2",
"/folder2/file1",
"/folder3",
"/folder3/subfolder1",
"/folder3/subfolder1/subfolder1",
"/folder3/subfolder1/subfolder1/file1",
"/folder3/subfolder2",
"/folder3/subfolder2/subfolder1",
"/folder3/subfolder2/subfolder1/file1",
"/folder4",
"/folder4/file1",
},
},
{
name: "start late without from",
fn: noopFn,
options: []func(*WalkOptions){
WithStartAfterHint("/folder3/subfolder1/subfolder1/file1"),
},
expected: []string{
// start late
"/folder3",
"/folder3/subfolder1",
"/folder3/subfolder1/subfolder1",
"/folder3/subfolder1/subfolder1/file1",
"/folder3/subfolder2",
"/folder3/subfolder2/subfolder1",
"/folder3/subfolder2/subfolder1/file1",
"/folder4",
"/folder4/file1",
},
err: false,
},
{
name: "start late with from",
fn: noopFn,
from: "/folder3",
options: []func(*WalkOptions){
WithStartAfterHint("/folder3/subfolder1/subfolder1/file1"),
},
expected: []string{
// start late
"/folder3/subfolder1",
"/folder3/subfolder1/subfolder1",
"/folder3/subfolder1/subfolder1/file1",
"/folder3/subfolder2",
"/folder3/subfolder2/subfolder1",
"/folder3/subfolder2/subfolder1/file1",
},
err: false,
},
{
name: "stop early",
fn: func(fileInfo FileInfo) error {
@ -159,7 +222,7 @@ func TestWalkFallback(t *testing.T) {
t.Fatalf("fileInfo isDir not matching file system: expected %t actual %t", d.isDir(fileInfo.Path()), fileInfo.IsDir())
}
return tc.fn(fileInfo)
})
}, tc.options...)
if tc.err && err == nil {
t.Fatalf("expected err")
}