From 8e4a8517c5b3d7c2af7c1b418c5e353c21379017 Mon Sep 17 00:00:00 2001 From: James Hewitt Date: Fri, 18 Aug 2023 13:51:54 +0100 Subject: [PATCH 1/4] Revert "fix: resolve most comments" This reverts commit 6a5846b32eb9f9b750c492d4eca1112bdb6855a9. Signed-off-by: James Hewitt --- registry/storage/catalog.go | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/registry/storage/catalog.go b/registry/storage/catalog.go index 019d159e..09a687d8 100644 --- a/registry/storage/catalog.go +++ b/registry/storage/catalog.go @@ -14,21 +14,20 @@ import ( ) var ( - // ErrStopReposWalk is a sentinel error to indicate that the repository path walk was stopped. + // ErrStopReposWalk is used as a return value to indicate that the repository path walk + // should be stopped. It's not returned as an error by any function. ErrStopReposWalk = errors.New("stop repos walk") ) -// Repositories populates the passed passed repos slice with repositories in the -// registry up to the capacity of the slice. Returns the number of repos returned and -// `io.EOF` if no more repositories are available. +// Returns a list or a partial list of repositories in the registry. // Because it's a quite expensive operation, it should only be used when building up // an initial set of repositories. func (reg *registry) Repositories(ctx context.Context, repos []string, last string) (n int, err error) { var finishedWalk bool var foundRepos []string - if len(repos) == 0 && cap(repos) == 0 { - return 0, errors.New("no repos requested") + if len(repos) == 0 { + return -1, errors.New("no repos requested") } root, err := pathFor(repositoriesRootPathSpec{}) From ad11105052ce28c66aeb57d0a563be251309232d Mon Sep 17 00:00:00 2001 From: James Hewitt Date: Fri, 18 Aug 2023 13:52:19 +0100 Subject: [PATCH 2/4] Revert "removed redundant check" This reverts commit 0f846853feb903585d5733e7aa6b56d564b03e3d. Signed-off-by: James Hewitt --- registry/storage/catalog.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/registry/storage/catalog.go b/registry/storage/catalog.go index 09a687d8..754893f9 100644 --- a/registry/storage/catalog.go +++ b/registry/storage/catalog.go @@ -112,6 +112,14 @@ func (reg *registry) walkRepos(ctx context.Context, root, last string, fn func(r // walkReposPath walks through all folders in `lookPath`, // looking for repositories. See walkRepos for more detailed description. func (reg *registry) walkReposPath(ctx context.Context, root, lookPath, last string, fn func(repoPath string) error) error { + // ensure that the current path is a dir, otherwise we just return + if f, err := reg.blobStore.driver.Stat(ctx, lookPath); err != nil || !f.IsDir() { + if err != nil { + return err + } + return nil + } + // get children in the current path children, err := reg.blobStore.driver.List(ctx, lookPath) if err != nil { From 8fd504debe08833a9f58e7281b1f048598a4f1d5 Mon Sep 17 00:00:00 2001 From: James Hewitt Date: Fri, 18 Aug 2023 13:52:28 +0100 Subject: [PATCH 3/4] Revert "Rename catalog funcs and update their godocs." This reverts commit 230cc72a8b3a7d724365f449ab14da61b9112eae. Signed-off-by: James Hewitt --- registry/storage/catalog.go | 62 +++++++++++------------ registry/storage/catalog_test.go | 85 ++++++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 32 deletions(-) diff --git a/registry/storage/catalog.go b/registry/storage/catalog.go index 754893f9..2afbdfc8 100644 --- a/registry/storage/catalog.go +++ b/registry/storage/catalog.go @@ -3,7 +3,6 @@ package storage import ( "context" "errors" - "fmt" "io" "path" "sort" @@ -14,12 +13,10 @@ import ( ) var ( - // ErrStopReposWalk is used as a return value to indicate that the repository path walk - // should be stopped. It's not returned as an error by any function. - ErrStopReposWalk = errors.New("stop repos walk") + ErrStopRec = errors.New("Stopped the recursion for getting repositories") ) -// Returns a list or a partial list of repositories in the registry. +// Returns a list, or partial list, of repositories in the registry. // Because it's a quite expensive operation, it should only be used when building up // an initial set of repositories. func (reg *registry) Repositories(ctx context.Context, repos []string, last string) (n int, err error) { @@ -27,7 +24,7 @@ func (reg *registry) Repositories(ctx context.Context, repos []string, last stri var foundRepos []string if len(repos) == 0 { - return -1, errors.New("no repos requested") + return 0, errors.New("no space in slice") } root, err := pathFor(repositoriesRootPathSpec{}) @@ -35,14 +32,14 @@ func (reg *registry) Repositories(ctx context.Context, repos []string, last stri return 0, err } - err = reg.walkRepos(ctx, root, last, func(repoPath string) error { + err = reg.getRepositories(ctx, root, last, func(repoPath string) error { // this is placed before the append, - // so that we will get an extra repo if - // any. This ensures that we do not return + // so that we will get a extra repo if + // any. This assures that we do not return // io.EOF without it being the last record. if len(foundRepos) == len(repos) { finishedWalk = true - return ErrStopReposWalk + return ErrStopRec } foundRepos = append(foundRepos, repoPath) @@ -69,7 +66,7 @@ func (reg *registry) Enumerate(ctx context.Context, ingester func(string) error) return err } - return reg.walkRepos(ctx, root, "", ingester) + return reg.getRepositories(ctx, root, "", ingester) } // Remove removes a repository from storage @@ -82,11 +79,12 @@ func (reg *registry) Remove(ctx context.Context, name reference.Named) error { return reg.driver.Delete(ctx, repoDir) } -// walkRepos walks paths rooted in root calling fn for each found repo path. -// Paths are walked in a lexical order which makes the output deterministic. -// If last is not an empty string it walks all repo paths. Otherwise -// it returns when last repoPath is found. -func (reg *registry) walkRepos(ctx context.Context, root, last string, fn func(repoPath string) error) error { +// getRepositories is a helper function for getRepositoriesRec calls +// the function fn with a repository path, if the current path looked +// at is a repository and is lexicographically after last. It is possible +// to return driver.ErrSkipDir, if there is no interest in any repositories +// under the given `repoPath`, or call ErrStopRec if the recursion should stop. +func (reg *registry) getRepositories(ctx context.Context, root, last string, fn func(repoPath string) error) error { midFn := fn // middleware func to exclude the `last` repo @@ -102,16 +100,16 @@ func (reg *registry) walkRepos(ctx context.Context, root, last string, fn func(r // call our recursive func, with the midFn and the start path // of where we want to find repositories. - err := reg.walkReposPath(ctx, root, root, last, midFn) - if err == ErrStopReposWalk { + err := reg.getRepositoriesRec(ctx, root, root, last, midFn) + if err == ErrStopRec { return nil } return err } -// walkReposPath walks through all folders in `lookPath`, -// looking for repositories. See walkRepos for more detailed description. -func (reg *registry) walkReposPath(ctx context.Context, root, lookPath, last string, fn func(repoPath string) error) error { +// getRepositoriesRec recurse through all folders it the `lookPath`, +// there it will try to find repositories. See getRepositories for more. +func (reg *registry) getRepositoriesRec(ctx context.Context, root, lookPath, last string, fn func(repoPath string) error) error { // ensure that the current path is a dir, otherwise we just return if f, err := reg.blobStore.driver.Stat(ctx, lookPath); err != nil || !f.IsDir() { if err != nil { @@ -130,25 +128,25 @@ func (reg *registry) walkReposPath(ctx context.Context, root, lookPath, last str sort.Strings(children) if last != "" { - splitLast := strings.Split(last, "/") + splitLasts := strings.Split(last, "/") - // call the next iteration of walkReposPath if any, but + // call the next iteration of getRepositoriesRec if any, but // exclude the current one. - if len(splitLast) > 1 { - if err := reg.walkReposPath(ctx, root, lookPath+"/"+splitLast[0], strings.Join(splitLast[1:], "/"), fn); err != nil { + if len(splitLasts) > 1 { + if err := reg.getRepositoriesRec(ctx, root, lookPath+"/"+splitLasts[0], strings.Join(splitLasts[1:], "/"), fn); err != nil { return err } } // find current last path in our children - n := sort.SearchStrings(children, lookPath+"/"+splitLast[0]) - if n == len(children) || children[n] != lookPath+"/"+splitLast[0] { - return fmt.Errorf("%q repository not found", last) + n := sort.SearchStrings(children, lookPath+"/"+splitLasts[0]) + if n == len(children) || children[n] != lookPath+"/"+splitLasts[0] { + return errors.New("the provided 'last' repositories does not exists") } // if this is not a final `last` (there are more `/` left) // then exclude the current index, else include it - if len(splitLast) > 1 { + if len(splitLasts) > 1 { children = children[n+1:] } else { children = children[n:] @@ -158,15 +156,15 @@ func (reg *registry) walkReposPath(ctx context.Context, root, lookPath, last str for _, child := range children { _, file := path.Split(child) - if file == "_manifests" { + if file == "_manifest" { if err := fn(strings.TrimPrefix(lookPath, root+"/")); err != nil { if err == driver.ErrSkipDir { break } return err } - } else if !strings.HasPrefix(file, "_") { - if err := reg.walkReposPath(ctx, root, child, "", fn); err != nil { + } else if file[0] != '_' { + if err := reg.getRepositoriesRec(ctx, root, child, "", fn); err != nil { return err } } diff --git a/registry/storage/catalog_test.go b/registry/storage/catalog_test.go index 554365d9..20764055 100644 --- a/registry/storage/catalog_test.go +++ b/registry/storage/catalog_test.go @@ -240,3 +240,88 @@ func TestCatalogWalkError(t *testing.T) { t.Errorf("Expected catalog driver list error") } } +<<<<<<< HEAD + +func BenchmarkPathCompareEqual(B *testing.B) { + B.StopTimer() + pp := randomPath(100) + // make a real copy + ppb := append([]byte{}, []byte(pp)...) + a, b := pp, string(ppb) + + B.StartTimer() + for i := 0; i < B.N; i++ { + lessPath(a, b) + } +} + +func BenchmarkPathCompareNotEqual(B *testing.B) { + B.StopTimer() + a, b := randomPath(100), randomPath(100) + B.StartTimer() + + for i := 0; i < B.N; i++ { + lessPath(a, b) + } +} + +func BenchmarkPathCompareNative(B *testing.B) { + B.StopTimer() + a, b := randomPath(100), randomPath(100) + B.StartTimer() + + for i := 0; i < B.N; i++ { + c := a < b + _ = c && false + } +} + +func BenchmarkPathCompareNativeEqual(B *testing.B) { + B.StopTimer() + pp := randomPath(100) + a, b := pp, pp + B.StartTimer() + + for i := 0; i < B.N; i++ { + c := a < b + _ = c && false + } +} + +var ( + filenameChars = []byte("abcdefghijklmnopqrstuvwxyz0123456789") + separatorChars = []byte("._-") +) + +func randomPath(length int64) string { + path := "/" + for int64(len(path)) < length { + chunkLength := rand.Int63n(length-int64(len(path))) + 1 + chunk := randomFilename(chunkLength) + path += chunk + remaining := length - int64(len(path)) + if remaining == 1 { + path += randomFilename(1) + } else if remaining > 1 { + path += "/" + } + } + return path +} + +func randomFilename(length int64) string { + b := make([]byte, length) + wasSeparator := true + for i := range b { + if !wasSeparator && i < len(b)-1 && rand.Intn(4) == 0 { + b[i] = separatorChars[rand.Intn(len(separatorChars))] + wasSeparator = true + } else { + b[i] = filenameChars[rand.Intn(len(filenameChars))] + wasSeparator = false + } + } + return string(b) +} +======= +>>>>>>> 27bd92bd (fix bug in catalog last param and optimized it) From 37a213dc4b2aaebc0c1a8077bf8f0cf82d67df8e Mon Sep 17 00:00:00 2001 From: James Hewitt Date: Fri, 18 Aug 2023 13:52:37 +0100 Subject: [PATCH 4/4] Revert "optimize catalog last param" This reverts commit 65f4ce4d9397fbec875e985d7169186ac94c8ac2. Signed-off-by: James Hewitt --- registry/storage/catalog.go | 163 ++++++++++++++----------------- registry/storage/catalog_test.go | 4 +- 2 files changed, 75 insertions(+), 92 deletions(-) diff --git a/registry/storage/catalog.go b/registry/storage/catalog.go index 2afbdfc8..55500694 100644 --- a/registry/storage/catalog.go +++ b/registry/storage/catalog.go @@ -5,17 +5,12 @@ import ( "errors" "io" "path" - "sort" "strings" "github.com/distribution/distribution/v3/reference" "github.com/distribution/distribution/v3/registry/storage/driver" ) -var ( - ErrStopRec = errors.New("Stopped the recursion for getting repositories") -) - // Returns a list, or partial list, of repositories in the registry. // Because it's a quite expensive operation, it should only be used when building up // an initial set of repositories. @@ -32,17 +27,20 @@ func (reg *registry) Repositories(ctx context.Context, repos []string, last stri return 0, err } - err = reg.getRepositories(ctx, root, last, func(repoPath string) error { - // this is placed before the append, - // so that we will get a extra repo if - // any. This assures that we do not return - // io.EOF without it being the last record. - if len(foundRepos) == len(repos) { - finishedWalk = true - return ErrStopRec + err = reg.blobStore.driver.Walk(ctx, root, func(fileInfo driver.FileInfo) error { + err := handleRepository(fileInfo, root, last, func(repoPath string) error { + foundRepos = append(foundRepos, repoPath) + return nil + }) + if err != nil { + return err } - foundRepos = append(foundRepos, repoPath) + // if we've filled our array, no need to walk any further + if len(foundRepos) == len(repos) { + finishedWalk = true + return driver.ErrSkipDir + } return nil }) @@ -66,7 +64,11 @@ func (reg *registry) Enumerate(ctx context.Context, ingester func(string) error) return err } - return reg.getRepositories(ctx, root, "", ingester) + err = reg.blobStore.driver.Walk(ctx, root, func(fileInfo driver.FileInfo) error { + return handleRepository(fileInfo, root, "", ingester) + }) + + return err } // Remove removes a repository from storage @@ -79,95 +81,78 @@ func (reg *registry) Remove(ctx context.Context, name reference.Named) error { return reg.driver.Delete(ctx, repoDir) } -// getRepositories is a helper function for getRepositoriesRec calls -// the function fn with a repository path, if the current path looked -// at is a repository and is lexicographically after last. It is possible -// to return driver.ErrSkipDir, if there is no interest in any repositories -// under the given `repoPath`, or call ErrStopRec if the recursion should stop. -func (reg *registry) getRepositories(ctx context.Context, root, last string, fn func(repoPath string) error) error { - midFn := fn - - // middleware func to exclude the `last` repo - // only use it, if there is set a last. - if last != "" { - midFn = func(repoPath string) error { - if repoPath != last { - return fn(repoPath) - } - return nil - } - } - - // call our recursive func, with the midFn and the start path - // of where we want to find repositories. - err := reg.getRepositoriesRec(ctx, root, root, last, midFn) - if err == ErrStopRec { - return nil - } - return err +// lessPath returns true if one path a is less than path b. +// +// A component-wise comparison is done, rather than the lexical comparison of +// strings. +func lessPath(a, b string) bool { + // we provide this behavior by making separator always sort first. + return compareReplaceInline(a, b, '/', '\x00') < 0 } -// getRepositoriesRec recurse through all folders it the `lookPath`, -// there it will try to find repositories. See getRepositories for more. -func (reg *registry) getRepositoriesRec(ctx context.Context, root, lookPath, last string, fn func(repoPath string) error) error { - // ensure that the current path is a dir, otherwise we just return - if f, err := reg.blobStore.driver.Stat(ctx, lookPath); err != nil || !f.IsDir() { - if err != nil { - return err - } - return nil +// compareReplaceInline modifies runtime.cmpstring to replace old with new +// during a byte-wise comparison. +func compareReplaceInline(s1, s2 string, old, new byte) int { + // TODO(stevvooe): We are missing an optimization when the s1 and s2 have + // the exact same slice header. It will make the code unsafe but can + // provide some extra performance. + + l := len(s1) + if len(s2) < l { + l = len(s2) } - // get children in the current path - children, err := reg.blobStore.driver.List(ctx, lookPath) - if err != nil { - return err - } - - // sort this, so that it will be added in the correct order - sort.Strings(children) - - if last != "" { - splitLasts := strings.Split(last, "/") - - // call the next iteration of getRepositoriesRec if any, but - // exclude the current one. - if len(splitLasts) > 1 { - if err := reg.getRepositoriesRec(ctx, root, lookPath+"/"+splitLasts[0], strings.Join(splitLasts[1:], "/"), fn); err != nil { - return err - } + for i := 0; i < l; i++ { + c1, c2 := s1[i], s2[i] + if c1 == old { + c1 = new } - // find current last path in our children - n := sort.SearchStrings(children, lookPath+"/"+splitLasts[0]) - if n == len(children) || children[n] != lookPath+"/"+splitLasts[0] { - return errors.New("the provided 'last' repositories does not exists") + if c2 == old { + c2 = new } - // if this is not a final `last` (there are more `/` left) - // then exclude the current index, else include it - if len(splitLasts) > 1 { - children = children[n+1:] - } else { - children = children[n:] + if c1 < c2 { + return -1 + } + + if c1 > c2 { + return +1 } } - for _, child := range children { - _, file := path.Split(child) + if len(s1) < len(s2) { + return -1 + } - if file == "_manifest" { - if err := fn(strings.TrimPrefix(lookPath, root+"/")); err != nil { - if err == driver.ErrSkipDir { - break - } - return err - } - } else if file[0] != '_' { - if err := reg.getRepositoriesRec(ctx, root, child, "", fn); err != nil { + if len(s1) > len(s2) { + return +1 + } + + return 0 +} + +// handleRepository calls function fn with a repository path if fileInfo +// has a path of a repository under root and that it is lexographically +// after last. Otherwise, it will return ErrSkipDir. This should be used +// with Walk to do handling with repositories in a storage. +func handleRepository(fileInfo driver.FileInfo, root, last string, fn func(repoPath string) error) error { + filePath := fileInfo.Path() + + // lop the base path off + repo := filePath[len(root)+1:] + + _, file := path.Split(repo) + if file == "_manifests" { + repo = strings.TrimSuffix(repo, "/_manifests") + if lessPath(last, repo) { + if err := fn(repo); err != nil { return err } } + return driver.ErrSkipDir + } else if strings.HasPrefix(file, "_") { + return driver.ErrSkipDir } return nil diff --git a/registry/storage/catalog_test.go b/registry/storage/catalog_test.go index 20764055..14ba4e83 100644 --- a/registry/storage/catalog_test.go +++ b/registry/storage/catalog_test.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "io" + "math/rand" "testing" "github.com/distribution/distribution/v3" @@ -240,7 +241,6 @@ func TestCatalogWalkError(t *testing.T) { t.Errorf("Expected catalog driver list error") } } -<<<<<<< HEAD func BenchmarkPathCompareEqual(B *testing.B) { B.StopTimer() @@ -323,5 +323,3 @@ func randomFilename(length int64) string { } return string(b) } -======= ->>>>>>> 27bd92bd (fix bug in catalog last param and optimized it)