From 65f4ce4d9397fbec875e985d7169186ac94c8ac2 Mon Sep 17 00:00:00 2001 From: "eyjhbb@gmail.com" Date: Tue, 27 Jun 2023 12:26:51 +0200 Subject: [PATCH] optimize catalog last param Signed-off-by: eyjhb Signed-off-by: David van der Spek --- registry/storage/catalog.go | 169 +++++++++++++++++-------------- registry/storage/catalog_test.go | 4 +- 2 files changed, 95 insertions(+), 78 deletions(-) diff --git a/registry/storage/catalog.go b/registry/storage/catalog.go index 55500694..2afbdfc8 100644 --- a/registry/storage/catalog.go +++ b/registry/storage/catalog.go @@ -5,12 +5,17 @@ import ( "errors" "io" "path" + "sort" "strings" "github.com/distribution/distribution/v3/reference" "github.com/distribution/distribution/v3/registry/storage/driver" ) +var ( + ErrStopRec = errors.New("Stopped the recursion for getting repositories") +) + // Returns a list, or partial list, of repositories in the registry. // Because it's a quite expensive operation, it should only be used when building up // an initial set of repositories. @@ -27,21 +32,18 @@ func (reg *registry) Repositories(ctx context.Context, repos []string, last stri return 0, err } - err = reg.blobStore.driver.Walk(ctx, root, func(fileInfo driver.FileInfo) error { - err := handleRepository(fileInfo, root, last, func(repoPath string) error { - foundRepos = append(foundRepos, repoPath) - return nil - }) - if err != nil { - return err - } - - // if we've filled our array, no need to walk any further + err = reg.getRepositories(ctx, root, last, func(repoPath string) error { + // this is placed before the append, + // so that we will get a extra repo if + // any. This assures that we do not return + // io.EOF without it being the last record. if len(foundRepos) == len(repos) { finishedWalk = true - return driver.ErrSkipDir + return ErrStopRec } + foundRepos = append(foundRepos, repoPath) + return nil }) @@ -64,11 +66,7 @@ func (reg *registry) Enumerate(ctx context.Context, ingester func(string) error) return err } - err = reg.blobStore.driver.Walk(ctx, root, func(fileInfo driver.FileInfo) error { - return handleRepository(fileInfo, root, "", ingester) - }) - - return err + return reg.getRepositories(ctx, root, "", ingester) } // Remove removes a repository from storage @@ -81,78 +79,95 @@ func (reg *registry) Remove(ctx context.Context, name reference.Named) error { return reg.driver.Delete(ctx, repoDir) } -// lessPath returns true if one path a is less than path b. -// -// A component-wise comparison is done, rather than the lexical comparison of -// strings. -func lessPath(a, b string) bool { - // we provide this behavior by making separator always sort first. - return compareReplaceInline(a, b, '/', '\x00') < 0 +// getRepositories is a helper function for getRepositoriesRec calls +// the function fn with a repository path, if the current path looked +// at is a repository and is lexicographically after last. It is possible +// to return driver.ErrSkipDir, if there is no interest in any repositories +// under the given `repoPath`, or call ErrStopRec if the recursion should stop. +func (reg *registry) getRepositories(ctx context.Context, root, last string, fn func(repoPath string) error) error { + midFn := fn + + // middleware func to exclude the `last` repo + // only use it, if there is set a last. + if last != "" { + midFn = func(repoPath string) error { + if repoPath != last { + return fn(repoPath) + } + return nil + } + } + + // call our recursive func, with the midFn and the start path + // of where we want to find repositories. + err := reg.getRepositoriesRec(ctx, root, root, last, midFn) + if err == ErrStopRec { + return nil + } + return err } -// compareReplaceInline modifies runtime.cmpstring to replace old with new -// during a byte-wise comparison. -func compareReplaceInline(s1, s2 string, old, new byte) int { - // TODO(stevvooe): We are missing an optimization when the s1 and s2 have - // the exact same slice header. It will make the code unsafe but can - // provide some extra performance. - - l := len(s1) - if len(s2) < l { - l = len(s2) +// getRepositoriesRec recurse through all folders it the `lookPath`, +// there it will try to find repositories. See getRepositories for more. +func (reg *registry) getRepositoriesRec(ctx context.Context, root, lookPath, last string, fn func(repoPath string) error) error { + // ensure that the current path is a dir, otherwise we just return + if f, err := reg.blobStore.driver.Stat(ctx, lookPath); err != nil || !f.IsDir() { + if err != nil { + return err + } + return nil } - for i := 0; i < l; i++ { - c1, c2 := s1[i], s2[i] - if c1 == old { - c1 = new - } - - if c2 == old { - c2 = new - } - - if c1 < c2 { - return -1 - } - - if c1 > c2 { - return +1 - } + // get children in the current path + children, err := reg.blobStore.driver.List(ctx, lookPath) + if err != nil { + return err } - if len(s1) < len(s2) { - return -1 - } + // sort this, so that it will be added in the correct order + sort.Strings(children) - if len(s1) > len(s2) { - return +1 - } + if last != "" { + splitLasts := strings.Split(last, "/") - return 0 -} - -// handleRepository calls function fn with a repository path if fileInfo -// has a path of a repository under root and that it is lexographically -// after last. Otherwise, it will return ErrSkipDir. This should be used -// with Walk to do handling with repositories in a storage. -func handleRepository(fileInfo driver.FileInfo, root, last string, fn func(repoPath string) error) error { - filePath := fileInfo.Path() - - // lop the base path off - repo := filePath[len(root)+1:] - - _, file := path.Split(repo) - if file == "_manifests" { - repo = strings.TrimSuffix(repo, "/_manifests") - if lessPath(last, repo) { - if err := fn(repo); err != nil { + // call the next iteration of getRepositoriesRec if any, but + // exclude the current one. + if len(splitLasts) > 1 { + if err := reg.getRepositoriesRec(ctx, root, lookPath+"/"+splitLasts[0], strings.Join(splitLasts[1:], "/"), fn); err != nil { + return err + } + } + + // find current last path in our children + n := sort.SearchStrings(children, lookPath+"/"+splitLasts[0]) + if n == len(children) || children[n] != lookPath+"/"+splitLasts[0] { + return errors.New("the provided 'last' repositories does not exists") + } + + // if this is not a final `last` (there are more `/` left) + // then exclude the current index, else include it + if len(splitLasts) > 1 { + children = children[n+1:] + } else { + children = children[n:] + } + } + + for _, child := range children { + _, file := path.Split(child) + + if file == "_manifest" { + if err := fn(strings.TrimPrefix(lookPath, root+"/")); err != nil { + if err == driver.ErrSkipDir { + break + } + return err + } + } else if file[0] != '_' { + if err := reg.getRepositoriesRec(ctx, root, child, "", fn); err != nil { return err } } - return driver.ErrSkipDir - } else if strings.HasPrefix(file, "_") { - return driver.ErrSkipDir } return nil diff --git a/registry/storage/catalog_test.go b/registry/storage/catalog_test.go index 14ba4e83..20764055 100644 --- a/registry/storage/catalog_test.go +++ b/registry/storage/catalog_test.go @@ -4,7 +4,6 @@ import ( "context" "fmt" "io" - "math/rand" "testing" "github.com/distribution/distribution/v3" @@ -241,6 +240,7 @@ func TestCatalogWalkError(t *testing.T) { t.Errorf("Expected catalog driver list error") } } +<<<<<<< HEAD func BenchmarkPathCompareEqual(B *testing.B) { B.StopTimer() @@ -323,3 +323,5 @@ func randomFilename(length int64) string { } return string(b) } +======= +>>>>>>> 27bd92bd (fix bug in catalog last param and optimized it)