From 65f4ce4d9397fbec875e985d7169186ac94c8ac2 Mon Sep 17 00:00:00 2001 From: "eyjhbb@gmail.com" Date: Tue, 27 Jun 2023 12:26:51 +0200 Subject: [PATCH 1/4] optimize catalog last param Signed-off-by: eyjhb Signed-off-by: David van der Spek --- registry/storage/catalog.go | 169 +++++++++++++++++-------------- registry/storage/catalog_test.go | 4 +- 2 files changed, 95 insertions(+), 78 deletions(-) diff --git a/registry/storage/catalog.go b/registry/storage/catalog.go index 55500694d..2afbdfc89 100644 --- a/registry/storage/catalog.go +++ b/registry/storage/catalog.go @@ -5,12 +5,17 @@ import ( "errors" "io" "path" + "sort" "strings" "github.com/distribution/distribution/v3/reference" "github.com/distribution/distribution/v3/registry/storage/driver" ) +var ( + ErrStopRec = errors.New("Stopped the recursion for getting repositories") +) + // Returns a list, or partial list, of repositories in the registry. // Because it's a quite expensive operation, it should only be used when building up // an initial set of repositories. @@ -27,21 +32,18 @@ func (reg *registry) Repositories(ctx context.Context, repos []string, last stri return 0, err } - err = reg.blobStore.driver.Walk(ctx, root, func(fileInfo driver.FileInfo) error { - err := handleRepository(fileInfo, root, last, func(repoPath string) error { - foundRepos = append(foundRepos, repoPath) - return nil - }) - if err != nil { - return err - } - - // if we've filled our array, no need to walk any further + err = reg.getRepositories(ctx, root, last, func(repoPath string) error { + // this is placed before the append, + // so that we will get a extra repo if + // any. This assures that we do not return + // io.EOF without it being the last record. if len(foundRepos) == len(repos) { finishedWalk = true - return driver.ErrSkipDir + return ErrStopRec } + foundRepos = append(foundRepos, repoPath) + return nil }) @@ -64,11 +66,7 @@ func (reg *registry) Enumerate(ctx context.Context, ingester func(string) error) return err } - err = reg.blobStore.driver.Walk(ctx, root, func(fileInfo driver.FileInfo) error { - return handleRepository(fileInfo, root, "", ingester) - }) - - return err + return reg.getRepositories(ctx, root, "", ingester) } // Remove removes a repository from storage @@ -81,78 +79,95 @@ func (reg *registry) Remove(ctx context.Context, name reference.Named) error { return reg.driver.Delete(ctx, repoDir) } -// lessPath returns true if one path a is less than path b. -// -// A component-wise comparison is done, rather than the lexical comparison of -// strings. -func lessPath(a, b string) bool { - // we provide this behavior by making separator always sort first. - return compareReplaceInline(a, b, '/', '\x00') < 0 +// getRepositories is a helper function for getRepositoriesRec calls +// the function fn with a repository path, if the current path looked +// at is a repository and is lexicographically after last. It is possible +// to return driver.ErrSkipDir, if there is no interest in any repositories +// under the given `repoPath`, or call ErrStopRec if the recursion should stop. +func (reg *registry) getRepositories(ctx context.Context, root, last string, fn func(repoPath string) error) error { + midFn := fn + + // middleware func to exclude the `last` repo + // only use it, if there is set a last. + if last != "" { + midFn = func(repoPath string) error { + if repoPath != last { + return fn(repoPath) + } + return nil + } + } + + // call our recursive func, with the midFn and the start path + // of where we want to find repositories. + err := reg.getRepositoriesRec(ctx, root, root, last, midFn) + if err == ErrStopRec { + return nil + } + return err } -// compareReplaceInline modifies runtime.cmpstring to replace old with new -// during a byte-wise comparison. -func compareReplaceInline(s1, s2 string, old, new byte) int { - // TODO(stevvooe): We are missing an optimization when the s1 and s2 have - // the exact same slice header. It will make the code unsafe but can - // provide some extra performance. - - l := len(s1) - if len(s2) < l { - l = len(s2) +// getRepositoriesRec recurse through all folders it the `lookPath`, +// there it will try to find repositories. See getRepositories for more. +func (reg *registry) getRepositoriesRec(ctx context.Context, root, lookPath, last string, fn func(repoPath string) error) error { + // ensure that the current path is a dir, otherwise we just return + if f, err := reg.blobStore.driver.Stat(ctx, lookPath); err != nil || !f.IsDir() { + if err != nil { + return err + } + return nil } - for i := 0; i < l; i++ { - c1, c2 := s1[i], s2[i] - if c1 == old { - c1 = new - } - - if c2 == old { - c2 = new - } - - if c1 < c2 { - return -1 - } - - if c1 > c2 { - return +1 - } + // get children in the current path + children, err := reg.blobStore.driver.List(ctx, lookPath) + if err != nil { + return err } - if len(s1) < len(s2) { - return -1 - } + // sort this, so that it will be added in the correct order + sort.Strings(children) - if len(s1) > len(s2) { - return +1 - } + if last != "" { + splitLasts := strings.Split(last, "/") - return 0 -} - -// handleRepository calls function fn with a repository path if fileInfo -// has a path of a repository under root and that it is lexographically -// after last. Otherwise, it will return ErrSkipDir. This should be used -// with Walk to do handling with repositories in a storage. -func handleRepository(fileInfo driver.FileInfo, root, last string, fn func(repoPath string) error) error { - filePath := fileInfo.Path() - - // lop the base path off - repo := filePath[len(root)+1:] - - _, file := path.Split(repo) - if file == "_manifests" { - repo = strings.TrimSuffix(repo, "/_manifests") - if lessPath(last, repo) { - if err := fn(repo); err != nil { + // call the next iteration of getRepositoriesRec if any, but + // exclude the current one. + if len(splitLasts) > 1 { + if err := reg.getRepositoriesRec(ctx, root, lookPath+"/"+splitLasts[0], strings.Join(splitLasts[1:], "/"), fn); err != nil { + return err + } + } + + // find current last path in our children + n := sort.SearchStrings(children, lookPath+"/"+splitLasts[0]) + if n == len(children) || children[n] != lookPath+"/"+splitLasts[0] { + return errors.New("the provided 'last' repositories does not exists") + } + + // if this is not a final `last` (there are more `/` left) + // then exclude the current index, else include it + if len(splitLasts) > 1 { + children = children[n+1:] + } else { + children = children[n:] + } + } + + for _, child := range children { + _, file := path.Split(child) + + if file == "_manifest" { + if err := fn(strings.TrimPrefix(lookPath, root+"/")); err != nil { + if err == driver.ErrSkipDir { + break + } + return err + } + } else if file[0] != '_' { + if err := reg.getRepositoriesRec(ctx, root, child, "", fn); err != nil { return err } } - return driver.ErrSkipDir - } else if strings.HasPrefix(file, "_") { - return driver.ErrSkipDir } return nil diff --git a/registry/storage/catalog_test.go b/registry/storage/catalog_test.go index 14ba4e833..207640558 100644 --- a/registry/storage/catalog_test.go +++ b/registry/storage/catalog_test.go @@ -4,7 +4,6 @@ import ( "context" "fmt" "io" - "math/rand" "testing" "github.com/distribution/distribution/v3" @@ -241,6 +240,7 @@ func TestCatalogWalkError(t *testing.T) { t.Errorf("Expected catalog driver list error") } } +<<<<<<< HEAD func BenchmarkPathCompareEqual(B *testing.B) { B.StopTimer() @@ -323,3 +323,5 @@ func randomFilename(length int64) string { } return string(b) } +======= +>>>>>>> 27bd92bd (fix bug in catalog last param and optimized it) From 230cc72a8b3a7d724365f449ab14da61b9112eae Mon Sep 17 00:00:00 2001 From: Milos Gajdos Date: Tue, 27 Jun 2023 12:26:56 +0200 Subject: [PATCH 2/4] Rename catalog funcs and update their godocs. Signed-off-by: Milos Gajdos Signed-off-by: David van der Spek --- registry/storage/catalog.go | 62 ++++++++++++----------- registry/storage/catalog_test.go | 85 -------------------------------- 2 files changed, 32 insertions(+), 115 deletions(-) diff --git a/registry/storage/catalog.go b/registry/storage/catalog.go index 2afbdfc89..754893f93 100644 --- a/registry/storage/catalog.go +++ b/registry/storage/catalog.go @@ -3,6 +3,7 @@ package storage import ( "context" "errors" + "fmt" "io" "path" "sort" @@ -13,10 +14,12 @@ import ( ) var ( - ErrStopRec = errors.New("Stopped the recursion for getting repositories") + // ErrStopReposWalk is used as a return value to indicate that the repository path walk + // should be stopped. It's not returned as an error by any function. + ErrStopReposWalk = errors.New("stop repos walk") ) -// Returns a list, or partial list, of repositories in the registry. +// Returns a list or a partial list of repositories in the registry. // Because it's a quite expensive operation, it should only be used when building up // an initial set of repositories. func (reg *registry) Repositories(ctx context.Context, repos []string, last string) (n int, err error) { @@ -24,7 +27,7 @@ func (reg *registry) Repositories(ctx context.Context, repos []string, last stri var foundRepos []string if len(repos) == 0 { - return 0, errors.New("no space in slice") + return -1, errors.New("no repos requested") } root, err := pathFor(repositoriesRootPathSpec{}) @@ -32,14 +35,14 @@ func (reg *registry) Repositories(ctx context.Context, repos []string, last stri return 0, err } - err = reg.getRepositories(ctx, root, last, func(repoPath string) error { + err = reg.walkRepos(ctx, root, last, func(repoPath string) error { // this is placed before the append, - // so that we will get a extra repo if - // any. This assures that we do not return + // so that we will get an extra repo if + // any. This ensures that we do not return // io.EOF without it being the last record. if len(foundRepos) == len(repos) { finishedWalk = true - return ErrStopRec + return ErrStopReposWalk } foundRepos = append(foundRepos, repoPath) @@ -66,7 +69,7 @@ func (reg *registry) Enumerate(ctx context.Context, ingester func(string) error) return err } - return reg.getRepositories(ctx, root, "", ingester) + return reg.walkRepos(ctx, root, "", ingester) } // Remove removes a repository from storage @@ -79,12 +82,11 @@ func (reg *registry) Remove(ctx context.Context, name reference.Named) error { return reg.driver.Delete(ctx, repoDir) } -// getRepositories is a helper function for getRepositoriesRec calls -// the function fn with a repository path, if the current path looked -// at is a repository and is lexicographically after last. It is possible -// to return driver.ErrSkipDir, if there is no interest in any repositories -// under the given `repoPath`, or call ErrStopRec if the recursion should stop. -func (reg *registry) getRepositories(ctx context.Context, root, last string, fn func(repoPath string) error) error { +// walkRepos walks paths rooted in root calling fn for each found repo path. +// Paths are walked in a lexical order which makes the output deterministic. +// If last is not an empty string it walks all repo paths. Otherwise +// it returns when last repoPath is found. +func (reg *registry) walkRepos(ctx context.Context, root, last string, fn func(repoPath string) error) error { midFn := fn // middleware func to exclude the `last` repo @@ -100,16 +102,16 @@ func (reg *registry) getRepositories(ctx context.Context, root, last string, fn // call our recursive func, with the midFn and the start path // of where we want to find repositories. - err := reg.getRepositoriesRec(ctx, root, root, last, midFn) - if err == ErrStopRec { + err := reg.walkReposPath(ctx, root, root, last, midFn) + if err == ErrStopReposWalk { return nil } return err } -// getRepositoriesRec recurse through all folders it the `lookPath`, -// there it will try to find repositories. See getRepositories for more. -func (reg *registry) getRepositoriesRec(ctx context.Context, root, lookPath, last string, fn func(repoPath string) error) error { +// walkReposPath walks through all folders in `lookPath`, +// looking for repositories. See walkRepos for more detailed description. +func (reg *registry) walkReposPath(ctx context.Context, root, lookPath, last string, fn func(repoPath string) error) error { // ensure that the current path is a dir, otherwise we just return if f, err := reg.blobStore.driver.Stat(ctx, lookPath); err != nil || !f.IsDir() { if err != nil { @@ -128,25 +130,25 @@ func (reg *registry) getRepositoriesRec(ctx context.Context, root, lookPath, las sort.Strings(children) if last != "" { - splitLasts := strings.Split(last, "/") + splitLast := strings.Split(last, "/") - // call the next iteration of getRepositoriesRec if any, but + // call the next iteration of walkReposPath if any, but // exclude the current one. - if len(splitLasts) > 1 { - if err := reg.getRepositoriesRec(ctx, root, lookPath+"/"+splitLasts[0], strings.Join(splitLasts[1:], "/"), fn); err != nil { + if len(splitLast) > 1 { + if err := reg.walkReposPath(ctx, root, lookPath+"/"+splitLast[0], strings.Join(splitLast[1:], "/"), fn); err != nil { return err } } // find current last path in our children - n := sort.SearchStrings(children, lookPath+"/"+splitLasts[0]) - if n == len(children) || children[n] != lookPath+"/"+splitLasts[0] { - return errors.New("the provided 'last' repositories does not exists") + n := sort.SearchStrings(children, lookPath+"/"+splitLast[0]) + if n == len(children) || children[n] != lookPath+"/"+splitLast[0] { + return fmt.Errorf("%q repository not found", last) } // if this is not a final `last` (there are more `/` left) // then exclude the current index, else include it - if len(splitLasts) > 1 { + if len(splitLast) > 1 { children = children[n+1:] } else { children = children[n:] @@ -156,15 +158,15 @@ func (reg *registry) getRepositoriesRec(ctx context.Context, root, lookPath, las for _, child := range children { _, file := path.Split(child) - if file == "_manifest" { + if file == "_manifests" { if err := fn(strings.TrimPrefix(lookPath, root+"/")); err != nil { if err == driver.ErrSkipDir { break } return err } - } else if file[0] != '_' { - if err := reg.getRepositoriesRec(ctx, root, child, "", fn); err != nil { + } else if !strings.HasPrefix(file, "_") { + if err := reg.walkReposPath(ctx, root, child, "", fn); err != nil { return err } } diff --git a/registry/storage/catalog_test.go b/registry/storage/catalog_test.go index 207640558..554365d97 100644 --- a/registry/storage/catalog_test.go +++ b/registry/storage/catalog_test.go @@ -240,88 +240,3 @@ func TestCatalogWalkError(t *testing.T) { t.Errorf("Expected catalog driver list error") } } -<<<<<<< HEAD - -func BenchmarkPathCompareEqual(B *testing.B) { - B.StopTimer() - pp := randomPath(100) - // make a real copy - ppb := append([]byte{}, []byte(pp)...) - a, b := pp, string(ppb) - - B.StartTimer() - for i := 0; i < B.N; i++ { - lessPath(a, b) - } -} - -func BenchmarkPathCompareNotEqual(B *testing.B) { - B.StopTimer() - a, b := randomPath(100), randomPath(100) - B.StartTimer() - - for i := 0; i < B.N; i++ { - lessPath(a, b) - } -} - -func BenchmarkPathCompareNative(B *testing.B) { - B.StopTimer() - a, b := randomPath(100), randomPath(100) - B.StartTimer() - - for i := 0; i < B.N; i++ { - c := a < b - _ = c && false - } -} - -func BenchmarkPathCompareNativeEqual(B *testing.B) { - B.StopTimer() - pp := randomPath(100) - a, b := pp, pp - B.StartTimer() - - for i := 0; i < B.N; i++ { - c := a < b - _ = c && false - } -} - -var ( - filenameChars = []byte("abcdefghijklmnopqrstuvwxyz0123456789") - separatorChars = []byte("._-") -) - -func randomPath(length int64) string { - path := "/" - for int64(len(path)) < length { - chunkLength := rand.Int63n(length-int64(len(path))) + 1 - chunk := randomFilename(chunkLength) - path += chunk - remaining := length - int64(len(path)) - if remaining == 1 { - path += randomFilename(1) - } else if remaining > 1 { - path += "/" - } - } - return path -} - -func randomFilename(length int64) string { - b := make([]byte, length) - wasSeparator := true - for i := range b { - if !wasSeparator && i < len(b)-1 && rand.Intn(4) == 0 { - b[i] = separatorChars[rand.Intn(len(separatorChars))] - wasSeparator = true - } else { - b[i] = filenameChars[rand.Intn(len(filenameChars))] - wasSeparator = false - } - } - return string(b) -} -======= ->>>>>>> 27bd92bd (fix bug in catalog last param and optimized it) From 0f846853feb903585d5733e7aa6b56d564b03e3d Mon Sep 17 00:00:00 2001 From: eyjhb Date: Tue, 27 Jun 2023 12:27:00 +0200 Subject: [PATCH 3/4] removed redundant check Signed-off-by: eyjhb --- registry/storage/catalog.go | 8 -------- 1 file changed, 8 deletions(-) diff --git a/registry/storage/catalog.go b/registry/storage/catalog.go index 754893f93..09a687d8c 100644 --- a/registry/storage/catalog.go +++ b/registry/storage/catalog.go @@ -112,14 +112,6 @@ func (reg *registry) walkRepos(ctx context.Context, root, last string, fn func(r // walkReposPath walks through all folders in `lookPath`, // looking for repositories. See walkRepos for more detailed description. func (reg *registry) walkReposPath(ctx context.Context, root, lookPath, last string, fn func(repoPath string) error) error { - // ensure that the current path is a dir, otherwise we just return - if f, err := reg.blobStore.driver.Stat(ctx, lookPath); err != nil || !f.IsDir() { - if err != nil { - return err - } - return nil - } - // get children in the current path children, err := reg.blobStore.driver.List(ctx, lookPath) if err != nil { From 6a5846b32eb9f9b750c492d4eca1112bdb6855a9 Mon Sep 17 00:00:00 2001 From: David van der Spek Date: Tue, 27 Jun 2023 12:27:05 +0200 Subject: [PATCH 4/4] fix: resolve most comments Signed-off-by: David van der Spek --- registry/storage/catalog.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/registry/storage/catalog.go b/registry/storage/catalog.go index 09a687d8c..019d159ed 100644 --- a/registry/storage/catalog.go +++ b/registry/storage/catalog.go @@ -14,20 +14,21 @@ import ( ) var ( - // ErrStopReposWalk is used as a return value to indicate that the repository path walk - // should be stopped. It's not returned as an error by any function. + // ErrStopReposWalk is a sentinel error to indicate that the repository path walk was stopped. ErrStopReposWalk = errors.New("stop repos walk") ) -// Returns a list or a partial list of repositories in the registry. +// Repositories populates the passed passed repos slice with repositories in the +// registry up to the capacity of the slice. Returns the number of repos returned and +// `io.EOF` if no more repositories are available. // Because it's a quite expensive operation, it should only be used when building up // an initial set of repositories. func (reg *registry) Repositories(ctx context.Context, repos []string, last string) (n int, err error) { var finishedWalk bool var foundRepos []string - if len(repos) == 0 { - return -1, errors.New("no repos requested") + if len(repos) == 0 && cap(repos) == 0 { + return 0, errors.New("no repos requested") } root, err := pathFor(repositoriesRootPathSpec{})