Revert "optimize catalog last param"

This reverts commit 65f4ce4d93.

Signed-off-by: James Hewitt <james.hewitt@uk.ibm.com>
This commit is contained in:
James Hewitt 2023-08-18 13:52:37 +01:00
parent 8fd504debe
commit 37a213dc4b
No known key found for this signature in database
GPG key ID: EA6C3C654B6193E4
2 changed files with 75 additions and 92 deletions

View file

@ -5,17 +5,12 @@ import (
"errors" "errors"
"io" "io"
"path" "path"
"sort"
"strings" "strings"
"github.com/distribution/distribution/v3/reference" "github.com/distribution/distribution/v3/reference"
"github.com/distribution/distribution/v3/registry/storage/driver" "github.com/distribution/distribution/v3/registry/storage/driver"
) )
var (
ErrStopRec = errors.New("Stopped the recursion for getting repositories")
)
// Returns a list, or partial list, of repositories in the registry. // Returns a list, or partial list, of repositories in the registry.
// Because it's a quite expensive operation, it should only be used when building up // Because it's a quite expensive operation, it should only be used when building up
// an initial set of repositories. // an initial set of repositories.
@ -32,17 +27,20 @@ func (reg *registry) Repositories(ctx context.Context, repos []string, last stri
return 0, err return 0, err
} }
err = reg.getRepositories(ctx, root, last, func(repoPath string) error { err = reg.blobStore.driver.Walk(ctx, root, func(fileInfo driver.FileInfo) error {
// this is placed before the append, err := handleRepository(fileInfo, root, last, func(repoPath string) error {
// so that we will get a extra repo if foundRepos = append(foundRepos, repoPath)
// any. This assures that we do not return return nil
// io.EOF without it being the last record. })
if len(foundRepos) == len(repos) { if err != nil {
finishedWalk = true return err
return ErrStopRec
} }
foundRepos = append(foundRepos, repoPath) // if we've filled our array, no need to walk any further
if len(foundRepos) == len(repos) {
finishedWalk = true
return driver.ErrSkipDir
}
return nil return nil
}) })
@ -66,7 +64,11 @@ func (reg *registry) Enumerate(ctx context.Context, ingester func(string) error)
return err return err
} }
return reg.getRepositories(ctx, root, "", ingester) err = reg.blobStore.driver.Walk(ctx, root, func(fileInfo driver.FileInfo) error {
return handleRepository(fileInfo, root, "", ingester)
})
return err
} }
// Remove removes a repository from storage // Remove removes a repository from storage
@ -79,95 +81,78 @@ func (reg *registry) Remove(ctx context.Context, name reference.Named) error {
return reg.driver.Delete(ctx, repoDir) return reg.driver.Delete(ctx, repoDir)
} }
// getRepositories is a helper function for getRepositoriesRec calls // lessPath returns true if one path a is less than path b.
// the function fn with a repository path, if the current path looked //
// at is a repository and is lexicographically after last. It is possible // A component-wise comparison is done, rather than the lexical comparison of
// to return driver.ErrSkipDir, if there is no interest in any repositories // strings.
// under the given `repoPath`, or call ErrStopRec if the recursion should stop. func lessPath(a, b string) bool {
func (reg *registry) getRepositories(ctx context.Context, root, last string, fn func(repoPath string) error) error { // we provide this behavior by making separator always sort first.
midFn := fn return compareReplaceInline(a, b, '/', '\x00') < 0
// middleware func to exclude the `last` repo
// only use it, if there is set a last.
if last != "" {
midFn = func(repoPath string) error {
if repoPath != last {
return fn(repoPath)
} }
return nil
// compareReplaceInline modifies runtime.cmpstring to replace old with new
// during a byte-wise comparison.
func compareReplaceInline(s1, s2 string, old, new byte) int {
// TODO(stevvooe): We are missing an optimization when the s1 and s2 have
// the exact same slice header. It will make the code unsafe but can
// provide some extra performance.
l := len(s1)
if len(s2) < l {
l = len(s2)
}
for i := 0; i < l; i++ {
c1, c2 := s1[i], s2[i]
if c1 == old {
c1 = new
}
if c2 == old {
c2 = new
}
if c1 < c2 {
return -1
}
if c1 > c2 {
return +1
} }
} }
// call our recursive func, with the midFn and the start path if len(s1) < len(s2) {
// of where we want to find repositories. return -1
err := reg.getRepositoriesRec(ctx, root, root, last, midFn)
if err == ErrStopRec {
return nil
}
return err
} }
// getRepositoriesRec recurse through all folders it the `lookPath`, if len(s1) > len(s2) {
// there it will try to find repositories. See getRepositories for more. return +1
func (reg *registry) getRepositoriesRec(ctx context.Context, root, lookPath, last string, fn func(repoPath string) error) error {
// ensure that the current path is a dir, otherwise we just return
if f, err := reg.blobStore.driver.Stat(ctx, lookPath); err != nil || !f.IsDir() {
if err != nil {
return err
}
return nil
} }
// get children in the current path return 0
children, err := reg.blobStore.driver.List(ctx, lookPath)
if err != nil {
return err
} }
// sort this, so that it will be added in the correct order // handleRepository calls function fn with a repository path if fileInfo
sort.Strings(children) // has a path of a repository under root and that it is lexographically
// after last. Otherwise, it will return ErrSkipDir. This should be used
// with Walk to do handling with repositories in a storage.
func handleRepository(fileInfo driver.FileInfo, root, last string, fn func(repoPath string) error) error {
filePath := fileInfo.Path()
if last != "" { // lop the base path off
splitLasts := strings.Split(last, "/") repo := filePath[len(root)+1:]
// call the next iteration of getRepositoriesRec if any, but _, file := path.Split(repo)
// exclude the current one. if file == "_manifests" {
if len(splitLasts) > 1 { repo = strings.TrimSuffix(repo, "/_manifests")
if err := reg.getRepositoriesRec(ctx, root, lookPath+"/"+splitLasts[0], strings.Join(splitLasts[1:], "/"), fn); err != nil { if lessPath(last, repo) {
return err if err := fn(repo); err != nil {
}
}
// find current last path in our children
n := sort.SearchStrings(children, lookPath+"/"+splitLasts[0])
if n == len(children) || children[n] != lookPath+"/"+splitLasts[0] {
return errors.New("the provided 'last' repositories does not exists")
}
// if this is not a final `last` (there are more `/` left)
// then exclude the current index, else include it
if len(splitLasts) > 1 {
children = children[n+1:]
} else {
children = children[n:]
}
}
for _, child := range children {
_, file := path.Split(child)
if file == "_manifest" {
if err := fn(strings.TrimPrefix(lookPath, root+"/")); err != nil {
if err == driver.ErrSkipDir {
break
}
return err
}
} else if file[0] != '_' {
if err := reg.getRepositoriesRec(ctx, root, child, "", fn); err != nil {
return err return err
} }
} }
return driver.ErrSkipDir
} else if strings.HasPrefix(file, "_") {
return driver.ErrSkipDir
} }
return nil return nil

View file

@ -4,6 +4,7 @@ import (
"context" "context"
"fmt" "fmt"
"io" "io"
"math/rand"
"testing" "testing"
"github.com/distribution/distribution/v3" "github.com/distribution/distribution/v3"
@ -240,7 +241,6 @@ func TestCatalogWalkError(t *testing.T) {
t.Errorf("Expected catalog driver list error") t.Errorf("Expected catalog driver list error")
} }
} }
<<<<<<< HEAD
func BenchmarkPathCompareEqual(B *testing.B) { func BenchmarkPathCompareEqual(B *testing.B) {
B.StopTimer() B.StopTimer()
@ -323,5 +323,3 @@ func randomFilename(length int64) string {
} }
return string(b) return string(b)
} }
=======
>>>>>>> 27bd92bd (fix bug in catalog last param and optimized it)