Merge pull request #4004 from Jamstah/revert-3902

Revert 3902
This commit is contained in:
Milos Gajdos 2023-08-18 16:26:39 +01:00 committed by GitHub
commit ed256e07f1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 160 additions and 87 deletions

View file

@ -3,32 +3,23 @@ package storage
import ( import (
"context" "context"
"errors" "errors"
"fmt"
"io" "io"
"path" "path"
"sort"
"strings" "strings"
"github.com/distribution/distribution/v3/reference" "github.com/distribution/distribution/v3/reference"
"github.com/distribution/distribution/v3/registry/storage/driver" "github.com/distribution/distribution/v3/registry/storage/driver"
) )
var ( // Returns a list, or partial list, of repositories in the registry.
// ErrStopReposWalk is a sentinel error to indicate that the repository path walk was stopped.
ErrStopReposWalk = errors.New("stop repos walk")
)
// Repositories populates the passed passed repos slice with repositories in the
// registry up to the capacity of the slice. Returns the number of repos returned and
// `io.EOF` if no more repositories are available.
// Because it's a quite expensive operation, it should only be used when building up // Because it's a quite expensive operation, it should only be used when building up
// an initial set of repositories. // an initial set of repositories.
func (reg *registry) Repositories(ctx context.Context, repos []string, last string) (n int, err error) { func (reg *registry) Repositories(ctx context.Context, repos []string, last string) (n int, err error) {
var finishedWalk bool var finishedWalk bool
var foundRepos []string var foundRepos []string
if len(repos) == 0 && cap(repos) == 0 { if len(repos) == 0 {
return 0, errors.New("no repos requested") return 0, errors.New("no space in slice")
} }
root, err := pathFor(repositoriesRootPathSpec{}) root, err := pathFor(repositoriesRootPathSpec{})
@ -36,17 +27,20 @@ func (reg *registry) Repositories(ctx context.Context, repos []string, last stri
return 0, err return 0, err
} }
err = reg.walkRepos(ctx, root, last, func(repoPath string) error { err = reg.blobStore.driver.Walk(ctx, root, func(fileInfo driver.FileInfo) error {
// this is placed before the append, err := handleRepository(fileInfo, root, last, func(repoPath string) error {
// so that we will get an extra repo if foundRepos = append(foundRepos, repoPath)
// any. This ensures that we do not return return nil
// io.EOF without it being the last record. })
if len(foundRepos) == len(repos) { if err != nil {
finishedWalk = true return err
return ErrStopReposWalk
} }
foundRepos = append(foundRepos, repoPath) // if we've filled our array, no need to walk any further
if len(foundRepos) == len(repos) {
finishedWalk = true
return driver.ErrSkipDir
}
return nil return nil
}) })
@ -70,7 +64,11 @@ func (reg *registry) Enumerate(ctx context.Context, ingester func(string) error)
return err return err
} }
return reg.walkRepos(ctx, root, "", ingester) err = reg.blobStore.driver.Walk(ctx, root, func(fileInfo driver.FileInfo) error {
return handleRepository(fileInfo, root, "", ingester)
})
return err
} }
// Remove removes a repository from storage // Remove removes a repository from storage
@ -83,86 +81,78 @@ func (reg *registry) Remove(ctx context.Context, name reference.Named) error {
return reg.driver.Delete(ctx, repoDir) return reg.driver.Delete(ctx, repoDir)
} }
// walkRepos walks paths rooted in root calling fn for each found repo path. // lessPath returns true if one path a is less than path b.
// Paths are walked in a lexical order which makes the output deterministic. //
// If last is not an empty string it walks all repo paths. Otherwise // A component-wise comparison is done, rather than the lexical comparison of
// it returns when last repoPath is found. // strings.
func (reg *registry) walkRepos(ctx context.Context, root, last string, fn func(repoPath string) error) error { func lessPath(a, b string) bool {
midFn := fn // we provide this behavior by making separator always sort first.
return compareReplaceInline(a, b, '/', '\x00') < 0
// middleware func to exclude the `last` repo
// only use it, if there is set a last.
if last != "" {
midFn = func(repoPath string) error {
if repoPath != last {
return fn(repoPath)
}
return nil
}
}
// call our recursive func, with the midFn and the start path
// of where we want to find repositories.
err := reg.walkReposPath(ctx, root, root, last, midFn)
if err == ErrStopReposWalk {
return nil
}
return err
} }
// walkReposPath walks through all folders in `lookPath`, // compareReplaceInline modifies runtime.cmpstring to replace old with new
// looking for repositories. See walkRepos for more detailed description. // during a byte-wise comparison.
func (reg *registry) walkReposPath(ctx context.Context, root, lookPath, last string, fn func(repoPath string) error) error { func compareReplaceInline(s1, s2 string, old, new byte) int {
// get children in the current path // TODO(stevvooe): We are missing an optimization when the s1 and s2 have
children, err := reg.blobStore.driver.List(ctx, lookPath) // the exact same slice header. It will make the code unsafe but can
if err != nil { // provide some extra performance.
return err
l := len(s1)
if len(s2) < l {
l = len(s2)
} }
// sort this, so that it will be added in the correct order for i := 0; i < l; i++ {
sort.Strings(children) c1, c2 := s1[i], s2[i]
if c1 == old {
if last != "" { c1 = new
splitLast := strings.Split(last, "/")
// call the next iteration of walkReposPath if any, but
// exclude the current one.
if len(splitLast) > 1 {
if err := reg.walkReposPath(ctx, root, lookPath+"/"+splitLast[0], strings.Join(splitLast[1:], "/"), fn); err != nil {
return err
}
} }
// find current last path in our children if c2 == old {
n := sort.SearchStrings(children, lookPath+"/"+splitLast[0]) c2 = new
if n == len(children) || children[n] != lookPath+"/"+splitLast[0] {
return fmt.Errorf("%q repository not found", last)
} }
// if this is not a final `last` (there are more `/` left) if c1 < c2 {
// then exclude the current index, else include it return -1
if len(splitLast) > 1 { }
children = children[n+1:]
} else { if c1 > c2 {
children = children[n:] return +1
} }
} }
for _, child := range children { if len(s1) < len(s2) {
_, file := path.Split(child) return -1
}
if file == "_manifests" { if len(s1) > len(s2) {
if err := fn(strings.TrimPrefix(lookPath, root+"/")); err != nil { return +1
if err == driver.ErrSkipDir { }
break
} return 0
return err }
}
} else if !strings.HasPrefix(file, "_") { // handleRepository calls function fn with a repository path if fileInfo
if err := reg.walkReposPath(ctx, root, child, "", fn); err != nil { // has a path of a repository under root and that it is lexographically
// after last. Otherwise, it will return ErrSkipDir. This should be used
// with Walk to do handling with repositories in a storage.
func handleRepository(fileInfo driver.FileInfo, root, last string, fn func(repoPath string) error) error {
filePath := fileInfo.Path()
// lop the base path off
repo := filePath[len(root)+1:]
_, file := path.Split(repo)
if file == "_manifests" {
repo = strings.TrimSuffix(repo, "/_manifests")
if lessPath(last, repo) {
if err := fn(repo); err != nil {
return err return err
} }
} }
return driver.ErrSkipDir
} else if strings.HasPrefix(file, "_") {
return driver.ErrSkipDir
} }
return nil return nil

View file

@ -4,6 +4,7 @@ import (
"context" "context"
"fmt" "fmt"
"io" "io"
"math/rand"
"testing" "testing"
"github.com/distribution/distribution/v3" "github.com/distribution/distribution/v3"
@ -240,3 +241,85 @@ func TestCatalogWalkError(t *testing.T) {
t.Errorf("Expected catalog driver list error") t.Errorf("Expected catalog driver list error")
} }
} }
func BenchmarkPathCompareEqual(B *testing.B) {
B.StopTimer()
pp := randomPath(100)
// make a real copy
ppb := append([]byte{}, []byte(pp)...)
a, b := pp, string(ppb)
B.StartTimer()
for i := 0; i < B.N; i++ {
lessPath(a, b)
}
}
func BenchmarkPathCompareNotEqual(B *testing.B) {
B.StopTimer()
a, b := randomPath(100), randomPath(100)
B.StartTimer()
for i := 0; i < B.N; i++ {
lessPath(a, b)
}
}
func BenchmarkPathCompareNative(B *testing.B) {
B.StopTimer()
a, b := randomPath(100), randomPath(100)
B.StartTimer()
for i := 0; i < B.N; i++ {
c := a < b
_ = c && false
}
}
func BenchmarkPathCompareNativeEqual(B *testing.B) {
B.StopTimer()
pp := randomPath(100)
a, b := pp, pp
B.StartTimer()
for i := 0; i < B.N; i++ {
c := a < b
_ = c && false
}
}
var (
filenameChars = []byte("abcdefghijklmnopqrstuvwxyz0123456789")
separatorChars = []byte("._-")
)
func randomPath(length int64) string {
path := "/"
for int64(len(path)) < length {
chunkLength := rand.Int63n(length-int64(len(path))) + 1
chunk := randomFilename(chunkLength)
path += chunk
remaining := length - int64(len(path))
if remaining == 1 {
path += randomFilename(1)
} else if remaining > 1 {
path += "/"
}
}
return path
}
func randomFilename(length int64) string {
b := make([]byte, length)
wasSeparator := true
for i := range b {
if !wasSeparator && i < len(b)-1 && rand.Intn(4) == 0 {
b[i] = separatorChars[rand.Intn(len(separatorChars))]
wasSeparator = true
} else {
b[i] = filenameChars[rand.Intn(len(filenameChars))]
wasSeparator = false
}
}
return string(b)
}