Merge pull request #4004 from Jamstah/revert-3902

Revert 3902
2023-08-18 16:26:39 +01:00 · 2023-08-18 16:26:39 +01:00 · ed256e07f1
commit ed256e07f1
parent a45c5785ab 37a213dc4b
2 changed files with 160 additions and 87 deletions
--- a/registry/storage/catalog.go
+++ b/registry/storage/catalog.go
@ -3,32 +3,23 @@ package storage
 import (
 	"context"
 	"errors"
-	"fmt"
 	"io"
 	"path"
-	"sort"
 	"strings"

 	"github.com/distribution/distribution/v3/reference"
 	"github.com/distribution/distribution/v3/registry/storage/driver"
 )

-var (
-	// ErrStopReposWalk is a sentinel error to indicate that the repository path walk was stopped.
-	ErrStopReposWalk = errors.New("stop repos walk")
-)
-
-// Repositories populates the passed passed repos slice with repositories in the
-// registry up to the capacity of the slice. Returns the number of repos returned and
-// `io.EOF` if no more repositories are available.
+// Returns a list, or partial list, of repositories in the registry.
 // Because it's a quite expensive operation, it should only be used when building up
 // an initial set of repositories.
 func (reg *registry) Repositories(ctx context.Context, repos []string, last string) (n int, err error) {
 	var finishedWalk bool
 	var foundRepos []string

-	if len(repos) == 0 && cap(repos) == 0 {
-		return 0, errors.New("no repos requested")
+	if len(repos) == 0 {
+		return 0, errors.New("no space in slice")
 	}

 	root, err := pathFor(repositoriesRootPathSpec{})
@ -36,17 +27,20 @@ func (reg *registry) Repositories(ctx context.Context, repos []string, last stri
 		return 0, err
 	}

-	err = reg.walkRepos(ctx, root, last, func(repoPath string) error {
-		// this is placed before the append,
-		// so that we will get an extra repo if
-		// any. This ensures that we do not return
-		// io.EOF without it being the last record.
-		if len(foundRepos) == len(repos) {
-			finishedWalk = true
-			return ErrStopReposWalk
+	err = reg.blobStore.driver.Walk(ctx, root, func(fileInfo driver.FileInfo) error {
+		err := handleRepository(fileInfo, root, last, func(repoPath string) error {
+			foundRepos = append(foundRepos, repoPath)
+			return nil
+		})
+		if err != nil {
+			return err
 		}

-		foundRepos = append(foundRepos, repoPath)
+		// if we've filled our array, no need to walk any further
+		if len(foundRepos) == len(repos) {
+			finishedWalk = true
+			return driver.ErrSkipDir
+		}

 		return nil
 	})
@ -70,7 +64,11 @@ func (reg *registry) Enumerate(ctx context.Context, ingester func(string) error)
 		return err
 	}

-	return reg.walkRepos(ctx, root, "", ingester)
+	err = reg.blobStore.driver.Walk(ctx, root, func(fileInfo driver.FileInfo) error {
+		return handleRepository(fileInfo, root, "", ingester)
+	})
+
+	return err
 }

 // Remove removes a repository from storage
@ -83,86 +81,78 @@ func (reg *registry) Remove(ctx context.Context, name reference.Named) error {
 	return reg.driver.Delete(ctx, repoDir)
 }

-// walkRepos walks paths rooted in root calling fn for each found repo path.
-// Paths are walked in a lexical order which makes the output deterministic.
-// If last is not an empty string it walks all repo paths. Otherwise
-// it returns when last repoPath is found.
-func (reg *registry) walkRepos(ctx context.Context, root, last string, fn func(repoPath string) error) error {
-	midFn := fn
-
-	// middleware func to exclude the `last` repo
-	// only use it, if there is set a last.
-	if last != "" {
-		midFn = func(repoPath string) error {
-			if repoPath != last {
-				return fn(repoPath)
-			}
-			return nil
-		}
-	}
-
-	// call our recursive func, with the midFn and the start path
-	// of where we want to find repositories.
-	err := reg.walkReposPath(ctx, root, root, last, midFn)
-	if err == ErrStopReposWalk {
-		return nil
-	}
-	return err
+// lessPath returns true if one path a is less than path b.
+//
+// A component-wise comparison is done, rather than the lexical comparison of
+// strings.
+func lessPath(a, b string) bool {
+	// we provide this behavior by making separator always sort first.
+	return compareReplaceInline(a, b, '/', '\x00') < 0
 }

-// walkReposPath walks through all folders in `lookPath`,
-// looking for repositories. See walkRepos for more detailed description.
-func (reg *registry) walkReposPath(ctx context.Context, root, lookPath, last string, fn func(repoPath string) error) error {
-	// get children in the current path
-	children, err := reg.blobStore.driver.List(ctx, lookPath)
-	if err != nil {
-		return err
+// compareReplaceInline modifies runtime.cmpstring to replace old with new
+// during a byte-wise comparison.
+func compareReplaceInline(s1, s2 string, old, new byte) int {
+	// TODO(stevvooe): We are missing an optimization when the s1 and s2 have
+	// the exact same slice header. It will make the code unsafe but can
+	// provide some extra performance.
+
+	l := len(s1)
+	if len(s2) < l {
+		l = len(s2)
 	}

-	// sort this, so that it will be added in the correct order
-	sort.Strings(children)
+	for i := 0; i < l; i++ {
+		c1, c2 := s1[i], s2[i]
+		if c1 == old {
+			c1 = new
+		}

-	if last != "" {
-		splitLast := strings.Split(last, "/")
+		if c2 == old {
+			c2 = new
+		}

-		// call the next iteration of walkReposPath if any, but
-		// exclude the current one.
-		if len(splitLast) > 1 {
-			if err := reg.walkReposPath(ctx, root, lookPath+"/"+splitLast[0], strings.Join(splitLast[1:], "/"), fn); err != nil {
-				return err
+		if c1 < c2 {
+			return -1
+		}
+
+		if c1 > c2 {
+			return +1
 		}
 	}

-		// find current last path in our children
-		n := sort.SearchStrings(children, lookPath+"/"+splitLast[0])
-		if n == len(children) || children[n] != lookPath+"/"+splitLast[0] {
-			return fmt.Errorf("%q repository not found", last)
+	if len(s1) < len(s2) {
+		return -1
 	}

-		// if this is not a final `last` (there are more `/` left)
-		// then exclude the current index, else include it
-		if len(splitLast) > 1 {
-			children = children[n+1:]
-		} else {
-			children = children[n:]
-		}
+	if len(s1) > len(s2) {
+		return +1
 	}

-	for _, child := range children {
-		_, file := path.Split(child)
+	return 0
+}

+// handleRepository calls function fn with a repository path if fileInfo
+// has a path of a repository under root and that it is lexographically
+// after last. Otherwise, it will return ErrSkipDir. This should be used
+// with Walk to do handling with repositories in a storage.
+func handleRepository(fileInfo driver.FileInfo, root, last string, fn func(repoPath string) error) error {
+	filePath := fileInfo.Path()
+
+	// lop the base path off
+	repo := filePath[len(root)+1:]
+
+	_, file := path.Split(repo)
 	if file == "_manifests" {
-			if err := fn(strings.TrimPrefix(lookPath, root+"/")); err != nil {
-				if err == driver.ErrSkipDir {
-					break
-				}
-				return err
-			}
-		} else if !strings.HasPrefix(file, "_") {
-			if err := reg.walkReposPath(ctx, root, child, "", fn); err != nil {
+		repo = strings.TrimSuffix(repo, "/_manifests")
+		if lessPath(last, repo) {
+			if err := fn(repo); err != nil {
 				return err
 			}
 		}
+		return driver.ErrSkipDir
+	} else if strings.HasPrefix(file, "_") {
+		return driver.ErrSkipDir
 	}

 	return nil
--- a/registry/storage/catalog_test.go
+++ b/registry/storage/catalog_test.go
@ -4,6 +4,7 @@ import (
 	"context"
 	"fmt"
 	"io"
+	"math/rand"
 	"testing"

 	"github.com/distribution/distribution/v3"
@ -240,3 +241,85 @@ func TestCatalogWalkError(t *testing.T) {
 		t.Errorf("Expected catalog driver list error")
 	}
 }
+
+func BenchmarkPathCompareEqual(B *testing.B) {
+	B.StopTimer()
+	pp := randomPath(100)
+	// make a real copy
+	ppb := append([]byte{}, []byte(pp)...)
+	a, b := pp, string(ppb)
+
+	B.StartTimer()
+	for i := 0; i < B.N; i++ {
+		lessPath(a, b)
+	}
+}
+
+func BenchmarkPathCompareNotEqual(B *testing.B) {
+	B.StopTimer()
+	a, b := randomPath(100), randomPath(100)
+	B.StartTimer()
+
+	for i := 0; i < B.N; i++ {
+		lessPath(a, b)
+	}
+}
+
+func BenchmarkPathCompareNative(B *testing.B) {
+	B.StopTimer()
+	a, b := randomPath(100), randomPath(100)
+	B.StartTimer()
+
+	for i := 0; i < B.N; i++ {
+		c := a < b
+		_ = c && false
+	}
+}
+
+func BenchmarkPathCompareNativeEqual(B *testing.B) {
+	B.StopTimer()
+	pp := randomPath(100)
+	a, b := pp, pp
+	B.StartTimer()
+
+	for i := 0; i < B.N; i++ {
+		c := a < b
+		_ = c && false
+	}
+}
+
+var (
+	filenameChars  = []byte("abcdefghijklmnopqrstuvwxyz0123456789")
+	separatorChars = []byte("._-")
+)
+
+func randomPath(length int64) string {
+	path := "/"
+	for int64(len(path)) < length {
+		chunkLength := rand.Int63n(length-int64(len(path))) + 1
+		chunk := randomFilename(chunkLength)
+		path += chunk
+		remaining := length - int64(len(path))
+		if remaining == 1 {
+			path += randomFilename(1)
+		} else if remaining > 1 {
+			path += "/"
+		}
+	}
+	return path
+}
+
+func randomFilename(length int64) string {
+	b := make([]byte, length)
+	wasSeparator := true
+	for i := range b {
+		if !wasSeparator && i < len(b)-1 && rand.Intn(4) == 0 {
+			b[i] = separatorChars[rand.Intn(len(separatorChars))]
+			wasSeparator = true
+		} else {
+			b[i] = filenameChars[rand.Intn(len(filenameChars))]
+			wasSeparator = false
+		}
+	}
+	return string(b)
+}