Merge pull request #3902 from pluralsh/catalog-opti-fix-rebase

Optimise catalog function rebase of #3145
2023-08-18 08:11:29 +01:00 · 2023-08-18 08:11:29 +01:00 · bf3c2df6b2
commit bf3c2df6b2
parent ac32466188 6a5846b32e
2 changed files with 92 additions and 165 deletions
--- a/registry/storage/catalog.go
+++ b/registry/storage/catalog.go
@ -3,23 +3,32 @@ package storage
 import (
 	"context"
 	"errors"
+	"fmt"
 	"io"
 	"path"
+	"sort"
 	"strings"

 	"github.com/distribution/distribution/v3/reference"
 	"github.com/distribution/distribution/v3/registry/storage/driver"
 )

-// Returns a list, or partial list, of repositories in the registry.
+var (
+	// ErrStopReposWalk is a sentinel error to indicate that the repository path walk was stopped.
+	ErrStopReposWalk = errors.New("stop repos walk")
+)
+
+// Repositories populates the passed passed repos slice with repositories in the
+// registry up to the capacity of the slice. Returns the number of repos returned and
+// `io.EOF` if no more repositories are available.
 // Because it's a quite expensive operation, it should only be used when building up
 // an initial set of repositories.
 func (reg *registry) Repositories(ctx context.Context, repos []string, last string) (n int, err error) {
 	var finishedWalk bool
 	var foundRepos []string

-	if len(repos) == 0 {
-		return 0, errors.New("no space in slice")
+	if len(repos) == 0 && cap(repos) == 0 {
+		return 0, errors.New("no repos requested")
 	}

 	root, err := pathFor(repositoriesRootPathSpec{})
@ -27,21 +36,18 @@ func (reg *registry) Repositories(ctx context.Context, repos []string, last stri
 		return 0, err
 	}

-	err = reg.blobStore.driver.Walk(ctx, root, func(fileInfo driver.FileInfo) error {
-		err := handleRepository(fileInfo, root, last, func(repoPath string) error {
-			foundRepos = append(foundRepos, repoPath)
-			return nil
-		})
-		if err != nil {
-			return err
-		}
-
-		// if we've filled our array, no need to walk any further
+	err = reg.walkRepos(ctx, root, last, func(repoPath string) error {
+		// this is placed before the append,
+		// so that we will get an extra repo if
+		// any. This ensures that we do not return
+		// io.EOF without it being the last record.
 		if len(foundRepos) == len(repos) {
 			finishedWalk = true
-			return driver.ErrSkipDir
+			return ErrStopReposWalk
 		}

+		foundRepos = append(foundRepos, repoPath)
+
 		return nil
 	})

@ -64,11 +70,7 @@ func (reg *registry) Enumerate(ctx context.Context, ingester func(string) error)
 		return err
 	}

-	err = reg.blobStore.driver.Walk(ctx, root, func(fileInfo driver.FileInfo) error {
-		return handleRepository(fileInfo, root, "", ingester)
-	})
-
-	return err
+	return reg.walkRepos(ctx, root, "", ingester)
 }

 // Remove removes a repository from storage
@ -81,78 +83,86 @@ func (reg *registry) Remove(ctx context.Context, name reference.Named) error {
 	return reg.driver.Delete(ctx, repoDir)
 }

-// lessPath returns true if one path a is less than path b.
-//
-// A component-wise comparison is done, rather than the lexical comparison of
-// strings.
-func lessPath(a, b string) bool {
-	// we provide this behavior by making separator always sort first.
-	return compareReplaceInline(a, b, '/', '\x00') < 0
+// walkRepos walks paths rooted in root calling fn for each found repo path.
+// Paths are walked in a lexical order which makes the output deterministic.
+// If last is not an empty string it walks all repo paths. Otherwise
+// it returns when last repoPath is found.
+func (reg *registry) walkRepos(ctx context.Context, root, last string, fn func(repoPath string) error) error {
+	midFn := fn
+
+	// middleware func to exclude the `last` repo
+	// only use it, if there is set a last.
+	if last != "" {
+		midFn = func(repoPath string) error {
+			if repoPath != last {
+				return fn(repoPath)
+			}
+			return nil
+		}
+	}
+
+	// call our recursive func, with the midFn and the start path
+	// of where we want to find repositories.
+	err := reg.walkReposPath(ctx, root, root, last, midFn)
+	if err == ErrStopReposWalk {
+		return nil
+	}
+	return err
 }

-// compareReplaceInline modifies runtime.cmpstring to replace old with new
-// during a byte-wise comparison.
-func compareReplaceInline(s1, s2 string, old, new byte) int {
-	// TODO(stevvooe): We are missing an optimization when the s1 and s2 have
-	// the exact same slice header. It will make the code unsafe but can
-	// provide some extra performance.
-
-	l := len(s1)
-	if len(s2) < l {
-		l = len(s2)
+// walkReposPath walks through all folders in `lookPath`,
+// looking for repositories. See walkRepos for more detailed description.
+func (reg *registry) walkReposPath(ctx context.Context, root, lookPath, last string, fn func(repoPath string) error) error {
+	// get children in the current path
+	children, err := reg.blobStore.driver.List(ctx, lookPath)
+	if err != nil {
+		return err
 	}

-	for i := 0; i < l; i++ {
-		c1, c2 := s1[i], s2[i]
-		if c1 == old {
-			c1 = new
-		}
+	// sort this, so that it will be added in the correct order
+	sort.Strings(children)

-		if c2 == old {
-			c2 = new
-		}
+	if last != "" {
+		splitLast := strings.Split(last, "/")

-		if c1 < c2 {
-			return -1
-		}
-
-		if c1 > c2 {
-			return +1
-		}
-	}
-
-	if len(s1) < len(s2) {
-		return -1
-	}
-
-	if len(s1) > len(s2) {
-		return +1
-	}
-
-	return 0
-}
-
-// handleRepository calls function fn with a repository path if fileInfo
-// has a path of a repository under root and that it is lexographically
-// after last. Otherwise, it will return ErrSkipDir. This should be used
-// with Walk to do handling with repositories in a storage.
-func handleRepository(fileInfo driver.FileInfo, root, last string, fn func(repoPath string) error) error {
-	filePath := fileInfo.Path()
-
-	// lop the base path off
-	repo := filePath[len(root)+1:]
-
-	_, file := path.Split(repo)
-	if file == "_manifests" {
-		repo = strings.TrimSuffix(repo, "/_manifests")
-		if lessPath(last, repo) {
-			if err := fn(repo); err != nil {
+		// call the next iteration of walkReposPath if any, but
+		// exclude the current one.
+		if len(splitLast) > 1 {
+			if err := reg.walkReposPath(ctx, root, lookPath+"/"+splitLast[0], strings.Join(splitLast[1:], "/"), fn); err != nil {
+				return err
+			}
+		}
+
+		// find current last path in our children
+		n := sort.SearchStrings(children, lookPath+"/"+splitLast[0])
+		if n == len(children) || children[n] != lookPath+"/"+splitLast[0] {
+			return fmt.Errorf("%q repository not found", last)
+		}
+
+		// if this is not a final `last` (there are more `/` left)
+		// then exclude the current index, else include it
+		if len(splitLast) > 1 {
+			children = children[n+1:]
+		} else {
+			children = children[n:]
+		}
+	}
+
+	for _, child := range children {
+		_, file := path.Split(child)
+
+		if file == "_manifests" {
+			if err := fn(strings.TrimPrefix(lookPath, root+"/")); err != nil {
+				if err == driver.ErrSkipDir {
+					break
+				}
+				return err
+			}
+		} else if !strings.HasPrefix(file, "_") {
+			if err := reg.walkReposPath(ctx, root, child, "", fn); err != nil {
 				return err
 			}
 		}
-		return driver.ErrSkipDir
-	} else if strings.HasPrefix(file, "_") {
-		return driver.ErrSkipDir
 	}

 	return nil
--- a/registry/storage/catalog_test.go
+++ b/registry/storage/catalog_test.go
@ -4,7 +4,6 @@ import (
 	"context"
 	"fmt"
 	"io"
-	"math/rand"
 	"testing"

 	"github.com/distribution/distribution/v3"
@ -241,85 +240,3 @@ func TestCatalogWalkError(t *testing.T) {
 		t.Errorf("Expected catalog driver list error")
 	}
 }
-
-func BenchmarkPathCompareEqual(B *testing.B) {
-	B.StopTimer()
-	pp := randomPath(100)
-	// make a real copy
-	ppb := append([]byte{}, []byte(pp)...)
-	a, b := pp, string(ppb)
-
-	B.StartTimer()
-	for i := 0; i < B.N; i++ {
-		lessPath(a, b)
-	}
-}
-
-func BenchmarkPathCompareNotEqual(B *testing.B) {
-	B.StopTimer()
-	a, b := randomPath(100), randomPath(100)
-	B.StartTimer()
-
-	for i := 0; i < B.N; i++ {
-		lessPath(a, b)
-	}
-}
-
-func BenchmarkPathCompareNative(B *testing.B) {
-	B.StopTimer()
-	a, b := randomPath(100), randomPath(100)
-	B.StartTimer()
-
-	for i := 0; i < B.N; i++ {
-		c := a < b
-		_ = c && false
-	}
-}
-
-func BenchmarkPathCompareNativeEqual(B *testing.B) {
-	B.StopTimer()
-	pp := randomPath(100)
-	a, b := pp, pp
-	B.StartTimer()
-
-	for i := 0; i < B.N; i++ {
-		c := a < b
-		_ = c && false
-	}
-}
-
-var (
-	filenameChars  = []byte("abcdefghijklmnopqrstuvwxyz0123456789")
-	separatorChars = []byte("._-")
-)
-
-func randomPath(length int64) string {
-	path := "/"
-	for int64(len(path)) < length {
-		chunkLength := rand.Int63n(length-int64(len(path))) + 1
-		chunk := randomFilename(chunkLength)
-		path += chunk
-		remaining := length - int64(len(path))
-		if remaining == 1 {
-			path += randomFilename(1)
-		} else if remaining > 1 {
-			path += "/"
-		}
-	}
-	return path
-}
-
-func randomFilename(length int64) string {
-	b := make([]byte, length)
-	wasSeparator := true
-	for i := range b {
-		if !wasSeparator && i < len(b)-1 && rand.Intn(4) == 0 {
-			b[i] = separatorChars[rand.Intn(len(separatorChars))]
-			wasSeparator = true
-		} else {
-			b[i] = filenameChars[rand.Intn(len(filenameChars))]
-			wasSeparator = false
-		}
-	}
-	return string(b)
-}