Improve catalog enumerate runtime by an order of magnitude

Signed-off-by: Edgar Lee <edgar.lee@docker.com>
This commit is contained in:
Edgar Lee 2016-08-09 17:42:26 -07:00
parent 9000745401
commit 2f81b3b058
2 changed files with 65 additions and 43 deletions

View file

@ -10,6 +10,10 @@ import (
"github.com/docker/distribution/registry/storage/driver" "github.com/docker/distribution/registry/storage/driver"
) )
// errFinishedWalk signals an early exit to the walk when the current query
// is satisfied.
var errFinishedWalk = errors.New("finished walk")
// Returns a list, or partial list, of repositories in the registry. // Returns a list, or partial list, of repositories in the registry.
// Because it's a quite expensive operation, it should only be used when building up // Because it's a quite expensive operation, it should only be used when building up
// an initial set of repositories. // an initial set of repositories.
@ -25,25 +29,13 @@ func (reg *registry) Repositories(ctx context.Context, repos []string, last stri
return 0, err return 0, err
} }
// errFinishedWalk signals an early exit to the walk when the current query
// is satisfied.
errFinishedWalk := errors.New("finished walk")
err = Walk(ctx, reg.blobStore.driver, root, func(fileInfo driver.FileInfo) error { err = Walk(ctx, reg.blobStore.driver, root, func(fileInfo driver.FileInfo) error {
filePath := fileInfo.Path() err := handleRepository(fileInfo, root, last, func(repoPath string) error {
foundRepos = append(foundRepos, repoPath)
// lop the base path off return nil
repoPath := filePath[len(root)+1:] })
if err != nil {
_, file := path.Split(repoPath) return err
if file == "_layers" {
repoPath = strings.TrimSuffix(repoPath, "/_layers")
if lessPath(last, repoPath) {
foundRepos = append(foundRepos, repoPath)
}
return ErrSkipDir
} else if strings.HasPrefix(file, "_") {
return ErrSkipDir
} }
// if we've filled our array, no need to walk any further // if we've filled our array, no need to walk any further
@ -71,33 +63,16 @@ func (reg *registry) Repositories(ctx context.Context, repos []string, last stri
// Enumerate applies ingester to each repository // Enumerate applies ingester to each repository
func (reg *registry) Enumerate(ctx context.Context, ingester func(string) error) error { func (reg *registry) Enumerate(ctx context.Context, ingester func(string) error) error {
repoNameBuffer := make([]string, 100) root, err := pathFor(repositoriesRootPathSpec{})
var last string if err != nil {
for { return err
n, err := reg.Repositories(ctx, repoNameBuffer, last)
if err != nil && err != io.EOF {
return err
}
if n == 0 {
break
}
last = repoNameBuffer[n-1]
for i := 0; i < n; i++ {
repoName := repoNameBuffer[i]
err = ingester(repoName)
if err != nil {
return err
}
}
if err == io.EOF {
break
}
} }
return nil
err = Walk(ctx, reg.blobStore.driver, root, func(fileInfo driver.FileInfo) error {
return handleRepository(fileInfo, root, "", ingester)
})
return err
} }
// lessPath returns true if one path a is less than path b. // lessPath returns true if one path a is less than path b.
@ -150,3 +125,29 @@ func compareReplaceInline(s1, s2 string, old, new byte) int {
return 0 return 0
} }
// handleRepository calls function fn with a repository path if fileInfo
// has a path of a repository under root and that it is lexographically
// after last. Otherwise, it will return ErrSkipDir. This should be used
// with Walk to do handling with repositories in a storage.
func handleRepository(fileInfo driver.FileInfo, root, last string, fn func(repoPath string) error) error {
filePath := fileInfo.Path()
// lop the base path off
repo := filePath[len(root)+1:]
_, file := path.Split(repo)
if file == "_layers" {
repo = strings.TrimSuffix(repo, "/_layers")
if lessPath(last, repo) {
if err := fn(repo); err != nil {
return err
}
}
return ErrSkipDir
} else if strings.HasPrefix(file, "_") {
return ErrSkipDir
}
return nil
}

View file

@ -171,7 +171,28 @@ func TestCatalogInParts(t *testing.T) {
if numFilled != 0 { if numFilled != 0 {
t.Errorf("Expected catalog fourth chunk err") t.Errorf("Expected catalog fourth chunk err")
} }
}
func TestCatalogEnumerate(t *testing.T) {
env := setupFS(t)
var repos []string
repositoryEnumerator := env.registry.(distribution.RepositoryEnumerator)
err := repositoryEnumerator.Enumerate(env.ctx, func(repoName string) error {
repos = append(repos, repoName)
return nil
})
if err != nil {
t.Errorf("Expected catalog enumerate err")
}
if len(repos) != len(env.expected) {
t.Errorf("Expected catalog enumerate doesn't have correct number of values")
}
if !testEq(repos, env.expected, len(env.expected)) {
t.Errorf("Expected catalog enumerate not over all values")
}
} }
func testEq(a, b []string, size int) bool { func testEq(a, b []string, size int) bool {