From bba5a0d05c27328d7a75a16e8082f9b16b9721f0 Mon Sep 17 00:00:00 2001 From: Stephen J Day Date: Fri, 5 Aug 2016 17:18:43 -0700 Subject: [PATCH 1/2] registry/storage: more efficient path compare in catalog Previous component-wise path comparison is recursive and generates a large amount of garbage. This more efficient version simply replaces the path comparison with the zero-value to sort before everything. We do this by replacing the byte-wise comparison that swaps a single character inline for the separator comparison, such that separators sort first. The resulting implementation provides component-wise path comparison with no cost incurred for allocation or stack frame. Direction of the comparison is also reversed to match Go style. Signed-off-by: Stephen J Day --- registry/storage/catalog.go | 59 +++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 16 deletions(-) diff --git a/registry/storage/catalog.go b/registry/storage/catalog.go index 89616402..70e98441 100644 --- a/registry/storage/catalog.go +++ b/registry/storage/catalog.go @@ -39,7 +39,7 @@ func (reg *registry) Repositories(ctx context.Context, repos []string, last stri _, file := path.Split(repoPath) if file == "_layers" { repoPath = strings.TrimSuffix(repoPath, "/_layers") - if pathGreaterThan(repoPath, last) { + if lessPath(last, repoPath) { foundRepos = append(foundRepos, repoPath) } return ErrSkipDir @@ -96,22 +96,49 @@ func (reg *registry) Enumerate(ctx context.Context, ingester func(string) error) } -func pathGreaterThan(pathX, pathY string) (b bool) { - splitPathX := strings.SplitN(pathX, "/", 2) - splitPathY := strings.SplitN(pathY, "/", 2) - - if splitPathX[0] == splitPathY[0] { - if len(splitPathX) == 1 && len(splitPathY) == 1 { - return false - } else if len(splitPathX) == 1 && len(splitPathY) != 1 { - return false - } else if len(splitPathX) != 1 && len(splitPathY) == 1 { - return true - } - - return pathGreaterThan(splitPathX[1], splitPathY[1]) +// lessPath returns true if one path a is less than path b. +// +// A component-wise comparison is done, rather than the lexical comparison of +// strings. +func lessPath(a, b string) bool { + // we provide this behavior by making separator always sort first. + return compareReplaceInline(a, b, '/', '\x00') < 0 +} +// compareReplaceInline modifies runtime.cmpstring to replace old with new +// during a byte-wise comparison. +func compareReplaceInline(s1, s2 string, old, new byte) int { + l := len(s1) + if len(s2) < l { + l = len(s2) } - return splitPathX[0] > splitPathY[0] + for i := 0; i < l; i++ { + c1, c2 := s1[i], s2[i] + if c1 == old { + c1 = new + } + + if c2 == old { + c2 = new + } + + if c1 < c2 { + return -1 + } + + if c1 > c2 { + return +1 + } + } + + if len(s1) < len(s2) { + return -1 + } + + if len(s1) > len(s2) { + return +1 + } + + return 0 } From 308faf00f1665a25758268b7765868b2dd389f19 Mon Sep 17 00:00:00 2001 From: Stephen J Day Date: Mon, 8 Aug 2016 17:09:49 -0700 Subject: [PATCH 2/2] catalog: add benchmarks for overridden path comparison Signed-off-by: Stephen J Day --- registry/storage/catalog.go | 4 ++ registry/storage/catalog_test.go | 81 ++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) diff --git a/registry/storage/catalog.go b/registry/storage/catalog.go index 70e98441..3bde7e35 100644 --- a/registry/storage/catalog.go +++ b/registry/storage/catalog.go @@ -108,6 +108,10 @@ func lessPath(a, b string) bool { // compareReplaceInline modifies runtime.cmpstring to replace old with new // during a byte-wise comparison. func compareReplaceInline(s1, s2 string, old, new byte) int { + // TODO(stevvooe): We are missing an optimization when the s1 and s2 have + // the exact same slice header. It will make the code unsafe but can + // provide some extra performance. + l := len(s1) if len(s2) < l { l = len(s2) diff --git a/registry/storage/catalog_test.go b/registry/storage/catalog_test.go index c125cb17..3385a2d5 100644 --- a/registry/storage/catalog_test.go +++ b/registry/storage/catalog_test.go @@ -3,6 +3,7 @@ package storage import ( "fmt" "io" + "math/rand" "testing" "github.com/docker/distribution" @@ -220,3 +221,83 @@ func TestCatalogWalkError(t *testing.T) { t.Errorf("Expected catalog driver list error") } } + +func BenchmarkPathCompareEqual(B *testing.B) { + B.StopTimer() + pp := randomPath(100) + // make a real copy + ppb := append([]byte{}, []byte(pp)...) + a, b := pp, string(ppb) + + B.StartTimer() + for i := 0; i < B.N; i++ { + lessPath(a, b) + } +} + +func BenchmarkPathCompareNotEqual(B *testing.B) { + B.StopTimer() + a, b := randomPath(100), randomPath(100) + B.StartTimer() + + for i := 0; i < B.N; i++ { + lessPath(a, b) + } +} + +func BenchmarkPathCompareNative(B *testing.B) { + B.StopTimer() + a, b := randomPath(100), randomPath(100) + B.StartTimer() + + for i := 0; i < B.N; i++ { + c := a < b + c = c && false + } +} + +func BenchmarkPathCompareNativeEqual(B *testing.B) { + B.StopTimer() + pp := randomPath(100) + a, b := pp, pp + B.StartTimer() + + for i := 0; i < B.N; i++ { + c := a < b + c = c && false + } +} + +var filenameChars = []byte("abcdefghijklmnopqrstuvwxyz0123456789") +var separatorChars = []byte("._-") + +func randomPath(length int64) string { + path := "/" + for int64(len(path)) < length { + chunkLength := rand.Int63n(length-int64(len(path))) + 1 + chunk := randomFilename(chunkLength) + path += chunk + remaining := length - int64(len(path)) + if remaining == 1 { + path += randomFilename(1) + } else if remaining > 1 { + path += "/" + } + } + return path +} + +func randomFilename(length int64) string { + b := make([]byte, length) + wasSeparator := true + for i := range b { + if !wasSeparator && i < len(b)-1 && rand.Intn(4) == 0 { + b[i] = separatorChars[rand.Intn(len(separatorChars))] + wasSeparator = true + } else { + b[i] = filenameChars[rand.Intn(len(filenameChars))] + wasSeparator = false + } + } + return string(b) +}