From a5882d66468fd477d3f7a909c2d93ee71b434467 Mon Sep 17 00:00:00 2001 From: Liang Zheng Date: Wed, 24 Apr 2024 10:54:40 +0800 Subject: [PATCH 1/2] vendor: update manifest dependencies Signed-off-by: Liang Zheng --- go.mod | 2 +- vendor/golang.org/x/sync/errgroup/errgroup.go | 132 ++++++++++++++++++ vendor/golang.org/x/sync/errgroup/go120.go | 14 ++ .../golang.org/x/sync/errgroup/pre_go120.go | 15 ++ vendor/modules.txt | 1 + 5 files changed, 163 insertions(+), 1 deletion(-) create mode 100644 vendor/golang.org/x/sync/errgroup/errgroup.go create mode 100644 vendor/golang.org/x/sync/errgroup/go120.go create mode 100644 vendor/golang.org/x/sync/errgroup/pre_go120.go diff --git a/go.mod b/go.mod index 62b5b5e47..4e41e1bef 100644 --- a/go.mod +++ b/go.mod @@ -88,7 +88,7 @@ require ( go.opentelemetry.io/otel/metric v1.21.0 // indirect go.opentelemetry.io/otel/sdk/metric v1.21.0 // indirect go.opentelemetry.io/proto/otlp v1.0.0 // indirect - golang.org/x/sync v0.3.0 // indirect + golang.org/x/sync v0.3.0 golang.org/x/sys v0.18.0 // indirect golang.org/x/text v0.14.0 // indirect golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect diff --git a/vendor/golang.org/x/sync/errgroup/errgroup.go b/vendor/golang.org/x/sync/errgroup/errgroup.go new file mode 100644 index 000000000..b18efb743 --- /dev/null +++ b/vendor/golang.org/x/sync/errgroup/errgroup.go @@ -0,0 +1,132 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package errgroup provides synchronization, error propagation, and Context +// cancelation for groups of goroutines working on subtasks of a common task. +package errgroup + +import ( + "context" + "fmt" + "sync" +) + +type token struct{} + +// A Group is a collection of goroutines working on subtasks that are part of +// the same overall task. +// +// A zero Group is valid, has no limit on the number of active goroutines, +// and does not cancel on error. +type Group struct { + cancel func(error) + + wg sync.WaitGroup + + sem chan token + + errOnce sync.Once + err error +} + +func (g *Group) done() { + if g.sem != nil { + <-g.sem + } + g.wg.Done() +} + +// WithContext returns a new Group and an associated Context derived from ctx. +// +// The derived Context is canceled the first time a function passed to Go +// returns a non-nil error or the first time Wait returns, whichever occurs +// first. +func WithContext(ctx context.Context) (*Group, context.Context) { + ctx, cancel := withCancelCause(ctx) + return &Group{cancel: cancel}, ctx +} + +// Wait blocks until all function calls from the Go method have returned, then +// returns the first non-nil error (if any) from them. +func (g *Group) Wait() error { + g.wg.Wait() + if g.cancel != nil { + g.cancel(g.err) + } + return g.err +} + +// Go calls the given function in a new goroutine. +// It blocks until the new goroutine can be added without the number of +// active goroutines in the group exceeding the configured limit. +// +// The first call to return a non-nil error cancels the group's context, if the +// group was created by calling WithContext. The error will be returned by Wait. +func (g *Group) Go(f func() error) { + if g.sem != nil { + g.sem <- token{} + } + + g.wg.Add(1) + go func() { + defer g.done() + + if err := f(); err != nil { + g.errOnce.Do(func() { + g.err = err + if g.cancel != nil { + g.cancel(g.err) + } + }) + } + }() +} + +// TryGo calls the given function in a new goroutine only if the number of +// active goroutines in the group is currently below the configured limit. +// +// The return value reports whether the goroutine was started. +func (g *Group) TryGo(f func() error) bool { + if g.sem != nil { + select { + case g.sem <- token{}: + // Note: this allows barging iff channels in general allow barging. + default: + return false + } + } + + g.wg.Add(1) + go func() { + defer g.done() + + if err := f(); err != nil { + g.errOnce.Do(func() { + g.err = err + if g.cancel != nil { + g.cancel(g.err) + } + }) + } + }() + return true +} + +// SetLimit limits the number of active goroutines in this group to at most n. +// A negative value indicates no limit. +// +// Any subsequent call to the Go method will block until it can add an active +// goroutine without exceeding the configured limit. +// +// The limit must not be modified while any goroutines in the group are active. +func (g *Group) SetLimit(n int) { + if n < 0 { + g.sem = nil + return + } + if len(g.sem) != 0 { + panic(fmt.Errorf("errgroup: modify limit while %v goroutines in the group are still active", len(g.sem))) + } + g.sem = make(chan token, n) +} diff --git a/vendor/golang.org/x/sync/errgroup/go120.go b/vendor/golang.org/x/sync/errgroup/go120.go new file mode 100644 index 000000000..7d419d376 --- /dev/null +++ b/vendor/golang.org/x/sync/errgroup/go120.go @@ -0,0 +1,14 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build go1.20 +// +build go1.20 + +package errgroup + +import "context" + +func withCancelCause(parent context.Context) (context.Context, func(error)) { + return context.WithCancelCause(parent) +} diff --git a/vendor/golang.org/x/sync/errgroup/pre_go120.go b/vendor/golang.org/x/sync/errgroup/pre_go120.go new file mode 100644 index 000000000..1795c18ac --- /dev/null +++ b/vendor/golang.org/x/sync/errgroup/pre_go120.go @@ -0,0 +1,15 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !go1.20 +// +build !go1.20 + +package errgroup + +import "context" + +func withCancelCause(parent context.Context) (context.Context, func(error)) { + ctx, cancel := context.WithCancel(parent) + return ctx, func(error) { cancel() } +} diff --git a/vendor/modules.txt b/vendor/modules.txt index e052ccf61..e83242dad 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -504,6 +504,7 @@ golang.org/x/oauth2/jws golang.org/x/oauth2/jwt # golang.org/x/sync v0.3.0 ## explicit; go 1.17 +golang.org/x/sync/errgroup golang.org/x/sync/semaphore # golang.org/x/sys v0.18.0 ## explicit; go 1.18 From a2afe23f386e827d1975530aab12010f0be2a774 Mon Sep 17 00:00:00 2001 From: Liang Zheng Date: Thu, 18 Apr 2024 15:56:26 +0800 Subject: [PATCH 2/2] add concurrency limits for tag lookup and untag Harbor is using the distribution for it's (harbor-registry) registry component. The harbor GC will call into the registry to delete the manifest, which in turn then does a lookup for all tags that reference the deleted manifest. To find the tag references, the registry will iterate every tag in the repository and read it's link file to check if it matches the deleted manifest (i.e. to see if uses the same sha256 digest). So, the more tags in repository, the worse the performance will be (as there will be more s3 API calls occurring for the tag directory lookups and tag file reads). Therefore, we can use concurrent lookup and untag to optimize performance as described in https://github.com/goharbor/harbor/issues/12948. P.S. This optimization was originally contributed by @Antiarchitect, now I would like to take it over. Thanks @Antiarchitect's efforts with PR https://github.com/distribution/distribution/pull/3890. Signed-off-by: Liang Zheng --- cmd/registry/config-cache.yml | 2 + cmd/registry/config-dev.yml | 2 + cmd/registry/config-example.yml | 2 + configuration/configuration.go | 17 ++++++++ configuration/configuration_test.go | 8 ++++ docs/content/about/configuration.md | 22 ++++++++++ registry/handlers/app.go | 15 +++++++ registry/handlers/manifests.go | 25 ++++++++++-- registry/storage/registry.go | 22 +++++++++- registry/storage/tagstore.go | 62 ++++++++++++++++++++--------- 10 files changed, 153 insertions(+), 24 deletions(-) diff --git a/cmd/registry/config-cache.yml b/cmd/registry/config-cache.yml index d648303d9..72da9d602 100644 --- a/cmd/registry/config-cache.yml +++ b/cmd/registry/config-cache.yml @@ -12,6 +12,8 @@ storage: maintenance: uploadpurging: enabled: false + tag: + concurrencylimit: 8 http: addr: :5000 secret: asecretforlocaldevelopment diff --git a/cmd/registry/config-dev.yml b/cmd/registry/config-dev.yml index 9bf36583e..68fd83c8f 100644 --- a/cmd/registry/config-dev.yml +++ b/cmd/registry/config-dev.yml @@ -14,6 +14,8 @@ storage: maintenance: uploadpurging: enabled: false + tag: + concurrencylimit: 8 http: addr: :5000 debug: diff --git a/cmd/registry/config-example.yml b/cmd/registry/config-example.yml index c760cd567..2cb402062 100644 --- a/cmd/registry/config-example.yml +++ b/cmd/registry/config-example.yml @@ -7,6 +7,8 @@ storage: blobdescriptor: inmemory filesystem: rootdirectory: /var/lib/registry + tag: + concurrencylimit: 8 http: addr: :5000 headers: diff --git a/configuration/configuration.go b/configuration/configuration.go index 427081977..aa74e3cb4 100644 --- a/configuration/configuration.go +++ b/configuration/configuration.go @@ -441,6 +441,8 @@ func (storage Storage) Type() string { // allow configuration of delete case "redirect": // allow configuration of redirect + case "tag": + // allow configuration of tag default: storageType = append(storageType, k) } @@ -454,6 +456,19 @@ func (storage Storage) Type() string { return "" } +// TagParameters returns the Parameters map for a Storage tag configuration +func (storage Storage) TagParameters() Parameters { + return storage["tag"] +} + +// setTagParameter changes the parameter at the provided key to the new value +func (storage Storage) setTagParameter(key string, value interface{}) { + if _, ok := storage["tag"]; !ok { + storage["tag"] = make(Parameters) + } + storage["tag"][key] = value +} + // Parameters returns the Parameters map for a Storage configuration func (storage Storage) Parameters() Parameters { return storage[storage.Type()] @@ -482,6 +497,8 @@ func (storage *Storage) UnmarshalYAML(unmarshal func(interface{}) error) error { // allow configuration of delete case "redirect": // allow configuration of redirect + case "tag": + // allow configuration of tag default: types = append(types, k) } diff --git a/configuration/configuration_test.go b/configuration/configuration_test.go index 2139f8f1a..e3bf1bf1c 100644 --- a/configuration/configuration_test.go +++ b/configuration/configuration_test.go @@ -39,6 +39,9 @@ var configStruct = Configuration{ "url1": "https://foo.example.com", "path1": "/some-path", }, + "tag": Parameters{ + "concurrencylimit": 10, + }, }, Auth: Auth{ "silly": Parameters{ @@ -167,6 +170,8 @@ storage: int1: 42 url1: "https://foo.example.com" path1: "/some-path" + tag: + concurrencylimit: 10 auth: silly: realm: silly @@ -542,6 +547,9 @@ func copyConfig(config Configuration) *Configuration { for k, v := range config.Storage.Parameters() { configCopy.Storage.setParameter(k, v) } + for k, v := range config.Storage.TagParameters() { + configCopy.Storage.setTagParameter(k, v) + } configCopy.Auth = Auth{config.Auth.Type(): Parameters{}} for k, v := range config.Auth.Parameters() { diff --git a/docs/content/about/configuration.md b/docs/content/about/configuration.md index 482a40ca7..18f0afaba 100644 --- a/docs/content/about/configuration.md +++ b/docs/content/about/configuration.md @@ -141,6 +141,8 @@ storage: usedualstack: false loglevel: debug inmemory: # This driver takes no parameters + tag: + concurrencylimit: 8 delete: enabled: false redirect: @@ -521,6 +523,26 @@ parameter sets a limit on the number of descriptors to store in the cache. The default value is 10000. If this parameter is set to 0, the cache is allowed to grow with no size limit. +### `tag` + +The `tag` subsection provides configuration to set concurrency limit for tag lookup. +When user calls into the registry to delete the manifest, which in turn then does a +lookup for all tags that reference the deleted manifest. To find the tag references, +the registry will iterate every tag in the repository and read it's link file to check +if it matches the deleted manifest (i.e. to see if uses the same sha256 digest). +So, the more tags in repository, the worse the performance will be (as there will +be more S3 API calls occurring for the tag directory lookups and tag file reads if +using S3 storage driver). + +Therefore, add a single flag `concurrencylimit` to set concurrency limit to optimize tag +lookup performance under the `tag` section. When a value is not provided or equal to 0, +`GOMAXPROCS` will be used. + +```yaml +tag: + concurrencylimit: 8 +``` + ### `redirect` The `redirect` subsection provides configuration for managing redirects from diff --git a/registry/handlers/app.go b/registry/handlers/app.go index 5a9620829..83e987d85 100644 --- a/registry/handlers/app.go +++ b/registry/handlers/app.go @@ -188,6 +188,21 @@ func NewApp(ctx context.Context, config *configuration.Configuration) *App { } } + // configure tag lookup concurrency limit + if p := config.Storage.TagParameters(); p != nil { + l, ok := p["concurrencylimit"] + if ok { + limit, ok := l.(int) + if !ok { + panic("tag lookup concurrency limit config key must have a integer value") + } + if limit < 0 { + panic("tag lookup concurrency limit should be a non-negative integer value") + } + options = append(options, storage.TagLookupConcurrencyLimit(limit)) + } + } + // configure redirects var redirectDisabled bool if redirectConfig, ok := config.Storage["redirect"]; ok { diff --git a/registry/handlers/manifests.go b/registry/handlers/manifests.go index 704a8ab7f..144c04482 100644 --- a/registry/handlers/manifests.go +++ b/registry/handlers/manifests.go @@ -6,6 +6,7 @@ import ( "mime" "net/http" "strings" + "sync" "github.com/distribution/distribution/v3" "github.com/distribution/distribution/v3/internal/dcontext" @@ -13,11 +14,13 @@ import ( "github.com/distribution/distribution/v3/manifest/ocischema" "github.com/distribution/distribution/v3/manifest/schema2" "github.com/distribution/distribution/v3/registry/api/errcode" + "github.com/distribution/distribution/v3/registry/storage" "github.com/distribution/distribution/v3/registry/storage/driver" "github.com/distribution/reference" "github.com/gorilla/handlers" "github.com/opencontainers/go-digest" v1 "github.com/opencontainers/image-spec/specs-go/v1" + "golang.org/x/sync/errgroup" ) const ( @@ -481,12 +484,26 @@ func (imh *manifestHandler) DeleteManifest(w http.ResponseWriter, r *http.Reques return } + var ( + errs []error + mu sync.Mutex + ) + g := errgroup.Group{} + g.SetLimit(storage.DefaultConcurrencyLimit) for _, tag := range referencedTags { - if err := tagService.Untag(imh, tag); err != nil { - imh.Errors = append(imh.Errors, err) - return - } + tag := tag + + g.Go(func() error { + if err := tagService.Untag(imh, tag); err != nil { + mu.Lock() + errs = append(errs, err) + mu.Unlock() + } + return nil + }) } + _ = g.Wait() // imh will record all errors, so ignore the error of Wait() + imh.Errors = errs w.WriteHeader(http.StatusAccepted) } diff --git a/registry/storage/registry.go b/registry/storage/registry.go index ecf483bf9..5bc5295c9 100644 --- a/registry/storage/registry.go +++ b/registry/storage/registry.go @@ -3,6 +3,7 @@ package storage import ( "context" "regexp" + "runtime" "github.com/distribution/distribution/v3" "github.com/distribution/distribution/v3/registry/storage/cache" @@ -10,6 +11,10 @@ import ( "github.com/distribution/reference" ) +var ( + DefaultConcurrencyLimit = runtime.GOMAXPROCS(0) +) + // registry is the top-level implementation of Registry for use in the storage // package. All instances should descend from this object. type registry struct { @@ -18,6 +23,7 @@ type registry struct { statter *blobStatter // global statter service. blobDescriptorCacheProvider cache.BlobDescriptorCacheProvider deleteEnabled bool + tagLookupConcurrencyLimit int resumableDigestEnabled bool blobDescriptorServiceFactory distribution.BlobDescriptorServiceFactory manifestURLs manifestURLs @@ -40,6 +46,13 @@ func EnableRedirect(registry *registry) error { return nil } +func TagLookupConcurrencyLimit(concurrencyLimit int) RegistryOption { + return func(registry *registry) error { + registry.tagLookupConcurrencyLimit = concurrencyLimit + return nil + } +} + // EnableDelete is a functional option for NewRegistry. It enables deletion on // the registry. func EnableDelete(registry *registry) error { @@ -184,9 +197,14 @@ func (repo *repository) Named() reference.Named { } func (repo *repository) Tags(ctx context.Context) distribution.TagService { + limit := DefaultConcurrencyLimit + if repo.tagLookupConcurrencyLimit > 0 { + limit = repo.tagLookupConcurrencyLimit + } tags := &tagStore{ - repository: repo, - blobStore: repo.registry.blobStore, + repository: repo, + blobStore: repo.registry.blobStore, + concurrencyLimit: limit, } return tags diff --git a/registry/storage/tagstore.go b/registry/storage/tagstore.go index 29dcf4e3f..a481df94d 100644 --- a/registry/storage/tagstore.go +++ b/registry/storage/tagstore.go @@ -4,10 +4,13 @@ import ( "context" "path" "sort" + "sync" + + "github.com/opencontainers/go-digest" + "golang.org/x/sync/errgroup" "github.com/distribution/distribution/v3" storagedriver "github.com/distribution/distribution/v3/registry/storage/driver" - "github.com/opencontainers/go-digest" ) var _ distribution.TagService = &tagStore{} @@ -18,8 +21,9 @@ var _ distribution.TagService = &tagStore{} // which only makes use of the Digest field of the returned distribution.Descriptor // but does not enable full roundtripping of Descriptor objects type tagStore struct { - repository *repository - blobStore *blobStore + repository *repository + blobStore *blobStore + concurrencyLimit int } // All returns all tags @@ -145,26 +149,48 @@ func (ts *tagStore) Lookup(ctx context.Context, desc distribution.Descriptor) ([ return nil, err } - var tags []string + g, ctx := errgroup.WithContext(ctx) + g.SetLimit(ts.concurrencyLimit) + + var ( + tags []string + mu sync.Mutex + ) for _, tag := range allTags { - tagLinkPathSpec := manifestTagCurrentPathSpec{ - name: ts.repository.Named().Name(), - tag: tag, + if ctx.Err() != nil { + break } + tag := tag - tagLinkPath, _ := pathFor(tagLinkPathSpec) - tagDigest, err := ts.blobStore.readlink(ctx, tagLinkPath) - if err != nil { - switch err.(type) { - case storagedriver.PathNotFoundError: - continue + g.Go(func() error { + tagLinkPathSpec := manifestTagCurrentPathSpec{ + name: ts.repository.Named().Name(), + tag: tag, } - return nil, err - } - if tagDigest == desc.Digest { - tags = append(tags, tag) - } + tagLinkPath, _ := pathFor(tagLinkPathSpec) + tagDigest, err := ts.blobStore.readlink(ctx, tagLinkPath) + if err != nil { + switch err.(type) { + case storagedriver.PathNotFoundError: + return nil + } + return err + } + + if tagDigest == desc.Digest { + mu.Lock() + tags = append(tags, tag) + mu.Unlock() + } + + return nil + }) + } + + err = g.Wait() + if err != nil { + return nil, err } return tags, nil