From 764c69ca59d6dc2487c47d581676ad6acf7be8c8 Mon Sep 17 00:00:00 2001 From: Bartosz Borkowski Date: Wed, 11 Jul 2018 10:44:41 +0200 Subject: [PATCH] Garbage collection speed optimizations - improved manifest mark computational complexity from quadratic to linear - optimized manifest removal by passing precomputed set of tags instead of all Signed-off-by: Bartosz Borkowski --- registry/storage/garbagecollect.go | 129 +++++++++++++++++++++++++---- 1 file changed, 115 insertions(+), 14 deletions(-) diff --git a/registry/storage/garbagecollect.go b/registry/storage/garbagecollect.go index 48d428a88..54adcaa58 100644 --- a/registry/storage/garbagecollect.go +++ b/registry/storage/garbagecollect.go @@ -3,6 +3,8 @@ package storage import ( "context" "fmt" + "path" + "time" "github.com/docker/distribution" "github.com/docker/distribution/reference" @@ -33,6 +35,7 @@ func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, regis if !ok { return fmt.Errorf("unable to convert Namespace to RepositoryEnumerator") } + emit("GC mark phase %v", time.Now().String()) // mark markSet := make(map[digest.Digest]struct{}) @@ -60,23 +63,60 @@ func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, regis return fmt.Errorf("unable to convert ManifestService into ManifestEnumerator") } + // 1. fetch all tag names + allTags, err := repository.Tags(ctx).All(ctx) + switch err.(type) { + case distribution.ErrRepositoryUnknown: + break + case nil: + break + default: + return fmt.Errorf("failed to retrieve tags %v", err) + } + + digestUsed := make(map[digest.Digest]int) + tagDigests := make(map[string][]digest.Digest) + + // 2. read each tag's _current_ digest and mark its usage; store all index links for later reference + for _, tag := range allTags { + description, err := repository.Tags(ctx).Get(ctx, tag) + switch err.(type) { + case distribution.ErrTagUnknown: + // corrupted storage; current link is missing + break + case nil: + digestUsed[description.Digest] = 1 + break + default: + return fmt.Errorf("failed to retrieve tag %v: %v", tag, err) + } + + // tag links (historical and current) + digests, err := getDigests(ctx, storageDriver, repoName, tag) + if err != nil { + return fmt.Errorf("failed to retrieve tag links %v: %v", tag, err) + } + + if digests != nil { + tagDigests[tag] = digests + } + } + + // 3. produce digest usage map by transposing tagsDigests + digestTags := make(map[digest.Digest][]string) + for tag, digests := range tagDigests { + for _, digest := range digests { + digestTags[digest] = append(digestTags[digest], tag) + } + } + err = manifestEnumerator.Enumerate(ctx, func(dgst digest.Digest) error { if opts.RemoveUntagged { - // fetch all tags where this manifest is the latest one - tags, err := repository.Tags(ctx).Lookup(ctx, distribution.Descriptor{Digest: dgst}) - if err != nil { - return fmt.Errorf("failed to retrieve tags for digest %v: %v", dgst, err) - } - if len(tags) == 0 { + // check if this digest is used by any tag + if _, exists := digestUsed[dgst]; !exists { emit("manifest eligible for deletion: %s", dgst) - // fetch all tags from repository - // all of these tags could contain manifest in history - // which means that we need check (and delete) those references when deleting manifest - allTags, err := repository.Tags(ctx).All(ctx) - if err != nil { - return fmt.Errorf("failed to retrieve tags %v", err) - } - manifestArr = append(manifestArr, ManifestDel{Name: repoName, Digest: dgst, Tags: allTags}) + // add only tags linking to given digest + manifestArr = append(manifestArr, ManifestDel{Name: repoName, Digest: dgst, Tags: digestTags[dgst]}) return nil } } @@ -119,6 +159,7 @@ func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, regis // sweep vacuum := NewVacuum(ctx, storageDriver) if !opts.DryRun { + emit("GC manifest removal phase %v", time.Now().String()) for _, obj := range manifestArr { err = vacuum.RemoveManifest(obj.Name, obj.Digest, obj.Tags) if err != nil { @@ -126,6 +167,8 @@ func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, regis } } } + + emit("GC blob scan phase %v", time.Now().String()) blobService := registry.Blobs() deleteSet := make(map[digest.Digest]struct{}) err = blobService.Enumerate(ctx, func(dgst digest.Digest) error { @@ -139,6 +182,7 @@ func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, regis return fmt.Errorf("error enumerating blobs: %v", err) } emit("\n%d blobs marked, %d blobs and %d manifests eligible for deletion", len(markSet), len(deleteSet), len(manifestArr)) + emit("GC blob removal phase %v", time.Now().String()) for dgst := range deleteSet { emit("blob eligible for deletion: %s", dgst) if opts.DryRun { @@ -149,6 +193,63 @@ func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, regis return fmt.Errorf("failed to delete blob %s: %v", dgst, err) } } + emit("GC done %v", time.Now().String()) return err } + +// Finds all digests given tag links to +func getDigests(ctx context.Context, storageDriver driver.StorageDriver, repoName string, tag string) ([]digest.Digest, error) { + indexPath, err := pathFor(manifestTagIndexPathSpec{ + name: repoName, + tag: tag, + }) + + if err != nil { + return nil, err + } + + var descriptors []digest.Digest + + err = storageDriver.Walk(ctx, indexPath, func(fileInfo driver.FileInfo) error { + if fileInfo.IsDir() { + return nil + } + + filePath := fileInfo.Path() + + dir, fileName := path.Split(filePath) + if fileName != "link" { + return nil + } + + digest, err := digestFromLinkDir(dir) + if err != nil { + return err + } + + descriptors = append(descriptors, digest) + return nil + }) + + if err != nil { + if _, ok := err.(driver.PathNotFoundError); ok { + return descriptors, nil + } + + return nil, fmt.Errorf("failed to read tags %v digests: %v", tag, err) + } + + return descriptors, nil +} + +// Reconstructs a digest from a link directory +func digestFromLinkDir(dir string) (digest.Digest, error) { + dir = path.Dir(dir) + dir, hex := path.Split(dir) + dir = path.Dir(dir) + dir, algo := path.Split(dir) + + dgst := digest.NewDigestFromHex(algo, hex) + return dgst, dgst.Validate() +}