From 80b310ca447d52ce107775841645f382f775346d Mon Sep 17 00:00:00 2001 From: Richard Scothern Date: Wed, 23 Mar 2016 16:42:50 -0700 Subject: [PATCH 1/3] Add a --dry-run flag. If enabled this will print the mark and sweep process with removing any files. Signed-off-by: Richard Scothern --- registry/garbagecollect.go | 31 +++++++++++++++++++++++++------ registry/storage/manifeststore.go | 7 +++---- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/registry/garbagecollect.go b/registry/garbagecollect.go index ecb64c98d..cfeee0789 100644 --- a/registry/garbagecollect.go +++ b/registry/garbagecollect.go @@ -13,12 +13,16 @@ import ( "github.com/docker/distribution/registry/storage" "github.com/docker/distribution/registry/storage/driver" "github.com/docker/distribution/registry/storage/driver/factory" - "github.com/spf13/cobra" ) +func emit(ctx context.Context, s string) { + if dryRun { + context.GetLogger(ctx).Infof("gc: %s", s) + } +} + func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error { - // Construct a registry registry, err := storage.NewRegistry(ctx, storageDriver) if err != nil { return fmt.Errorf("failed to construct registry: %v", err) @@ -32,6 +36,8 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error // mark markSet := make(map[digest.Digest]struct{}) err = repositoryEnumerator.Enumerate(ctx, func(repoName string) error { + emit(ctx, fmt.Sprint(repoName)) + var err error named, err := reference.ParseNamed(repoName) if err != nil { @@ -53,7 +59,8 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error } err = manifestEnumerator.Enumerate(ctx, func(dgst digest.Digest) error { - // Mark the manifest's blob + // Mark the manifest's blo + emit(ctx, fmt.Sprintf("%s: adding manifest %s ", repoName, dgst)) markSet[dgst] = struct{}{} manifest, err := manifestService.Get(ctx, dgst) @@ -64,6 +71,7 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error descriptors := manifest.References() for _, descriptor := range descriptors { markSet[descriptor.Digest] = struct{}{} + emit(ctx, fmt.Sprintf("%s: marking blob %v", repoName, descriptor)) } switch manifest.(type) { @@ -77,11 +85,13 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error return fmt.Errorf("failed to get signatures for signed manifest: %v", err) } for _, signatureDigest := range signatures { + emit(ctx, fmt.Sprintf("%s: marking signature %s", repoName, signatureDigest)) markSet[signatureDigest] = struct{}{} } break case *schema2.DeserializedManifest: config := manifest.(*schema2.DeserializedManifest).Config + emit(ctx, fmt.Sprintf("%s: marking configuration %s", repoName, config.Digest)) markSet[config.Digest] = struct{}{} break } @@ -113,6 +123,10 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error // Construct vacuum vacuum := storage.NewVacuum(ctx, storageDriver) for dgst := range deleteSet { + if dryRun { + emit(ctx, fmt.Sprintf("deleting %s", dgst)) + continue + } err = vacuum.RemoveBlob(string(dgst)) if err != nil { return fmt.Errorf("failed to delete blob %s: %v\n", dgst, err) @@ -122,13 +136,18 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error return err } +func init() { + GCCmd.Flags().BoolVarP(&dryRun, "dry-run", "d", false, "do everything expect remove the blobs") +} + +var dryRun bool + // GCCmd is the cobra command that corresponds to the garbage-collect subcommand var GCCmd = &cobra.Command{ Use: "garbage-collect ", - Short: "`garbage-collects` deletes layers not referenced by any manifests", - Long: "`garbage-collects` deletes layers not referenced by any manifests", + Short: "`garbage-collect` deletes layers not referenced by any manifests", + Long: "`garbage-collect` deletes layers not referenced by any manifests", Run: func(cmd *cobra.Command, args []string) { - config, err := resolveConfiguration(args) if err != nil { fmt.Fprintf(os.Stderr, "configuration error: %v\n", err) diff --git a/registry/storage/manifeststore.go b/registry/storage/manifeststore.go index f3660c98d..e0b823092 100644 --- a/registry/storage/manifeststore.go +++ b/registry/storage/manifeststore.go @@ -161,16 +161,15 @@ func (ms *manifestStore) GetSignatures(ctx context.Context, manifestDigest diges return nil, err } - signaturesPath = path.Join(signaturesPath, "sha256") - - signaturePaths, err := ms.blobStore.driver.List(ctx, signaturesPath) + alg := string(digest.SHA256) + signaturePaths, err := ms.blobStore.driver.List(ctx, path.Join(signaturesPath, alg)) if err != nil { return nil, err } var digests []digest.Digest for _, sigPath := range signaturePaths { - sigdigest, err := digest.ParseDigest("sha256:" + path.Base(sigPath)) + sigdigest, err := digest.ParseDigest(alg + ":" + path.Base(sigPath)) if err != nil { // merely found not a digest continue From 4324b70c50b3c7f60b9a121470e4c6db0ba86388 Mon Sep 17 00:00:00 2001 From: Richard Scothern Date: Thu, 24 Mar 2016 16:03:25 -0700 Subject: [PATCH 2/3] Fix signature handling with GC. If a schema 1 manifest is uploaded with the `disablesignaturestore` option set to true, then no signatures will exist. Handle this case. If a schema 1 manifest is pushed, deleted, garbage collected and pushed again, the repository will contain signature links from the first version, but the blobs will not exist. Disable the signature store in the garbage-collect command so signatures are not fetched. Signed-off-by: Richard Scothern --- registry/garbagecollect.go | 43 +++++++++++++++++++------------ registry/garbagecollect_test.go | 6 ++--- registry/storage/manifeststore.go | 12 +++++++-- 3 files changed, 40 insertions(+), 21 deletions(-) diff --git a/registry/garbagecollect.go b/registry/garbagecollect.go index cfeee0789..8df956b9f 100644 --- a/registry/garbagecollect.go +++ b/registry/garbagecollect.go @@ -13,20 +13,18 @@ import ( "github.com/docker/distribution/registry/storage" "github.com/docker/distribution/registry/storage/driver" "github.com/docker/distribution/registry/storage/driver/factory" + "github.com/docker/libtrust" "github.com/spf13/cobra" ) -func emit(ctx context.Context, s string) { +func emit(format string, a ...interface{}) { if dryRun { - context.GetLogger(ctx).Infof("gc: %s", s) + fmt.Printf(format, a...) + fmt.Println("") } } -func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error { - registry, err := storage.NewRegistry(ctx, storageDriver) - if err != nil { - return fmt.Errorf("failed to construct registry: %v", err) - } +func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver, registry distribution.Namespace) error { repositoryEnumerator, ok := registry.(distribution.RepositoryEnumerator) if !ok { @@ -35,8 +33,8 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error // mark markSet := make(map[digest.Digest]struct{}) - err = repositoryEnumerator.Enumerate(ctx, func(repoName string) error { - emit(ctx, fmt.Sprint(repoName)) + err := repositoryEnumerator.Enumerate(ctx, func(repoName string) error { + emit(repoName) var err error named, err := reference.ParseNamed(repoName) @@ -59,8 +57,8 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error } err = manifestEnumerator.Enumerate(ctx, func(dgst digest.Digest) error { - // Mark the manifest's blo - emit(ctx, fmt.Sprintf("%s: adding manifest %s ", repoName, dgst)) + // Mark the manifest's blob + emit("%s: marking manifest %s ", repoName, dgst) markSet[dgst] = struct{}{} manifest, err := manifestService.Get(ctx, dgst) @@ -71,7 +69,7 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error descriptors := manifest.References() for _, descriptor := range descriptors { markSet[descriptor.Digest] = struct{}{} - emit(ctx, fmt.Sprintf("%s: marking blob %v", repoName, descriptor)) + emit("%s: marking blob %s", repoName, descriptor.Digest) } switch manifest.(type) { @@ -85,13 +83,13 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error return fmt.Errorf("failed to get signatures for signed manifest: %v", err) } for _, signatureDigest := range signatures { - emit(ctx, fmt.Sprintf("%s: marking signature %s", repoName, signatureDigest)) + emit("%s: marking signature %s", repoName, signatureDigest) markSet[signatureDigest] = struct{}{} } break case *schema2.DeserializedManifest: config := manifest.(*schema2.DeserializedManifest).Config - emit(ctx, fmt.Sprintf("%s: marking configuration %s", repoName, config.Digest)) + emit("%s: marking configuration %s", repoName, config.Digest) markSet[config.Digest] = struct{}{} break } @@ -120,11 +118,12 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error return fmt.Errorf("error enumerating blobs: %v", err) } + emit("\n%d blobs marked, %d blobs eligible for deletion", len(markSet), len(deleteSet)) // Construct vacuum vacuum := storage.NewVacuum(ctx, storageDriver) for dgst := range deleteSet { if dryRun { - emit(ctx, fmt.Sprintf("deleting %s", dgst)) + emit("deleting %s", dgst) continue } err = vacuum.RemoveBlob(string(dgst)) @@ -168,7 +167,19 @@ var GCCmd = &cobra.Command{ os.Exit(1) } - err = markAndSweep(ctx, driver) + k, err := libtrust.GenerateECP256PrivateKey() + if err != nil { + fmt.Fprintf(os.Stderr, "%s", err) + os.Exit(1) + } + + registry, err := storage.NewRegistry(ctx, driver, storage.DisableSchema1Signatures, storage.Schema1SigningKey(k)) + if err != nil { + fmt.Fprintf(os.Stderr, "failed to construct registry: %v", err) + os.Exit(1) + } + + err = markAndSweep(ctx, driver, registry) if err != nil { fmt.Fprintf(os.Stderr, "failed to garbage collect: %v", err) os.Exit(1) diff --git a/registry/garbagecollect_test.go b/registry/garbagecollect_test.go index 6096e758e..dd5fadd53 100644 --- a/registry/garbagecollect_test.go +++ b/registry/garbagecollect_test.go @@ -161,7 +161,7 @@ func TestNoDeletionNoEffect(t *testing.T) { } // Run GC - err = markAndSweep(context.Background(), inmemoryDriver) + err = markAndSweep(context.Background(), inmemoryDriver, registry) if err != nil { t.Fatalf("Failed mark and sweep: %v", err) } @@ -193,7 +193,7 @@ func TestDeletionHasEffect(t *testing.T) { manifests.Delete(ctx, image3.manifestDigest) // Run GC - err = markAndSweep(context.Background(), inmemoryDriver) + err = markAndSweep(context.Background(), inmemoryDriver, registry) if err != nil { t.Fatalf("Failed mark and sweep: %v", err) } @@ -327,7 +327,7 @@ func TestOrphanBlobDeleted(t *testing.T) { uploadRandomSchema2Image(t, repo) // Run GC - err = markAndSweep(context.Background(), inmemoryDriver) + err = markAndSweep(context.Background(), inmemoryDriver, registry) if err != nil { t.Fatalf("Failed mark and sweep: %v", err) } diff --git a/registry/storage/manifeststore.go b/registry/storage/manifeststore.go index e0b823092..5a9165f90 100644 --- a/registry/storage/manifeststore.go +++ b/registry/storage/manifeststore.go @@ -12,6 +12,7 @@ import ( "github.com/docker/distribution/manifest/manifestlist" "github.com/docker/distribution/manifest/schema1" "github.com/docker/distribution/manifest/schema2" + "github.com/docker/distribution/registry/storage/driver" ) // A ManifestHandler gets and puts manifests of a particular type. @@ -161,13 +162,20 @@ func (ms *manifestStore) GetSignatures(ctx context.Context, manifestDigest diges return nil, err } + var digests []digest.Digest alg := string(digest.SHA256) signaturePaths, err := ms.blobStore.driver.List(ctx, path.Join(signaturesPath, alg)) - if err != nil { + + switch err.(type) { + case nil: + break + case driver.PathNotFoundError: + // Manifest may have been pushed with signature store disabled + return digests, nil + default: return nil, err } - var digests []digest.Digest for _, sigPath := range signaturePaths { sigdigest, err := digest.ParseDigest(alg + ":" + path.Base(sigPath)) if err != nil { From eb0b7f0173d0c54d1db7c76ef4b7d55b5fc7710a Mon Sep 17 00:00:00 2001 From: Richard Scothern Date: Tue, 29 Mar 2016 10:47:22 -0700 Subject: [PATCH 3/3] Update the gc documentation. Signed-off-by: Richard Scothern --- docs/gc.md | 35 ++++++++++++++++++++++++----------- registry/garbagecollect.go | 7 +++---- 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/docs/gc.md b/docs/gc.md index 345653693..fef0304e6 100644 --- a/docs/gc.md +++ b/docs/gc.md @@ -8,21 +8,34 @@ keywords = ["registry, garbage, images, tags, repository, distribution"] # What Garbage Collection Does -Garbage collection is a process that delete blobs to which no manifests refer. -It runs in two phases. First, in the 'mark' phase, the process scans all the -manifests in the registry. From these manifests, it constructs a set of content -address digests. This set is the 'mark set' and denotes the set of blobs to *not* -delete. Secondly, in the 'sweep' phase, the process scans all the blobs and if -a blob's content address digest is not in the mark set, the process will delete -it. +"Garbage collection deletes blobs which no manifests reference. Manifests and +blobs which are deleted by their digest through the Registry API will become +eligible for garbage collection, but the actual blobs will not be removed from +storage until garbage collection is run. +# How Garbage Collection Works + +Garbage collection runs in two phases. First, in the 'mark' phase, the process +scans all the manifests in the registry. From these manifests, it constructs a +set of content address digests. This set is the 'mark set' and denotes the set +of blobs to *not* delete. Secondly, in the 'sweep' phase, the process scans all +the blobs and if a blob's content address digest is not in the mark set, the +process will delete it. + +> **NOTE** You should ensure that the registry is in read-only mode or not running at +> all. If you were to upload an image while garbage collection is running, there is the +> risk that the image's layers will be mistakenly deleted, leading to a corrupted image. + +This type of garbage collection is known as stop-the-world garbage collection. In +future registry versions the intention is that garbage collection will be an +automated background action and this manual process will no longer apply. # How to Run You can run garbage collection by running - docker run --rm registry-image-name garbage-collect /etc/docker/registry/config.yml +`docker run --rm registry-image-name garbage-collect /etc/docker/registry/config.yml` + +Additionally, garbage collection can be run in `dry-run` mode, which will print +the progress of the mark and sweep phases without removing any data. -NOTE: You should ensure that the registry itself is in read-only mode or not running at -all. If you were to upload an image while garbage collection is running, there is the -risk that the image's layers will be mistakenly deleted, leading to a corrupted image. diff --git a/registry/garbagecollect.go b/registry/garbagecollect.go index 8df956b9f..1be4546d7 100644 --- a/registry/garbagecollect.go +++ b/registry/garbagecollect.go @@ -19,8 +19,7 @@ import ( func emit(format string, a ...interface{}) { if dryRun { - fmt.Printf(format, a...) - fmt.Println("") + fmt.Printf(format+"\n", a...) } } @@ -122,8 +121,8 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver, regis // Construct vacuum vacuum := storage.NewVacuum(ctx, storageDriver) for dgst := range deleteSet { + emit("blob eligible for deletion: %s", dgst) if dryRun { - emit("deleting %s", dgst) continue } err = vacuum.RemoveBlob(string(dgst)) @@ -169,7 +168,7 @@ var GCCmd = &cobra.Command{ k, err := libtrust.GenerateECP256PrivateKey() if err != nil { - fmt.Fprintf(os.Stderr, "%s", err) + fmt.Fprint(os.Stderr, err) os.Exit(1) }