Merge pull request #1560 from RichardScothern/gc

Fix signature handling with GC.
This commit is contained in:
Richard Scothern 2016-03-30 17:46:04 -07:00
commit 13f72ef7dc
4 changed files with 79 additions and 30 deletions

View file

@ -8,21 +8,34 @@ keywords = ["registry, garbage, images, tags, repository, distribution"]
# What Garbage Collection Does # What Garbage Collection Does
Garbage collection is a process that delete blobs to which no manifests refer. "Garbage collection deletes blobs which no manifests reference. Manifests and
It runs in two phases. First, in the 'mark' phase, the process scans all the blobs which are deleted by their digest through the Registry API will become
manifests in the registry. From these manifests, it constructs a set of content eligible for garbage collection, but the actual blobs will not be removed from
address digests. This set is the 'mark set' and denotes the set of blobs to *not* storage until garbage collection is run.
delete. Secondly, in the 'sweep' phase, the process scans all the blobs and if
a blob's content address digest is not in the mark set, the process will delete
it.
# How Garbage Collection Works
Garbage collection runs in two phases. First, in the 'mark' phase, the process
scans all the manifests in the registry. From these manifests, it constructs a
set of content address digests. This set is the 'mark set' and denotes the set
of blobs to *not* delete. Secondly, in the 'sweep' phase, the process scans all
the blobs and if a blob's content address digest is not in the mark set, the
process will delete it.
> **NOTE** You should ensure that the registry is in read-only mode or not running at
> all. If you were to upload an image while garbage collection is running, there is the
> risk that the image's layers will be mistakenly deleted, leading to a corrupted image.
This type of garbage collection is known as stop-the-world garbage collection. In
future registry versions the intention is that garbage collection will be an
automated background action and this manual process will no longer apply.
# How to Run # How to Run
You can run garbage collection by running You can run garbage collection by running
docker run --rm registry-image-name garbage-collect /etc/docker/registry/config.yml `docker run --rm registry-image-name garbage-collect /etc/docker/registry/config.yml`
Additionally, garbage collection can be run in `dry-run` mode, which will print
the progress of the mark and sweep phases without removing any data.
NOTE: You should ensure that the registry itself is in read-only mode or not running at
all. If you were to upload an image while garbage collection is running, there is the
risk that the image's layers will be mistakenly deleted, leading to a corrupted image.

View file

@ -13,16 +13,17 @@ import (
"github.com/docker/distribution/registry/storage" "github.com/docker/distribution/registry/storage"
"github.com/docker/distribution/registry/storage/driver" "github.com/docker/distribution/registry/storage/driver"
"github.com/docker/distribution/registry/storage/driver/factory" "github.com/docker/distribution/registry/storage/driver/factory"
"github.com/docker/libtrust"
"github.com/spf13/cobra" "github.com/spf13/cobra"
) )
func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error { func emit(format string, a ...interface{}) {
// Construct a registry if dryRun {
registry, err := storage.NewRegistry(ctx, storageDriver) fmt.Printf(format+"\n", a...)
if err != nil {
return fmt.Errorf("failed to construct registry: %v", err)
} }
}
func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver, registry distribution.Namespace) error {
repositoryEnumerator, ok := registry.(distribution.RepositoryEnumerator) repositoryEnumerator, ok := registry.(distribution.RepositoryEnumerator)
if !ok { if !ok {
@ -31,7 +32,9 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error
// mark // mark
markSet := make(map[digest.Digest]struct{}) markSet := make(map[digest.Digest]struct{})
err = repositoryEnumerator.Enumerate(ctx, func(repoName string) error { err := repositoryEnumerator.Enumerate(ctx, func(repoName string) error {
emit(repoName)
var err error var err error
named, err := reference.ParseNamed(repoName) named, err := reference.ParseNamed(repoName)
if err != nil { if err != nil {
@ -54,6 +57,7 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error
err = manifestEnumerator.Enumerate(ctx, func(dgst digest.Digest) error { err = manifestEnumerator.Enumerate(ctx, func(dgst digest.Digest) error {
// Mark the manifest's blob // Mark the manifest's blob
emit("%s: marking manifest %s ", repoName, dgst)
markSet[dgst] = struct{}{} markSet[dgst] = struct{}{}
manifest, err := manifestService.Get(ctx, dgst) manifest, err := manifestService.Get(ctx, dgst)
@ -64,6 +68,7 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error
descriptors := manifest.References() descriptors := manifest.References()
for _, descriptor := range descriptors { for _, descriptor := range descriptors {
markSet[descriptor.Digest] = struct{}{} markSet[descriptor.Digest] = struct{}{}
emit("%s: marking blob %s", repoName, descriptor.Digest)
} }
switch manifest.(type) { switch manifest.(type) {
@ -77,11 +82,13 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error
return fmt.Errorf("failed to get signatures for signed manifest: %v", err) return fmt.Errorf("failed to get signatures for signed manifest: %v", err)
} }
for _, signatureDigest := range signatures { for _, signatureDigest := range signatures {
emit("%s: marking signature %s", repoName, signatureDigest)
markSet[signatureDigest] = struct{}{} markSet[signatureDigest] = struct{}{}
} }
break break
case *schema2.DeserializedManifest: case *schema2.DeserializedManifest:
config := manifest.(*schema2.DeserializedManifest).Config config := manifest.(*schema2.DeserializedManifest).Config
emit("%s: marking configuration %s", repoName, config.Digest)
markSet[config.Digest] = struct{}{} markSet[config.Digest] = struct{}{}
break break
} }
@ -110,9 +117,14 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error
return fmt.Errorf("error enumerating blobs: %v", err) return fmt.Errorf("error enumerating blobs: %v", err)
} }
emit("\n%d blobs marked, %d blobs eligible for deletion", len(markSet), len(deleteSet))
// Construct vacuum // Construct vacuum
vacuum := storage.NewVacuum(ctx, storageDriver) vacuum := storage.NewVacuum(ctx, storageDriver)
for dgst := range deleteSet { for dgst := range deleteSet {
emit("blob eligible for deletion: %s", dgst)
if dryRun {
continue
}
err = vacuum.RemoveBlob(string(dgst)) err = vacuum.RemoveBlob(string(dgst))
if err != nil { if err != nil {
return fmt.Errorf("failed to delete blob %s: %v\n", dgst, err) return fmt.Errorf("failed to delete blob %s: %v\n", dgst, err)
@ -122,13 +134,18 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error
return err return err
} }
func init() {
GCCmd.Flags().BoolVarP(&dryRun, "dry-run", "d", false, "do everything expect remove the blobs")
}
var dryRun bool
// GCCmd is the cobra command that corresponds to the garbage-collect subcommand // GCCmd is the cobra command that corresponds to the garbage-collect subcommand
var GCCmd = &cobra.Command{ var GCCmd = &cobra.Command{
Use: "garbage-collect <config>", Use: "garbage-collect <config>",
Short: "`garbage-collects` deletes layers not referenced by any manifests", Short: "`garbage-collect` deletes layers not referenced by any manifests",
Long: "`garbage-collects` deletes layers not referenced by any manifests", Long: "`garbage-collect` deletes layers not referenced by any manifests",
Run: func(cmd *cobra.Command, args []string) { Run: func(cmd *cobra.Command, args []string) {
config, err := resolveConfiguration(args) config, err := resolveConfiguration(args)
if err != nil { if err != nil {
fmt.Fprintf(os.Stderr, "configuration error: %v\n", err) fmt.Fprintf(os.Stderr, "configuration error: %v\n", err)
@ -149,7 +166,19 @@ var GCCmd = &cobra.Command{
os.Exit(1) os.Exit(1)
} }
err = markAndSweep(ctx, driver) k, err := libtrust.GenerateECP256PrivateKey()
if err != nil {
fmt.Fprint(os.Stderr, err)
os.Exit(1)
}
registry, err := storage.NewRegistry(ctx, driver, storage.DisableSchema1Signatures, storage.Schema1SigningKey(k))
if err != nil {
fmt.Fprintf(os.Stderr, "failed to construct registry: %v", err)
os.Exit(1)
}
err = markAndSweep(ctx, driver, registry)
if err != nil { if err != nil {
fmt.Fprintf(os.Stderr, "failed to garbage collect: %v", err) fmt.Fprintf(os.Stderr, "failed to garbage collect: %v", err)
os.Exit(1) os.Exit(1)

View file

@ -161,7 +161,7 @@ func TestNoDeletionNoEffect(t *testing.T) {
} }
// Run GC // Run GC
err = markAndSweep(context.Background(), inmemoryDriver) err = markAndSweep(context.Background(), inmemoryDriver, registry)
if err != nil { if err != nil {
t.Fatalf("Failed mark and sweep: %v", err) t.Fatalf("Failed mark and sweep: %v", err)
} }
@ -193,7 +193,7 @@ func TestDeletionHasEffect(t *testing.T) {
manifests.Delete(ctx, image3.manifestDigest) manifests.Delete(ctx, image3.manifestDigest)
// Run GC // Run GC
err = markAndSweep(context.Background(), inmemoryDriver) err = markAndSweep(context.Background(), inmemoryDriver, registry)
if err != nil { if err != nil {
t.Fatalf("Failed mark and sweep: %v", err) t.Fatalf("Failed mark and sweep: %v", err)
} }
@ -327,7 +327,7 @@ func TestOrphanBlobDeleted(t *testing.T) {
uploadRandomSchema2Image(t, repo) uploadRandomSchema2Image(t, repo)
// Run GC // Run GC
err = markAndSweep(context.Background(), inmemoryDriver) err = markAndSweep(context.Background(), inmemoryDriver, registry)
if err != nil { if err != nil {
t.Fatalf("Failed mark and sweep: %v", err) t.Fatalf("Failed mark and sweep: %v", err)
} }

View file

@ -12,6 +12,7 @@ import (
"github.com/docker/distribution/manifest/manifestlist" "github.com/docker/distribution/manifest/manifestlist"
"github.com/docker/distribution/manifest/schema1" "github.com/docker/distribution/manifest/schema1"
"github.com/docker/distribution/manifest/schema2" "github.com/docker/distribution/manifest/schema2"
"github.com/docker/distribution/registry/storage/driver"
) )
// A ManifestHandler gets and puts manifests of a particular type. // A ManifestHandler gets and puts manifests of a particular type.
@ -161,16 +162,22 @@ func (ms *manifestStore) GetSignatures(ctx context.Context, manifestDigest diges
return nil, err return nil, err
} }
signaturesPath = path.Join(signaturesPath, "sha256") var digests []digest.Digest
alg := string(digest.SHA256)
signaturePaths, err := ms.blobStore.driver.List(ctx, path.Join(signaturesPath, alg))
signaturePaths, err := ms.blobStore.driver.List(ctx, signaturesPath) switch err.(type) {
if err != nil { case nil:
break
case driver.PathNotFoundError:
// Manifest may have been pushed with signature store disabled
return digests, nil
default:
return nil, err return nil, err
} }
var digests []digest.Digest
for _, sigPath := range signaturePaths { for _, sigPath := range signaturePaths {
sigdigest, err := digest.ParseDigest("sha256:" + path.Base(sigPath)) sigdigest, err := digest.ParseDigest(alg + ":" + path.Base(sigPath))
if err != nil { if err != nil {
// merely found not a digest // merely found not a digest
continue continue