Merge pull request #1560 from RichardScothern/gc

Fix signature handling with GC.
This commit is contained in:
Richard Scothern 2016-03-30 17:46:04 -07:00
commit 13f72ef7dc
4 changed files with 79 additions and 30 deletions

View file

@ -8,21 +8,34 @@ keywords = ["registry, garbage, images, tags, repository, distribution"]
# What Garbage Collection Does
Garbage collection is a process that delete blobs to which no manifests refer.
It runs in two phases. First, in the 'mark' phase, the process scans all the
manifests in the registry. From these manifests, it constructs a set of content
address digests. This set is the 'mark set' and denotes the set of blobs to *not*
delete. Secondly, in the 'sweep' phase, the process scans all the blobs and if
a blob's content address digest is not in the mark set, the process will delete
it.
"Garbage collection deletes blobs which no manifests reference. Manifests and
blobs which are deleted by their digest through the Registry API will become
eligible for garbage collection, but the actual blobs will not be removed from
storage until garbage collection is run.
# How Garbage Collection Works
Garbage collection runs in two phases. First, in the 'mark' phase, the process
scans all the manifests in the registry. From these manifests, it constructs a
set of content address digests. This set is the 'mark set' and denotes the set
of blobs to *not* delete. Secondly, in the 'sweep' phase, the process scans all
the blobs and if a blob's content address digest is not in the mark set, the
process will delete it.
> **NOTE** You should ensure that the registry is in read-only mode or not running at
> all. If you were to upload an image while garbage collection is running, there is the
> risk that the image's layers will be mistakenly deleted, leading to a corrupted image.
This type of garbage collection is known as stop-the-world garbage collection. In
future registry versions the intention is that garbage collection will be an
automated background action and this manual process will no longer apply.
# How to Run
You can run garbage collection by running
docker run --rm registry-image-name garbage-collect /etc/docker/registry/config.yml
`docker run --rm registry-image-name garbage-collect /etc/docker/registry/config.yml`
Additionally, garbage collection can be run in `dry-run` mode, which will print
the progress of the mark and sweep phases without removing any data.
NOTE: You should ensure that the registry itself is in read-only mode or not running at
all. If you were to upload an image while garbage collection is running, there is the
risk that the image's layers will be mistakenly deleted, leading to a corrupted image.

View file

@ -13,16 +13,17 @@ import (
"github.com/docker/distribution/registry/storage"
"github.com/docker/distribution/registry/storage/driver"
"github.com/docker/distribution/registry/storage/driver/factory"
"github.com/docker/libtrust"
"github.com/spf13/cobra"
)
func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error {
// Construct a registry
registry, err := storage.NewRegistry(ctx, storageDriver)
if err != nil {
return fmt.Errorf("failed to construct registry: %v", err)
func emit(format string, a ...interface{}) {
if dryRun {
fmt.Printf(format+"\n", a...)
}
}
func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver, registry distribution.Namespace) error {
repositoryEnumerator, ok := registry.(distribution.RepositoryEnumerator)
if !ok {
@ -31,7 +32,9 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error
// mark
markSet := make(map[digest.Digest]struct{})
err = repositoryEnumerator.Enumerate(ctx, func(repoName string) error {
err := repositoryEnumerator.Enumerate(ctx, func(repoName string) error {
emit(repoName)
var err error
named, err := reference.ParseNamed(repoName)
if err != nil {
@ -54,6 +57,7 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error
err = manifestEnumerator.Enumerate(ctx, func(dgst digest.Digest) error {
// Mark the manifest's blob
emit("%s: marking manifest %s ", repoName, dgst)
markSet[dgst] = struct{}{}
manifest, err := manifestService.Get(ctx, dgst)
@ -64,6 +68,7 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error
descriptors := manifest.References()
for _, descriptor := range descriptors {
markSet[descriptor.Digest] = struct{}{}
emit("%s: marking blob %s", repoName, descriptor.Digest)
}
switch manifest.(type) {
@ -77,11 +82,13 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error
return fmt.Errorf("failed to get signatures for signed manifest: %v", err)
}
for _, signatureDigest := range signatures {
emit("%s: marking signature %s", repoName, signatureDigest)
markSet[signatureDigest] = struct{}{}
}
break
case *schema2.DeserializedManifest:
config := manifest.(*schema2.DeserializedManifest).Config
emit("%s: marking configuration %s", repoName, config.Digest)
markSet[config.Digest] = struct{}{}
break
}
@ -110,9 +117,14 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error
return fmt.Errorf("error enumerating blobs: %v", err)
}
emit("\n%d blobs marked, %d blobs eligible for deletion", len(markSet), len(deleteSet))
// Construct vacuum
vacuum := storage.NewVacuum(ctx, storageDriver)
for dgst := range deleteSet {
emit("blob eligible for deletion: %s", dgst)
if dryRun {
continue
}
err = vacuum.RemoveBlob(string(dgst))
if err != nil {
return fmt.Errorf("failed to delete blob %s: %v\n", dgst, err)
@ -122,13 +134,18 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error
return err
}
func init() {
GCCmd.Flags().BoolVarP(&dryRun, "dry-run", "d", false, "do everything expect remove the blobs")
}
var dryRun bool
// GCCmd is the cobra command that corresponds to the garbage-collect subcommand
var GCCmd = &cobra.Command{
Use: "garbage-collect <config>",
Short: "`garbage-collects` deletes layers not referenced by any manifests",
Long: "`garbage-collects` deletes layers not referenced by any manifests",
Short: "`garbage-collect` deletes layers not referenced by any manifests",
Long: "`garbage-collect` deletes layers not referenced by any manifests",
Run: func(cmd *cobra.Command, args []string) {
config, err := resolveConfiguration(args)
if err != nil {
fmt.Fprintf(os.Stderr, "configuration error: %v\n", err)
@ -149,7 +166,19 @@ var GCCmd = &cobra.Command{
os.Exit(1)
}
err = markAndSweep(ctx, driver)
k, err := libtrust.GenerateECP256PrivateKey()
if err != nil {
fmt.Fprint(os.Stderr, err)
os.Exit(1)
}
registry, err := storage.NewRegistry(ctx, driver, storage.DisableSchema1Signatures, storage.Schema1SigningKey(k))
if err != nil {
fmt.Fprintf(os.Stderr, "failed to construct registry: %v", err)
os.Exit(1)
}
err = markAndSweep(ctx, driver, registry)
if err != nil {
fmt.Fprintf(os.Stderr, "failed to garbage collect: %v", err)
os.Exit(1)

View file

@ -161,7 +161,7 @@ func TestNoDeletionNoEffect(t *testing.T) {
}
// Run GC
err = markAndSweep(context.Background(), inmemoryDriver)
err = markAndSweep(context.Background(), inmemoryDriver, registry)
if err != nil {
t.Fatalf("Failed mark and sweep: %v", err)
}
@ -193,7 +193,7 @@ func TestDeletionHasEffect(t *testing.T) {
manifests.Delete(ctx, image3.manifestDigest)
// Run GC
err = markAndSweep(context.Background(), inmemoryDriver)
err = markAndSweep(context.Background(), inmemoryDriver, registry)
if err != nil {
t.Fatalf("Failed mark and sweep: %v", err)
}
@ -327,7 +327,7 @@ func TestOrphanBlobDeleted(t *testing.T) {
uploadRandomSchema2Image(t, repo)
// Run GC
err = markAndSweep(context.Background(), inmemoryDriver)
err = markAndSweep(context.Background(), inmemoryDriver, registry)
if err != nil {
t.Fatalf("Failed mark and sweep: %v", err)
}

View file

@ -12,6 +12,7 @@ import (
"github.com/docker/distribution/manifest/manifestlist"
"github.com/docker/distribution/manifest/schema1"
"github.com/docker/distribution/manifest/schema2"
"github.com/docker/distribution/registry/storage/driver"
)
// A ManifestHandler gets and puts manifests of a particular type.
@ -161,16 +162,22 @@ func (ms *manifestStore) GetSignatures(ctx context.Context, manifestDigest diges
return nil, err
}
signaturesPath = path.Join(signaturesPath, "sha256")
var digests []digest.Digest
alg := string(digest.SHA256)
signaturePaths, err := ms.blobStore.driver.List(ctx, path.Join(signaturesPath, alg))
signaturePaths, err := ms.blobStore.driver.List(ctx, signaturesPath)
if err != nil {
switch err.(type) {
case nil:
break
case driver.PathNotFoundError:
// Manifest may have been pushed with signature store disabled
return digests, nil
default:
return nil, err
}
var digests []digest.Digest
for _, sigPath := range signaturePaths {
sigdigest, err := digest.ParseDigest("sha256:" + path.Base(sigPath))
sigdigest, err := digest.ParseDigest(alg + ":" + path.Base(sigPath))
if err != nil {
// merely found not a digest
continue