forked from TrueCloudLab/distribution
Merge pull request #1560 from RichardScothern/gc
Fix signature handling with GC.
This commit is contained in:
commit
13f72ef7dc
4 changed files with 79 additions and 30 deletions
35
docs/gc.md
35
docs/gc.md
|
@ -8,21 +8,34 @@ keywords = ["registry, garbage, images, tags, repository, distribution"]
|
|||
|
||||
# What Garbage Collection Does
|
||||
|
||||
Garbage collection is a process that delete blobs to which no manifests refer.
|
||||
It runs in two phases. First, in the 'mark' phase, the process scans all the
|
||||
manifests in the registry. From these manifests, it constructs a set of content
|
||||
address digests. This set is the 'mark set' and denotes the set of blobs to *not*
|
||||
delete. Secondly, in the 'sweep' phase, the process scans all the blobs and if
|
||||
a blob's content address digest is not in the mark set, the process will delete
|
||||
it.
|
||||
"Garbage collection deletes blobs which no manifests reference. Manifests and
|
||||
blobs which are deleted by their digest through the Registry API will become
|
||||
eligible for garbage collection, but the actual blobs will not be removed from
|
||||
storage until garbage collection is run.
|
||||
|
||||
# How Garbage Collection Works
|
||||
|
||||
Garbage collection runs in two phases. First, in the 'mark' phase, the process
|
||||
scans all the manifests in the registry. From these manifests, it constructs a
|
||||
set of content address digests. This set is the 'mark set' and denotes the set
|
||||
of blobs to *not* delete. Secondly, in the 'sweep' phase, the process scans all
|
||||
the blobs and if a blob's content address digest is not in the mark set, the
|
||||
process will delete it.
|
||||
|
||||
> **NOTE** You should ensure that the registry is in read-only mode or not running at
|
||||
> all. If you were to upload an image while garbage collection is running, there is the
|
||||
> risk that the image's layers will be mistakenly deleted, leading to a corrupted image.
|
||||
|
||||
This type of garbage collection is known as stop-the-world garbage collection. In
|
||||
future registry versions the intention is that garbage collection will be an
|
||||
automated background action and this manual process will no longer apply.
|
||||
|
||||
# How to Run
|
||||
|
||||
You can run garbage collection by running
|
||||
|
||||
docker run --rm registry-image-name garbage-collect /etc/docker/registry/config.yml
|
||||
`docker run --rm registry-image-name garbage-collect /etc/docker/registry/config.yml`
|
||||
|
||||
Additionally, garbage collection can be run in `dry-run` mode, which will print
|
||||
the progress of the mark and sweep phases without removing any data.
|
||||
|
||||
NOTE: You should ensure that the registry itself is in read-only mode or not running at
|
||||
all. If you were to upload an image while garbage collection is running, there is the
|
||||
risk that the image's layers will be mistakenly deleted, leading to a corrupted image.
|
||||
|
|
|
@ -13,16 +13,17 @@ import (
|
|||
"github.com/docker/distribution/registry/storage"
|
||||
"github.com/docker/distribution/registry/storage/driver"
|
||||
"github.com/docker/distribution/registry/storage/driver/factory"
|
||||
|
||||
"github.com/docker/libtrust"
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error {
|
||||
// Construct a registry
|
||||
registry, err := storage.NewRegistry(ctx, storageDriver)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to construct registry: %v", err)
|
||||
func emit(format string, a ...interface{}) {
|
||||
if dryRun {
|
||||
fmt.Printf(format+"\n", a...)
|
||||
}
|
||||
}
|
||||
|
||||
func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver, registry distribution.Namespace) error {
|
||||
|
||||
repositoryEnumerator, ok := registry.(distribution.RepositoryEnumerator)
|
||||
if !ok {
|
||||
|
@ -31,7 +32,9 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error
|
|||
|
||||
// mark
|
||||
markSet := make(map[digest.Digest]struct{})
|
||||
err = repositoryEnumerator.Enumerate(ctx, func(repoName string) error {
|
||||
err := repositoryEnumerator.Enumerate(ctx, func(repoName string) error {
|
||||
emit(repoName)
|
||||
|
||||
var err error
|
||||
named, err := reference.ParseNamed(repoName)
|
||||
if err != nil {
|
||||
|
@ -54,6 +57,7 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error
|
|||
|
||||
err = manifestEnumerator.Enumerate(ctx, func(dgst digest.Digest) error {
|
||||
// Mark the manifest's blob
|
||||
emit("%s: marking manifest %s ", repoName, dgst)
|
||||
markSet[dgst] = struct{}{}
|
||||
|
||||
manifest, err := manifestService.Get(ctx, dgst)
|
||||
|
@ -64,6 +68,7 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error
|
|||
descriptors := manifest.References()
|
||||
for _, descriptor := range descriptors {
|
||||
markSet[descriptor.Digest] = struct{}{}
|
||||
emit("%s: marking blob %s", repoName, descriptor.Digest)
|
||||
}
|
||||
|
||||
switch manifest.(type) {
|
||||
|
@ -77,11 +82,13 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error
|
|||
return fmt.Errorf("failed to get signatures for signed manifest: %v", err)
|
||||
}
|
||||
for _, signatureDigest := range signatures {
|
||||
emit("%s: marking signature %s", repoName, signatureDigest)
|
||||
markSet[signatureDigest] = struct{}{}
|
||||
}
|
||||
break
|
||||
case *schema2.DeserializedManifest:
|
||||
config := manifest.(*schema2.DeserializedManifest).Config
|
||||
emit("%s: marking configuration %s", repoName, config.Digest)
|
||||
markSet[config.Digest] = struct{}{}
|
||||
break
|
||||
}
|
||||
|
@ -110,9 +117,14 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error
|
|||
return fmt.Errorf("error enumerating blobs: %v", err)
|
||||
}
|
||||
|
||||
emit("\n%d blobs marked, %d blobs eligible for deletion", len(markSet), len(deleteSet))
|
||||
// Construct vacuum
|
||||
vacuum := storage.NewVacuum(ctx, storageDriver)
|
||||
for dgst := range deleteSet {
|
||||
emit("blob eligible for deletion: %s", dgst)
|
||||
if dryRun {
|
||||
continue
|
||||
}
|
||||
err = vacuum.RemoveBlob(string(dgst))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to delete blob %s: %v\n", dgst, err)
|
||||
|
@ -122,13 +134,18 @@ func markAndSweep(ctx context.Context, storageDriver driver.StorageDriver) error
|
|||
return err
|
||||
}
|
||||
|
||||
func init() {
|
||||
GCCmd.Flags().BoolVarP(&dryRun, "dry-run", "d", false, "do everything expect remove the blobs")
|
||||
}
|
||||
|
||||
var dryRun bool
|
||||
|
||||
// GCCmd is the cobra command that corresponds to the garbage-collect subcommand
|
||||
var GCCmd = &cobra.Command{
|
||||
Use: "garbage-collect <config>",
|
||||
Short: "`garbage-collects` deletes layers not referenced by any manifests",
|
||||
Long: "`garbage-collects` deletes layers not referenced by any manifests",
|
||||
Short: "`garbage-collect` deletes layers not referenced by any manifests",
|
||||
Long: "`garbage-collect` deletes layers not referenced by any manifests",
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
|
||||
config, err := resolveConfiguration(args)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "configuration error: %v\n", err)
|
||||
|
@ -149,7 +166,19 @@ var GCCmd = &cobra.Command{
|
|||
os.Exit(1)
|
||||
}
|
||||
|
||||
err = markAndSweep(ctx, driver)
|
||||
k, err := libtrust.GenerateECP256PrivateKey()
|
||||
if err != nil {
|
||||
fmt.Fprint(os.Stderr, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
registry, err := storage.NewRegistry(ctx, driver, storage.DisableSchema1Signatures, storage.Schema1SigningKey(k))
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "failed to construct registry: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
err = markAndSweep(ctx, driver, registry)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "failed to garbage collect: %v", err)
|
||||
os.Exit(1)
|
||||
|
|
|
@ -161,7 +161,7 @@ func TestNoDeletionNoEffect(t *testing.T) {
|
|||
}
|
||||
|
||||
// Run GC
|
||||
err = markAndSweep(context.Background(), inmemoryDriver)
|
||||
err = markAndSweep(context.Background(), inmemoryDriver, registry)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed mark and sweep: %v", err)
|
||||
}
|
||||
|
@ -193,7 +193,7 @@ func TestDeletionHasEffect(t *testing.T) {
|
|||
manifests.Delete(ctx, image3.manifestDigest)
|
||||
|
||||
// Run GC
|
||||
err = markAndSweep(context.Background(), inmemoryDriver)
|
||||
err = markAndSweep(context.Background(), inmemoryDriver, registry)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed mark and sweep: %v", err)
|
||||
}
|
||||
|
@ -327,7 +327,7 @@ func TestOrphanBlobDeleted(t *testing.T) {
|
|||
uploadRandomSchema2Image(t, repo)
|
||||
|
||||
// Run GC
|
||||
err = markAndSweep(context.Background(), inmemoryDriver)
|
||||
err = markAndSweep(context.Background(), inmemoryDriver, registry)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed mark and sweep: %v", err)
|
||||
}
|
||||
|
|
|
@ -12,6 +12,7 @@ import (
|
|||
"github.com/docker/distribution/manifest/manifestlist"
|
||||
"github.com/docker/distribution/manifest/schema1"
|
||||
"github.com/docker/distribution/manifest/schema2"
|
||||
"github.com/docker/distribution/registry/storage/driver"
|
||||
)
|
||||
|
||||
// A ManifestHandler gets and puts manifests of a particular type.
|
||||
|
@ -161,16 +162,22 @@ func (ms *manifestStore) GetSignatures(ctx context.Context, manifestDigest diges
|
|||
return nil, err
|
||||
}
|
||||
|
||||
signaturesPath = path.Join(signaturesPath, "sha256")
|
||||
var digests []digest.Digest
|
||||
alg := string(digest.SHA256)
|
||||
signaturePaths, err := ms.blobStore.driver.List(ctx, path.Join(signaturesPath, alg))
|
||||
|
||||
signaturePaths, err := ms.blobStore.driver.List(ctx, signaturesPath)
|
||||
if err != nil {
|
||||
switch err.(type) {
|
||||
case nil:
|
||||
break
|
||||
case driver.PathNotFoundError:
|
||||
// Manifest may have been pushed with signature store disabled
|
||||
return digests, nil
|
||||
default:
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var digests []digest.Digest
|
||||
for _, sigPath := range signaturePaths {
|
||||
sigdigest, err := digest.ParseDigest("sha256:" + path.Base(sigPath))
|
||||
sigdigest, err := digest.ParseDigest(alg + ":" + path.Base(sigPath))
|
||||
if err != nil {
|
||||
// merely found not a digest
|
||||
continue
|
||||
|
|
Loading…
Reference in a new issue