garbage-collect --delete-untagged removes multi-arch manifests #3178

Signed-off-by: readerx <gnolld@foxmail.com>
This commit is contained in:
readerx 2023-01-09 15:33:33 +08:00
parent 362910506b
commit b14ff4c5f6
2 changed files with 269 additions and 20 deletions

View file

@ -5,6 +5,7 @@ import (
"fmt"
"github.com/distribution/distribution/v3"
"github.com/distribution/distribution/v3/manifest/manifestlist"
"github.com/distribution/distribution/v3/reference"
"github.com/distribution/distribution/v3/registry/storage/driver"
"github.com/opencontainers/go-digest"
@ -60,33 +61,62 @@ func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, regis
return fmt.Errorf("unable to convert ManifestService into ManifestEnumerator")
}
manifests := make(map[digest.Digest]digest.Digest)
untaggedManifists := make(map[digest.Digest]struct{})
err = manifestEnumerator.Enumerate(ctx, func(dgst digest.Digest) error {
if opts.RemoveUntagged {
// fetch all tags where this manifest is the latest one
tags, err := repository.Tags(ctx).Lookup(ctx, distribution.Descriptor{Digest: dgst})
if err != nil {
return fmt.Errorf("failed to retrieve tags for digest %v: %v", dgst, err)
}
if len(tags) == 0 {
emit("manifest eligible for deletion: %s", dgst)
// fetch all tags from repository
// all of these tags could contain manifest in history
// which means that we need check (and delete) those references when deleting manifest
allTags, err := repository.Tags(ctx).All(ctx)
if err != nil {
return fmt.Errorf("failed to retrieve tags %v", err)
}
manifestArr = append(manifestArr, ManifestDel{Name: repoName, Digest: dgst, Tags: allTags})
return nil
// make manifestlist map
references := make([]digest.Digest, 0)
manifest, err := manifestService.Get(ctx, dgst)
if err != nil {
return fmt.Errorf("failed to retrieve manifest for digest %v: %v", dgst, err)
}
if mfl, ok := manifest.(*manifestlist.DeserializedManifestList); ok {
for _, mf := range mfl.ManifestList.Manifests {
manifests[mf.Digest] = dgst
references = append(references, mf.Digest)
}
}
if _, exist := manifests[dgst]; !exist {
manifests[dgst] = dgst
}
if _, exist := untaggedManifists[dgst]; !exist {
references = append(references, dgst)
}
if opts.RemoveUntagged {
for _, ref := range references {
// fetch all tags where this manifest is the latest one
tags, err := repository.Tags(ctx).Lookup(ctx, distribution.Descriptor{Digest: ref})
if err != nil {
return fmt.Errorf("failed to retrieve tags for digest %v: %v", ref, err)
}
if len(tags) == 0 {
untaggedManifists[ref] = struct{}{}
}
}
}
return nil
})
for dgst, mfl := range manifests {
_, manifestUntaged := untaggedManifists[dgst]
_, manifestListUntaged := untaggedManifists[mfl]
if manifestUntaged && manifestListUntaged {
emit("manifest eligible for deletion: %s", dgst)
manifestArr = append(manifestArr, ManifestDel{Name: repoName, Digest: dgst, Tags: nil})
continue
}
// Mark the manifest's blob
emit("%s: marking manifest %s ", repoName, dgst)
markSet[dgst] = struct{}{}
manifest, err := manifestService.Get(ctx, dgst)
if err != nil {
return fmt.Errorf("failed to retrieve manifest for digest %v: %v", dgst, err)
if _, ok := err.(distribution.ErrManifestUnknownRevision); ok {
continue
}
return fmt.Errorf("mark failed to retrieve manifest for digest %v: %v", dgst, err)
}
descriptors := manifest.References()
@ -94,9 +124,23 @@ func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, regis
markSet[descriptor.Digest] = struct{}{}
emit("%s: marking blob %s", repoName, descriptor.Digest)
}
}
return nil
})
if !opts.DryRun && len(manifestArr) > 0 {
// fetch all tags from repository
// all of these tags could contain manifest in history
// which means that we need check (and delete) those references when deleting manifest
allTags, err := repository.Tags(ctx).All(ctx)
if err != nil {
if _, ok := err.(distribution.ErrRepositoryUnknown); !ok {
return fmt.Errorf("failed to retrieve tags %v", err)
}
}
for _, m := range manifestArr {
m.Tags = allTags
}
}
// In certain situations such as unfinished uploads, deleting all
// tags in S3 or removing the _manifests folder manually, this

View file

@ -501,3 +501,208 @@ func TestOrphanBlobDeleted(t *testing.T) {
}
}
}
func TestTaggedManifestlistWithUntaggedManifest(t *testing.T) {
ctx := context.Background()
inmemoryDriver := inmemory.New()
registry := createRegistry(t, inmemoryDriver)
repo := makeRepository(t, registry, "foo/taggedlist/untaggedmanifest")
manifestService, err := repo.Manifests(ctx)
if err != nil {
t.Fatalf("%v", err)
}
image1 := uploadRandomSchema2Image(t, repo)
image2 := uploadRandomSchema2Image(t, repo)
// construct a manifestlist to reference manifests that is not tagged.
blobstatter := registry.BlobStatter()
manifestList, err := testutil.MakeManifestList(blobstatter, []digest.Digest{
image1.manifestDigest, image2.manifestDigest,
})
if err != nil {
t.Fatalf("Failed to make manifest list: %v", err)
}
dgst, err := manifestService.Put(ctx, manifestList)
if err != nil {
t.Fatalf("Failed to add manifest list: %v", err)
}
repo.Tags(ctx).Tag(ctx, "test", distribution.Descriptor{Digest: dgst})
before := allBlobs(t, registry)
// Run GC
err = MarkAndSweep(context.Background(), inmemoryDriver, registry, GCOpts{
DryRun: false,
RemoveUntagged: true,
})
if err != nil {
t.Fatalf("Failed mark and sweep: %v", err)
}
after := allBlobs(t, registry)
if len(before) != len(after) {
t.Fatalf("Garbage collection affected storage: %d != %d", len(before), len(after))
}
if _, ok := after[image1.manifestDigest]; !ok {
t.Fatalf("First manifest is missing")
}
if _, ok := after[image2.manifestDigest]; !ok {
t.Fatalf("Second manifest is missing")
}
if _, ok := after[dgst]; !ok {
t.Fatalf("Manifest list is missing")
}
}
func TestUnTaggedManifestlistWithUntaggedManifest(t *testing.T) {
ctx := context.Background()
inmemoryDriver := inmemory.New()
registry := createRegistry(t, inmemoryDriver)
repo := makeRepository(t, registry, "foo/untaggedlist/untaggedmanifest")
manifestService, err := repo.Manifests(ctx)
if err != nil {
t.Fatalf("%v", err)
}
image1 := uploadRandomSchema2Image(t, repo)
image2 := uploadRandomSchema2Image(t, repo)
// construct a manifestlist to reference manifests that is not tagged.
blobstatter := registry.BlobStatter()
manifestList, err := testutil.MakeManifestList(blobstatter, []digest.Digest{
image1.manifestDigest, image2.manifestDigest,
})
if err != nil {
t.Fatalf("Failed to make manifest list: %v", err)
}
_, err = manifestService.Put(ctx, manifestList)
if err != nil {
t.Fatalf("Failed to add manifest list: %v", err)
}
// Run GC
err = MarkAndSweep(context.Background(), inmemoryDriver, registry, GCOpts{
DryRun: false,
RemoveUntagged: true,
})
if err != nil {
t.Fatalf("Failed mark and sweep: %v", err)
}
after := allBlobs(t, registry)
if len(after) != 0 {
t.Fatalf("Garbage collection affected storage: %d != %d", len(after), 0)
}
}
func TestUnTaggedManifestlistWithTaggedManifest(t *testing.T) {
ctx := context.Background()
inmemoryDriver := inmemory.New()
registry := createRegistry(t, inmemoryDriver)
repo := makeRepository(t, registry, "foo/untaggedlist/taggedmanifest")
manifestService, err := repo.Manifests(ctx)
if err != nil {
t.Fatalf("%v", err)
}
image1 := uploadRandomSchema2Image(t, repo)
image2 := uploadRandomSchema2Image(t, repo)
repo.Tags(ctx).Tag(ctx, "image1", distribution.Descriptor{Digest: image1.manifestDigest})
repo.Tags(ctx).Tag(ctx, "image2", distribution.Descriptor{Digest: image2.manifestDigest})
// construct a manifestlist to reference manifests that is tagged.
blobstatter := registry.BlobStatter()
manifestList, err := testutil.MakeManifestList(blobstatter, []digest.Digest{
image1.manifestDigest, image2.manifestDigest,
})
if err != nil {
t.Fatalf("Failed to make manifest list: %v", err)
}
dgst, err := manifestService.Put(ctx, manifestList)
if err != nil {
t.Fatalf("Failed to add manifest list: %v", err)
}
// Run GC
err = MarkAndSweep(context.Background(), inmemoryDriver, registry, GCOpts{
DryRun: false,
RemoveUntagged: true,
})
if err != nil {
t.Fatalf("Failed mark and sweep: %v", err)
}
after := allBlobs(t, registry)
afterManifests := allManifests(t, manifestService)
if _, ok := after[dgst]; ok {
t.Fatalf("Untaged manifestlist still exists")
}
if _, ok := afterManifests[image1.manifestDigest]; !ok {
t.Fatalf("First manifest is missing")
}
if _, ok := afterManifests[image2.manifestDigest]; !ok {
t.Fatalf("Second manifest is missing")
}
}
func TestTaggedManifestlistWithDeletedreference(t *testing.T) {
ctx := context.Background()
inmemoryDriver := inmemory.New()
registry := createRegistry(t, inmemoryDriver)
repo := makeRepository(t, registry, "foo/untaggedlist/deleteref")
manifestService, err := repo.Manifests(ctx)
if err != nil {
t.Fatalf("%v", err)
}
image1 := uploadRandomSchema2Image(t, repo)
image2 := uploadRandomSchema2Image(t, repo)
// construct a manifestlist to reference manifests that is deleted.
blobstatter := registry.BlobStatter()
manifestList, err := testutil.MakeManifestList(blobstatter, []digest.Digest{
image1.manifestDigest, image2.manifestDigest,
})
if err != nil {
t.Fatalf("Failed to make manifest list: %v", err)
}
_, err = manifestService.Put(ctx, manifestList)
if err != nil {
t.Fatalf("Failed to add manifest list: %v", err)
}
manifestService.Delete(ctx, image1.manifestDigest)
manifestService.Delete(ctx, image2.manifestDigest)
// Run GC
err = MarkAndSweep(context.Background(), inmemoryDriver, registry, GCOpts{
DryRun: false,
RemoveUntagged: true,
})
if err != nil {
t.Fatalf("Failed mark and sweep: %v", err)
}
after := allBlobs(t, registry)
if len(after) != 0 {
t.Fatalf("Garbage collection affected storage: %d != %d", len(after), 0)
}
}