From b14ff4c5f6525f90d0c9ba56e6bb1e31727dfb05 Mon Sep 17 00:00:00 2001 From: readerx Date: Mon, 9 Jan 2023 15:33:33 +0800 Subject: [PATCH] garbage-collect --delete-untagged removes multi-arch manifests #3178 Signed-off-by: readerx --- registry/storage/garbagecollect.go | 84 +++++++--- registry/storage/garbagecollect_test.go | 205 ++++++++++++++++++++++++ 2 files changed, 269 insertions(+), 20 deletions(-) diff --git a/registry/storage/garbagecollect.go b/registry/storage/garbagecollect.go index 661af53ff..2107d24ef 100644 --- a/registry/storage/garbagecollect.go +++ b/registry/storage/garbagecollect.go @@ -5,6 +5,7 @@ import ( "fmt" "github.com/distribution/distribution/v3" + "github.com/distribution/distribution/v3/manifest/manifestlist" "github.com/distribution/distribution/v3/reference" "github.com/distribution/distribution/v3/registry/storage/driver" "github.com/opencontainers/go-digest" @@ -60,33 +61,62 @@ func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, regis return fmt.Errorf("unable to convert ManifestService into ManifestEnumerator") } + manifests := make(map[digest.Digest]digest.Digest) + untaggedManifists := make(map[digest.Digest]struct{}) err = manifestEnumerator.Enumerate(ctx, func(dgst digest.Digest) error { - if opts.RemoveUntagged { - // fetch all tags where this manifest is the latest one - tags, err := repository.Tags(ctx).Lookup(ctx, distribution.Descriptor{Digest: dgst}) - if err != nil { - return fmt.Errorf("failed to retrieve tags for digest %v: %v", dgst, err) - } - if len(tags) == 0 { - emit("manifest eligible for deletion: %s", dgst) - // fetch all tags from repository - // all of these tags could contain manifest in history - // which means that we need check (and delete) those references when deleting manifest - allTags, err := repository.Tags(ctx).All(ctx) - if err != nil { - return fmt.Errorf("failed to retrieve tags %v", err) - } - manifestArr = append(manifestArr, ManifestDel{Name: repoName, Digest: dgst, Tags: allTags}) - return nil + // make manifestlist map + references := make([]digest.Digest, 0) + manifest, err := manifestService.Get(ctx, dgst) + if err != nil { + return fmt.Errorf("failed to retrieve manifest for digest %v: %v", dgst, err) + } + if mfl, ok := manifest.(*manifestlist.DeserializedManifestList); ok { + for _, mf := range mfl.ManifestList.Manifests { + manifests[mf.Digest] = dgst + references = append(references, mf.Digest) } } + if _, exist := manifests[dgst]; !exist { + manifests[dgst] = dgst + } + if _, exist := untaggedManifists[dgst]; !exist { + references = append(references, dgst) + } + + if opts.RemoveUntagged { + for _, ref := range references { + // fetch all tags where this manifest is the latest one + tags, err := repository.Tags(ctx).Lookup(ctx, distribution.Descriptor{Digest: ref}) + if err != nil { + return fmt.Errorf("failed to retrieve tags for digest %v: %v", ref, err) + } + if len(tags) == 0 { + untaggedManifists[ref] = struct{}{} + } + } + } + return nil + }) + + for dgst, mfl := range manifests { + _, manifestUntaged := untaggedManifists[dgst] + _, manifestListUntaged := untaggedManifists[mfl] + if manifestUntaged && manifestListUntaged { + emit("manifest eligible for deletion: %s", dgst) + manifestArr = append(manifestArr, ManifestDel{Name: repoName, Digest: dgst, Tags: nil}) + continue + } + // Mark the manifest's blob emit("%s: marking manifest %s ", repoName, dgst) markSet[dgst] = struct{}{} manifest, err := manifestService.Get(ctx, dgst) if err != nil { - return fmt.Errorf("failed to retrieve manifest for digest %v: %v", dgst, err) + if _, ok := err.(distribution.ErrManifestUnknownRevision); ok { + continue + } + return fmt.Errorf("mark failed to retrieve manifest for digest %v: %v", dgst, err) } descriptors := manifest.References() @@ -94,9 +124,23 @@ func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, regis markSet[descriptor.Digest] = struct{}{} emit("%s: marking blob %s", repoName, descriptor.Digest) } + } - return nil - }) + if !opts.DryRun && len(manifestArr) > 0 { + // fetch all tags from repository + // all of these tags could contain manifest in history + // which means that we need check (and delete) those references when deleting manifest + allTags, err := repository.Tags(ctx).All(ctx) + if err != nil { + if _, ok := err.(distribution.ErrRepositoryUnknown); !ok { + return fmt.Errorf("failed to retrieve tags %v", err) + } + } + + for _, m := range manifestArr { + m.Tags = allTags + } + } // In certain situations such as unfinished uploads, deleting all // tags in S3 or removing the _manifests folder manually, this diff --git a/registry/storage/garbagecollect_test.go b/registry/storage/garbagecollect_test.go index 110880833..98a8b7900 100644 --- a/registry/storage/garbagecollect_test.go +++ b/registry/storage/garbagecollect_test.go @@ -501,3 +501,208 @@ func TestOrphanBlobDeleted(t *testing.T) { } } } + +func TestTaggedManifestlistWithUntaggedManifest(t *testing.T) { + ctx := context.Background() + inmemoryDriver := inmemory.New() + + registry := createRegistry(t, inmemoryDriver) + repo := makeRepository(t, registry, "foo/taggedlist/untaggedmanifest") + manifestService, err := repo.Manifests(ctx) + if err != nil { + t.Fatalf("%v", err) + } + + image1 := uploadRandomSchema2Image(t, repo) + image2 := uploadRandomSchema2Image(t, repo) + + // construct a manifestlist to reference manifests that is not tagged. + blobstatter := registry.BlobStatter() + manifestList, err := testutil.MakeManifestList(blobstatter, []digest.Digest{ + image1.manifestDigest, image2.manifestDigest, + }) + if err != nil { + t.Fatalf("Failed to make manifest list: %v", err) + } + + dgst, err := manifestService.Put(ctx, manifestList) + if err != nil { + t.Fatalf("Failed to add manifest list: %v", err) + } + + repo.Tags(ctx).Tag(ctx, "test", distribution.Descriptor{Digest: dgst}) + + before := allBlobs(t, registry) + + // Run GC + err = MarkAndSweep(context.Background(), inmemoryDriver, registry, GCOpts{ + DryRun: false, + RemoveUntagged: true, + }) + if err != nil { + t.Fatalf("Failed mark and sweep: %v", err) + } + + after := allBlobs(t, registry) + if len(before) != len(after) { + t.Fatalf("Garbage collection affected storage: %d != %d", len(before), len(after)) + } + + if _, ok := after[image1.manifestDigest]; !ok { + t.Fatalf("First manifest is missing") + } + + if _, ok := after[image2.manifestDigest]; !ok { + t.Fatalf("Second manifest is missing") + } + + if _, ok := after[dgst]; !ok { + t.Fatalf("Manifest list is missing") + } +} + +func TestUnTaggedManifestlistWithUntaggedManifest(t *testing.T) { + ctx := context.Background() + inmemoryDriver := inmemory.New() + + registry := createRegistry(t, inmemoryDriver) + repo := makeRepository(t, registry, "foo/untaggedlist/untaggedmanifest") + manifestService, err := repo.Manifests(ctx) + if err != nil { + t.Fatalf("%v", err) + } + + image1 := uploadRandomSchema2Image(t, repo) + image2 := uploadRandomSchema2Image(t, repo) + + // construct a manifestlist to reference manifests that is not tagged. + blobstatter := registry.BlobStatter() + manifestList, err := testutil.MakeManifestList(blobstatter, []digest.Digest{ + image1.manifestDigest, image2.manifestDigest, + }) + if err != nil { + t.Fatalf("Failed to make manifest list: %v", err) + } + + _, err = manifestService.Put(ctx, manifestList) + if err != nil { + t.Fatalf("Failed to add manifest list: %v", err) + } + + // Run GC + err = MarkAndSweep(context.Background(), inmemoryDriver, registry, GCOpts{ + DryRun: false, + RemoveUntagged: true, + }) + if err != nil { + t.Fatalf("Failed mark and sweep: %v", err) + } + + after := allBlobs(t, registry) + if len(after) != 0 { + t.Fatalf("Garbage collection affected storage: %d != %d", len(after), 0) + } + +} + +func TestUnTaggedManifestlistWithTaggedManifest(t *testing.T) { + ctx := context.Background() + inmemoryDriver := inmemory.New() + + registry := createRegistry(t, inmemoryDriver) + repo := makeRepository(t, registry, "foo/untaggedlist/taggedmanifest") + manifestService, err := repo.Manifests(ctx) + if err != nil { + t.Fatalf("%v", err) + } + + image1 := uploadRandomSchema2Image(t, repo) + image2 := uploadRandomSchema2Image(t, repo) + + repo.Tags(ctx).Tag(ctx, "image1", distribution.Descriptor{Digest: image1.manifestDigest}) + repo.Tags(ctx).Tag(ctx, "image2", distribution.Descriptor{Digest: image2.manifestDigest}) + + // construct a manifestlist to reference manifests that is tagged. + blobstatter := registry.BlobStatter() + manifestList, err := testutil.MakeManifestList(blobstatter, []digest.Digest{ + image1.manifestDigest, image2.manifestDigest, + }) + if err != nil { + t.Fatalf("Failed to make manifest list: %v", err) + } + + dgst, err := manifestService.Put(ctx, manifestList) + if err != nil { + t.Fatalf("Failed to add manifest list: %v", err) + } + + // Run GC + err = MarkAndSweep(context.Background(), inmemoryDriver, registry, GCOpts{ + DryRun: false, + RemoveUntagged: true, + }) + if err != nil { + t.Fatalf("Failed mark and sweep: %v", err) + } + + after := allBlobs(t, registry) + afterManifests := allManifests(t, manifestService) + + if _, ok := after[dgst]; ok { + t.Fatalf("Untaged manifestlist still exists") + } + + if _, ok := afterManifests[image1.manifestDigest]; !ok { + t.Fatalf("First manifest is missing") + } + + if _, ok := afterManifests[image2.manifestDigest]; !ok { + t.Fatalf("Second manifest is missing") + } +} + +func TestTaggedManifestlistWithDeletedreference(t *testing.T) { + ctx := context.Background() + inmemoryDriver := inmemory.New() + + registry := createRegistry(t, inmemoryDriver) + repo := makeRepository(t, registry, "foo/untaggedlist/deleteref") + manifestService, err := repo.Manifests(ctx) + if err != nil { + t.Fatalf("%v", err) + } + + image1 := uploadRandomSchema2Image(t, repo) + image2 := uploadRandomSchema2Image(t, repo) + + // construct a manifestlist to reference manifests that is deleted. + blobstatter := registry.BlobStatter() + manifestList, err := testutil.MakeManifestList(blobstatter, []digest.Digest{ + image1.manifestDigest, image2.manifestDigest, + }) + if err != nil { + t.Fatalf("Failed to make manifest list: %v", err) + } + + _, err = manifestService.Put(ctx, manifestList) + if err != nil { + t.Fatalf("Failed to add manifest list: %v", err) + } + + manifestService.Delete(ctx, image1.manifestDigest) + manifestService.Delete(ctx, image2.manifestDigest) + + // Run GC + err = MarkAndSweep(context.Background(), inmemoryDriver, registry, GCOpts{ + DryRun: false, + RemoveUntagged: true, + }) + if err != nil { + t.Fatalf("Failed mark and sweep: %v", err) + } + + after := allBlobs(t, registry) + if len(after) != 0 { + t.Fatalf("Garbage collection affected storage: %d != %d", len(after), 0) + } +}