distribution/registry/storage/garbagecollect.go
Liang Zheng 112156321f fix: ignore error of manifest tag path not found in gc
it is reasonable to ignore the error that the manifest tag path does not exist when querying
all tags of the specified repository when executing gc.

Signed-off-by: Liang Zheng <zhengliang0901@gmail.com>
2024-04-25 17:13:06 +08:00

189 lines
5.6 KiB
Go

package storage
import (
"context"
"fmt"
"github.com/distribution/distribution/v3"
"github.com/distribution/distribution/v3/registry/storage/driver"
"github.com/distribution/reference"
"github.com/opencontainers/go-digest"
)
func emit(format string, a ...interface{}) {
fmt.Printf(format+"\n", a...)
}
// GCOpts contains options for garbage collector
type GCOpts struct {
DryRun bool
RemoveUntagged bool
}
// ManifestDel contains manifest structure which will be deleted
type ManifestDel struct {
Name string
Digest digest.Digest
Tags []string
}
// MarkAndSweep performs a mark and sweep of registry data
func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, registry distribution.Namespace, opts GCOpts) error {
repositoryEnumerator, ok := registry.(distribution.RepositoryEnumerator)
if !ok {
return fmt.Errorf("unable to convert Namespace to RepositoryEnumerator")
}
// mark
markSet := make(map[digest.Digest]struct{})
manifestArr := make([]ManifestDel, 0)
err := repositoryEnumerator.Enumerate(ctx, func(repoName string) error {
emit(repoName)
var err error
named, err := reference.WithName(repoName)
if err != nil {
return fmt.Errorf("failed to parse repo name %s: %v", repoName, err)
}
repository, err := registry.Repository(ctx, named)
if err != nil {
return fmt.Errorf("failed to construct repository: %v", err)
}
manifestService, err := repository.Manifests(ctx)
if err != nil {
return fmt.Errorf("failed to construct manifest service: %v", err)
}
manifestEnumerator, ok := manifestService.(distribution.ManifestEnumerator)
if !ok {
return fmt.Errorf("unable to convert ManifestService into ManifestEnumerator")
}
err = manifestEnumerator.Enumerate(ctx, func(dgst digest.Digest) error {
if opts.RemoveUntagged {
// fetch all tags where this manifest is the latest one
tags, err := repository.Tags(ctx).Lookup(ctx, distribution.Descriptor{Digest: dgst})
if err != nil {
return fmt.Errorf("failed to retrieve tags for digest %v: %v", dgst, err)
}
if len(tags) == 0 {
// fetch all tags from repository
// all of these tags could contain manifest in history
// which means that we need check (and delete) those references when deleting manifest
allTags, err := repository.Tags(ctx).All(ctx)
if err != nil {
if _, ok := err.(distribution.ErrRepositoryUnknown); ok {
emit("manifest tags path of repository %s does not exist", repoName)
return nil
}
return fmt.Errorf("failed to retrieve tags %v", err)
}
manifestArr = append(manifestArr, ManifestDel{Name: repoName, Digest: dgst, Tags: allTags})
return nil
}
}
// Mark the manifest's blob
emit("%s: marking manifest %s ", repoName, dgst)
markSet[dgst] = struct{}{}
return markManifestReferences(dgst, manifestService, ctx, func(d digest.Digest) bool {
_, marked := markSet[d]
if !marked {
markSet[d] = struct{}{}
emit("%s: marking blob %s", repoName, d)
}
return marked
})
})
// In certain situations such as unfinished uploads, deleting all
// tags in S3 or removing the _manifests folder manually, this
// error may be of type PathNotFound.
//
// In these cases we can continue marking other manifests safely.
if _, ok := err.(driver.PathNotFoundError); ok {
return nil
}
return err
})
if err != nil {
return fmt.Errorf("failed to mark: %v", err)
}
manifestArr = unmarkReferencedManifest(manifestArr, markSet)
// sweep
vacuum := NewVacuum(ctx, storageDriver)
if !opts.DryRun {
for _, obj := range manifestArr {
err = vacuum.RemoveManifest(obj.Name, obj.Digest, obj.Tags)
if err != nil {
return fmt.Errorf("failed to delete manifest %s: %v", obj.Digest, err)
}
}
}
blobService := registry.Blobs()
deleteSet := make(map[digest.Digest]struct{})
err = blobService.Enumerate(ctx, func(dgst digest.Digest) error {
// check if digest is in markSet. If not, delete it!
if _, ok := markSet[dgst]; !ok {
deleteSet[dgst] = struct{}{}
}
return nil
})
if err != nil {
return fmt.Errorf("error enumerating blobs: %v", err)
}
emit("\n%d blobs marked, %d blobs and %d manifests eligible for deletion", len(markSet), len(deleteSet), len(manifestArr))
for dgst := range deleteSet {
emit("blob eligible for deletion: %s", dgst)
if opts.DryRun {
continue
}
err = vacuum.RemoveBlob(string(dgst))
if err != nil {
return fmt.Errorf("failed to delete blob %s: %v", dgst, err)
}
}
return err
}
// unmarkReferencedManifest filters out manifest present in markSet
func unmarkReferencedManifest(manifestArr []ManifestDel, markSet map[digest.Digest]struct{}) []ManifestDel {
filtered := make([]ManifestDel, 0)
for _, obj := range manifestArr {
if _, ok := markSet[obj.Digest]; !ok {
emit("manifest eligible for deletion: %s", obj)
filtered = append(filtered, obj)
}
}
return filtered
}
// markManifestReferences marks the manifest references
func markManifestReferences(dgst digest.Digest, manifestService distribution.ManifestService, ctx context.Context, ingester func(digest.Digest) bool) error {
manifest, err := manifestService.Get(ctx, dgst)
if err != nil {
return fmt.Errorf("failed to retrieve manifest for digest %v: %v", dgst, err)
}
descriptors := manifest.References()
for _, descriptor := range descriptors {
// do not visit references if already marked
if ingester(descriptor.Digest) {
continue
}
if ok, _ := manifestService.Exists(ctx, descriptor.Digest); ok {
err := markManifestReferences(descriptor.Digest, manifestService, ctx, ingester)
if err != nil {
return err
}
}
}
return nil
}