distribution/registry/storage/garbagecollect.go
Liang Zheng d9050bb917 remove layer's link file by gc
The garbage-collect should remove unsed layer link file

P.S. This was originally contributed by @m-masataka, now I would like to take over it.
Thanks @m-masataka efforts with PR https://github.com/distribution/distribution/pull/2288

Signed-off-by: Liang Zheng <zhengliang0901@gmail.com>
2024-07-03 00:16:11 +08:00

217 lines
6.4 KiB
Go

package storage
import (
"context"
"errors"
"fmt"
"github.com/distribution/distribution/v3"
"github.com/distribution/distribution/v3/registry/storage/driver"
"github.com/distribution/reference"
"github.com/opencontainers/go-digest"
)
func emit(format string, a ...interface{}) {
fmt.Printf(format+"\n", a...)
}
// GCOpts contains options for garbage collector
type GCOpts struct {
DryRun bool
RemoveUntagged bool
}
// ManifestDel contains manifest structure which will be deleted
type ManifestDel struct {
Name string
Digest digest.Digest
Tags []string
}
// MarkAndSweep performs a mark and sweep of registry data
func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, registry distribution.Namespace, opts GCOpts) error {
repositoryEnumerator, ok := registry.(distribution.RepositoryEnumerator)
if !ok {
return fmt.Errorf("unable to convert Namespace to RepositoryEnumerator")
}
// mark
markSet := make(map[digest.Digest]struct{})
deleteLayerSet := make(map[string][]digest.Digest)
manifestArr := make([]ManifestDel, 0)
err := repositoryEnumerator.Enumerate(ctx, func(repoName string) error {
emit(repoName)
var err error
named, err := reference.WithName(repoName)
if err != nil {
return fmt.Errorf("failed to parse repo name %s: %v", repoName, err)
}
repository, err := registry.Repository(ctx, named)
if err != nil {
return fmt.Errorf("failed to construct repository: %v", err)
}
manifestService, err := repository.Manifests(ctx)
if err != nil {
return fmt.Errorf("failed to construct manifest service: %v", err)
}
manifestEnumerator, ok := manifestService.(distribution.ManifestEnumerator)
if !ok {
return fmt.Errorf("unable to convert ManifestService into ManifestEnumerator")
}
err = manifestEnumerator.Enumerate(ctx, func(dgst digest.Digest) error {
if opts.RemoveUntagged {
// fetch all tags where this manifest is the latest one
tags, err := repository.Tags(ctx).Lookup(ctx, distribution.Descriptor{Digest: dgst})
if err != nil {
return fmt.Errorf("failed to retrieve tags for digest %v: %v", dgst, err)
}
if len(tags) == 0 {
// fetch all tags from repository
// all of these tags could contain manifest in history
// which means that we need check (and delete) those references when deleting manifest
allTags, err := repository.Tags(ctx).All(ctx)
if err != nil {
if _, ok := err.(distribution.ErrRepositoryUnknown); ok {
emit("manifest tags path of repository %s does not exist", repoName)
return nil
}
return fmt.Errorf("failed to retrieve tags %v", err)
}
manifestArr = append(manifestArr, ManifestDel{Name: repoName, Digest: dgst, Tags: allTags})
return nil
}
}
// Mark the manifest's blob
emit("%s: marking manifest %s ", repoName, dgst)
markSet[dgst] = struct{}{}
return markManifestReferences(dgst, manifestService, ctx, func(d digest.Digest) bool {
_, marked := markSet[d]
if !marked {
markSet[d] = struct{}{}
emit("%s: marking blob %s", repoName, d)
}
return marked
})
})
if err != nil {
// In certain situations such as unfinished uploads, deleting all
// tags in S3 or removing the _manifests folder manually, this
// error may be of type PathNotFound.
//
// In these cases we can continue marking other manifests safely.
if _, ok := err.(driver.PathNotFoundError); !ok {
return err
}
}
blobService := repository.Blobs(ctx)
layerEnumerator, ok := blobService.(distribution.ManifestEnumerator)
if !ok {
return errors.New("unable to convert BlobService into ManifestEnumerator")
}
var deleteLayers []digest.Digest
err = layerEnumerator.Enumerate(ctx, func(dgst digest.Digest) error {
if _, ok := markSet[dgst]; !ok {
deleteLayers = append(deleteLayers, dgst)
}
return nil
})
if len(deleteLayers) > 0 {
deleteLayerSet[repoName] = deleteLayers
}
return err
})
if err != nil {
return fmt.Errorf("failed to mark: %v", err)
}
manifestArr = unmarkReferencedManifest(manifestArr, markSet)
// sweep
vacuum := NewVacuum(ctx, storageDriver)
if !opts.DryRun {
for _, obj := range manifestArr {
err = vacuum.RemoveManifest(obj.Name, obj.Digest, obj.Tags)
if err != nil {
return fmt.Errorf("failed to delete manifest %s: %v", obj.Digest, err)
}
}
}
blobService := registry.Blobs()
deleteSet := make(map[digest.Digest]struct{})
err = blobService.Enumerate(ctx, func(dgst digest.Digest) error {
// check if digest is in markSet. If not, delete it!
if _, ok := markSet[dgst]; !ok {
deleteSet[dgst] = struct{}{}
}
return nil
})
if err != nil {
return fmt.Errorf("error enumerating blobs: %v", err)
}
emit("\n%d blobs marked, %d blobs and %d manifests eligible for deletion", len(markSet), len(deleteSet), len(manifestArr))
for dgst := range deleteSet {
emit("blob eligible for deletion: %s", dgst)
if opts.DryRun {
continue
}
err = vacuum.RemoveBlob(string(dgst))
if err != nil {
return fmt.Errorf("failed to delete blob %s: %v", dgst, err)
}
}
for repo, dgsts := range deleteLayerSet {
for _, dgst := range dgsts {
err = vacuum.RemoveLayer(repo, dgst)
if err != nil {
return fmt.Errorf("failed to delete layer link %s of repo %s: %v", dgst, repo, err)
}
}
}
return err
}
// unmarkReferencedManifest filters out manifest present in markSet
func unmarkReferencedManifest(manifestArr []ManifestDel, markSet map[digest.Digest]struct{}) []ManifestDel {
filtered := make([]ManifestDel, 0)
for _, obj := range manifestArr {
if _, ok := markSet[obj.Digest]; !ok {
emit("manifest eligible for deletion: %s", obj)
filtered = append(filtered, obj)
}
}
return filtered
}
// markManifestReferences marks the manifest references
func markManifestReferences(dgst digest.Digest, manifestService distribution.ManifestService, ctx context.Context, ingester func(digest.Digest) bool) error {
manifest, err := manifestService.Get(ctx, dgst)
if err != nil {
return fmt.Errorf("failed to retrieve manifest for digest %v: %v", dgst, err)
}
descriptors := manifest.References()
for _, descriptor := range descriptors {
// do not visit references if already marked
if ingester(descriptor.Digest) {
continue
}
if ok, _ := manifestService.Exists(ctx, descriptor.Digest); ok {
err := markManifestReferences(descriptor.Digest, manifestService, ctx, ingester)
if err != nil {
return err
}
}
}
return nil
}