remove layer's link file by gc

The garbage-collect should remove unsed layer link file

P.S. This was originally contributed by @m-masataka, now I would like to take over it.
Thanks @m-masataka efforts with PR https://github.com/distribution/distribution/pull/2288

Signed-off-by: Liang Zheng <zhengliang0901@gmail.com>
This commit is contained in:
Liang Zheng 2024-05-08 17:22:24 +08:00
parent 5f804a9df7
commit d9050bb917
3 changed files with 92 additions and 7 deletions

View file

@ -2,6 +2,7 @@ package storage
import (
"context"
"errors"
"fmt"
"github.com/distribution/distribution/v3"
@ -36,6 +37,7 @@ func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, regis
// mark
markSet := make(map[digest.Digest]struct{})
deleteLayerSet := make(map[string][]digest.Digest)
manifestArr := make([]ManifestDel, 0)
err := repositoryEnumerator.Enumerate(ctx, func(repoName string) error {
emit(repoName)
@ -97,15 +99,32 @@ func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, regis
})
})
if err != nil {
// In certain situations such as unfinished uploads, deleting all
// tags in S3 or removing the _manifests folder manually, this
// error may be of type PathNotFound.
//
// In these cases we can continue marking other manifests safely.
if _, ok := err.(driver.PathNotFoundError); ok {
return nil
if _, ok := err.(driver.PathNotFoundError); !ok {
return err
}
}
blobService := repository.Blobs(ctx)
layerEnumerator, ok := blobService.(distribution.ManifestEnumerator)
if !ok {
return errors.New("unable to convert BlobService into ManifestEnumerator")
}
var deleteLayers []digest.Digest
err = layerEnumerator.Enumerate(ctx, func(dgst digest.Digest) error {
if _, ok := markSet[dgst]; !ok {
deleteLayers = append(deleteLayers, dgst)
}
return nil
})
if len(deleteLayers) > 0 {
deleteLayerSet[repoName] = deleteLayers
}
return err
})
if err != nil {
@ -148,6 +167,15 @@ func MarkAndSweep(ctx context.Context, storageDriver driver.StorageDriver, regis
}
}
for repo, dgsts := range deleteLayerSet {
for _, dgst := range dgsts {
err = vacuum.RemoveLayer(repo, dgst)
if err != nil {
return fmt.Errorf("failed to delete layer link %s of repo %s: %v", dgst, repo, err)
}
}
}
return err
}

View file

@ -9,6 +9,7 @@ import (
"github.com/distribution/distribution/v3/internal/dcontext"
"github.com/distribution/distribution/v3/manifest/ocischema"
"github.com/distribution/distribution/v3/registry/storage/driver"
storagedriver "github.com/distribution/distribution/v3/registry/storage/driver"
"github.com/distribution/distribution/v3/registry/storage/driver/inmemory"
"github.com/distribution/distribution/v3/testutil"
"github.com/distribution/reference"
@ -416,6 +417,47 @@ func TestDeleteManifestIndexIfTagNotFound(t *testing.T) {
}
}
func TestGCWithUnusedLayerLinkPath(t *testing.T) {
ctx := dcontext.Background()
d := inmemory.New()
registry := createRegistry(t, d)
repo := makeRepository(t, registry, "unusedlayerlink")
image := uploadRandomSchema2Image(t, repo)
for dgst := range image.layers {
layerLinkPath, err := pathFor(layerLinkPathSpec{name: "unusedlayerlink", digest: dgst})
if err != nil {
t.Fatal(err)
}
fileInfo, err := d.Stat(ctx, layerLinkPath)
if err != nil {
t.Fatal(err)
}
if fileInfo == nil {
t.Fatalf("layer link path %s not found", layerLinkPath)
}
}
err := MarkAndSweep(dcontext.Background(), d, registry, GCOpts{
DryRun: false,
RemoveUntagged: true,
})
if err != nil {
t.Fatalf("got error: %v, expected nil", err)
}
for dgst := range image.layers {
layerLinkPath, err := pathFor(layerLinkPathSpec{name: "unusedlayerlink", digest: dgst})
if err != nil {
t.Fatal(err)
}
_, err = d.Stat(ctx, layerLinkPath)
if _, ok := err.(storagedriver.PathNotFoundError); !ok {
t.Fatalf("layer link path %s should be not found", layerLinkPath)
}
}
}
func TestGCWithUnknownRepository(t *testing.T) {
ctx := dcontext.Background()
d := inmemory.New()

View file

@ -100,3 +100,18 @@ func (v Vacuum) RemoveRepository(repoName string) error {
return nil
}
// RemoveLayer removes a layer link path from the storage
func (v Vacuum) RemoveLayer(repoName string, dgst digest.Digest) error {
layerLinkPath, err := pathFor(layerLinkPathSpec{name: repoName, digest: dgst})
if err != nil {
return err
}
dcontext.GetLogger(v.ctx).Infof("Deleting layer link path: %s", layerLinkPath)
err = v.driver.Delete(v.ctx, layerLinkPath)
if err != nil {
return err
}
return nil
}