forked from TrueCloudLab/distribution
Merge pull request #3635 from milosgajdos/make-s3-driver-delete-faster
Delete S3 keys incrementally in batches
This commit is contained in:
commit
e3509fc1de
3 changed files with 71 additions and 43 deletions
|
@ -15,6 +15,7 @@ import (
|
|||
"bytes"
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
|
@ -922,54 +923,71 @@ func (d *driver) copy(ctx context.Context, sourcePath string, destPath string) e
|
|||
return err
|
||||
}
|
||||
|
||||
func min(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// Delete recursively deletes all objects stored at "path" and its subpaths.
|
||||
// We must be careful since S3 does not guarantee read after delete consistency
|
||||
func (d *driver) Delete(ctx context.Context, path string) error {
|
||||
s3Objects := make([]*s3.ObjectIdentifier, 0, listMax)
|
||||
|
||||
// manually add the given path if it's a file
|
||||
stat, err := d.Stat(ctx, path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if stat != nil && !stat.IsDir() {
|
||||
path := d.s3Path(path)
|
||||
s3Objects = append(s3Objects, &s3.ObjectIdentifier{
|
||||
Key: &path,
|
||||
})
|
||||
}
|
||||
|
||||
// list objects under the given path as a subpath (suffix with slash "/")
|
||||
s3Path := d.s3Path(path) + "/"
|
||||
s3Path := d.s3Path(path)
|
||||
listObjectsInput := &s3.ListObjectsV2Input{
|
||||
Bucket: aws.String(d.Bucket),
|
||||
Prefix: aws.String(s3Path),
|
||||
}
|
||||
ListLoop:
|
||||
|
||||
for {
|
||||
// list all the objects
|
||||
resp, err := d.S3.ListObjectsV2(listObjectsInput)
|
||||
|
||||
// resp.Contents can only be empty on the first call
|
||||
// if there were no more results to return after the first call, resp.IsTruncated would have been false
|
||||
// and the loop would be exited without recalling ListObjects
|
||||
// and the loop would exit without recalling ListObjects
|
||||
if err != nil || len(resp.Contents) == 0 {
|
||||
break ListLoop
|
||||
return storagedriver.PathNotFoundError{Path: path}
|
||||
}
|
||||
|
||||
for _, key := range resp.Contents {
|
||||
// Skip if we encounter a key that is not a subpath (so that deleting "/a" does not delete "/ab").
|
||||
if len(*key.Key) > len(s3Path) && (*key.Key)[len(s3Path)] != '/' {
|
||||
continue
|
||||
}
|
||||
s3Objects = append(s3Objects, &s3.ObjectIdentifier{
|
||||
Key: key.Key,
|
||||
})
|
||||
}
|
||||
|
||||
// Delete objects only if the list is not empty, otherwise S3 API returns a cryptic error
|
||||
if len(s3Objects) > 0 {
|
||||
// NOTE: according to AWS docs https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html
|
||||
// by default the response returns up to 1,000 key names. The response _might_ contain fewer keys but it will never contain more.
|
||||
// 10000 keys is coincidentally (?) also the max number of keys that can be deleted in a single Delete operation, so we'll just smack
|
||||
// Delete here straight away and reset the object slice when successful.
|
||||
resp, err := d.S3.DeleteObjects(&s3.DeleteObjectsInput{
|
||||
Bucket: aws.String(d.Bucket),
|
||||
Delete: &s3.Delete{
|
||||
Objects: s3Objects,
|
||||
Quiet: aws.Bool(false),
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(resp.Errors) > 0 {
|
||||
// NOTE: AWS SDK s3.Error does not implement error interface which
|
||||
// is pretty intensely sad, so we have to do away with this for now.
|
||||
errs := make([]error, 0, len(resp.Errors))
|
||||
for _, err := range resp.Errors {
|
||||
errs = append(errs, errors.New(err.String()))
|
||||
}
|
||||
return storagedriver.Errors{
|
||||
DriverName: driverName,
|
||||
Errs: errs,
|
||||
}
|
||||
}
|
||||
}
|
||||
// NOTE: we don't want to reallocate
|
||||
// the slice so we simply "reset" it
|
||||
s3Objects = s3Objects[:0]
|
||||
|
||||
// resp.Contents must have at least one element or we would have returned not found
|
||||
listObjectsInput.StartAfter = resp.Contents[len(resp.Contents)-1].Key
|
||||
|
||||
|
@ -980,24 +998,6 @@ ListLoop:
|
|||
}
|
||||
}
|
||||
|
||||
total := len(s3Objects)
|
||||
if total == 0 {
|
||||
return storagedriver.PathNotFoundError{Path: path}
|
||||
}
|
||||
|
||||
// need to chunk objects into groups of 1000 per s3 restrictions
|
||||
for i := 0; i < total; i += 1000 {
|
||||
_, err := d.S3.DeleteObjects(&s3.DeleteObjectsInput{
|
||||
Bucket: aws.String(d.Bucket),
|
||||
Delete: &s3.Delete{
|
||||
Objects: s3Objects[i:min(i+1000, total)],
|
||||
Quiet: aws.Bool(false),
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -772,6 +772,10 @@ func TestMoveWithMultipartCopy(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestListObjectsV2(t *testing.T) {
|
||||
if skipS3() != "" {
|
||||
t.Skip(skipS3())
|
||||
}
|
||||
|
||||
rootDir, err := ioutil.TempDir("", "driver-")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error creating temporary directory: %v", err)
|
||||
|
|
|
@ -169,3 +169,27 @@ type Error struct {
|
|||
func (err Error) Error() string {
|
||||
return fmt.Sprintf("%s: %s", err.DriverName, err.Enclosed)
|
||||
}
|
||||
|
||||
// Errors provides the envelope for multiple errors
|
||||
// for use within the storagedriver implementations.
|
||||
type Errors struct {
|
||||
DriverName string
|
||||
Errs []error
|
||||
}
|
||||
|
||||
var _ error = Errors{}
|
||||
|
||||
func (e Errors) Error() string {
|
||||
switch len(e.Errs) {
|
||||
case 0:
|
||||
return "<nil>"
|
||||
case 1:
|
||||
return e.Errs[0].Error()
|
||||
default:
|
||||
msg := "errors:\n"
|
||||
for _, err := range e.Errs {
|
||||
msg += err.Error() + "\n"
|
||||
}
|
||||
return msg
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue