Use multipart upload API in S3 Move method

This change to the S3 Move method uses S3's multipart upload API to copy
objects whose size exceeds a threshold.  Parts are copied concurrently.
The level of concurrency, part size, and threshold are all configurable
with reasonable defaults.

Using the multipart upload API has two benefits.

* The S3 Move method can now handle objects over 5 GB, fixing #886.

* Moving most objects, and espectially large ones, is faster.  For
  example, moving a 1 GB object averaged 30 seconds but now averages 10.

Signed-off-by: Noah Treuhaft <noah.treuhaft@docker.com>
This commit is contained in:
Noah Treuhaft 2016-08-15 17:12:24 -07:00
parent c810308d1b
commit 63468ef4a8
4 changed files with 305 additions and 61 deletions

View file

@ -1,19 +1,21 @@
package s3
import (
"bytes"
"io/ioutil"
"math/rand"
"os"
"strconv"
"testing"
"gopkg.in/check.v1"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/service/s3"
"github.com/docker/distribution/context"
storagedriver "github.com/docker/distribution/registry/storage/driver"
"github.com/docker/distribution/registry/storage/driver/testsuites"
"gopkg.in/check.v1"
)
// Hook up gocheck into the "go test" runner.
@ -65,6 +67,9 @@ func init() {
keyID,
secureBool,
minChunkSize,
defaultMultipartCopyChunkSize,
defaultMultipartCopyMaxConcurrency,
defaultMultipartCopyThresholdSize,
rootDirectory,
storageClass,
driverName + "-test",
@ -238,3 +243,57 @@ func TestOverThousandBlobs(t *testing.T) {
t.Fatalf("unexpected error deleting thousand files: %v", err)
}
}
func TestMoveWithMultipartCopy(t *testing.T) {
if skipS3() != "" {
t.Skip(skipS3())
}
rootDir, err := ioutil.TempDir("", "driver-")
if err != nil {
t.Fatalf("unexpected error creating temporary directory: %v", err)
}
defer os.Remove(rootDir)
d, err := s3DriverConstructor(rootDir, s3.StorageClassStandard)
if err != nil {
t.Fatalf("unexpected error creating driver: %v", err)
}
ctx := context.Background()
sourcePath := "/source"
destPath := "/dest"
defer d.Delete(ctx, sourcePath)
defer d.Delete(ctx, destPath)
// An object larger than d's MultipartCopyThresholdSize will cause d.Move() to perform a multipart copy.
multipartCopyThresholdSize := d.baseEmbed.Base.StorageDriver.(*driver).MultipartCopyThresholdSize
contents := make([]byte, 2*multipartCopyThresholdSize)
rand.Read(contents)
err = d.PutContent(ctx, sourcePath, contents)
if err != nil {
t.Fatalf("unexpected error creating content: %v", err)
}
err = d.Move(ctx, sourcePath, destPath)
if err != nil {
t.Fatalf("unexpected error moving file: %v", err)
}
received, err := d.GetContent(ctx, destPath)
if err != nil {
t.Fatalf("unexpected error getting content: %v", err)
}
if !bytes.Equal(contents, received) {
t.Fatal("content differs")
}
_, err = d.GetContent(ctx, sourcePath)
switch err.(type) {
case storagedriver.PathNotFoundError:
default:
t.Fatalf("unexpected error getting content: %v", err)
}
}