distribution/storagedriver/s3/s3.go
Andrey Kostov 8ca960a0b5 S3 driver refactor
This requires some discussion of how we will handle errors due to network problems
and after further changes in that direction some more stress testing. There is also an
upcomming commit implementing zero fill on WriteStream when offset is greater than
the current size of the file.
2014-12-19 19:16:51 +02:00

458 lines
12 KiB
Go

package s3
import (
"bytes"
"fmt"
"io"
"io/ioutil"
"net/http"
"strconv"
"strings"
"time"
"github.com/crowdmob/goamz/aws"
"github.com/crowdmob/goamz/s3"
"github.com/docker/docker-registry/storagedriver"
"github.com/docker/docker-registry/storagedriver/factory"
)
const driverName = "s3"
// minChunkSize defines the minimum multipart upload chunk size
// S3 API requires multipart upload chunks to be at least 5MB
const chunkSize = 5 * 1024 * 1024
// listMax is the largest amount of objects you can request from S3 in a list call
const listMax = 1000
func init() {
factory.Register(driverName, &s3DriverFactory{})
}
// s3DriverFactory implements the factory.StorageDriverFactory interface
type s3DriverFactory struct{}
func (factory *s3DriverFactory) Create(parameters map[string]interface{}) (storagedriver.StorageDriver, error) {
return FromParameters(parameters)
}
// Driver is a storagedriver.StorageDriver implementation backed by Amazon S3
// Objects are stored at absolute keys in the provided bucket
type Driver struct {
S3 *s3.S3
Bucket *s3.Bucket
Encrypt bool
rootDirectory string
}
// FromParameters constructs a new Driver with a given parameters map
// Required parameters:
// - accesskey
// - secretkey
// - region
// - bucket
// - encrypt
func FromParameters(parameters map[string]interface{}) (*Driver, error) {
accessKey, ok := parameters["accesskey"]
if !ok {
accessKey = ""
}
secretKey, ok := parameters["secretkey"]
if !ok {
secretKey = ""
}
regionName, ok := parameters["region"]
if !ok || fmt.Sprint(regionName) == "" {
return nil, fmt.Errorf("No region parameter provided")
}
region := aws.GetRegion(fmt.Sprint(regionName))
if region.Name == "" {
return nil, fmt.Errorf("Invalid region provided: %v", region)
}
bucket, ok := parameters["bucket"]
if !ok || fmt.Sprint(bucket) == "" {
return nil, fmt.Errorf("No bucket parameter provided")
}
encrypt, ok := parameters["encrypt"]
if !ok {
return nil, fmt.Errorf("No encrypt parameter provided")
}
encryptBool, err := strconv.ParseBool(fmt.Sprint(encrypt))
if err != nil {
return nil, fmt.Errorf("Unable to parse the encrypt parameter: %v", err)
}
rootDirectory, ok := parameters["rootdirectory"]
if !ok {
return nil, fmt.Errorf("No rootDirectory parameter provided")
}
return New(fmt.Sprint(accessKey), fmt.Sprint(secretKey), fmt.Sprint(bucket), fmt.Sprint(rootDirectory), region, encryptBool)
}
// New constructs a new Driver with the given AWS credentials, region, encryption flag, and
// bucketName
func New(accessKey, secretKey, bucketName, rootDirectory string, region aws.Region, encrypt bool) (*Driver, error) {
auth, err := aws.GetAuth(accessKey, secretKey, "", time.Time{})
if err != nil {
return nil, err
}
s3obj := s3.New(auth, region)
bucket := s3obj.Bucket(bucketName)
if err := bucket.PutBucket(getPermissions()); err != nil {
s3Err, ok := err.(*s3.Error)
if !(ok && s3Err.Code == "BucketAlreadyOwnedByYou") {
return nil, err
}
}
// TODO What if they use this bucket for other things? I can't just clean out the multis
// TODO Add timestamp checking
multis, _, err := bucket.ListMulti("", "")
if err != nil {
return nil, err
}
for _, multi := range multis {
err := multi.Abort()
//TODO appropriate to do this error checking?
if err != nil {
return nil, err
}
}
return &Driver{s3obj, bucket, encrypt, rootDirectory}, nil
}
// Implement the storagedriver.StorageDriver interface
// GetContent retrieves the content stored at "path" as a []byte.
func (d *Driver) GetContent(path string) ([]byte, error) {
if !storagedriver.PathRegexp.MatchString(path) {
return nil, storagedriver.InvalidPathError{Path: path}
}
content, err := d.Bucket.Get(d.s3Path(path))
if err != nil {
return nil, parseError(path, err)
}
return content, nil
}
// PutContent stores the []byte content at a location designated by "path".
func (d *Driver) PutContent(path string, contents []byte) error {
if !storagedriver.PathRegexp.MatchString(path) {
return storagedriver.InvalidPathError{Path: path}
}
return parseError(path, d.Bucket.Put(d.s3Path(path), contents, d.getContentType(), getPermissions(), d.getOptions()))
}
// ReadStream retrieves an io.ReadCloser for the content stored at "path" with a
// given byte offset.
func (d *Driver) ReadStream(path string, offset int64) (io.ReadCloser, error) {
if !storagedriver.PathRegexp.MatchString(path) {
return nil, storagedriver.InvalidPathError{Path: path}
}
if offset < 0 {
return nil, storagedriver.InvalidOffsetError{Path: path, Offset: offset}
}
headers := make(http.Header)
headers.Add("Range", "bytes="+strconv.FormatInt(offset, 10)+"-")
resp, err := d.Bucket.GetResponseWithHeaders(d.s3Path(path), headers)
if err != nil {
if s3Err, ok := err.(*s3.Error); ok && s3Err.Code == "InvalidRange" {
return ioutil.NopCloser(bytes.NewReader(nil)), nil
}
return nil, parseError(path, err)
}
return resp.Body, nil
}
// WriteStream stores the contents of the provided io.ReadCloser at a location
// designated by the given path.
func (d *Driver) WriteStream(path string, offset int64, reader io.Reader) (totalRead int64, err error) {
if !storagedriver.PathRegexp.MatchString(path) {
return 0, storagedriver.InvalidPathError{Path: path}
}
if offset < 0 {
return 0, storagedriver.InvalidOffsetError{Path: path, Offset: offset}
}
partNumber := 1
parts := []s3.Part{}
var part s3.Part
multi, err := d.Bucket.InitMulti(d.s3Path(path), d.getContentType(), getPermissions(), d.getOptions())
if err != nil {
return 0, err
}
buf := make([]byte, chunkSize)
// We never want to leave a dangling multipart upload, our only consistent state is
// when there is a whole object at path. This is in order to remain consistent with
// the stat call.
//
// Note that if the machine dies before executing the defer, we will be left with a dangling
// multipart upload, which will eventually be cleaned up, but we will lose all of the progress
// made prior to the machine crashing.
defer func() {
if len(parts) > 0 {
err = multi.Complete(parts)
if err != nil {
multi.Abort()
}
}
}()
if offset > 0 {
resp, err := d.Bucket.Head(d.s3Path(path), nil)
if err != nil {
return 0, err
}
if resp.ContentLength < offset {
return 0, storagedriver.InvalidOffsetError{Path: path, Offset: offset}
}
if resp.ContentLength < chunkSize {
// If everything written so far is less than the minimum part size of 5MB, we need
// to fill out the first part up to that minimum.
current, err := d.ReadStream(path, 0)
if err != nil {
return 0, err
}
bytesRead, err := io.ReadFull(current, buf[0:offset])
if err != nil && err != io.ErrUnexpectedEOF && err != io.EOF {
return 0, err
} else if int64(bytesRead) != offset {
//TODO Maybe a different error? I don't even think this case is reachable...
return 0, storagedriver.InvalidOffsetError{Path: path, Offset: offset}
}
bytesRead, err = io.ReadFull(reader, buf[offset:])
totalRead += int64(bytesRead)
if err != nil && err != io.ErrUnexpectedEOF && err != io.EOF {
return totalRead, err
}
part, err = multi.PutPart(int(partNumber), bytes.NewReader(buf[0:int64(bytesRead)+offset]))
if err != nil {
return totalRead, err
}
} else {
fmt.Println("About to PutPartCopy")
// If the file that we already have is larger than 5MB, then we make it the first part
// of the new multipart upload.
_, part, err = multi.PutPartCopy(partNumber, s3.CopyOptions{}, d.Bucket.Name+"/"+d.s3Path(path))
if err != nil {
return 0, err
}
}
parts = append(parts, part)
partNumber++
if totalRead+offset < chunkSize {
return totalRead, nil
}
}
for {
bytesRead, err := io.ReadFull(reader, buf)
totalRead += int64(bytesRead)
if err != nil && err != io.ErrUnexpectedEOF && err != io.EOF {
return totalRead, err
}
part, err := multi.PutPart(int(partNumber), bytes.NewReader(buf[0:bytesRead]))
if err != nil {
return totalRead, err
}
parts = append(parts, part)
partNumber++
if int64(bytesRead) < chunkSize {
break
}
}
return totalRead, nil
}
// Stat retrieves the FileInfo for the given path, including the current size
// in bytes and the creation time.
func (d *Driver) Stat(path string) (storagedriver.FileInfo, error) {
if !storagedriver.PathRegexp.MatchString(path) {
return nil, storagedriver.InvalidPathError{Path: path}
}
listResponse, err := d.Bucket.List(d.s3Path(path), "", "", 1)
if err != nil {
return nil, err
}
fi := storagedriver.FileInfoFields{
Path: path,
}
if len(listResponse.Contents) == 1 {
if listResponse.Contents[0].Key != d.s3Path(path) {
fi.IsDir = true
} else {
fi.IsDir = false
fi.Size = listResponse.Contents[0].Size
timestamp, err := time.Parse(time.RFC3339Nano, listResponse.Contents[0].LastModified)
if err != nil {
return nil, err
}
fi.ModTime = timestamp
}
} else if len(listResponse.CommonPrefixes) == 1 {
fi.IsDir = true
} else {
return nil, storagedriver.PathNotFoundError{Path: path}
}
return storagedriver.FileInfoInternal{FileInfoFields: fi}, nil
}
// List returns a list of the objects that are direct descendants of the given path.
func (d *Driver) List(path string) ([]string, error) {
if !storagedriver.PathRegexp.MatchString(path) && path != "/" {
return nil, storagedriver.InvalidPathError{Path: path}
}
if path != "/" && path[len(path)-1] != '/' {
path = path + "/"
}
listResponse, err := d.Bucket.List(d.s3Path(path), "/", "", listMax)
if err != nil {
return nil, err
}
files := []string{}
directories := []string{}
for {
for _, key := range listResponse.Contents {
files = append(files, strings.Replace(key.Key, d.s3Path(""), "", 1))
}
for _, commonPrefix := range listResponse.CommonPrefixes {
directories = append(directories, strings.Replace(commonPrefix[0:len(commonPrefix)-1], d.s3Path(""), "", 1))
}
if listResponse.IsTruncated {
listResponse, err = d.Bucket.List(d.s3Path(path), "/", listResponse.NextMarker, listMax)
if err != nil {
return nil, err
}
} else {
break
}
}
return append(files, directories...), nil
}
// Move moves an object stored at sourcePath to destPath, removing the original
// object.
func (d *Driver) Move(sourcePath string, destPath string) error {
if !storagedriver.PathRegexp.MatchString(sourcePath) {
return storagedriver.InvalidPathError{Path: sourcePath}
} else if !storagedriver.PathRegexp.MatchString(destPath) {
return storagedriver.InvalidPathError{Path: destPath}
}
/* This is terrible, but aws doesn't have an actual move. */
_, err := d.Bucket.PutCopy(d.s3Path(destPath), getPermissions(),
s3.CopyOptions{Options: d.getOptions(), ContentType: d.getContentType()}, d.Bucket.Name+"/"+d.s3Path(sourcePath))
if err != nil {
return parseError(sourcePath, err)
}
return d.Delete(sourcePath)
}
// Delete recursively deletes all objects stored at "path" and its subpaths.
func (d *Driver) Delete(path string) error {
if !storagedriver.PathRegexp.MatchString(path) {
return storagedriver.InvalidPathError{Path: path}
}
listResponse, err := d.Bucket.List(d.s3Path(path), "", "", listMax)
if err != nil || len(listResponse.Contents) == 0 {
return storagedriver.PathNotFoundError{Path: path}
}
s3Objects := make([]s3.Object, listMax)
for len(listResponse.Contents) > 0 {
for index, key := range listResponse.Contents {
s3Objects[index].Key = key.Key
}
err := d.Bucket.DelMulti(s3.Delete{Quiet: false, Objects: s3Objects[0:len(listResponse.Contents)]})
if err != nil {
return nil
}
listResponse, err = d.Bucket.List(d.s3Path(path), "", "", listMax)
if err != nil {
return err
}
}
return nil
}
func (d *Driver) s3Path(path string) string {
return strings.TrimLeft(d.rootDirectory+path, "/")
}
func (d *Driver) fullPath(path string) string {
return d.rootDirectory + path
}
func parseError(path string, err error) error {
if s3Err, ok := err.(*s3.Error); ok && s3Err.Code == "NoSuchKey" {
return storagedriver.PathNotFoundError{Path: path}
}
return err
}
func hasCode(err error, code string) bool {
s3err, ok := err.(*aws.Error)
return ok && s3err.Code == code
}
func (d *Driver) getOptions() s3.Options {
return s3.Options{SSE: d.Encrypt}
}
func getPermissions() s3.ACL {
return s3.Private
}
func (d *Driver) getContentType() string {
return "application/octet-stream"
}