s3: set/get the hash for multipart files - #523

This commit is contained in:
Chris Redekop 2018-01-06 09:30:10 -05:00 committed by Nick Craig-Wood
parent 65618afd8c
commit 4dd1e507f4
2 changed files with 35 additions and 8 deletions

View file

@ -224,8 +224,8 @@ The modified time is stored as metadata on the object as
### Multipart uploads ### ### Multipart uploads ###
rclone supports multipart uploads with S3 which means that it can rclone supports multipart uploads with S3 which means that it can
upload files bigger than 5GB. Note that files uploaded with multipart upload files bigger than 5GB. Note that files uploaded *both* with
upload don't have an MD5SUM. multipart upload *and* through crypt remotes do not have MD5 sums.
### Buckets and Regions ### ### Buckets and Regions ###

View file

@ -14,6 +14,8 @@ What happens if you CTRL-C a multipart upload
*/ */
import ( import (
"encoding/base64"
"encoding/hex"
"fmt" "fmt"
"io" "io"
"net/http" "net/http"
@ -221,6 +223,7 @@ func init() {
// Constants // Constants
const ( const (
metaMtime = "Mtime" // the meta key to store mtime in - eg X-Amz-Meta-Mtime metaMtime = "Mtime" // the meta key to store mtime in - eg X-Amz-Meta-Mtime
metaMD5Hash = "Md5chksum" // the meta key to store md5hash in
listChunkSize = 1000 // number of items to read at once listChunkSize = 1000 // number of items to read at once
maxRetries = 10 // number of retries to make of operations maxRetries = 10 // number of retries to make of operations
maxSizeForCopy = 5 * 1024 * 1024 * 1024 // The maximum size of object we can COPY maxSizeForCopy = 5 * 1024 * 1024 * 1024 // The maximum size of object we can COPY
@ -832,13 +835,25 @@ func (o *Object) Hash(t fs.HashType) (string, error) {
if t != fs.HashMD5 { if t != fs.HashMD5 {
return "", fs.ErrHashUnsupported return "", fs.ErrHashUnsupported
} }
etag := strings.Trim(strings.ToLower(o.etag), `"`) hash := strings.Trim(strings.ToLower(o.etag), `"`)
// Check the etag is a valid md5sum // Check the etag is a valid md5sum
if !matchMd5.MatchString(etag) { if !matchMd5.MatchString(hash) {
// fs.Debugf(o, "Invalid md5sum (probably multipart uploaded) - ignoring: %q", etag) err := o.readMetaData()
return "", nil if err != nil {
return "", err
}
if md5sum, ok := o.meta[metaMD5Hash]; ok {
md5sumBytes, err := base64.StdEncoding.DecodeString(*md5sum)
if err != nil {
return "", err
}
hash = hex.EncodeToString(md5sumBytes)
} else {
hash = ""
}
} }
return etag, nil return hash, nil
} }
// Size returns the size of an object in bytes // Size returns the size of an object in bytes
@ -985,13 +1000,13 @@ func (o *Object) Update(in io.Reader, src fs.ObjectInfo, options ...fs.OpenOptio
return err return err
} }
modTime := src.ModTime() modTime := src.ModTime()
size := src.Size()
uploader := s3manager.NewUploader(o.fs.ses, func(u *s3manager.Uploader) { uploader := s3manager.NewUploader(o.fs.ses, func(u *s3manager.Uploader) {
u.Concurrency = 2 u.Concurrency = 2
u.LeavePartsOnError = false u.LeavePartsOnError = false
u.S3 = o.fs.c u.S3 = o.fs.c
u.PartSize = s3manager.MinUploadPartSize u.PartSize = s3manager.MinUploadPartSize
size := src.Size()
if size == -1 { if size == -1 {
// Make parts as small as possible while still being able to upload to the // Make parts as small as possible while still being able to upload to the
@ -1011,6 +1026,18 @@ func (o *Object) Update(in io.Reader, src fs.ObjectInfo, options ...fs.OpenOptio
metaMtime: aws.String(swift.TimeToFloatString(modTime)), metaMtime: aws.String(swift.TimeToFloatString(modTime)),
} }
if size > uploader.PartSize {
hash, err := src.Hash(fs.HashMD5)
if err == nil && matchMd5.MatchString(hash) {
hashBytes, err := hex.DecodeString(hash)
if err == nil {
metadata[metaMD5Hash] = aws.String(base64.StdEncoding.EncodeToString(hashBytes))
}
}
}
// Guess the content type // Guess the content type
mimeType := fs.MimeType(src) mimeType := fs.MimeType(src)