s3: add --s3-copy-cutoff for size to switch to multipart copy

Before this change we used the same (relatively low limits) for server
side copy as we did for multipart uploads.  It doesn't make sense to
use the same limits since no data is being downloaded or uploaded for
a server side copy.

This change introduces a new parameter --s3-copy-cutoff to control
when the switch from single to multipart server size copy happens and
defaults it to the maximum 5GB.

This makes server side copies much more efficient.

It also fixes the erroneous error when trying to set the modification
time of a file bigger than 5GB.

See #3778
This commit is contained in:
Nick Craig-Wood 2019-12-02 17:14:57 +00:00
parent f4746f5064
commit 0d10640aaa

View file

@ -715,6 +715,16 @@ file you can stream upload is 48GB. If you wish to stream upload
larger files then you will need to increase chunk_size.`,
Default: minChunkSize,
Advanced: true,
}, {
Name: "copy_cutoff",
Help: `Cutoff for switching to multipart copy
Any files larger than this that need to be server side copied will be
copied in chunks of this size.
The minimum is 0 and the maximum is 5GB.`,
Default: fs.SizeSuffix(maxSizeForCopy),
Advanced: true,
}, {
Name: "disable_checksum",
Help: "Don't store MD5 checksum with object metadata",
@ -809,6 +819,7 @@ type Options struct {
SSEKMSKeyID string `config:"sse_kms_key_id"`
StorageClass string `config:"storage_class"`
UploadCutoff fs.SizeSuffix `config:"upload_cutoff"`
CopyCutoff fs.SizeSuffix `config:"copy_cutoff"`
ChunkSize fs.SizeSuffix `config:"chunk_size"`
DisableChecksum bool `config:"disable_checksum"`
SessionToken string `config:"session_token"`
@ -1653,7 +1664,7 @@ func (f *Fs) copy(ctx context.Context, req *s3.CopyObjectInput, dstBucket, dstPa
req.StorageClass = &f.opt.StorageClass
}
if srcSize >= int64(f.opt.UploadCutoff) {
if srcSize >= int64(f.opt.CopyCutoff) {
return f.copyMultipart(ctx, req, dstBucket, dstPath, srcBucket, srcPath, srcSize)
}
return f.pacer.Call(func() (bool, error) {
@ -1704,7 +1715,7 @@ func (f *Fs) copyMultipart(ctx context.Context, req *s3.CopyObjectInput, dstBuck
}
}()
partSize := int64(f.opt.ChunkSize)
partSize := int64(f.opt.CopyCutoff)
numParts := (srcSize-1)/partSize + 1
var parts []*s3.CompletedPart
@ -1932,11 +1943,6 @@ func (o *Object) SetModTime(ctx context.Context, modTime time.Time) error {
}
o.meta[metaMtime] = aws.String(swift.TimeToFloatString(modTime))
if o.bytes >= maxSizeForCopy {
fs.Debugf(o, "SetModTime is unsupported for objects bigger than %v bytes", fs.SizeSuffix(maxSizeForCopy))
return nil
}
// Can't update metadata here, so return this error to force a recopy
if o.storageClass == "GLACIER" || o.storageClass == "DEEP_ARCHIVE" {
return fs.ErrorCantSetModTime