diff --git a/backend/s3/s3.go b/backend/s3/s3.go index f7c3f46f3..c49d9c9b7 100644 --- a/backend/s3/s3.go +++ b/backend/s3/s3.go @@ -26,6 +26,7 @@ import ( "regexp" "strconv" "strings" + "sync" "time" "github.com/aws/aws-sdk-go/aws" @@ -693,14 +694,25 @@ The minimum is 0 and the maximum is 5GB.`, Name: "chunk_size", Help: `Chunk size to use for uploading. -When uploading files larger than upload_cutoff they will be uploaded -as multipart uploads using this chunk size. +When uploading files larger than upload_cutoff or files with unknown +size (eg from "rclone rcat" or uploaded with "rclone mount" or google +photos or google docs) they will be uploaded as multipart uploads +using this chunk size. Note that "--s3-upload-concurrency" chunks of this size are buffered in memory per transfer. If you are transferring large files over high speed links and you have -enough memory, then increasing this will speed up the transfers.`, +enough memory, then increasing this will speed up the transfers. + +Rclone will automatically increase the chunk size when uploading a +large file of known size to stay below the 10,000 chunks limit. + +Files of unknown size are uploaded with the configured +chunk_size. Since the default chunk size is 5MB and there can be at +most 10,000 chunks, this means that by default the maximum size of +file you can stream upload is 48GB. If you wish to stream upload +larger files then you will need to increase chunk_size.`, Default: minChunkSize, Advanced: true, }, { @@ -771,12 +783,11 @@ WARNING: Storing parts of an incomplete multipart upload counts towards space us // Constants const ( - metaMtime = "Mtime" // the meta key to store mtime in - eg X-Amz-Meta-Mtime - metaMD5Hash = "Md5chksum" // the meta key to store md5hash in - listChunkSize = 1000 // number of items to read at once - maxRetries = 10 // number of retries to make of operations - maxSizeForCopy = 5 * 1024 * 1024 * 1024 // The maximum size of object we can COPY - maxFileSize = 5 * 1024 * 1024 * 1024 * 1024 // largest possible upload file size + metaMtime = "Mtime" // the meta key to store mtime in - eg X-Amz-Meta-Mtime + metaMD5Hash = "Md5chksum" // the meta key to store md5hash in + listChunkSize = 1000 // number of items to read at once + maxRetries = 10 // number of retries to make of operations + maxSizeForCopy = 5 * 1024 * 1024 * 1024 // The maximum size of object we can COPY minChunkSize = fs.SizeSuffix(s3manager.MinUploadPartSize) defaultUploadCutoff = fs.SizeSuffix(200 * 1024 * 1024) maxUploadCutoff = fs.SizeSuffix(5 * 1024 * 1024 * 1024) @@ -1982,6 +1993,8 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.Read return resp.Body, nil } +var warnStreamUpload sync.Once + // Update the Object from in with modTime and size func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error { bucket, bucketPath := o.split() @@ -2001,10 +2014,14 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op u.S3 = o.fs.c u.PartSize = int64(o.fs.opt.ChunkSize) + // size can be -1 here meaning we don't know the size of the incoming file. We use ChunkSize + // buffers here (default 5MB). With a maximum number of parts (10,000) this will be a file of + // 48GB which seems like a not too unreasonable limit. if size == -1 { - // Make parts as small as possible while still being able to upload to the - // S3 file size limit. Rounded up to nearest MB. - u.PartSize = (((maxFileSize / s3manager.MaxUploadParts) >> 20) + 1) << 20 + warnStreamUpload.Do(func() { + fs.Logf(o.fs, "Streaming uploads using chunk size %v will have maximum file size of %v", + o.fs.opt.ChunkSize, fs.SizeSuffix(u.PartSize*s3manager.MaxUploadParts)) + }) return } // Adjust PartSize until the number of parts is small enough.