s3: Reduce memory usage streaming files by reducing max stream upload size

Before this change rclone would allow the user to stream (eg with
rclone mount, rclone rcat or uploading google photos or docs) 5TB
files.  This meant that rclone allocated 4 * 525 MB buffers per
transfer which is way too much memory by default.

This change makes rclone use the configured chunk size for streamed
uploads.  This is 5MB by default which means that rclone can stream
upload files up to 48GB by default staying below the 10,000 chunks
limit.

This can be increased with --s3-chunk-size if necessary.

If rclone detects that a file is being streamed to s3 it will make a
single NOTICE level log stating the limitation.

This fixes the enormous memory usage.

Fixes #3568
See: https://forum.rclone.org/t/how-much-memory-does-rclone-need/12743
This commit is contained in:
Nick Craig-Wood 2019-11-06 10:41:03 +00:00
parent 4b20afa94a
commit 9b5308144f

View file

@ -26,6 +26,7 @@ import (
"regexp"
"strconv"
"strings"
"sync"
"time"
"github.com/aws/aws-sdk-go/aws"
@ -693,14 +694,25 @@ The minimum is 0 and the maximum is 5GB.`,
Name: "chunk_size",
Help: `Chunk size to use for uploading.
When uploading files larger than upload_cutoff they will be uploaded
as multipart uploads using this chunk size.
When uploading files larger than upload_cutoff or files with unknown
size (eg from "rclone rcat" or uploaded with "rclone mount" or google
photos or google docs) they will be uploaded as multipart uploads
using this chunk size.
Note that "--s3-upload-concurrency" chunks of this size are buffered
in memory per transfer.
If you are transferring large files over high speed links and you have
enough memory, then increasing this will speed up the transfers.`,
enough memory, then increasing this will speed up the transfers.
Rclone will automatically increase the chunk size when uploading a
large file of known size to stay below the 10,000 chunks limit.
Files of unknown size are uploaded with the configured
chunk_size. Since the default chunk size is 5MB and there can be at
most 10,000 chunks, this means that by default the maximum size of
file you can stream upload is 48GB. If you wish to stream upload
larger files then you will need to increase chunk_size.`,
Default: minChunkSize,
Advanced: true,
}, {
@ -771,12 +783,11 @@ WARNING: Storing parts of an incomplete multipart upload counts towards space us
// Constants
const (
metaMtime = "Mtime" // the meta key to store mtime in - eg X-Amz-Meta-Mtime
metaMD5Hash = "Md5chksum" // the meta key to store md5hash in
listChunkSize = 1000 // number of items to read at once
maxRetries = 10 // number of retries to make of operations
maxSizeForCopy = 5 * 1024 * 1024 * 1024 // The maximum size of object we can COPY
maxFileSize = 5 * 1024 * 1024 * 1024 * 1024 // largest possible upload file size
metaMtime = "Mtime" // the meta key to store mtime in - eg X-Amz-Meta-Mtime
metaMD5Hash = "Md5chksum" // the meta key to store md5hash in
listChunkSize = 1000 // number of items to read at once
maxRetries = 10 // number of retries to make of operations
maxSizeForCopy = 5 * 1024 * 1024 * 1024 // The maximum size of object we can COPY
minChunkSize = fs.SizeSuffix(s3manager.MinUploadPartSize)
defaultUploadCutoff = fs.SizeSuffix(200 * 1024 * 1024)
maxUploadCutoff = fs.SizeSuffix(5 * 1024 * 1024 * 1024)
@ -1982,6 +1993,8 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.Read
return resp.Body, nil
}
var warnStreamUpload sync.Once
// Update the Object from in with modTime and size
func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error {
bucket, bucketPath := o.split()
@ -2001,10 +2014,14 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op
u.S3 = o.fs.c
u.PartSize = int64(o.fs.opt.ChunkSize)
// size can be -1 here meaning we don't know the size of the incoming file. We use ChunkSize
// buffers here (default 5MB). With a maximum number of parts (10,000) this will be a file of
// 48GB which seems like a not too unreasonable limit.
if size == -1 {
// Make parts as small as possible while still being able to upload to the
// S3 file size limit. Rounded up to nearest MB.
u.PartSize = (((maxFileSize / s3manager.MaxUploadParts) >> 20) + 1) << 20
warnStreamUpload.Do(func() {
fs.Logf(o.fs, "Streaming uploads using chunk size %v will have maximum file size of %v",
o.fs.opt.ChunkSize, fs.SizeSuffix(u.PartSize*s3manager.MaxUploadParts))
})
return
}
// Adjust PartSize until the number of parts is small enough.