azureblob,b2,s3: fix chunksize calculations producing too many parts

Before this fix, the chunksize calculator was using the previous size
of the object, not the new size of the object to calculate the chunk
sizes.

This meant that uploading a replacement object which needed a new
chunk size would fail, using too many parts.

This fix fixes the calculator to take the size explicitly.
This commit is contained in:
Nick Craig-Wood 2022-08-09 10:44:54 +01:00
parent cb8842941b
commit 0501773db1
5 changed files with 111 additions and 37 deletions

View file

@ -1678,14 +1678,14 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op
} }
} }
uploadParts := int64(maxUploadParts) uploadParts := maxUploadParts
if uploadParts < 1 { if uploadParts < 1 {
uploadParts = 1 uploadParts = 1
} else if uploadParts > maxUploadParts { } else if uploadParts > maxUploadParts {
uploadParts = maxUploadParts uploadParts = maxUploadParts
} }
// calculate size of parts/blocks // calculate size of parts/blocks
partSize := chunksize.Calculator(o, int(uploadParts), o.fs.opt.ChunkSize) partSize := chunksize.Calculator(o, src.Size(), uploadParts, o.fs.opt.ChunkSize)
putBlobOptions := azblob.UploadStreamToBlockBlobOptions{ putBlobOptions := azblob.UploadStreamToBlockBlobOptions{
BufferSize: int(partSize), BufferSize: int(partSize),

View file

@ -97,7 +97,7 @@ func (f *Fs) newLargeUpload(ctx context.Context, o *Object, in io.Reader, src fs
if size == -1 { if size == -1 {
fs.Debugf(o, "Streaming upload with --b2-chunk-size %s allows uploads of up to %s and will fail only when that limit is reached.", f.opt.ChunkSize, maxParts*f.opt.ChunkSize) fs.Debugf(o, "Streaming upload with --b2-chunk-size %s allows uploads of up to %s and will fail only when that limit is reached.", f.opt.ChunkSize, maxParts*f.opt.ChunkSize)
} else { } else {
chunkSize = chunksize.Calculator(src, maxParts, defaultChunkSize) chunkSize = chunksize.Calculator(o, size, maxParts, defaultChunkSize)
parts = size / int64(chunkSize) parts = size / int64(chunkSize)
if size%int64(chunkSize) != 0 { if size%int64(chunkSize) != 0 {
parts++ parts++

View file

@ -2116,7 +2116,7 @@ type Options struct {
UploadCutoff fs.SizeSuffix `config:"upload_cutoff"` UploadCutoff fs.SizeSuffix `config:"upload_cutoff"`
CopyCutoff fs.SizeSuffix `config:"copy_cutoff"` CopyCutoff fs.SizeSuffix `config:"copy_cutoff"`
ChunkSize fs.SizeSuffix `config:"chunk_size"` ChunkSize fs.SizeSuffix `config:"chunk_size"`
MaxUploadParts int64 `config:"max_upload_parts"` MaxUploadParts int `config:"max_upload_parts"`
DisableChecksum bool `config:"disable_checksum"` DisableChecksum bool `config:"disable_checksum"`
SharedCredentialsFile string `config:"shared_credentials_file"` SharedCredentialsFile string `config:"shared_credentials_file"`
Profile string `config:"profile"` Profile string `config:"profile"`
@ -4718,10 +4718,10 @@ func (o *Object) uploadMultipart(ctx context.Context, req *s3.PutObjectInput, si
if size == -1 { if size == -1 {
warnStreamUpload.Do(func() { warnStreamUpload.Do(func() {
fs.Logf(f, "Streaming uploads using chunk size %v will have maximum file size of %v", fs.Logf(f, "Streaming uploads using chunk size %v will have maximum file size of %v",
f.opt.ChunkSize, fs.SizeSuffix(int64(partSize)*uploadParts)) f.opt.ChunkSize, fs.SizeSuffix(int64(partSize)*int64(uploadParts)))
}) })
} else { } else {
partSize = chunksize.Calculator(o, int(uploadParts), f.opt.ChunkSize) partSize = chunksize.Calculator(o, size, uploadParts, f.opt.ChunkSize)
} }
memPool := f.getMemoryPool(int64(partSize)) memPool := f.getMemoryPool(int64(partSize))

View file

@ -5,18 +5,26 @@ import (
"github.com/rclone/rclone/fs" "github.com/rclone/rclone/fs"
) )
/* // Calculator calculates the minimum chunk size needed to fit within
Calculator calculates the minimum chunk size needed to fit within the maximum number of parts, rounded up to the nearest fs.Mebi // the maximum number of parts, rounded up to the nearest fs.Mebi.
//
// For most backends, (chunk_size) * (concurrent_upload_routines)
// memory will be required so we want to use the smallest possible
// chunk size that's going to allow the upload to proceed. Rounding up
// to the nearest fs.Mebi on the assumption that some backends may
// only allow integer type parameters when specifying the chunk size.
//
// Returns the default chunk size if it is sufficiently large enough
// to support the given file size otherwise returns the smallest chunk
// size necessary to allow the upload to proceed.
func Calculator(o interface{}, size int64, maxParts int, defaultChunkSize fs.SizeSuffix) fs.SizeSuffix {
// If streaming then use default chunk size
if size < 0 {
fs.Debugf(o, "Streaming upload with chunk_size %s allows uploads of up to %s and will fail only when that limit is reached.", defaultChunkSize, fs.SizeSuffix(maxParts)*defaultChunkSize)
For most backends, (chunk_size) * (concurrent_upload_routines) memory will be required so we want to use the smallest return defaultChunkSize
possible chunk size that's going to allow the upload to proceed. Rounding up to the nearest fs.Mebi on the assumption }
that some backends may only allow integer type parameters when specifying the chunk size. fileSize := fs.SizeSuffix(size)
Returns the default chunk size if it is sufficiently large enough to support the given file size otherwise returns the
smallest chunk size necessary to allow the upload to proceed.
*/
func Calculator(objInfo fs.ObjectInfo, maxParts int, defaultChunkSize fs.SizeSuffix) fs.SizeSuffix {
fileSize := fs.SizeSuffix(objInfo.Size())
requiredChunks := fileSize / defaultChunkSize requiredChunks := fileSize / defaultChunkSize
if requiredChunks < fs.SizeSuffix(maxParts) || (requiredChunks == fs.SizeSuffix(maxParts) && fileSize%defaultChunkSize == 0) { if requiredChunks < fs.SizeSuffix(maxParts) || (requiredChunks == fs.SizeSuffix(maxParts) && fileSize%defaultChunkSize == 0) {
return defaultChunkSize return defaultChunkSize
@ -31,6 +39,6 @@ func Calculator(objInfo fs.ObjectInfo, maxParts int, defaultChunkSize fs.SizeSuf
minChunk += fs.Mebi minChunk += fs.Mebi
} }
fs.Debugf(objInfo, "size: %v, parts: %v, default: %v, new: %v; default chunk size insufficient, returned new chunk size", fileSize, maxParts, defaultChunkSize, minChunk) fs.Debugf(o, "size: %v, parts: %v, default: %v, new: %v; default chunk size insufficient, returned new chunk size", fileSize, maxParts, defaultChunkSize, minChunk)
return minChunk return minChunk
} }

View file

@ -2,34 +2,100 @@ package chunksize
import ( import (
"testing" "testing"
"time"
"github.com/rclone/rclone/fs" "github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/object"
) )
func TestComputeChunkSize(t *testing.T) { func TestComputeChunkSize(t *testing.T) {
tests := map[string]struct { for _, test := range []struct {
fileSize fs.SizeSuffix name string
size fs.SizeSuffix
maxParts int maxParts int
defaultChunkSize fs.SizeSuffix defaultChunkSize fs.SizeSuffix
expected fs.SizeSuffix want fs.SizeSuffix
}{ }{
"default size returned when file size is small enough": {fileSize: 1000, maxParts: 10000, defaultChunkSize: toSizeSuffixMiB(10), expected: toSizeSuffixMiB(10)}, {
"default size returned when file size is just 1 byte small enough": {fileSize: toSizeSuffixMiB(100000) - 1, maxParts: 10000, defaultChunkSize: toSizeSuffixMiB(10), expected: toSizeSuffixMiB(10)}, name: "streaming file",
"no rounding up when everything divides evenly": {fileSize: toSizeSuffixMiB(1000000), maxParts: 10000, defaultChunkSize: toSizeSuffixMiB(100), expected: toSizeSuffixMiB(100)}, size: -1,
"rounding up to nearest MiB when not quite enough parts": {fileSize: toSizeSuffixMiB(1000000), maxParts: 9999, defaultChunkSize: toSizeSuffixMiB(100), expected: toSizeSuffixMiB(101)}, maxParts: 10000,
"rounding up to nearest MiB when one extra byte": {fileSize: toSizeSuffixMiB(1000000) + 1, maxParts: 10000, defaultChunkSize: toSizeSuffixMiB(100), expected: toSizeSuffixMiB(101)}, defaultChunkSize: toSizeSuffixMiB(10),
"expected MiB value when rounding sets to absolute minimum": {fileSize: toSizeSuffixMiB(1) - 1, maxParts: 1, defaultChunkSize: toSizeSuffixMiB(1), expected: toSizeSuffixMiB(1)}, want: toSizeSuffixMiB(10),
"expected MiB value when rounding to absolute min with extra": {fileSize: toSizeSuffixMiB(1) + 1, maxParts: 1, defaultChunkSize: toSizeSuffixMiB(1), expected: toSizeSuffixMiB(2)}, }, {
} name: "default size returned when file size is small enough",
size: 1000,
maxParts: 10000,
defaultChunkSize: toSizeSuffixMiB(10),
want: toSizeSuffixMiB(10),
}, {
name: "default size returned when file size is just 1 byte small enough",
size: toSizeSuffixMiB(100000) - 1,
maxParts: 10000,
defaultChunkSize: toSizeSuffixMiB(10),
want: toSizeSuffixMiB(10),
}, {
name: "no rounding up when everything divides evenly",
size: toSizeSuffixMiB(1000000),
maxParts: 10000,
defaultChunkSize: toSizeSuffixMiB(100),
want: toSizeSuffixMiB(100),
}, {
name: "rounding up to nearest MiB when not quite enough parts",
size: toSizeSuffixMiB(1000000),
maxParts: 9999,
defaultChunkSize: toSizeSuffixMiB(100),
want: toSizeSuffixMiB(101),
}, {
name: "rounding up to nearest MiB when one extra byte",
size: toSizeSuffixMiB(1000000) + 1,
maxParts: 10000,
defaultChunkSize: toSizeSuffixMiB(100),
want: toSizeSuffixMiB(101),
}, {
name: "expected MiB value when rounding sets to absolute minimum",
size: toSizeSuffixMiB(1) - 1,
maxParts: 1,
defaultChunkSize: toSizeSuffixMiB(1),
want: toSizeSuffixMiB(1),
}, {
name: "expected MiB value when rounding to absolute min with extra",
size: toSizeSuffixMiB(1) + 1,
maxParts: 1,
defaultChunkSize: toSizeSuffixMiB(1),
want: toSizeSuffixMiB(2),
}, {
name: "issue from forum #1",
size: 120864818840,
maxParts: 10000,
defaultChunkSize: 5 * 1024 * 1024,
want: toSizeSuffixMiB(12),
},
} {
t.Run(test.name, func(t *testing.T) {
got := Calculator(test.name, int64(test.size), test.maxParts, test.defaultChunkSize)
if got != test.want {
t.Fatalf("expected: %v, got: %v", test.want, got)
}
if test.size < 0 {
return
}
parts := func(result fs.SizeSuffix) int {
n := test.size / result
r := test.size % result
if r != 0 {
n++
}
return int(n)
}
// Check this gives the parts in range
if parts(got) > test.maxParts {
t.Fatalf("too many parts %d", parts(got))
}
// Check that setting chunk size smaller gave too many parts
if got > test.defaultChunkSize {
if parts(got-toSizeSuffixMiB(1)) <= test.maxParts {
t.Fatalf("chunk size %v too big as %v only gives %d parts", got, got-toSizeSuffixMiB(1), parts(got-toSizeSuffixMiB(1)))
}
for name, tc := range tests {
t.Run(name, func(t *testing.T) {
src := object.NewStaticObjectInfo("mock", time.Now(), int64(tc.fileSize), true, nil, nil)
result := Calculator(src, tc.maxParts, tc.defaultChunkSize)
if result != tc.expected {
t.Fatalf("expected: %v, got: %v", tc.expected, result)
} }
}) })
} }