forked from TrueCloudLab/rclone
operations: enable multi threaded downloads - Fixes #2252
This implements the --multi-thread-cutoff and --multi-thread-streams flags to control multi thread downloading to the local backend.
This commit is contained in:
parent
687cbf3ded
commit
7d70e92664
8 changed files with 340 additions and 30 deletions
|
@ -73,6 +73,7 @@ Please see [the full list of all storage providers and their features](https://r
|
|||
* Optional encryption ([Crypt](https://rclone.org/crypt/))
|
||||
* Optional cache ([Cache](https://rclone.org/cache/))
|
||||
* Optional FUSE mount ([rclone mount](https://rclone.org/commands/rclone_mount/))
|
||||
* Multi-threaded downloads to local disk
|
||||
|
||||
## Installation & documentation
|
||||
|
||||
|
|
|
@ -65,6 +65,7 @@ Features
|
|||
* ([Cache](/cache/)) backend
|
||||
* ([Union](/union/)) backend
|
||||
* Optional FUSE mount ([rclone mount](/commands/rclone_mount/))
|
||||
* Multi-threaded downloads to local disk
|
||||
|
||||
Links
|
||||
|
||||
|
|
|
@ -674,6 +674,49 @@ if you are reading and writing to an OS X filing system this will be
|
|||
|
||||
This command line flag allows you to override that computed default.
|
||||
|
||||
### --multi-thread-cutoff=SIZE ###
|
||||
|
||||
When downloading files to the local backend above this size, rclone
|
||||
will use multiple threads to download the file. (default 250M)
|
||||
|
||||
Rclone preallocates the file (using `fallocate(FALLOC_FL_KEEP_SIZE)`
|
||||
on unix or `NTSetInformationFile` on Windows both of which takes no
|
||||
time) then each thread writes directly into the file at the correct
|
||||
place. This means that rclone won't create fragmented or sparse files
|
||||
and there won't be any assembly time at the end of the transfer.
|
||||
|
||||
The number of threads used to dowload is controlled by
|
||||
`--multi-thread-streams`.
|
||||
|
||||
Use `-vv` if you wish to see info about the threads.
|
||||
|
||||
This will work with the `sync`/`copy`/`move` commands and friends
|
||||
`copyto`/`moveto`. Multi thread downloads will be used with `rclone
|
||||
mount` and `rclone serve` if `--vfs-cache-mode` is set to `writes` or
|
||||
above.
|
||||
|
||||
**NB** that this **only** works for a local destination but will work
|
||||
with any source.
|
||||
|
||||
### --multi-thread-streams=N ###
|
||||
|
||||
When using multi thread downloads (see above `--multi-thread-cutoff`)
|
||||
this sets the maximum number of streams to use. Set to `0` to disable
|
||||
multi thread downloads. (Default 4)
|
||||
|
||||
Exactly how many streams rclone uses for the download depends on the
|
||||
size of the file. To calculate the number of download streams Rclone
|
||||
divides the size of the file by the `--multi-thread-cutoff` and rounds
|
||||
up, up to the maximum set with `--multi-thread-streams`.
|
||||
|
||||
So if `--multi-thread-cutoff 250MB` and `--multi-thread-streams 4` are
|
||||
in effect (the defaults):
|
||||
|
||||
- 0MB.250MB files will be downloaded with 1 stream
|
||||
- 250MB..500MB files will be downloaded with 2 streams
|
||||
- 500MB..750MB files will be downloaded with 3 streams
|
||||
- 750MB+ files will be downloaded with 4 streams
|
||||
|
||||
### --no-gzip-encoding ###
|
||||
|
||||
Don't set `Accept-Encoding: gzip`. This means that rclone won't ask
|
||||
|
|
|
@ -93,6 +93,8 @@ type ConfigInfo struct {
|
|||
CaCert string // Client Side CA
|
||||
ClientCert string // Client Side Cert
|
||||
ClientKey string // Client Side Key
|
||||
MultiThreadCutoff SizeSuffix
|
||||
MultiThreadStreams int
|
||||
}
|
||||
|
||||
// NewConfig creates a new config with everything set to the default
|
||||
|
@ -124,6 +126,8 @@ func NewConfig() *ConfigInfo {
|
|||
c.MaxBacklog = 10000
|
||||
// We do not want to set the default here. We use this variable being empty as part of the fall-through of options.
|
||||
// c.StatsOneLineDateFormat = "2006/01/02 15:04:05 - "
|
||||
c.MultiThreadCutoff = SizeSuffix(250 * 1024 * 1024)
|
||||
c.MultiThreadStreams = 4
|
||||
|
||||
return c
|
||||
}
|
||||
|
|
|
@ -95,6 +95,8 @@ func AddFlags(flagSet *pflag.FlagSet) {
|
|||
flags.StringVarP(flagSet, &fs.Config.CaCert, "ca-cert", "", fs.Config.CaCert, "CA certificate used to verify servers")
|
||||
flags.StringVarP(flagSet, &fs.Config.ClientCert, "client-cert", "", fs.Config.ClientCert, "Client SSL certificate (PEM) for mutual TLS auth")
|
||||
flags.StringVarP(flagSet, &fs.Config.ClientKey, "client-key", "", fs.Config.ClientKey, "Client SSL private key (PEM) for mutual TLS auth")
|
||||
flags.FVarP(flagSet, &fs.Config.MultiThreadCutoff, "multi-thread-cutoff", "", "Use multi-thread downloads for files above this size.")
|
||||
flags.IntVarP(flagSet, &fs.Config.MultiThreadStreams, "multi-thread-streams", "", fs.Config.MultiThreadStreams, "Max number of streams to use for multi-thread downloads.")
|
||||
}
|
||||
|
||||
// SetFlags converts any flags into config which weren't straight forward
|
||||
|
|
171
fs/operations/multithread.go
Normal file
171
fs/operations/multithread.go
Normal file
|
@ -0,0 +1,171 @@
|
|||
package operations
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
|
||||
"github.com/ncw/rclone/fs"
|
||||
"github.com/ncw/rclone/fs/accounting"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sync/errgroup"
|
||||
)
|
||||
|
||||
const (
|
||||
multithreadChunkSize = 64 << 10
|
||||
multithreadChunkSizeMask = multithreadChunkSize - 1
|
||||
multithreadBufferSize = 32 * 1024
|
||||
)
|
||||
|
||||
// state for a multi-thread copy
|
||||
type multiThreadCopyState struct {
|
||||
ctx context.Context
|
||||
partSize int64
|
||||
size int64
|
||||
wc fs.WriterAtCloser
|
||||
src fs.Object
|
||||
acc *accounting.Account
|
||||
streams int
|
||||
}
|
||||
|
||||
// Copy a single stream into place
|
||||
func (mc *multiThreadCopyState) copyStream(stream int) (err error) {
|
||||
defer func() {
|
||||
if err != nil {
|
||||
fs.Debugf(mc.src, "multi-thread copy: stream %d/%d failed: %v", stream+1, mc.streams, err)
|
||||
}
|
||||
}()
|
||||
start := int64(stream) * mc.partSize
|
||||
if start >= mc.size {
|
||||
return nil
|
||||
}
|
||||
end := start + mc.partSize
|
||||
if end > mc.size {
|
||||
end = mc.size
|
||||
}
|
||||
|
||||
fs.Debugf(mc.src, "multi-thread copy: stream %d/%d (%d-%d) size %v starting", stream+1, mc.streams, start, end, fs.SizeSuffix(end-start))
|
||||
|
||||
rc, err := newReOpen(mc.src, nil, &fs.RangeOption{Start: start, End: end - 1}, fs.Config.LowLevelRetries)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "multpart copy: failed to open source")
|
||||
}
|
||||
defer fs.CheckClose(rc, &err)
|
||||
|
||||
// Copy the data
|
||||
buf := make([]byte, multithreadBufferSize)
|
||||
offset := start
|
||||
for {
|
||||
// Check if context cancelled and exit if so
|
||||
if mc.ctx.Err() != nil {
|
||||
return mc.ctx.Err()
|
||||
}
|
||||
nr, er := rc.Read(buf)
|
||||
if nr > 0 {
|
||||
err = mc.acc.AccountRead(nr)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "multpart copy: accounting failed")
|
||||
}
|
||||
nw, ew := mc.wc.WriteAt(buf[0:nr], offset)
|
||||
if nw > 0 {
|
||||
offset += int64(nw)
|
||||
}
|
||||
if ew != nil {
|
||||
return errors.Wrap(ew, "multpart copy: write failed")
|
||||
}
|
||||
if nr != nw {
|
||||
return errors.Wrap(io.ErrShortWrite, "multpart copy")
|
||||
}
|
||||
}
|
||||
if er != nil {
|
||||
if er != io.EOF {
|
||||
return errors.Wrap(er, "multpart copy: read failed")
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if offset != end {
|
||||
return errors.Errorf("multpart copy: wrote %d bytes but expected to write %d", offset-start, end-start)
|
||||
}
|
||||
|
||||
fs.Debugf(mc.src, "multi-thread copy: stream %d/%d (%d-%d) size %v finished", stream+1, mc.streams, start, end, fs.SizeSuffix(end-start))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Calculate the chunk sizes and updated number of streams
|
||||
func (mc *multiThreadCopyState) calculateChunks() {
|
||||
partSize := mc.size / int64(mc.streams)
|
||||
// Round partition size up so partSize * streams >= size
|
||||
if (mc.size % int64(mc.streams)) != 0 {
|
||||
partSize++
|
||||
}
|
||||
// round partSize up to nearest multithreadChunkSize boundary
|
||||
mc.partSize = (partSize + multithreadChunkSizeMask) &^ multithreadChunkSizeMask
|
||||
// recalculate number of streams
|
||||
mc.streams = int(mc.size / mc.partSize)
|
||||
// round streams up so partSize * streams >= size
|
||||
if (mc.size % mc.partSize) != 0 {
|
||||
mc.streams++
|
||||
}
|
||||
}
|
||||
|
||||
// Copy src to (f, remote) using streams download threads and the OpenWriterAt feature
|
||||
func multiThreadCopy(f fs.Fs, remote string, src fs.Object, streams int) (newDst fs.Object, err error) {
|
||||
openWriterAt := f.Features().OpenWriterAt
|
||||
if openWriterAt == nil {
|
||||
return nil, errors.New("multi-thread copy: OpenWriterAt not supported")
|
||||
}
|
||||
if src.Size() < 0 {
|
||||
return nil, errors.New("multi-thread copy: can't copy unknown sized file")
|
||||
}
|
||||
if src.Size() == 0 {
|
||||
return nil, errors.New("multi-thread copy: can't copy zero sized file")
|
||||
}
|
||||
|
||||
g, ctx := errgroup.WithContext(context.Background())
|
||||
mc := &multiThreadCopyState{
|
||||
ctx: ctx,
|
||||
size: src.Size(),
|
||||
src: src,
|
||||
streams: streams,
|
||||
}
|
||||
mc.calculateChunks()
|
||||
|
||||
// Make accounting
|
||||
mc.acc = accounting.NewAccount(nil, src)
|
||||
defer fs.CheckClose(mc.acc, &err)
|
||||
|
||||
// create write file handle
|
||||
mc.wc, err = openWriterAt(remote, mc.size)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "multpart copy: failed to open destination")
|
||||
}
|
||||
defer fs.CheckClose(mc.wc, &err)
|
||||
|
||||
fs.Debugf(src, "Starting multi-thread copy with %d parts of size %v", mc.streams, fs.SizeSuffix(mc.partSize))
|
||||
for stream := 0; stream < mc.streams; stream++ {
|
||||
stream := stream
|
||||
g.Go(func() (err error) {
|
||||
return mc.copyStream(stream)
|
||||
})
|
||||
}
|
||||
err = g.Wait()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
obj, err := f.NewObject(remote)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "multi-thread copy: failed to find object after copy")
|
||||
}
|
||||
|
||||
err = obj.SetModTime(src.ModTime())
|
||||
switch err {
|
||||
case nil, fs.ErrorCantSetModTime, fs.ErrorCantSetModTimeWithoutDelete:
|
||||
default:
|
||||
return nil, errors.Wrap(err, "multi-thread copy: failed to set modification time")
|
||||
}
|
||||
|
||||
fs.Debugf(src, "Finished multi-thread copy with %d parts of size %v", mc.streams, fs.SizeSuffix(mc.partSize))
|
||||
return obj, nil
|
||||
}
|
70
fs/operations/multithread_test.go
Normal file
70
fs/operations/multithread_test.go
Normal file
|
@ -0,0 +1,70 @@
|
|||
package operations
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/ncw/rclone/fs"
|
||||
"github.com/ncw/rclone/fstest"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestMultithreadCalculateChunks(t *testing.T) {
|
||||
for _, test := range []struct {
|
||||
size int64
|
||||
streams int
|
||||
wantPartSize int64
|
||||
wantStreams int
|
||||
}{
|
||||
{size: 1, streams: 10, wantPartSize: multithreadChunkSize, wantStreams: 1},
|
||||
{size: 1 << 20, streams: 1, wantPartSize: 1 << 20, wantStreams: 1},
|
||||
{size: 1 << 20, streams: 2, wantPartSize: 1 << 19, wantStreams: 2},
|
||||
{size: (1 << 20) + 1, streams: 2, wantPartSize: (1 << 19) + multithreadChunkSize, wantStreams: 2},
|
||||
{size: (1 << 20) - 1, streams: 2, wantPartSize: (1 << 19), wantStreams: 2},
|
||||
} {
|
||||
t.Run(fmt.Sprintf("%+v", test), func(t *testing.T) {
|
||||
mc := &multiThreadCopyState{
|
||||
size: test.size,
|
||||
streams: test.streams,
|
||||
}
|
||||
mc.calculateChunks()
|
||||
assert.Equal(t, test.wantPartSize, mc.partSize)
|
||||
assert.Equal(t, test.wantStreams, mc.streams)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMultithreadCopy(t *testing.T) {
|
||||
r := fstest.NewRun(t)
|
||||
defer r.Finalise()
|
||||
|
||||
for _, test := range []struct {
|
||||
size int
|
||||
streams int
|
||||
}{
|
||||
{size: multithreadChunkSize*2 - 1, streams: 2},
|
||||
{size: multithreadChunkSize * 2, streams: 2},
|
||||
{size: multithreadChunkSize*2 + 1, streams: 2},
|
||||
} {
|
||||
t.Run(fmt.Sprintf("%+v", test), func(t *testing.T) {
|
||||
contents := fstest.RandomString(test.size)
|
||||
t1 := fstest.Time("2001-02-03T04:05:06.499999999Z")
|
||||
file1 := r.WriteObject("file1", contents, t1)
|
||||
fstest.CheckItems(t, r.Fremote, file1)
|
||||
fstest.CheckItems(t, r.Flocal)
|
||||
|
||||
src, err := r.Fremote.NewObject("file1")
|
||||
require.NoError(t, err)
|
||||
|
||||
dst, err := multiThreadCopy(r.Flocal, "file1", src, 2)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, src.Size(), dst.Size())
|
||||
assert.Equal(t, "file1", dst.Remote())
|
||||
|
||||
fstest.CheckListingWithPrecision(t, r.Fremote, []fstest.Item{file1}, nil, fs.ModTimeNotSupported)
|
||||
require.NoError(t, dst.Remove())
|
||||
})
|
||||
}
|
||||
|
||||
}
|
|
@ -292,38 +292,56 @@ func Copy(f fs.Fs, dst fs.Object, remote string, src fs.Object) (newDst fs.Objec
|
|||
}
|
||||
// If can't server side copy, do it manually
|
||||
if err == fs.ErrorCantCopy {
|
||||
var in0 io.ReadCloser
|
||||
in0, err = newReOpen(src, hashOption, nil, fs.Config.LowLevelRetries)
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "failed to open source object")
|
||||
} else {
|
||||
if src.Size() == -1 {
|
||||
// -1 indicates unknown size. Use Rcat to handle both remotes supporting and not supporting PutStream.
|
||||
if doUpdate {
|
||||
actionTaken = "Copied (Rcat, replaced existing)"
|
||||
} else {
|
||||
actionTaken = "Copied (Rcat, new)"
|
||||
}
|
||||
dst, err = Rcat(f, remote, in0, src.ModTime())
|
||||
newDst = dst
|
||||
if doOpenWriterAt := f.Features().OpenWriterAt; doOpenWriterAt != nil && src.Size() >= int64(fs.Config.MultiThreadCutoff) && fs.Config.MultiThreadStreams > 1 {
|
||||
// Number of streams proportional to size
|
||||
streams := src.Size() / int64(fs.Config.MultiThreadCutoff)
|
||||
// With maximum
|
||||
if streams > int64(fs.Config.MultiThreadStreams) {
|
||||
streams = int64(fs.Config.MultiThreadStreams)
|
||||
}
|
||||
if streams < 2 {
|
||||
streams = 2
|
||||
}
|
||||
dst, err = multiThreadCopy(f, remote, src, int(streams))
|
||||
if doUpdate {
|
||||
actionTaken = "Multi-thread Copied (replaced existing)"
|
||||
} else {
|
||||
in := accounting.NewAccount(in0, src).WithBuffer() // account and buffer the transfer
|
||||
var wrappedSrc fs.ObjectInfo = src
|
||||
// We try to pass the original object if possible
|
||||
if src.Remote() != remote {
|
||||
wrappedSrc = &overrideRemoteObject{Object: src, remote: remote}
|
||||
}
|
||||
if doUpdate {
|
||||
actionTaken = "Copied (replaced existing)"
|
||||
err = dst.Update(in, wrappedSrc, hashOption)
|
||||
} else {
|
||||
actionTaken = "Copied (new)"
|
||||
dst, err = f.Put(in, wrappedSrc, hashOption)
|
||||
}
|
||||
closeErr := in.Close()
|
||||
if err == nil {
|
||||
actionTaken = "Multi-thread Copied (new)"
|
||||
}
|
||||
} else {
|
||||
var in0 io.ReadCloser
|
||||
in0, err = newReOpen(src, hashOption, nil, fs.Config.LowLevelRetries)
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "failed to open source object")
|
||||
} else {
|
||||
if src.Size() == -1 {
|
||||
// -1 indicates unknown size. Use Rcat to handle both remotes supporting and not supporting PutStream.
|
||||
if doUpdate {
|
||||
actionTaken = "Copied (Rcat, replaced existing)"
|
||||
} else {
|
||||
actionTaken = "Copied (Rcat, new)"
|
||||
}
|
||||
dst, err = Rcat(f, remote, in0, src.ModTime())
|
||||
newDst = dst
|
||||
err = closeErr
|
||||
} else {
|
||||
in := accounting.NewAccount(in0, src).WithBuffer() // account and buffer the transfer
|
||||
var wrappedSrc fs.ObjectInfo = src
|
||||
// We try to pass the original object if possible
|
||||
if src.Remote() != remote {
|
||||
wrappedSrc = &overrideRemoteObject{Object: src, remote: remote}
|
||||
}
|
||||
if doUpdate {
|
||||
actionTaken = "Copied (replaced existing)"
|
||||
err = dst.Update(in, wrappedSrc, hashOption)
|
||||
} else {
|
||||
actionTaken = "Copied (new)"
|
||||
dst, err = f.Put(in, wrappedSrc, hashOption)
|
||||
}
|
||||
closeErr := in.Close()
|
||||
if err == nil {
|
||||
newDst = dst
|
||||
err = closeErr
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue