From 3dfcfc2caa5179642337268b983d91acb349be60 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Mon, 21 Aug 2023 17:14:13 +0100 Subject: [PATCH] operations: document multi-thread copy and tweak defaults --- docs/content/docs.md | 60 ++++++++--------- docs/content/overview.md | 96 +++++++++++++++------------- fs/config.go | 16 ++--- fs/config/configflags/configflags.go | 2 +- fs/operations/operations.go | 11 +--- 5 files changed, 92 insertions(+), 93 deletions(-) diff --git a/docs/content/docs.md b/docs/content/docs.md index 4f764adca..c229bef19 100644 --- a/docs/content/docs.md +++ b/docs/content/docs.md @@ -1531,36 +1531,51 @@ As a final hint, size is not the only factor: block size (or similar concept) can have an impact. In one case, we observed that exact multiples of 16k performed much better than other values. +### --multi-thread-chunk-size=SizeSuffix ### + +Normally the chunk size for multi thread copies is set by the backend. +However some backends such as `local` and `smb` (which implement +`OpenWriterAt` but not `OpenChunkWriter`) don't have a natural chunk +size. + +In this case the value of this option is used (default 64Mi). + ### --multi-thread-cutoff=SIZE ### -When downloading files to the local backend above this size, rclone -will use multiple threads to download the file (default 250M). +When transferring files to capable backends above this size, rclone +will use multiple threads to download the file (default 256M). -Rclone preallocates the file (using `fallocate(FALLOC_FL_KEEP_SIZE)` -on unix or `NTSetInformationFile` on Windows both of which takes no -time) then each thread writes directly into the file at the correct -place. This means that rclone won't create fragmented or sparse files -and there won't be any assembly time at the end of the transfer. +Capable backends are marked in the +[overview](/overview/#optional-features) as `MultithreadUpload`. (They +need to implement either `OpenWriterAt` or `OpenChunkedWriter`). These +include include, `local`, `s3`, `azureblob`, `b2` and `smb`. -The number of threads used to download is controlled by +On the local disk, rclone preallocates the file (using +`fallocate(FALLOC_FL_KEEP_SIZE)` on unix or `NTSetInformationFile` on +Windows both of which takes no time) then each thread writes directly +into the file at the correct place. This means that rclone won't +create fragmented or sparse files and there won't be any assembly time +at the end of the transfer. + +The number of threads used to transfer is controlled by `--multi-thread-streams`. Use `-vv` if you wish to see info about the threads. This will work with the `sync`/`copy`/`move` commands and friends -`copyto`/`moveto`. Multi thread downloads will be used with `rclone +`copyto`/`moveto`. Multi thread transfers will be used with `rclone mount` and `rclone serve` if `--vfs-cache-mode` is set to `writes` or above. -**NB** that this **only** works for a local destination but will work -with any source. +**NB** that this **only** works supported backends as the destination +but will work with any backend as the source. -**NB** that multi thread copies are disabled for local to local copies +**NB** that multi-thread copies are disabled for local to local copies as they are faster without unless `--multi-thread-streams` is set explicitly. -**NB** on Windows using multi-thread downloads will cause the -resulting files to be [sparse](https://en.wikipedia.org/wiki/Sparse_file). +**NB** on Windows using multi-thread transfers to the local disk will +cause the resulting files to be [sparse](https://en.wikipedia.org/wiki/Sparse_file). Use `--local-no-sparse` to disable sparse files (which may cause long delays at the start of downloads) or disable multi-thread downloads with `--multi-thread-streams 0` @@ -1568,21 +1583,8 @@ with `--multi-thread-streams 0` ### --multi-thread-streams=N ### When using multi thread downloads (see above `--multi-thread-cutoff`) -this sets the maximum number of streams to use. Set to `0` to disable -multi thread downloads (Default 4). - -Exactly how many streams rclone uses for the download depends on the -size of the file. To calculate the number of download streams Rclone -divides the size of the file by the `--multi-thread-cutoff` and rounds -up, up to the maximum set with `--multi-thread-streams`. - -So if `--multi-thread-cutoff 250M` and `--multi-thread-streams 4` are -in effect (the defaults): - -- 0..250 MiB files will be downloaded with 1 stream -- 250..500 MiB files will be downloaded with 2 streams -- 500..750 MiB files will be downloaded with 3 streams -- 750+ MiB files will be downloaded with 4 streams +this sets the number of streams to use. Set to `0` to disable multi +thread downloads (Default 4). ### --no-check-dest ### diff --git a/docs/content/overview.md b/docs/content/overview.md index 0af2fb45e..1ed89ba1c 100644 --- a/docs/content/overview.md +++ b/docs/content/overview.md @@ -465,51 +465,51 @@ See [the metadata docs](/docs/#metadata) for more info. All rclone remotes support a base command set. Other features depend upon backend-specific capabilities. -| Name | Purge | Copy | Move | DirMove | CleanUp | ListR | StreamUpload | LinkSharing | About | EmptyDir | -| ---------------------------- |:-----:|:----:|:----:|:-------:|:-------:|:-----:|:------------:|:------------:|:-----:|:--------:| -| 1Fichier | No | Yes | Yes | No | No | No | No | Yes | No | Yes | -| Akamai Netstorage | Yes | No | No | No | No | Yes | Yes | No | No | Yes | -| Amazon Drive | Yes | No | Yes | Yes | No | No | No | No | No | Yes | -| Amazon S3 (or S3 compatible) | No | Yes | No | No | Yes | Yes | Yes | Yes | No | No | -| Backblaze B2 | No | Yes | No | No | Yes | Yes | Yes | Yes | No | No | -| Box | Yes | Yes | Yes | Yes | Yes ‡‡ | No | Yes | Yes | Yes | Yes | -| Citrix ShareFile | Yes | Yes | Yes | Yes | No | No | No | No | No | Yes | -| Dropbox | Yes | Yes | Yes | Yes | No | No | Yes | Yes | Yes | Yes | -| Enterprise File Fabric | Yes | Yes | Yes | Yes | Yes | No | No | No | No | Yes | -| FTP | No | No | Yes | Yes | No | No | Yes | No | No | Yes | -| Google Cloud Storage | Yes | Yes | No | No | No | Yes | Yes | No | No | No | -| Google Drive | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | -| Google Photos | No | No | No | No | No | No | No | No | No | No | -| HDFS | Yes | No | Yes | Yes | No | No | Yes | No | Yes | Yes | -| HiDrive | Yes | Yes | Yes | Yes | No | No | Yes | No | No | Yes | -| HTTP | No | No | No | No | No | No | No | No | No | Yes | -| Internet Archive | No | Yes | No | No | Yes | Yes | No | Yes | Yes | No | -| Jottacloud | Yes | Yes | Yes | Yes | Yes | Yes | No | Yes | Yes | Yes | -| Koofr | Yes | Yes | Yes | Yes | No | No | Yes | Yes | Yes | Yes | -| Mail.ru Cloud | Yes | Yes | Yes | Yes | Yes | No | No | Yes | Yes | Yes | -| Mega | Yes | No | Yes | Yes | Yes | No | No | Yes | Yes | Yes | -| Memory | No | Yes | No | No | No | Yes | Yes | No | No | No | -| Microsoft Azure Blob Storage | Yes | Yes | No | No | No | Yes | Yes | No | No | No | -| Microsoft OneDrive | Yes | Yes | Yes | Yes | Yes | No | No | Yes | Yes | Yes | -| OpenDrive | Yes | Yes | Yes | Yes | No | No | No | No | No | Yes | -| OpenStack Swift | Yes † | Yes | No | No | No | Yes | Yes | No | Yes | No | -| Oracle Object Storage | No | Yes | No | No | Yes | Yes | Yes | No | No | No | -| pCloud | Yes | Yes | Yes | Yes | Yes | No | No | Yes | Yes | Yes | -| PikPak | Yes | Yes | Yes | Yes | Yes | No | No | Yes | Yes | Yes | -| premiumize.me | Yes | No | Yes | Yes | No | No | No | Yes | Yes | Yes | -| put.io | Yes | No | Yes | Yes | Yes | No | Yes | No | Yes | Yes | -| QingStor | No | Yes | No | No | Yes | Yes | No | No | No | No | -| Seafile | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | -| SFTP | No | No | Yes | Yes | No | No | Yes | No | Yes | Yes | -| Sia | No | No | No | No | No | No | Yes | No | No | Yes | -| SMB | No | No | Yes | Yes | No | No | Yes | No | No | Yes | -| SugarSync | Yes | Yes | Yes | Yes | No | No | Yes | Yes | No | Yes | -| Storj | Yes ☨ | Yes | Yes | No | No | Yes | Yes | Yes | No | No | -| Uptobox | No | Yes | Yes | Yes | No | No | No | No | No | No | -| WebDAV | Yes | Yes | Yes | Yes | No | No | Yes ‡ | No | Yes | Yes | -| Yandex Disk | Yes | Yes | Yes | Yes | Yes | No | Yes | Yes | Yes | Yes | -| Zoho WorkDrive | Yes | Yes | Yes | Yes | No | No | No | No | Yes | Yes | -| The local filesystem | Yes | No | Yes | Yes | No | No | Yes | No | Yes | Yes | +| Name | Purge | Copy | Move | DirMove | CleanUp | ListR | StreamUpload | MultithreadUpload | LinkSharing | About | EmptyDir | +| ---------------------------- |:-----:|:----:|:----:|:-------:|:-------:|:-----:|:------------:|:------------------|:------------:|:-----:|:--------:| +| 1Fichier | No | Yes | Yes | No | No | No | No | No | Yes | No | Yes | +| Akamai Netstorage | Yes | No | No | No | No | Yes | Yes | No | No | No | Yes | +| Amazon Drive | Yes | No | Yes | Yes | No | No | No | No | No | No | Yes | +| Amazon S3 (or S3 compatible) | No | Yes | No | No | Yes | Yes | Yes | Yes | Yes | No | No | +| Backblaze B2 | No | Yes | No | No | Yes | Yes | Yes | Yes | Yes | No | No | +| Box | Yes | Yes | Yes | Yes | Yes ‡‡ | No | Yes | No | Yes | Yes | Yes | +| Citrix ShareFile | Yes | Yes | Yes | Yes | No | No | No | No | No | No | Yes | +| Dropbox | Yes | Yes | Yes | Yes | No | No | Yes | No | Yes | Yes | Yes | +| Enterprise File Fabric | Yes | Yes | Yes | Yes | Yes | No | No | No | No | No | Yes | +| FTP | No | No | Yes | Yes | No | No | Yes | No | No | No | Yes | +| Google Cloud Storage | Yes | Yes | No | No | No | Yes | Yes | No | No | No | No | +| Google Drive | Yes | Yes | Yes | Yes | Yes | Yes | Yes | No | Yes | Yes | Yes | +| Google Photos | No | No | No | No | No | No | No | No | No | No | No | +| HDFS | Yes | No | Yes | Yes | No | No | Yes | No | No | Yes | Yes | +| HiDrive | Yes | Yes | Yes | Yes | No | No | Yes | No | No | No | Yes | +| HTTP | No | No | No | No | No | No | No | No | No | No | Yes | +| Internet Archive | No | Yes | No | No | Yes | Yes | No | No | Yes | Yes | No | +| Jottacloud | Yes | Yes | Yes | Yes | Yes | Yes | No | No | Yes | Yes | Yes | +| Koofr | Yes | Yes | Yes | Yes | No | No | Yes | No | Yes | Yes | Yes | +| Mail.ru Cloud | Yes | Yes | Yes | Yes | Yes | No | No | No | Yes | Yes | Yes | +| Mega | Yes | No | Yes | Yes | Yes | No | No | No | Yes | Yes | Yes | +| Memory | No | Yes | No | No | No | Yes | Yes | No | No | No | No | +| Microsoft Azure Blob Storage | Yes | Yes | No | No | No | Yes | Yes | Yes | No | No | No | +| Microsoft OneDrive | Yes | Yes | Yes | Yes | Yes | No | No | No | Yes | Yes | Yes | +| OpenDrive | Yes | Yes | Yes | Yes | No | No | No | No | No | No | Yes | +| OpenStack Swift | Yes † | Yes | No | No | No | Yes | Yes | No | No | Yes | No | +| Oracle Object Storage | No | Yes | No | No | Yes | Yes | Yes | No | No | No | No | +| pCloud | Yes | Yes | Yes | Yes | Yes | No | No | No | Yes | Yes | Yes | +| PikPak | Yes | Yes | Yes | Yes | Yes | No | No | No | Yes | Yes | Yes | +| premiumize.me | Yes | No | Yes | Yes | No | No | No | No | Yes | Yes | Yes | +| put.io | Yes | No | Yes | Yes | Yes | No | Yes | No | No | Yes | Yes | +| QingStor | No | Yes | No | No | Yes | Yes | No | No | No | No | No | +| Seafile | Yes | Yes | Yes | Yes | Yes | Yes | Yes | No | Yes | Yes | Yes | +| SFTP | No | No | Yes | Yes | No | No | Yes | No | No | Yes | Yes | +| Sia | No | No | No | No | No | No | Yes | No | No | No | Yes | +| SMB | No | No | Yes | Yes | No | No | Yes | Yes | No | No | Yes | +| SugarSync | Yes | Yes | Yes | Yes | No | No | Yes | No | Yes | No | Yes | +| Storj | Yes ☨ | Yes | Yes | No | No | Yes | Yes | No | Yes | No | No | +| Uptobox | No | Yes | Yes | Yes | No | No | No | No | No | No | No | +| WebDAV | Yes | Yes | Yes | Yes | No | No | Yes ‡ | No | No | Yes | Yes | +| Yandex Disk | Yes | Yes | Yes | Yes | Yes | No | Yes | No | Yes | Yes | Yes | +| Zoho WorkDrive | Yes | Yes | Yes | Yes | No | No | No | No | No | Yes | Yes | +| The local filesystem | Yes | No | Yes | Yes | No | No | Yes | Yes | No | Yes | Yes | ### Purge ### @@ -573,6 +573,12 @@ Some remotes allow files to be uploaded without knowing the file size in advance. This allows certain operations to work without spooling the file to local disk first, e.g. `rclone rcat`. +### MultithreadUpload ### + +Some remotes allow transfers to the remote to be sent as chunks in +parallel. If this is supported then rclone will use multi-thread +copying to transfer files much faster. + ### LinkSharing ### Sets the necessary permissions on a file or folder and prints a link diff --git a/fs/config.go b/fs/config.go index c0bd306f8..9c8449913 100644 --- a/fs/config.go +++ b/fs/config.go @@ -94,7 +94,6 @@ type ConfigInfo struct { SuffixKeepExtension bool UseListR bool BufferSize SizeSuffix - MultiThreadWriteBufferSize SizeSuffix BwLimit BwTimetable BwLimitFile BwTimetable TPSLimit float64 @@ -127,7 +126,9 @@ type ConfigInfo struct { ClientKey string // Client Side Key MultiThreadCutoff SizeSuffix MultiThreadStreams int - MultiThreadSet bool // whether MultiThreadStreams was set (set in fs/config/configflags) + MultiThreadSet bool // whether MultiThreadStreams was set (set in fs/config/configflags) + MultiThreadChunkSize SizeSuffix // Chunk size for multi-thread downloads / uploads, if not set by filesystem + MultiThreadWriteBufferSize SizeSuffix OrderBy string // instructions on how to order the transfer UploadHeaders []*HTTPOption DownloadHeaders []*HTTPOption @@ -145,9 +146,8 @@ type ConfigInfo struct { Metadata bool ServerSideAcrossConfigs bool TerminalColorMode TerminalColorMode - DefaultTime Time // time that directories with no time should display - Inplace bool // Download directly to destination file instead of atomic download to temp/rename - MultiThreadChunkSize SizeSuffix // Chunk size for multi-thread downloads / uploads, if not set by filesystem + DefaultTime Time // time that directories with no time should display + Inplace bool // Download directly to destination file instead of atomic download to temp/rename } // NewConfig creates a new config with everything set to the default @@ -172,7 +172,6 @@ func NewConfig() *ConfigInfo { c.MaxDepth = -1 c.DataRateUnit = "bytes" c.BufferSize = SizeSuffix(16 << 20) - c.MultiThreadWriteBufferSize = SizeSuffix(128 * 1024) c.UserAgent = "rclone/" + Version c.StreamingUploadCutoff = SizeSuffix(100 * 1024) c.MaxStatsGroups = 1000 @@ -183,9 +182,10 @@ func NewConfig() *ConfigInfo { c.MaxBacklog = 10000 // We do not want to set the default here. We use this variable being empty as part of the fall-through of options. // c.StatsOneLineDateFormat = "2006/01/02 15:04:05 - " - c.MultiThreadCutoff = SizeSuffix(250 * 1024 * 1024) + c.MultiThreadCutoff = SizeSuffix(256 * 1024 * 1024) c.MultiThreadStreams = 4 - c.MultiThreadChunkSize = SizeSuffix(50 * 1024 * 1024) + c.MultiThreadChunkSize = SizeSuffix(64 * 1024 * 1024) + c.MultiThreadWriteBufferSize = SizeSuffix(128 * 1024) c.TrackRenamesStrategy = "hash" c.FsCacheExpireDuration = 300 * time.Second diff --git a/fs/config/configflags/configflags.go b/fs/config/configflags/configflags.go index 36ff2cb6f..37ed76e2c 100644 --- a/fs/config/configflags/configflags.go +++ b/fs/config/configflags/configflags.go @@ -125,7 +125,7 @@ func AddFlags(ci *fs.ConfigInfo, flagSet *pflag.FlagSet) { flags.StringVarP(flagSet, &ci.ClientCert, "client-cert", "", ci.ClientCert, "Client SSL certificate (PEM) for mutual TLS auth", "Networking") flags.StringVarP(flagSet, &ci.ClientKey, "client-key", "", ci.ClientKey, "Client SSL private key (PEM) for mutual TLS auth", "Networking") flags.FVarP(flagSet, &ci.MultiThreadCutoff, "multi-thread-cutoff", "", "Use multi-thread downloads for files above this size", "Copy") - flags.IntVarP(flagSet, &ci.MultiThreadStreams, "multi-thread-streams", "", ci.MultiThreadStreams, "Max number of streams to use for multi-thread downloads", "Copy") + flags.IntVarP(flagSet, &ci.MultiThreadStreams, "multi-thread-streams", "", ci.MultiThreadStreams, "Number of streams to use for multi-thread downloads", "Copy") flags.FVarP(flagSet, &ci.MultiThreadWriteBufferSize, "multi-thread-write-buffer-size", "", "In memory buffer size for writing when in multi-thread mode", "Copy") flags.FVarP(flagSet, &ci.MultiThreadChunkSize, "multi-thread-chunk-size", "", "Chunk size for multi-thread downloads / uploads, if not set by filesystem", "Copy") flags.BoolVarP(flagSet, &ci.UseJSONLog, "use-json-log", "", ci.UseJSONLog, "Use json log format", "Logging") diff --git a/fs/operations/operations.go b/fs/operations/operations.go index 0d032b5b4..c48ca4169 100644 --- a/fs/operations/operations.go +++ b/fs/operations/operations.go @@ -419,16 +419,7 @@ func Copy(ctx context.Context, f fs.Fs, dst fs.Object, remote string, src fs.Obj }) } if doMultiThreadCopy(ctx, f, src) { - // Number of streams proportional to size - streams := src.Size() / int64(ci.MultiThreadCutoff) - // With maximum - if streams > int64(ci.MultiThreadStreams) { - streams = int64(ci.MultiThreadStreams) - } - if streams < 2 { - streams = 2 - } - dst, err = multiThreadCopy(ctx, f, remotePartial, src, int(streams), tr) + dst, err = multiThreadCopy(ctx, f, remotePartial, src, ci.MultiThreadStreams, tr) if err == nil { newDst = dst }