sftp: Fix performance regression by re-enabling concurrent writes #5197

Betweeen rclone v1.54 and v1.55 there was an approx 3x performance
regression when transferring to distant SFTP servers (in particular
rsync.net).

This turned out to be due to the library github.com/pkg/sftp rclone
uses. Concurrent writes used to be enabled in this library by default
(for v1.12.0 as used in rclone v1.54) but they are no longer enabled
(for v1.13.0 as used in rclone v1.55) for safety reasons and it is
necessary to enable them specifically.

The safety concerns are due to the uncertainty as to whether writes
come in order and whether a half completed file might have holes in
it. This isn't a problem for rclone since a) it doesn't restart
uploads and b) it has a post-transfer checksum test.

This change introduces a new flag `--sftp-disable-concurrent-writes`
to control the feature which defaults to false, meaning that
concurrent writes are enabled as in v1.54.

However this isn't quite enough to fix the problem as the sftp library
needs to be able to sniff the size of the stream from the reader
passed in, so this also adds a `Size` interface to the reader to
enable this. This involved a patch to the library.

The library was reverted to v1.12.0 for v1.55.1 - this patch installs
v1.13.0+master to fix the Size interface problem.

See: https://github.com/pkg/sftp/issues/426
This commit is contained in:
Nick Craig-Wood 2021-04-05 10:32:20 +01:00
parent 4b1d28550a
commit 0537791d14
3 changed files with 60 additions and 29 deletions

View file

@ -224,6 +224,17 @@ have a server which returns
Then you may need to enable this flag.
If concurrent reads are disabled, the use_fstat option is ignored.
`,
Advanced: true,
}, {
Name: "disable_concurrent_writes",
Default: false,
Help: `If set don't use concurrent writes
Normally rclone uses concurrent writes to upload files. This improves
the performance greatly, especially for distant servers.
This option disables concurrent writes should that be necessary.
`,
Advanced: true,
}, {
@ -244,29 +255,30 @@ Set to 0 to keep connections indefinitely.
// Options defines the configuration for this backend
type Options struct {
Host string `config:"host"`
User string `config:"user"`
Port string `config:"port"`
Pass string `config:"pass"`
KeyPem string `config:"key_pem"`
KeyFile string `config:"key_file"`
KeyFilePass string `config:"key_file_pass"`
PubKeyFile string `config:"pubkey_file"`
KnownHostsFile string `config:"known_hosts_file"`
KeyUseAgent bool `config:"key_use_agent"`
UseInsecureCipher bool `config:"use_insecure_cipher"`
DisableHashCheck bool `config:"disable_hashcheck"`
AskPassword bool `config:"ask_password"`
PathOverride string `config:"path_override"`
SetModTime bool `config:"set_modtime"`
Md5sumCommand string `config:"md5sum_command"`
Sha1sumCommand string `config:"sha1sum_command"`
SkipLinks bool `config:"skip_links"`
Subsystem string `config:"subsystem"`
ServerCommand string `config:"server_command"`
UseFstat bool `config:"use_fstat"`
DisableConcurrentReads bool `config:"disable_concurrent_reads"`
IdleTimeout fs.Duration `config:"idle_timeout"`
Host string `config:"host"`
User string `config:"user"`
Port string `config:"port"`
Pass string `config:"pass"`
KeyPem string `config:"key_pem"`
KeyFile string `config:"key_file"`
KeyFilePass string `config:"key_file_pass"`
PubKeyFile string `config:"pubkey_file"`
KnownHostsFile string `config:"known_hosts_file"`
KeyUseAgent bool `config:"key_use_agent"`
UseInsecureCipher bool `config:"use_insecure_cipher"`
DisableHashCheck bool `config:"disable_hashcheck"`
AskPassword bool `config:"ask_password"`
PathOverride string `config:"path_override"`
SetModTime bool `config:"set_modtime"`
Md5sumCommand string `config:"md5sum_command"`
Sha1sumCommand string `config:"sha1sum_command"`
SkipLinks bool `config:"skip_links"`
Subsystem string `config:"subsystem"`
ServerCommand string `config:"server_command"`
UseFstat bool `config:"use_fstat"`
DisableConcurrentReads bool `config:"disable_concurrent_reads"`
DisableConcurrentWrites bool `config:"disable_concurrent_writes"`
IdleTimeout fs.Duration `config:"idle_timeout"`
}
// Fs stores the interface to the remote SFTP files
@ -414,8 +426,8 @@ func (f *Fs) newSftpClient(conn *ssh.Client, opts ...sftp.ClientOption) (*sftp.C
opts = opts[:len(opts):len(opts)] // make sure we don't overwrite the callers opts
opts = append(opts,
sftp.UseFstat(f.opt.UseFstat),
// FIXME disabled after library reversion
// sftp.UseConcurrentReads(!f.opt.DisableConcurrentReads),
sftp.UseConcurrentReads(!f.opt.DisableConcurrentReads),
sftp.UseConcurrentWrites(!f.opt.DisableConcurrentWrites),
)
if f.opt.DisableConcurrentReads { // FIXME
fs.Errorf(f, "Ignoring disable_concurrent_reads after library reversion - see #5197")
@ -1494,6 +1506,19 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.Read
return in, nil
}
type sizeReader struct {
io.Reader
size int64
}
// Size returns the expected size of the stream
//
// It is used in sftpFile.ReadFrom as a hint to work out the
// concurrency needed
func (sr *sizeReader) Size() int64 {
return sr.size
}
// Update a remote sftp file using the data <in> and ModTime from <src>
func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error {
o.fs.addTransfer() // Show transfer in progress
@ -1525,7 +1550,7 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op
fs.Debugf(src, "Removed after failed upload: %v", err)
}
}
_, err = file.ReadFrom(in)
_, err = file.ReadFrom(&sizeReader{Reader: in, size: src.Size()})
if err != nil {
remove()
return errors.Wrap(err, "Update ReadFrom failed")

6
go.mod
View file

@ -46,7 +46,7 @@ require (
github.com/nsf/termbox-go v1.1.1-0.20210421210813-2ff630277754
github.com/patrickmn/go-cache v2.1.0+incompatible
github.com/pkg/errors v0.9.1
github.com/pkg/sftp v1.12.0
github.com/pkg/sftp v1.13.1-0.20210424083437-2b80967078b8
github.com/prometheus/client_golang v1.10.0
github.com/prometheus/common v0.20.0 // indirect
github.com/putdotio/go-putio/putio v0.0.0-20200123120452-16d982cac2b8
@ -69,11 +69,11 @@ require (
go.etcd.io/bbolt v1.3.5
go.uber.org/zap v1.16.0 // indirect
goftp.io/server v0.4.1
golang.org/x/crypto v0.0.0-20210415154028-4f45737414dc
golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b
golang.org/x/net v0.0.0-20210415231046-e915ea6b2b7d
golang.org/x/oauth2 v0.0.0-20210413134643-5e61552d6c78
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c
golang.org/x/sys v0.0.0-20210419170143-37df388d1f33
golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7
golang.org/x/term v0.0.0-20210406210042-72f3dc4e9b72 // indirect
golang.org/x/text v0.3.6
golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba

6
go.sum
View file

@ -522,6 +522,8 @@ github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE
github.com/pkg/profile v1.2.1/go.mod h1:hJw3o1OdXxsrSjjVksARp5W95eeEaEfptyVZyv6JUPA=
github.com/pkg/sftp v1.12.0 h1:/f3b24xrDhkhddlaobPe2JgBqfdt+gC/NYl0QY9IOuI=
github.com/pkg/sftp v1.12.0/go.mod h1:fUqqXB5vEgVCZ131L+9say31RAri6aF6KDViawhxKK8=
github.com/pkg/sftp v1.13.1-0.20210424083437-2b80967078b8 h1:0sHotAvxm+h6PnYq2sz+xbbmPyzCMsubDzTMqT1Gbkw=
github.com/pkg/sftp v1.13.1-0.20210424083437-2b80967078b8/go.mod h1:3HaPG6Dq1ILlpPZRO0HVMrsydcdLt6HRDccSgb87qRg=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI=
@ -722,6 +724,8 @@ golang.org/x/crypto v0.0.0-20201112155050-0c6587e931a9/go.mod h1:LzIPMQfyMNhhGPh
golang.org/x/crypto v0.0.0-20201221181555-eec23a3978ad/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I=
golang.org/x/crypto v0.0.0-20210415154028-4f45737414dc h1:+q90ECDSAQirdykUN6sPEiBXBsp8Csjcca8Oy7bgLTA=
golang.org/x/crypto v0.0.0-20210415154028-4f45737414dc/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4=
golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b h1:7mWr3k41Qtv8XlltBkDkl8LoP3mpSgBW8BUoxtEdbXg=
golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
@ -896,6 +900,8 @@ golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210419170143-37df388d1f33 h1:zah5VTTvBlVRELjcDwGLLaWRHZJQsBtplweVYCii0KM=
golang.org/x/sys v0.0.0-20210419170143-37df388d1f33/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7 h1:iGu644GcxtEcrInvDsQRCwJjtCIOlT2V7IRt6ah2Whw=
golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210406210042-72f3dc4e9b72 h1:VqE9gduFZ4dbR7XoL77lHFp0/DyDUBKSXK7CMFkVcV0=