From eb91356e28775c65616ab3fa655b876582987b82 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Tue, 22 May 2018 14:48:23 +0100 Subject: [PATCH] fs/asyncreader: optionally user mmap for memory allocation with `--use-mmap` #2200 This replaces the `sync.Pool` allocator with lib/pool. This implements a pool of buffers of up to 64MB which can be re-used but is flushed every 5 seconds. If `--use-mmap` is set then rclone will use mmap for memory allocations which is much better at returning memory to the OS. --- docs/content/docs.md | 18 ++++++++++++ fs/asyncreader/asyncreader.go | 44 +++++++++++++--------------- fs/config.go | 1 + fs/config/configflags/configflags.go | 1 + 4 files changed, 40 insertions(+), 24 deletions(-) diff --git a/docs/content/docs.md b/docs/content/docs.md index 199f16f43..8947cbe11 100644 --- a/docs/content/docs.md +++ b/docs/content/docs.md @@ -399,6 +399,9 @@ See the [mount](/commands/rclone_mount/#file-buffering) documentation for more d Set to 0 to disable the buffering for the minimum memory usage. +Note that the memory allocation of the buffers is influenced by the +[--use-mmap](#use-mmap) flag. + ### --checkers=N ### The number of checkers to run in parallel. Checkers do the equality @@ -947,6 +950,21 @@ This can be useful when transferring to a remote which doesn't support mod times directly as it is more accurate than a `--size-only` check and faster than using `--checksum`. +### --use-mmap ### + +If this flag is set then rclone will use anonymous memory allocated by +mmap on Unix based platforms and VirtualAlloc on Windows for its +transfer buffers (size controlled by `--buffer-size`). Memory +allocated like this does not go on the Go heap and can be returned to +the OS immediately when it is finished with. + +If this flag is not set then rclone will allocate and free the buffers +using the Go memory allocator which may use more memory as memory +pages are returned less aggressively to the OS. + +It is possible this does not work well on all platforms so it is +disabled by default; in the future it may be enabled by default. + ### --use-server-modtime ### Some object-store backends (e.g, Swift, S3) do not preserve file modification diff --git a/fs/asyncreader/asyncreader.go b/fs/asyncreader/asyncreader.go index 9f7437d79..bff3ae29e 100644 --- a/fs/asyncreader/asyncreader.go +++ b/fs/asyncreader/asyncreader.go @@ -5,21 +5,22 @@ package asyncreader import ( "io" "sync" + "time" + "github.com/ncw/rclone/fs" + "github.com/ncw/rclone/lib/pool" "github.com/ncw/rclone/lib/readers" "github.com/pkg/errors" ) const ( // BufferSize is the default size of the async buffer - BufferSize = 1024 * 1024 - softStartInitial = 4 * 1024 + BufferSize = 1024 * 1024 + softStartInitial = 4 * 1024 + bufferCacheSize = 64 // max number of buffers to keep in cache + bufferCacheFlushTime = 5 * time.Second // flush the cached buffers after this long ) -var asyncBufferPool = sync.Pool{ - New: func() interface{} { return newBuffer() }, -} - var errorStreamAbandoned = errors.New("stream abandoned") // AsyncReader will do async read-ahead from the input reader @@ -98,16 +99,25 @@ func (a *AsyncReader) init(rd io.ReadCloser, buffers int) { }() } +// bufferPool is a global pool of buffers +var bufferPool *pool.Pool +var bufferPoolOnce sync.Once + // return the buffer to the pool (clearing it) func (a *AsyncReader) putBuffer(b *buffer) { - b.clear() - asyncBufferPool.Put(b) + bufferPool.Put(b.buf) + b.buf = nil } // get a buffer from the pool func (a *AsyncReader) getBuffer() *buffer { - b := asyncBufferPool.Get().(*buffer) - return b + bufferPoolOnce.Do(func() { + // Initialise the buffer pool when used + bufferPool = pool.New(bufferCacheFlushTime, BufferSize, bufferCacheSize, fs.Config.UseMmap) + }) + return &buffer{ + buf: bufferPool.Get(), + } } // Read will return the next available data. @@ -295,20 +305,6 @@ type buffer struct { offset int } -func newBuffer() *buffer { - return &buffer{ - buf: make([]byte, BufferSize), - err: nil, - } -} - -// clear returns the buffer to its full size and clears the members -func (b *buffer) clear() { - b.buf = b.buf[:cap(b.buf)] - b.err = nil - b.offset = 0 -} - // isEmpty returns true is offset is at end of // buffer, or func (b *buffer) isEmpty() bool { diff --git a/fs/config.go b/fs/config.go index 53e6835d1..b45c4e18b 100644 --- a/fs/config.go +++ b/fs/config.go @@ -86,6 +86,7 @@ type ConfigInfo struct { StatsOneLine bool Progress bool Cookie bool + UseMmap bool } // NewConfig creates a new config with everything set to the default diff --git a/fs/config/configflags/configflags.go b/fs/config/configflags/configflags.go index f9201bcda..2e22785c4 100644 --- a/fs/config/configflags/configflags.go +++ b/fs/config/configflags/configflags.go @@ -88,6 +88,7 @@ func AddFlags(flagSet *pflag.FlagSet) { flags.BoolVarP(flagSet, &fs.Config.StatsOneLine, "stats-one-line", "", fs.Config.StatsOneLine, "Make the stats fit on one line.") flags.BoolVarP(flagSet, &fs.Config.Progress, "progress", "P", fs.Config.Progress, "Show progress during transfer.") flags.BoolVarP(flagSet, &fs.Config.Cookie, "use-cookies", "", fs.Config.Cookie, "Enable session cookiejar.") + flags.BoolVarP(flagSet, &fs.Config.UseMmap, "use-mmap", "", fs.Config.UseMmap, "Use mmap allocator (see docs).") } // SetFlags converts any flags into config which weren't straight foward