diff --git a/backend/local/aaaa b/backend/local/aaaa new file mode 100644 index 000000000..e69de29bb diff --git a/backend/local/fadvise_other.go b/backend/local/fadvise_other.go new file mode 100644 index 000000000..18ac0d750 --- /dev/null +++ b/backend/local/fadvise_other.go @@ -0,0 +1,12 @@ +//+build !linux + +package local + +import ( + "io" + "os" +) + +func newFadviseReadCloser(o *Object, f *os.File, offset, limit int64) io.ReadCloser { + return f +} diff --git a/backend/local/fadvise_unix.go b/backend/local/fadvise_unix.go new file mode 100644 index 000000000..7a6f4bad5 --- /dev/null +++ b/backend/local/fadvise_unix.go @@ -0,0 +1,129 @@ +//+build linux + +package local + +import ( + "io" + "os" + + "github.com/rclone/rclone/fs" + "golang.org/x/sys/unix" +) + +// fadvise provides means to automate freeing pages in kernel page cache for +// a given file descriptor as the file is sequentially processed (read or +// written). +// +// When copying a file to a remote backend all the file content is read by +// kernel and put to page cache to make future reads faster. +// This causes memory pressure visible in both memory usage and CPU consumption +// and can even cause OOM errors in applications consuming large amounts memory. +// +// In case of an upload to a remote backend, there is no benefits from caching. +// +// fadvise would orchestrate calling POSIX_FADV_DONTNEED +// +// POSIX_FADV_DONTNEED attempts to free cached pages associated +// with the specified region. This is useful, for example, while +// streaming large files. A program may periodically request the +// kernel to free cached data that has already been used, so that +// more useful cached pages are not discarded instead. +// +// Requests to discard partial pages are ignored. It is +// preferable to preserve needed data than discard unneeded data. +// If the application requires that data be considered for +// discarding, then offset and len must be page-aligned. +// +// The implementation may attempt to write back dirty pages in +// the specified region, but this is not guaranteed. Any +// unwritten dirty pages will not be freed. If the application +// wishes to ensure that dirty pages will be released, it should +// call fsync(2) or fdatasync(2) first. +type fadvise struct { + o *Object + fd int + lastPos int64 + curPos int64 + windowSize int64 +} + +// sequential configures readahead strategy in Linux kernel. +// +// Under Linux, POSIX_FADV_NORMAL sets the readahead window to the +// default size for the backing device; POSIX_FADV_SEQUENTIAL doubles +// this size, and POSIX_FADV_RANDOM disables file readahead entirely. +func (f *fadvise) sequential(limit int64) bool { + l := int64(0) + if limit > 0 { + l = limit + } + if err := unix.Fadvise(f.fd, f.curPos, l, unix.FADV_SEQUENTIAL); err != nil { + fs.Debugf(f.o, "fadvise sequential failed on file descriptor %d: %s", f.fd, err) + return false + } + + return true +} + +func (f *fadvise) next(n int) { + f.curPos += int64(n) + f.freePagesIfNeeded() +} + +func (f *fadvise) freePagesIfNeeded() { + if f.curPos >= f.lastPos+f.windowSize { + f.freePages() + } +} + +func (f *fadvise) freePages() { + if err := unix.Fadvise(f.fd, f.lastPos, f.curPos-f.lastPos, unix.FADV_DONTNEED); err != nil { + fs.Debugf(f.o, "fadvise dontneed failed on file descriptor %d: %s", f.fd, err) + } + f.lastPos = f.curPos +} + +type fadviseReadCloser struct { + *fadvise + inner io.ReadCloser +} + +var defaultWindowSize = int64(32 * os.Getpagesize()) + +// newFadviseReadCloser wraps os.File so that reading from that file would +// remove already consumed pages from kernel page cache. +// In addition to that it instructs kernel to double the readahead window to +// make sequential reads faster. +// See also fadvise. +func newFadviseReadCloser(o *Object, f *os.File, offset, limit int64) io.ReadCloser { + r := fadviseReadCloser{ + fadvise: &fadvise{ + o: o, + fd: int(f.Fd()), + lastPos: offset, + curPos: offset, + windowSize: defaultWindowSize, + }, + inner: f, + } + + // If syscall failed it's likely that the subsequent syscalls to that + // file descriptor would also fail. In that case return the provided os.File + // pointer. + if !r.sequential(limit) { + return f + } + + return r +} + +func (f fadviseReadCloser) Read(p []byte) (n int, err error) { + n, err = f.inner.Read(p) + f.next(n) + return +} + +func (f fadviseReadCloser) Close() error { + f.freePages() + return f.inner.Close() +} diff --git a/backend/local/local.go b/backend/local/local.go index 12c76985f..f9e861977 100644 --- a/backend/local/local.go +++ b/backend/local/local.go @@ -777,7 +777,11 @@ func (o *Object) Hash(ctx context.Context, r hash.Type) (string, error) { var in io.ReadCloser if !o.translatedLink { - in, err = file.Open(o.path) + var fd *os.File + fd, err = file.Open(o.path) + if fd != nil { + in = newFadviseReadCloser(o, fd, 0, 0) + } } else { in, err = o.openTranslatedLink(0, -1) } @@ -938,7 +942,7 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.Read if err != nil { return } - wrappedFd := readers.NewLimitedReadCloser(fd, limit) + wrappedFd := readers.NewLimitedReadCloser(newFadviseReadCloser(o, fd, offset, limit), limit) if offset != 0 { // seek the object _, err = fd.Seek(offset, io.SeekStart)