vfs: fix hang in read wait code - Fixes #4039

Before this fix, rclone would sometimes hang in vfs.readAt().

This was due to a race condition causing rclone to miss the timeout
signal.

This was fixed by a small amount of extra locking.

This very likely also fixes a number of "failed to wait for
in-sequence read" errors.
This commit is contained in:
Nick Craig-Wood 2020-05-18 15:43:29 +01:00
parent 57ee25d75a
commit 3de9bd9d04

View file

@ -5,7 +5,6 @@ import (
"io" "io"
"os" "os"
"sync" "sync"
"sync/atomic"
"time" "time"
"github.com/pkg/errors" "github.com/pkg/errors"
@ -238,18 +237,22 @@ func (fh *ReadFileHandle) readAt(p []byte, off int64) (n int, err error) {
maxWait := fh.file.VFS().Opt.ReadWait maxWait := fh.file.VFS().Opt.ReadWait
timeout := time.NewTimer(maxWait) timeout := time.NewTimer(maxWait)
done := make(chan struct{}) done := make(chan struct{})
abort := int32(0) abort := false
go func() { go func() {
select { select {
case <-timeout.C: case <-timeout.C:
// set abort flag an give all the waiting goroutines a kick on timeout // take the lock to make sure that fh.cond.Wait() is called before
atomic.StoreInt32(&abort, 1) // fh.cond.Broadcast. NB fh.cond.L == fh.mu
fh.mu.Lock()
// set abort flag and give all the waiting goroutines a kick on timeout
abort = true
fs.Debugf(fh.remote, "aborting in-sequence read wait, off=%d", off) fs.Debugf(fh.remote, "aborting in-sequence read wait, off=%d", off)
fh.cond.Broadcast() fh.cond.Broadcast()
fh.mu.Unlock()
case <-done: case <-done:
} }
}() }()
for fh.offset != off && atomic.LoadInt32(&abort) == 0 { for fh.offset != off && !abort {
fs.Debugf(fh.remote, "waiting for in-sequence read to %d for %v", off, maxWait) fs.Debugf(fh.remote, "waiting for in-sequence read to %d for %v", off, maxWait)
fh.cond.Wait() fh.cond.Wait()
} }