vfs: fix writeback deadlocks and other bugs

- fix deadlock when cancelling upload
- fix double upload and panic after cancelled upload
- fix cancelation strategy of uploading files
    - don't cancel uploads if we don't modify the file
    - cancel uploads if we do modify the file
- fix deadlock between Item and writeback
- fix confusion about whether writeback item was being uploaded
- fix cornercases in cancelling uploads and removing files
This commit is contained in:
Nick Craig-Wood 2020-06-15 15:09:33 +01:00
parent 279a516c53
commit fa472a340e
4 changed files with 93 additions and 31 deletions

View file

@ -558,6 +558,7 @@ func (f *File) Remove() (err error) {
if wasWriting { if wasWriting {
// Ignore error deleting file if was writing it as it may not be uploaded yet // Ignore error deleting file if was writing it as it may not be uploaded yet
err = nil err = nil
fs.Debugf(f._path(), "Ignoring File.Remove file error as uploading: %v", err)
} else { } else {
fs.Debugf(f._path(), "File.Remove file error: %v", err) fs.Debugf(f._path(), "File.Remove file error: %v", err)
} }

View file

@ -315,9 +315,14 @@ func (c *Cache) Rename(name string, newName string, newObj fs.Object) (err error
func (c *Cache) Remove(name string) (wasWriting bool) { func (c *Cache) Remove(name string) (wasWriting bool) {
name = clean(name) name = clean(name)
c.mu.Lock() c.mu.Lock()
item, _ := c._get(name) item := c.item[name]
delete(c.item, name) if item != nil {
delete(c.item, name)
}
c.mu.Unlock() c.mu.Unlock()
if item == nil {
return false
}
return item.remove("file deleted") return item.remove("file deleted")
} }

View file

@ -35,6 +35,11 @@ import (
// be taken before Item.mu. downloader may call into Item but Item may // be taken before Item.mu. downloader may call into Item but Item may
// **not** call downloader methods with Item.mu held // **not** call downloader methods with Item.mu held
// NB Item and writeback are tightly linked so it is necessary to
// have a total lock ordering between them. writeback.mu must always
// be taken before Item.mu. writeback may call into Item but Item may
// **not** call writeback methods with Item.mu held
// Item is stored in the item map // Item is stored in the item map
// //
// The Info field is written to the backing store to store status // The Info field is written to the backing store to store status
@ -49,6 +54,7 @@ type Item struct {
o fs.Object // object we are caching - may be nil o fs.Object // object we are caching - may be nil
fd *os.File // handle we are using to read and write to the file fd *os.File // handle we are using to read and write to the file
metaDirty bool // set if the info needs writeback metaDirty bool // set if the info needs writeback
modified bool // set if the file has been modified since the last Open
info Info // info about the file to persist to backing store info Info // info about the file to persist to backing store
} }
@ -60,7 +66,7 @@ type Info struct {
Size int64 // size of the file Size int64 // size of the file
Rs ranges.Ranges // which parts of the file are present Rs ranges.Ranges // which parts of the file are present
Fingerprint string // fingerprint of remote object Fingerprint string // fingerprint of remote object
Dirty bool // set if the backing file has been modifed Dirty bool // set if the backing file has been modified
} }
// Items are a slice of *Item ordered by ATime // Items are a slice of *Item ordered by ATime
@ -111,7 +117,7 @@ func newItem(c *Cache, name string) (item *Item) {
if os.IsNotExist(statErr) { if os.IsNotExist(statErr) {
item._removeMeta("cache file doesn't exist") item._removeMeta("cache file doesn't exist")
} else { } else {
item._remove(fmt.Sprintf("failed to stat cache file: %v", statErr)) item.remove(fmt.Sprintf("failed to stat cache file: %v", statErr))
} }
} }
@ -120,7 +126,7 @@ func newItem(c *Cache, name string) (item *Item) {
if !exists { if !exists {
item._removeFile("metadata doesn't exist") item._removeFile("metadata doesn't exist")
} else if err != nil { } else if err != nil {
item._remove(fmt.Sprintf("failed to load metadata: %v", err)) item.remove(fmt.Sprintf("failed to load metadata: %v", err))
} }
// Get size estimate (which is best we can do until Open() called) // Get size estimate (which is best we can do until Open() called)
@ -361,6 +367,10 @@ func (item *Item) _dirty() {
item.info.ModTime = time.Now() item.info.ModTime = time.Now()
item.info.ATime = item.info.ModTime item.info.ATime = item.info.ModTime
item.metaDirty = true item.metaDirty = true
if !item.modified {
item.modified = true
go item.c.writeback.cancelUpload(item)
}
if !item.info.Dirty { if !item.info.Dirty {
item.info.Dirty = true item.info.Dirty = true
err := item._save() err := item._save()
@ -410,6 +420,7 @@ func (item *Item) Open(o fs.Object) (err error) {
if item.fd != nil { if item.fd != nil {
return errors.New("vfs cache item: internal error: didn't Close file") return errors.New("vfs cache item: internal error: didn't Close file")
} }
item.modified = false
fd, err := file.OpenFile(osPath, os.O_RDWR, 0600) fd, err := file.OpenFile(osPath, os.O_RDWR, 0600)
if err != nil { if err != nil {
@ -488,6 +499,7 @@ func (item *Item) _store(ctx context.Context, storeFn StoreFn) (err error) {
fs.Errorf(item.name, "Failed to write metadata file: %v", err) fs.Errorf(item.name, "Failed to write metadata file: %v", err)
} }
if storeFn != nil && item.o != nil { if storeFn != nil && item.o != nil {
fs.Debugf(item.name, "writeback object to VFS layer")
// Write the object back to the VFS layer as last // Write the object back to the VFS layer as last
// thing we do with mutex unlocked // thing we do with mutex unlocked
item.mu.Unlock() item.mu.Unlock()
@ -595,10 +607,15 @@ func (item *Item) Close(storeFn StoreFn) (err error) {
checkErr(item._store(context.Background(), storeFn)) checkErr(item._store(context.Background(), storeFn))
} else { } else {
// asynchronous writeback // asynchronous writeback
item.c.writeback.add(item, item.name, storeFn) item.mu.Unlock()
item.c.writeback.add(item, item.name, item.modified, storeFn)
item.mu.Lock()
} }
} }
// mark as not modified now we have uploaded or queued for upload
item.modified = false
return err return err
} }
@ -711,7 +728,9 @@ func (item *Item) _removeMeta(reason string) {
// call with lock held // call with lock held
func (item *Item) _remove(reason string) (wasWriting bool) { func (item *Item) _remove(reason string) (wasWriting bool) {
// Cancel writeback, if any // Cancel writeback, if any
wasWriting = item.c.writeback.cancel(item) item.mu.Unlock()
wasWriting = item.c.writeback.remove(item)
item.mu.Lock()
item.info.clean() item.info.clean()
item.metaDirty = false item.metaDirty = false
item._removeFile(reason) item._removeFile(reason)

View file

@ -9,6 +9,7 @@ import (
"time" "time"
"github.com/rclone/rclone/fs" "github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/fserrors"
"github.com/rclone/rclone/vfs/vfscommon" "github.com/rclone/rclone/vfs/vfscommon"
) )
@ -56,7 +57,8 @@ type writeBackItem struct {
index int // index into the priority queue for update index int // index into the priority queue for update
item *Item // Item that needs writeback item *Item // Item that needs writeback
expiry time.Time // When this expires we will write it back expiry time.Time // When this expires we will write it back
uploading bool // If we are uploading the item uploading bool // True if item is being processed by upload() method
onHeap bool // true if this item is on the items heap
cancel context.CancelFunc // To cancel the upload with cancel context.CancelFunc // To cancel the upload with
done chan struct{} // closed when the cancellation completes done chan struct{} // closed when the cancellation completes
storeFn StoreFn // To write the object back with storeFn StoreFn // To write the object back with
@ -149,21 +151,29 @@ func (wb *writeBack) _delItem(wbItem *writeBackItem) {
// //
// call with the lock held // call with the lock held
func (wb *writeBack) _popItem() (wbItem *writeBackItem) { func (wb *writeBack) _popItem() (wbItem *writeBackItem) {
return heap.Pop(&wb.items).(*writeBackItem) wbItem = heap.Pop(&wb.items).(*writeBackItem)
wbItem.onHeap = false
return wbItem
} }
// push a writeBackItem onto the items heap // push a writeBackItem onto the items heap
// //
// call with the lock held // call with the lock held
func (wb *writeBack) _pushItem(wbItem *writeBackItem) { func (wb *writeBack) _pushItem(wbItem *writeBackItem) {
heap.Push(&wb.items, wbItem) if !wbItem.onHeap {
heap.Push(&wb.items, wbItem)
wbItem.onHeap = true
}
} }
// remove a writeBackItem from the items heap // remove a writeBackItem from the items heap
// //
// call with the lock held // call with the lock held
func (wb *writeBack) _removeItem(wbItem *writeBackItem) { func (wb *writeBack) _removeItem(wbItem *writeBackItem) {
heap.Remove(&wb.items, wbItem.index) if wbItem.onHeap {
heap.Remove(&wb.items, wbItem.index)
wbItem.onHeap = false
}
} }
// peek the oldest writeBackItem - may be nil // peek the oldest writeBackItem - may be nil
@ -191,8 +201,10 @@ func (wb *writeBack) _resetTimer() {
} }
// add adds an item to the writeback queue or resets its timer if it // add adds an item to the writeback queue or resets its timer if it
// is already there // is already there.
func (wb *writeBack) add(item *Item, name string, storeFn StoreFn) { //
// if modified is false then it it doesn't a pending upload
func (wb *writeBack) add(item *Item, name string, modified bool, storeFn StoreFn) {
wb.mu.Lock() wb.mu.Lock()
defer wb.mu.Unlock() defer wb.mu.Unlock()
@ -200,7 +212,7 @@ func (wb *writeBack) add(item *Item, name string, storeFn StoreFn) {
if !ok { if !ok {
wbItem = wb._newItem(item, name) wbItem = wb._newItem(item, name)
} else { } else {
if wbItem.uploading { if wbItem.uploading && modified {
// We are uploading already so cancel the upload // We are uploading already so cancel the upload
wb._cancelUpload(wbItem) wb._cancelUpload(wbItem)
} }
@ -211,21 +223,21 @@ func (wb *writeBack) add(item *Item, name string, storeFn StoreFn) {
wb._resetTimer() wb._resetTimer()
} }
// cancel a writeback if there is one // Call when a file is removed. This cancels a writeback if there is
func (wb *writeBack) cancel(item *Item) (found bool) { // one and doesn't return the item to the queue.
func (wb *writeBack) remove(item *Item) (found bool) {
wb.mu.Lock() wb.mu.Lock()
defer wb.mu.Unlock() defer wb.mu.Unlock()
wbItem, found := wb.lookup[item] wbItem, found := wb.lookup[item]
if found { if found {
fs.Debugf(wbItem.name, "vfs cache: cancelling writeback") fs.Debugf(wbItem.name, "vfs cache: cancelling writeback (uploading %v) %p item %p", wbItem.uploading, wbItem, wbItem.item)
if wbItem.uploading { if wbItem.uploading {
// We are uploading already so cancel the upload // We are uploading already so cancel the upload
wb._cancelUpload(wbItem) wb._cancelUpload(wbItem)
} else {
// Remove the item from the heap
wb._removeItem(wbItem)
} }
// Remove the item from the heap
wb._removeItem(wbItem)
// Remove the item from the lookup map // Remove the item from the lookup map
wb._delItem(wbItem) wb._delItem(wbItem)
} }
@ -247,6 +259,8 @@ func (wb *writeBack) _kickUploader() {
} }
// upload the item - called as a goroutine // upload the item - called as a goroutine
//
// uploading will have been incremented here already
func (wb *writeBack) upload(ctx context.Context, wbItem *writeBackItem) { func (wb *writeBack) upload(ctx context.Context, wbItem *writeBackItem) {
wb.mu.Lock() wb.mu.Lock()
defer wb.mu.Unlock() defer wb.mu.Unlock()
@ -258,10 +272,10 @@ func (wb *writeBack) upload(ctx context.Context, wbItem *writeBackItem) {
wb.mu.Lock() wb.mu.Lock()
wbItem.cancel() // cancel context to release resources since store done wbItem.cancel() // cancel context to release resources since store done
if wbItem.uploading {
wbItem.uploading = false fs.Debugf(wbItem.name, "uploading = false %p item %p", wbItem, wbItem.item)
wb.uploads-- wbItem.uploading = false
} wb.uploads--
if err != nil { if err != nil {
// FIXME should this have a max number of transfer attempts? // FIXME should this have a max number of transfer attempts?
@ -269,7 +283,13 @@ func (wb *writeBack) upload(ctx context.Context, wbItem *writeBackItem) {
if wbItem.delay > maxUploadDelay { if wbItem.delay > maxUploadDelay {
wbItem.delay = maxUploadDelay wbItem.delay = maxUploadDelay
} }
fs.Errorf(wbItem.name, "vfs cache: failed to upload try #%d, will retry in %v: %v", wbItem.tries, wbItem.delay, err) if _, uerr := fserrors.Cause(err); uerr == context.Canceled {
fs.Infof(wbItem.name, "vfs cache: upload canceled sucessfully")
// Upload was cancelled so reset timer
wbItem.delay = wb.opt.WriteBack
} else {
fs.Errorf(wbItem.name, "vfs cache: failed to upload try #%d, will retry in %v: %v", wbItem.tries, wbItem.delay, err)
}
// push the item back on the queue for retry // push the item back on the queue for retry
wb._pushItem(wbItem) wb._pushItem(wbItem)
wb.items._update(wbItem, time.Now().Add(wbItem.delay)) wb.items._update(wbItem, time.Now().Add(wbItem.delay))
@ -282,26 +302,42 @@ func (wb *writeBack) upload(ctx context.Context, wbItem *writeBackItem) {
close(wbItem.done) close(wbItem.done)
} }
// cancel the upload // cancel the upload - the item should be on the heap after this returns
// //
// call with lock held // call with lock held
func (wb *writeBack) _cancelUpload(wbItem *writeBackItem) { func (wb *writeBack) _cancelUpload(wbItem *writeBackItem) {
if !wbItem.uploading { if !wbItem.uploading {
return return
} }
fs.Debugf(wbItem.name, "vfs cache: cancelling upload") fs.Infof(wbItem.name, "vfs cache: cancelling upload")
if wbItem.cancel != nil { if wbItem.cancel != nil {
// Cancel the upload - this may or may not be effective // Cancel the upload - this may or may not be effective
wbItem.cancel() wbItem.cancel()
// wait for the uploader to finish // wait for the uploader to finish
//
// we need to wait without the lock otherwise the
// background part will never run.
wb.mu.Unlock()
<-wbItem.done <-wbItem.done
} wb.mu.Lock()
if wbItem.uploading {
wbItem.uploading = false
wb.uploads--
} }
// uploading items are not on the heap so add them back // uploading items are not on the heap so add them back
wb._pushItem(wbItem) wb._pushItem(wbItem)
fs.Infof(wbItem.name, "vfs cache: cancelled upload")
}
// cancelUpload cancels the upload of the item if there is one in progress
//
// it returns true if there was an upload in progress
func (wb *writeBack) cancelUpload(item *Item) bool {
wb.mu.Lock()
defer wb.mu.Unlock()
wbItem, ok := wb.lookup[item]
if !ok || !wbItem.uploading {
return false
}
wb._cancelUpload(wbItem)
return true
} }
// this uploads as many items as possible // this uploads as many items as possible
@ -320,6 +356,7 @@ func (wb *writeBack) processItems(ctx context.Context) {
resetTimer = true resetTimer = true
// Pop the item, mark as uploading and start the uploader // Pop the item, mark as uploading and start the uploader
wbItem = wb._popItem() wbItem = wb._popItem()
fs.Debugf(wbItem.name, "uploading = true %p item %p", wbItem, wbItem.item)
wbItem.uploading = true wbItem.uploading = true
wb.uploads++ wb.uploads++
newCtx, cancel := context.WithCancel(ctx) newCtx, cancel := context.WithCancel(ctx)