From e6f2d84736ba9ac9d151ba409f981b599cb8e157 Mon Sep 17 00:00:00 2001 From: Alex Vanin Date: Mon, 14 Dec 2020 12:39:22 +0300 Subject: [PATCH] [#246] blobovnivza: Fix deadlock on concurrent evict and open new blobovnicza Deadlock occurs when `getActivate` function opens new blobovnicza and that invokes evict in LRU cache of open blobovniczas. `getActivate` makes `activeMtx.Lock()` and then cache evict makes `activeMtx.RLock()` and deadlock happens. Fix contains two steps: - add separate mutex to open blobovniczas (1), - split single Lock outside of `updateAndGet` (2). As for the (1) `bbolt.Open()` locks when it tries to open the same file from two threads. So separate mutex will prevent that. As for the (2) `updateAndGet` function contains from two parts. At first it checks if required blobovnicza is ready and it returns it. In this case we can use the simple RLock. But then there is an option when we should open new blobovnicza and update map of active blobovniczas. In this case we call `openBlobovnicza` without activeMtx lock. Cache evict happens there and it won't cause deadlock. Then we lock activeMtx to update the map of active blobovniczas. Concurrency can happen there. However `openBlobovnicza` will not open the same blobovnicza twice, so we can make one more check if opened blobovnicza was activated while thread was locked in activeMtx. If so, then return active blobovnicza, else finish activation. Signed-off-by: Alex Vanin --- .../blobstor/blobovnicza.go | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/pkg/local_object_storage/blobstor/blobovnicza.go b/pkg/local_object_storage/blobstor/blobovnicza.go index 781ee3f4..514813da 100644 --- a/pkg/local_object_storage/blobstor/blobovnicza.go +++ b/pkg/local_object_storage/blobstor/blobovnicza.go @@ -61,6 +61,10 @@ type blobovniczas struct { // cache of opened filled blobovniczas opened *lru.Cache + // mutex to exclude parallel bbolt.Open() calls + // bbolt.Open() deadlocks if it tries to open already opened file + openMtx sync.Mutex + // list of active (opened, non-filled) blobovniczas activeMtx sync.RWMutex active map[string]blobovniczaWithIndex @@ -676,10 +680,6 @@ func (b *blobovniczas) iterateSorted(addr *objectSDK.Address, curPath []string, // // returns error if blobvnicza could not be activated. func (b *blobovniczas) getActivated(p string) (blobovniczaWithIndex, error) { - b.activeMtx.Lock() - defer b.activeMtx.Unlock() - - // try again return b.updateAndGet(p, nil) } @@ -691,9 +691,7 @@ func (b *blobovniczas) updateActive(p string, old *uint64) error { log.Debug("updating active blobovnicza...") - b.activeMtx.Lock() _, err := b.updateAndGet(p, old) - b.activeMtx.Unlock() log.Debug("active blobovnicza successfully updated") @@ -704,7 +702,10 @@ func (b *blobovniczas) updateActive(p string, old *uint64) error { // // if current active blobovnicza's index is not old, it is returned unchanged. func (b *blobovniczas) updateAndGet(p string, old *uint64) (blobovniczaWithIndex, error) { + b.activeMtx.RLock() active, ok := b.active[p] + b.activeMtx.RUnlock() + if ok { if old != nil { if active.ind == b.blzShallowWidth-1 { @@ -726,6 +727,14 @@ func (b *blobovniczas) updateAndGet(p string, old *uint64) (blobovniczaWithIndex return active, err } + b.activeMtx.Lock() + defer b.activeMtx.Unlock() + + // check 2nd time to find out if it blobovnicza was activated while thread was locked + if tryActive, ok := b.active[p]; ok && tryActive.blz == active.blz { + return tryActive, nil + } + // remove from opened cache (active blobovnicza should always be opened) b.opened.Remove(p) b.active[p] = active @@ -785,6 +794,9 @@ func (b *blobovniczas) close() error { // // If blobovnicza is already opened and cached, instance from cache is returned w/o changes. func (b *blobovniczas) openBlobovnicza(p string) (*blobovnicza.Blobovnicza, error) { + b.openMtx.Lock() + defer b.openMtx.Unlock() + v, ok := b.opened.Get(p) if ok { // blobovnicza should be opened in cache