[#246] blobovnivza: Fix deadlock on concurrent evict and open new blobovnicza

Deadlock occurs when `getActivate` function opens new blobovnicza and that
invokes evict in LRU cache of open blobovniczas. `getActivate` makes
`activeMtx.Lock()` and then cache evict makes `activeMtx.RLock()` and deadlock
happens.

Fix contains two steps:
- add separate mutex to open blobovniczas (1),
- split single Lock outside of `updateAndGet` (2).

As for the (1) `bbolt.Open()` locks when it tries to open the same file from
two threads. So separate mutex will prevent that.

As for the (2) `updateAndGet` function contains from two parts. At first it
checks if required blobovnicza is ready and it returns it. In this case we can
use the simple RLock. But then there is an option when we should open new
blobovnicza and update map of active blobovniczas.

In this case we call `openBlobovnicza` without activeMtx lock. Cache evict
happens there and it won't cause deadlock.

Then we lock activeMtx to update the map of active blobovniczas. Concurrency can
happen there. However `openBlobovnicza` will not open the same blobovnicza twice,
so we can make one more check if opened blobovnicza was activated while thread was
locked in activeMtx. If so, then return active blobovnicza, else finish activation.

Signed-off-by: Alex Vanin <alexey@nspcc.ru>
This commit is contained in:
Alex Vanin 2020-12-14 12:39:22 +03:00 committed by Alex Vanin
parent 91bea44a1a
commit e6f2d84736

View file

@ -61,6 +61,10 @@ type blobovniczas struct {
// cache of opened filled blobovniczas
opened *lru.Cache
// mutex to exclude parallel bbolt.Open() calls
// bbolt.Open() deadlocks if it tries to open already opened file
openMtx sync.Mutex
// list of active (opened, non-filled) blobovniczas
activeMtx sync.RWMutex
active map[string]blobovniczaWithIndex
@ -676,10 +680,6 @@ func (b *blobovniczas) iterateSorted(addr *objectSDK.Address, curPath []string,
//
// returns error if blobvnicza could not be activated.
func (b *blobovniczas) getActivated(p string) (blobovniczaWithIndex, error) {
b.activeMtx.Lock()
defer b.activeMtx.Unlock()
// try again
return b.updateAndGet(p, nil)
}
@ -691,9 +691,7 @@ func (b *blobovniczas) updateActive(p string, old *uint64) error {
log.Debug("updating active blobovnicza...")
b.activeMtx.Lock()
_, err := b.updateAndGet(p, old)
b.activeMtx.Unlock()
log.Debug("active blobovnicza successfully updated")
@ -704,7 +702,10 @@ func (b *blobovniczas) updateActive(p string, old *uint64) error {
//
// if current active blobovnicza's index is not old, it is returned unchanged.
func (b *blobovniczas) updateAndGet(p string, old *uint64) (blobovniczaWithIndex, error) {
b.activeMtx.RLock()
active, ok := b.active[p]
b.activeMtx.RUnlock()
if ok {
if old != nil {
if active.ind == b.blzShallowWidth-1 {
@ -726,6 +727,14 @@ func (b *blobovniczas) updateAndGet(p string, old *uint64) (blobovniczaWithIndex
return active, err
}
b.activeMtx.Lock()
defer b.activeMtx.Unlock()
// check 2nd time to find out if it blobovnicza was activated while thread was locked
if tryActive, ok := b.active[p]; ok && tryActive.blz == active.blz {
return tryActive, nil
}
// remove from opened cache (active blobovnicza should always be opened)
b.opened.Remove(p)
b.active[p] = active
@ -785,6 +794,9 @@ func (b *blobovniczas) close() error {
//
// If blobovnicza is already opened and cached, instance from cache is returned w/o changes.
func (b *blobovniczas) openBlobovnicza(p string) (*blobovnicza.Blobovnicza, error) {
b.openMtx.Lock()
defer b.openMtx.Unlock()
v, ok := b.opened.Get(p)
if ok {
// blobovnicza should be opened in cache