forked from TrueCloudLab/frostfs-node
[#246] blobovnivza: Fix deadlock on concurrent evict and open new blobovnicza
Deadlock occurs when `getActivate` function opens new blobovnicza and that invokes evict in LRU cache of open blobovniczas. `getActivate` makes `activeMtx.Lock()` and then cache evict makes `activeMtx.RLock()` and deadlock happens. Fix contains two steps: - add separate mutex to open blobovniczas (1), - split single Lock outside of `updateAndGet` (2). As for the (1) `bbolt.Open()` locks when it tries to open the same file from two threads. So separate mutex will prevent that. As for the (2) `updateAndGet` function contains from two parts. At first it checks if required blobovnicza is ready and it returns it. In this case we can use the simple RLock. But then there is an option when we should open new blobovnicza and update map of active blobovniczas. In this case we call `openBlobovnicza` without activeMtx lock. Cache evict happens there and it won't cause deadlock. Then we lock activeMtx to update the map of active blobovniczas. Concurrency can happen there. However `openBlobovnicza` will not open the same blobovnicza twice, so we can make one more check if opened blobovnicza was activated while thread was locked in activeMtx. If so, then return active blobovnicza, else finish activation. Signed-off-by: Alex Vanin <alexey@nspcc.ru>
This commit is contained in:
parent
91bea44a1a
commit
e6f2d84736
1 changed files with 18 additions and 6 deletions
|
@ -61,6 +61,10 @@ type blobovniczas struct {
|
||||||
// cache of opened filled blobovniczas
|
// cache of opened filled blobovniczas
|
||||||
opened *lru.Cache
|
opened *lru.Cache
|
||||||
|
|
||||||
|
// mutex to exclude parallel bbolt.Open() calls
|
||||||
|
// bbolt.Open() deadlocks if it tries to open already opened file
|
||||||
|
openMtx sync.Mutex
|
||||||
|
|
||||||
// list of active (opened, non-filled) blobovniczas
|
// list of active (opened, non-filled) blobovniczas
|
||||||
activeMtx sync.RWMutex
|
activeMtx sync.RWMutex
|
||||||
active map[string]blobovniczaWithIndex
|
active map[string]blobovniczaWithIndex
|
||||||
|
@ -676,10 +680,6 @@ func (b *blobovniczas) iterateSorted(addr *objectSDK.Address, curPath []string,
|
||||||
//
|
//
|
||||||
// returns error if blobvnicza could not be activated.
|
// returns error if blobvnicza could not be activated.
|
||||||
func (b *blobovniczas) getActivated(p string) (blobovniczaWithIndex, error) {
|
func (b *blobovniczas) getActivated(p string) (blobovniczaWithIndex, error) {
|
||||||
b.activeMtx.Lock()
|
|
||||||
defer b.activeMtx.Unlock()
|
|
||||||
|
|
||||||
// try again
|
|
||||||
return b.updateAndGet(p, nil)
|
return b.updateAndGet(p, nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -691,9 +691,7 @@ func (b *blobovniczas) updateActive(p string, old *uint64) error {
|
||||||
|
|
||||||
log.Debug("updating active blobovnicza...")
|
log.Debug("updating active blobovnicza...")
|
||||||
|
|
||||||
b.activeMtx.Lock()
|
|
||||||
_, err := b.updateAndGet(p, old)
|
_, err := b.updateAndGet(p, old)
|
||||||
b.activeMtx.Unlock()
|
|
||||||
|
|
||||||
log.Debug("active blobovnicza successfully updated")
|
log.Debug("active blobovnicza successfully updated")
|
||||||
|
|
||||||
|
@ -704,7 +702,10 @@ func (b *blobovniczas) updateActive(p string, old *uint64) error {
|
||||||
//
|
//
|
||||||
// if current active blobovnicza's index is not old, it is returned unchanged.
|
// if current active blobovnicza's index is not old, it is returned unchanged.
|
||||||
func (b *blobovniczas) updateAndGet(p string, old *uint64) (blobovniczaWithIndex, error) {
|
func (b *blobovniczas) updateAndGet(p string, old *uint64) (blobovniczaWithIndex, error) {
|
||||||
|
b.activeMtx.RLock()
|
||||||
active, ok := b.active[p]
|
active, ok := b.active[p]
|
||||||
|
b.activeMtx.RUnlock()
|
||||||
|
|
||||||
if ok {
|
if ok {
|
||||||
if old != nil {
|
if old != nil {
|
||||||
if active.ind == b.blzShallowWidth-1 {
|
if active.ind == b.blzShallowWidth-1 {
|
||||||
|
@ -726,6 +727,14 @@ func (b *blobovniczas) updateAndGet(p string, old *uint64) (blobovniczaWithIndex
|
||||||
return active, err
|
return active, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
b.activeMtx.Lock()
|
||||||
|
defer b.activeMtx.Unlock()
|
||||||
|
|
||||||
|
// check 2nd time to find out if it blobovnicza was activated while thread was locked
|
||||||
|
if tryActive, ok := b.active[p]; ok && tryActive.blz == active.blz {
|
||||||
|
return tryActive, nil
|
||||||
|
}
|
||||||
|
|
||||||
// remove from opened cache (active blobovnicza should always be opened)
|
// remove from opened cache (active blobovnicza should always be opened)
|
||||||
b.opened.Remove(p)
|
b.opened.Remove(p)
|
||||||
b.active[p] = active
|
b.active[p] = active
|
||||||
|
@ -785,6 +794,9 @@ func (b *blobovniczas) close() error {
|
||||||
//
|
//
|
||||||
// If blobovnicza is already opened and cached, instance from cache is returned w/o changes.
|
// If blobovnicza is already opened and cached, instance from cache is returned w/o changes.
|
||||||
func (b *blobovniczas) openBlobovnicza(p string) (*blobovnicza.Blobovnicza, error) {
|
func (b *blobovniczas) openBlobovnicza(p string) (*blobovnicza.Blobovnicza, error) {
|
||||||
|
b.openMtx.Lock()
|
||||||
|
defer b.openMtx.Unlock()
|
||||||
|
|
||||||
v, ok := b.opened.Get(p)
|
v, ok := b.opened.Get(p)
|
||||||
if ok {
|
if ok {
|
||||||
// blobovnicza should be opened in cache
|
// blobovnicza should be opened in cache
|
||||||
|
|
Loading…
Reference in a new issue