frostfs-node/pkg/local_object_storage/engine/control.go
Leonard Lyubich 596d877a44 [#1549] engine: Disable shard on blobovnicza init failure
There is a need to support working w/o shard if it has problems with
blobovnicza tree.

Make `BlobStor.Init` to return new `ErrInitBlobovniczas` error. Remove
shard from storage engine's shard set if it returned this error from
`Init` call. So if some of the shards (but not all) return this error,
the node will be able to continue working without them.

Signed-off-by: Leonard Lyubich <leonard@nspcc.ru>
2022-07-08 13:45:57 +03:00

178 lines
4.6 KiB
Go

package engine
import (
"errors"
"fmt"
"sync"
"github.com/nspcc-dev/neofs-node/pkg/local_object_storage/blobstor"
"github.com/nspcc-dev/neofs-node/pkg/local_object_storage/shard"
"go.uber.org/zap"
)
// Open opens all StorageEngine's components.
func (e *StorageEngine) Open() error {
return e.open()
}
func (e *StorageEngine) open() error {
e.mtx.RLock()
defer e.mtx.RUnlock()
var wg sync.WaitGroup
var errCh = make(chan error, len(e.shards))
for id, sh := range e.shards {
wg.Add(1)
go func(id string, sh *shard.Shard) {
defer wg.Done()
if err := sh.Open(); err != nil {
errCh <- fmt.Errorf("could not open shard %s: %w", id, err)
}
}(id, sh.Shard)
}
wg.Wait()
close(errCh)
for err := range errCh {
if err != nil {
return err
}
}
return nil
}
// Init initializes all StorageEngine's components.
func (e *StorageEngine) Init() error {
e.mtx.RLock()
defer e.mtx.RUnlock()
for id, sh := range e.shards {
if err := sh.Init(); err != nil {
if errors.Is(err, blobstor.ErrInitBlobovniczas) {
delete(e.shards, id)
e.log.Error("shard initialization failure, skipping",
zap.String("id", id),
zap.Error(err),
)
continue
}
return fmt.Errorf("could not initialize shard %s: %w", id, err)
}
}
if len(e.shards) == 0 {
return errors.New("failed initialization on all shards")
}
return nil
}
var errClosed = errors.New("storage engine is closed")
// Close releases all StorageEngine's components. Waits for all data-related operations to complete.
// After the call, all the next ones will fail.
//
// The method is supposed to be called when the application exits.
func (e *StorageEngine) Close() error {
return e.setBlockExecErr(errClosed)
}
// closes all shards. Never returns an error, shard errors are logged.
func (e *StorageEngine) close(releasePools bool) error {
e.mtx.RLock()
defer e.mtx.RUnlock()
if releasePools {
for _, p := range e.shardPools {
p.Release()
}
}
for id, sh := range e.shards {
if err := sh.Close(); err != nil {
e.log.Debug("could not close shard",
zap.String("id", id),
zap.String("error", err.Error()),
)
}
}
return nil
}
// executes op if execution is not blocked, otherwise returns blocking error.
//
// Can be called concurrently with setBlockExecErr.
func (e *StorageEngine) execIfNotBlocked(op func() error) error {
e.blockExec.mtx.RLock()
defer e.blockExec.mtx.RUnlock()
if e.blockExec.err != nil {
return e.blockExec.err
}
return op()
}
// sets the flag of blocking execution of all data operations according to err:
// * err != nil, then blocks the execution. If exec wasn't blocked, calls close method
// (if err == errClosed => additionally releases pools and does not allow to resume executions).
// * otherwise, resumes execution. If exec was blocked, calls open method.
//
// Can be called concurrently with exec. In this case it waits for all executions to complete.
func (e *StorageEngine) setBlockExecErr(err error) error {
e.blockExec.mtx.Lock()
defer e.blockExec.mtx.Unlock()
prevErr := e.blockExec.err
wasClosed := errors.Is(prevErr, errClosed)
if wasClosed {
return errClosed
}
e.blockExec.err = err
if err == nil {
if prevErr != nil { // block -> ok
return e.open()
}
} else if prevErr == nil { // ok -> block
return e.close(errors.Is(err, errClosed))
}
// otherwise do nothing
return nil
}
// BlockExecution blocks the execution of any data-related operation. All blocked ops will return err.
// To resume the execution, use ResumeExecution method.
//
// Сan be called regardless of the fact of the previous blocking. If execution wasn't blocked, releases all resources
// similar to Close. Can be called concurrently with Close and any data related method (waits for all executions
// to complete). Returns error if any Close has been called before.
//
// Must not be called concurrently with either Open or Init.
//
// Note: technically passing nil error will resume the execution, otherwise, it is recommended to call ResumeExecution
// for this.
func (e *StorageEngine) BlockExecution(err error) error {
return e.setBlockExecErr(err)
}
// ResumeExecution resumes the execution of any data-related operation.
// To block the execution, use BlockExecution method.
//
// Сan be called regardless of the fact of the previous blocking. If execution was blocked, prepares all resources
// similar to Open. Can be called concurrently with Close and any data related method (waits for all executions
// to complete). Returns error if any Close has been called before.
//
// Must not be called concurrently with either Open or Init.
func (e *StorageEngine) ResumeExecution() error {
return e.setBlockExecErr(nil)
}