From 6472a170eb77392ea56b3c3f2e9f49d7ea07aec0 Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Thu, 17 Mar 2022 14:55:25 +0300 Subject: [PATCH] [#1143] shard: Introduce explicit `Degraded` mode `Degraded` mode is set automatically after error counter is over the threshold. `ReadOnly` mode can still be set by an administrator. Signed-off-by: Evgenii Stratonikov --- cmd/neofs-cli/modules/control.go | 8 +++++++- cmd/neofs-node/config/engine/shard/config.go | 2 ++ pkg/local_object_storage/engine/engine.go | 6 +++--- pkg/local_object_storage/engine/error_test.go | 4 ++-- pkg/local_object_storage/shard/delete.go | 2 +- pkg/local_object_storage/shard/gc.go | 2 +- pkg/local_object_storage/shard/inhume.go | 2 +- pkg/local_object_storage/shard/lock.go | 2 +- pkg/local_object_storage/shard/mode.go | 10 ++++++++++ pkg/local_object_storage/shard/move.go | 2 +- pkg/local_object_storage/shard/put.go | 2 +- pkg/local_object_storage/writecache/delete.go | 2 +- pkg/local_object_storage/writecache/flush.go | 4 ++-- pkg/local_object_storage/writecache/iterate.go | 2 +- pkg/local_object_storage/writecache/mode.go | 9 +++++++++ pkg/local_object_storage/writecache/persist.go | 2 +- pkg/local_object_storage/writecache/put.go | 2 +- pkg/services/control/server/list_shards.go | 2 ++ pkg/services/control/server/set_shard_mode.go | 2 ++ pkg/services/control/types.pb.go | 16 ++++++++++------ pkg/services/control/types.proto | 3 +++ 21 files changed, 62 insertions(+), 24 deletions(-) diff --git a/cmd/neofs-cli/modules/control.go b/cmd/neofs-cli/modules/control.go index 9a0e0aa25..bf0266884 100644 --- a/cmd/neofs-cli/modules/control.go +++ b/cmd/neofs-cli/modules/control.go @@ -78,6 +78,7 @@ const ( shardModeReadOnly = "read-only" shardModeReadWrite = "read-write" + shardModeDegraded = "degraded" ) const ( @@ -124,9 +125,10 @@ func initControlSetShardModeCmd() { flags.String(controlRPC, controlRPCDefault, controlRPCUsage) flags.StringVarP(&shardID, shardIDFlag, "", "", "ID of the shard in base58 encoding") flags.StringVarP(&shardMode, shardModeFlag, "", "", - fmt.Sprintf("new shard mode keyword ('%s', '%s')", + fmt.Sprintf("new shard mode keyword ('%s', '%s', '%s')", shardModeReadWrite, shardModeReadOnly, + shardModeDegraded, ), ) flags.Bool(shardClearErrorsFlag, false, "Set shard error count to 0") @@ -490,6 +492,8 @@ func prettyPrintShards(cmd *cobra.Command, ii []*control.ShardInfo) { mode = "read-write" case control.ShardMode_READ_ONLY: mode = "read-only" + case control.ShardMode_DEGRADED: + mode = "degraded" default: mode = "unknown" } @@ -526,6 +530,8 @@ func setShardMode(cmd *cobra.Command, _ []string) { mode = control.ShardMode_READ_WRITE case shardModeReadOnly: mode = control.ShardMode_READ_ONLY + case shardModeDegraded: + mode = control.ShardMode_DEGRADED } req := new(control.SetShardModeRequest) diff --git a/cmd/neofs-node/config/engine/shard/config.go b/cmd/neofs-node/config/engine/shard/config.go index 147b1c337..bb395afd6 100644 --- a/cmd/neofs-node/config/engine/shard/config.go +++ b/cmd/neofs-node/config/engine/shard/config.go @@ -77,6 +77,8 @@ func (x *Config) Mode() (m shard.Mode) { m = shard.ModeReadWrite case "read-only": m = shard.ModeReadOnly + case "degraded": + m = shard.ModeDegraded default: panic(fmt.Sprintf("unknown shard mode: %s", s)) } diff --git a/pkg/local_object_storage/engine/engine.go b/pkg/local_object_storage/engine/engine.go index 24fb66206..f40f158c5 100644 --- a/pkg/local_object_storage/engine/engine.go +++ b/pkg/local_object_storage/engine/engine.go @@ -50,13 +50,13 @@ func (e *StorageEngine) reportShardError( return } - err = sh.SetMode(shard.ModeReadOnly) + err = sh.SetMode(shard.ModeDegraded) if err != nil { - e.log.Error("failed to move shard in read-only mode", + e.log.Error("failed to move shard in degraded mode", zap.Uint32("error count", errCount), zap.Error(err)) } else { - e.log.Info("shard is moved in read-only due to error threshold", + e.log.Info("shard is moved in degraded mode due to error threshold", zap.Stringer("shard_id", sh.ID()), zap.Uint32("error count", errCount)) } diff --git a/pkg/local_object_storage/engine/error_test.go b/pkg/local_object_storage/engine/error_test.go index 7e943e9e0..7fe345e66 100644 --- a/pkg/local_object_storage/engine/error_test.go +++ b/pkg/local_object_storage/engine/error_test.go @@ -118,7 +118,7 @@ func TestErrorReporting(t *testing.T) { for i := uint32(0); i < 2; i++ { _, err = e.Get(&GetPrm{addr: object.AddressOf(obj)}) require.Error(t, err) - checkShardState(t, e, id[0], errThreshold+i, shard.ModeReadOnly) + checkShardState(t, e, id[0], errThreshold+i, shard.ModeDegraded) checkShardState(t, e, id[1], 0, shard.ModeReadWrite) } @@ -185,7 +185,7 @@ func TestBlobstorFailback(t *testing.T) { require.True(t, errors.Is(err, object.ErrRangeOutOfBounds), "got: %v", err) } - checkShardState(t, e, id[0], 4, shard.ModeReadOnly) + checkShardState(t, e, id[0], 4, shard.ModeDegraded) checkShardState(t, e, id[1], 0, shard.ModeReadWrite) } diff --git a/pkg/local_object_storage/shard/delete.go b/pkg/local_object_storage/shard/delete.go index 1b4e33b99..76499d4b3 100644 --- a/pkg/local_object_storage/shard/delete.go +++ b/pkg/local_object_storage/shard/delete.go @@ -31,7 +31,7 @@ func (p *DeletePrm) WithAddresses(addr ...*addressSDK.Address) *DeletePrm { // Delete removes data from the shard's writeCache, metaBase and // blobStor. func (s *Shard) Delete(prm *DeletePrm) (*DeleteRes, error) { - if s.GetMode() == ModeReadOnly { + if s.GetMode() != ModeReadWrite { return nil, ErrReadOnlyMode } diff --git a/pkg/local_object_storage/shard/gc.go b/pkg/local_object_storage/shard/gc.go index 1e7dfce70..57b84b8a8 100644 --- a/pkg/local_object_storage/shard/gc.go +++ b/pkg/local_object_storage/shard/gc.go @@ -174,7 +174,7 @@ func (gc *gc) stop() { // with GC-marked graves. // Does nothing if shard is in "read-only" mode. func (s *Shard) removeGarbage() { - if s.GetMode() == ModeReadOnly { + if s.GetMode() != ModeReadWrite { return } diff --git a/pkg/local_object_storage/shard/inhume.go b/pkg/local_object_storage/shard/inhume.go index e38c564ae..2b4247cfc 100644 --- a/pkg/local_object_storage/shard/inhume.go +++ b/pkg/local_object_storage/shard/inhume.go @@ -51,7 +51,7 @@ func (p *InhumePrm) MarkAsGarbage(addr ...*addressSDK.Address) *InhumePrm { // // Returns ErrReadOnlyMode error if shard is in "read-only" mode. func (s *Shard) Inhume(prm *InhumePrm) (*InhumeRes, error) { - if s.GetMode() == ModeReadOnly { + if s.GetMode() != ModeReadWrite { return nil, ErrReadOnlyMode } diff --git a/pkg/local_object_storage/shard/lock.go b/pkg/local_object_storage/shard/lock.go index b80462110..916b38b3a 100644 --- a/pkg/local_object_storage/shard/lock.go +++ b/pkg/local_object_storage/shard/lock.go @@ -14,7 +14,7 @@ import ( // // Locked list should be unique. Panics if it is empty. func (s *Shard) Lock(idCnr cid.ID, locker oid.ID, locked []oid.ID) error { - if s.GetMode() == ModeReadOnly { + if s.GetMode() != ModeReadWrite { return ErrReadOnlyMode } diff --git a/pkg/local_object_storage/shard/mode.go b/pkg/local_object_storage/shard/mode.go index 9a459eec7..1a6e85bc4 100644 --- a/pkg/local_object_storage/shard/mode.go +++ b/pkg/local_object_storage/shard/mode.go @@ -21,6 +21,12 @@ const ( // ModeReadOnly is a Mode value for shard that does not // accept write operation but is readable. ModeReadOnly + + // ModeDegraded is a Mode value for shard that is set automatically + // after a certain number of errors is encountered. It is the same as + // `ModeReadOnly` but also enables fallback algorithms for getting object + // in case metabase is corrupted. + ModeDegraded ) func (m Mode) String() string { @@ -31,6 +37,8 @@ func (m Mode) String() string { return "READ_WRITE" case ModeReadOnly: return "READ_ONLY" + case ModeDegraded: + return "DEGRADED" } } @@ -46,6 +54,8 @@ func (s *Shard) SetMode(m Mode) error { switch m { case ModeReadOnly: s.writeCache.SetMode(writecache.ModeReadOnly) + case ModeDegraded: + s.writeCache.SetMode(writecache.ModeDegraded) case ModeReadWrite: s.writeCache.SetMode(writecache.ModeReadWrite) } diff --git a/pkg/local_object_storage/shard/move.go b/pkg/local_object_storage/shard/move.go index caf01f97b..826adbbf0 100644 --- a/pkg/local_object_storage/shard/move.go +++ b/pkg/local_object_storage/shard/move.go @@ -27,7 +27,7 @@ func (p *ToMoveItPrm) WithAddress(addr *addressSDK.Address) *ToMoveItPrm { // ToMoveIt calls metabase.ToMoveIt method to mark object as relocatable to // another shard. func (s *Shard) ToMoveIt(prm *ToMoveItPrm) (*ToMoveItRes, error) { - if s.GetMode() == ModeReadOnly { + if s.GetMode() != ModeReadWrite { return nil, ErrReadOnlyMode } diff --git a/pkg/local_object_storage/shard/put.go b/pkg/local_object_storage/shard/put.go index 4fab4616e..f7f50d7ce 100644 --- a/pkg/local_object_storage/shard/put.go +++ b/pkg/local_object_storage/shard/put.go @@ -33,7 +33,7 @@ func (p *PutPrm) WithObject(obj *object.Object) *PutPrm { // // Returns ErrReadOnlyMode error if shard is in "read-only" mode. func (s *Shard) Put(prm *PutPrm) (*PutRes, error) { - if s.GetMode() == ModeReadOnly { + if s.GetMode() != ModeReadWrite { return nil, ErrReadOnlyMode } diff --git a/pkg/local_object_storage/writecache/delete.go b/pkg/local_object_storage/writecache/delete.go index bfdeb46db..3a90f1b51 100644 --- a/pkg/local_object_storage/writecache/delete.go +++ b/pkg/local_object_storage/writecache/delete.go @@ -16,7 +16,7 @@ import ( func (c *cache) Delete(addr *addressSDK.Address) error { c.modeMtx.RLock() defer c.modeMtx.RUnlock() - if c.mode == ModeReadOnly { + if c.readOnly() { return ErrReadOnly } diff --git a/pkg/local_object_storage/writecache/flush.go b/pkg/local_object_storage/writecache/flush.go index d7f45c366..c66a13bdd 100644 --- a/pkg/local_object_storage/writecache/flush.go +++ b/pkg/local_object_storage/writecache/flush.go @@ -65,7 +65,7 @@ func (c *cache) flush() { sz := 0 c.modeMtx.RLock() - if c.mode == ModeReadOnly { + if c.readOnly() { c.modeMtx.RUnlock() time.Sleep(time.Second) continue @@ -127,7 +127,7 @@ func (c *cache) flushBigObjects() { select { case <-tick.C: c.modeMtx.RLock() - if c.mode == ModeReadOnly { + if c.readOnly() { c.modeMtx.RUnlock() break } diff --git a/pkg/local_object_storage/writecache/iterate.go b/pkg/local_object_storage/writecache/iterate.go index 596db0deb..3c4af31bc 100644 --- a/pkg/local_object_storage/writecache/iterate.go +++ b/pkg/local_object_storage/writecache/iterate.go @@ -36,7 +36,7 @@ func (p *IterationPrm) WithIgnoreErrors(ignore bool) *IterationPrm { func (c *cache) Iterate(prm *IterationPrm) error { c.modeMtx.RLock() defer c.modeMtx.RUnlock() - if c.mode != ModeReadOnly { + if !c.readOnly() { return nil } diff --git a/pkg/local_object_storage/writecache/mode.go b/pkg/local_object_storage/writecache/mode.go index fce239f5a..8abd32d1d 100644 --- a/pkg/local_object_storage/writecache/mode.go +++ b/pkg/local_object_storage/writecache/mode.go @@ -14,6 +14,9 @@ const ( // ModeReadOnly is a mode in which write-cache doesn't flush anything to a metabase. ModeReadOnly + + // ModeDegraded is similar to a shard's degraded mode. + ModeDegraded ) // ErrReadOnly is returned when Put/Write is performed in a read-only mode. @@ -50,3 +53,9 @@ func (c *cache) SetMode(m Mode) { time.Sleep(time.Second) } } + +// readOnly returns true if current mode is read-only. +// `c.modeMtx` must be taken. +func (c *cache) readOnly() bool { + return c.mode != ModeReadWrite +} diff --git a/pkg/local_object_storage/writecache/persist.go b/pkg/local_object_storage/writecache/persist.go index bf638c353..7dfc6c86a 100644 --- a/pkg/local_object_storage/writecache/persist.go +++ b/pkg/local_object_storage/writecache/persist.go @@ -21,7 +21,7 @@ func (c *cache) persistLoop() { select { case <-tick.C: c.modeMtx.RLock() - if c.mode == ModeReadOnly { + if c.readOnly() { c.modeMtx.RUnlock() continue } diff --git a/pkg/local_object_storage/writecache/put.go b/pkg/local_object_storage/writecache/put.go index 4c60b5b89..1f4e6b515 100644 --- a/pkg/local_object_storage/writecache/put.go +++ b/pkg/local_object_storage/writecache/put.go @@ -15,7 +15,7 @@ var ErrBigObject = errors.New("too big object") func (c *cache) Put(o *objectSDK.Object) error { c.modeMtx.RLock() defer c.modeMtx.RUnlock() - if c.mode == ModeReadOnly { + if c.readOnly() { return ErrReadOnly } diff --git a/pkg/services/control/server/list_shards.go b/pkg/services/control/server/list_shards.go index b02a45633..3cc89284b 100644 --- a/pkg/services/control/server/list_shards.go +++ b/pkg/services/control/server/list_shards.go @@ -40,6 +40,8 @@ func (s *Server) ListShards(_ context.Context, req *control.ListShardsRequest) ( mode = control.ShardMode_READ_WRITE case shard.ModeReadOnly: mode = control.ShardMode_READ_ONLY + case shard.ModeDegraded: + mode = control.ShardMode_DEGRADED default: mode = control.ShardMode_SHARD_MODE_UNDEFINED } diff --git a/pkg/services/control/server/set_shard_mode.go b/pkg/services/control/server/set_shard_mode.go index 01edcfdbd..9e3b7308c 100644 --- a/pkg/services/control/server/set_shard_mode.go +++ b/pkg/services/control/server/set_shard_mode.go @@ -29,6 +29,8 @@ func (s *Server) SetShardMode(_ context.Context, req *control.SetShardModeReques mode = shard.ModeReadWrite case control.ShardMode_READ_ONLY: mode = shard.ModeReadOnly + case control.ShardMode_DEGRADED: + mode = shard.ModeDegraded default: return nil, status.Error(codes.Internal, fmt.Sprintf("unknown shard mode: %s", requestedMode)) } diff --git a/pkg/services/control/types.pb.go b/pkg/services/control/types.pb.go index 3674b0c78..16a8b75b5 100644 --- a/pkg/services/control/types.pb.go +++ b/pkg/services/control/types.pb.go @@ -144,6 +144,8 @@ const ( ShardMode_READ_WRITE ShardMode = 1 // Read-only. ShardMode_READ_ONLY ShardMode = 2 + // Degraded. + ShardMode_DEGRADED ShardMode = 3 ) // Enum value maps for ShardMode. @@ -152,11 +154,13 @@ var ( 0: "SHARD_MODE_UNDEFINED", 1: "READ_WRITE", 2: "READ_ONLY", + 3: "DEGRADED", } ShardMode_value = map[string]int32{ "SHARD_MODE_UNDEFINED": 0, "READ_WRITE": 1, "READ_ONLY": 2, + "DEGRADED": 3, } ) @@ -636,16 +640,16 @@ var file_pkg_services_control_types_proto_rawDesc = []byte{ 0x41, 0x54, 0x55, 0x53, 0x5f, 0x55, 0x4e, 0x44, 0x45, 0x46, 0x49, 0x4e, 0x45, 0x44, 0x10, 0x00, 0x12, 0x0c, 0x0a, 0x08, 0x53, 0x54, 0x41, 0x52, 0x54, 0x49, 0x4e, 0x47, 0x10, 0x01, 0x12, 0x09, 0x0a, 0x05, 0x52, 0x45, 0x41, 0x44, 0x59, 0x10, 0x02, 0x12, 0x11, 0x0a, 0x0d, 0x53, 0x48, 0x55, - 0x54, 0x54, 0x49, 0x4e, 0x47, 0x5f, 0x44, 0x4f, 0x57, 0x4e, 0x10, 0x03, 0x2a, 0x44, 0x0a, 0x09, + 0x54, 0x54, 0x49, 0x4e, 0x47, 0x5f, 0x44, 0x4f, 0x57, 0x4e, 0x10, 0x03, 0x2a, 0x52, 0x0a, 0x09, 0x53, 0x68, 0x61, 0x72, 0x64, 0x4d, 0x6f, 0x64, 0x65, 0x12, 0x18, 0x0a, 0x14, 0x53, 0x48, 0x41, 0x52, 0x44, 0x5f, 0x4d, 0x4f, 0x44, 0x45, 0x5f, 0x55, 0x4e, 0x44, 0x45, 0x46, 0x49, 0x4e, 0x45, 0x44, 0x10, 0x00, 0x12, 0x0e, 0x0a, 0x0a, 0x52, 0x45, 0x41, 0x44, 0x5f, 0x57, 0x52, 0x49, 0x54, 0x45, 0x10, 0x01, 0x12, 0x0d, 0x0a, 0x09, 0x52, 0x45, 0x41, 0x44, 0x5f, 0x4f, 0x4e, 0x4c, 0x59, - 0x10, 0x02, 0x42, 0x36, 0x5a, 0x34, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, - 0x2f, 0x6e, 0x73, 0x70, 0x63, 0x63, 0x2d, 0x64, 0x65, 0x76, 0x2f, 0x6e, 0x65, 0x6f, 0x66, 0x73, - 0x2d, 0x6e, 0x6f, 0x64, 0x65, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, - 0x65, 0x73, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x33, + 0x10, 0x02, 0x12, 0x0c, 0x0a, 0x08, 0x44, 0x45, 0x47, 0x52, 0x41, 0x44, 0x45, 0x44, 0x10, 0x03, + 0x42, 0x36, 0x5a, 0x34, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6e, + 0x73, 0x70, 0x63, 0x63, 0x2d, 0x64, 0x65, 0x76, 0x2f, 0x6e, 0x65, 0x6f, 0x66, 0x73, 0x2d, 0x6e, + 0x6f, 0x64, 0x65, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x73, + 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/pkg/services/control/types.proto b/pkg/services/control/types.proto index bbc34c179..a786c7b12 100644 --- a/pkg/services/control/types.proto +++ b/pkg/services/control/types.proto @@ -151,4 +151,7 @@ enum ShardMode { // Read-only. READ_ONLY = 2; + + // Degraded. + DEGRADED = 3; }