From efc71d35db8c8897c2bfa80e3e09490582dea3c1 Mon Sep 17 00:00:00 2001 From: Alexander Chuprov Date: Mon, 13 May 2024 23:20:29 +0300 Subject: [PATCH] [#1121] metrics: Change mode of shard components Signed-off-by: Alexander Chuprov --- cmd/frostfs-node/config/engine/config_test.go | 4 +- .../engine/control_test.go | 2 +- pkg/local_object_storage/engine/writecache.go | 4 +- pkg/local_object_storage/metabase/control.go | 6 +-- pkg/local_object_storage/metabase/metrics.go | 4 +- pkg/local_object_storage/metabase/mode.go | 12 ++++- pkg/local_object_storage/metabase/shard_id.go | 2 +- pkg/local_object_storage/metrics/metabase.go | 2 +- pkg/local_object_storage/metrics/pilorama.go | 4 +- pkg/local_object_storage/pilorama/boltdb.go | 14 ++++- pkg/local_object_storage/pilorama/metrics.go | 4 +- pkg/local_object_storage/shard/control.go | 8 +-- .../shard/control_test.go | 8 +-- .../shard/metrics_test.go | 4 +- pkg/local_object_storage/shard/mode/mode.go | 52 ++++++++++++++----- .../writecache/cachebbolt.go | 2 +- pkg/local_object_storage/writecache/flush.go | 2 +- .../writecache/metrics.go | 4 +- pkg/local_object_storage/writecache/mode.go | 12 ++++- pkg/metrics/pilorama.go | 4 +- 20 files changed, 104 insertions(+), 50 deletions(-) diff --git a/cmd/frostfs-node/config/engine/config_test.go b/cmd/frostfs-node/config/engine/config_test.go index b5c926fc..b073b066 100644 --- a/cmd/frostfs-node/config/engine/config_test.go +++ b/cmd/frostfs-node/config/engine/config_test.go @@ -116,7 +116,7 @@ func TestEngineSection(t *testing.T) { require.Equal(t, 15, gc.ExpiredCollectorWorkerCount()) require.Equal(t, false, sc.RefillMetabase()) - require.Equal(t, mode.ReadOnly, sc.Mode()) + require.Equal(t, mode.Mode(mode.ReadOnly), sc.Mode()) require.Equal(t, 100, sc.RefillMetabaseWorkersCount()) case 1: require.Equal(t, "tmp/1/blob/pilorama.db", pl.Path()) @@ -168,7 +168,7 @@ func TestEngineSection(t *testing.T) { require.Equal(t, gcconfig.ExpiredCollectorWorkersCountDefault, gc.ExpiredCollectorWorkerCount()) require.Equal(t, true, sc.RefillMetabase()) - require.Equal(t, mode.ReadWrite, sc.Mode()) + require.Equal(t, mode.Mode(mode.ReadWrite), sc.Mode()) require.Equal(t, shardconfig.RefillMetabaseWorkersCountDefault, sc.RefillMetabaseWorkersCount()) } return nil diff --git a/pkg/local_object_storage/engine/control_test.go b/pkg/local_object_storage/engine/control_test.go index 6bf6beac..4f8f4a3d 100644 --- a/pkg/local_object_storage/engine/control_test.go +++ b/pkg/local_object_storage/engine/control_test.go @@ -140,7 +140,7 @@ func testEngineFailInitAndReload(t *testing.T, degradedMode bool, opts []shard.O require.NoError(t, err) if degradedMode { require.NoError(t, e.Init(context.Background())) - require.Equal(t, mode.DegradedReadOnly, e.DumpInfo().Shards[0].Mode) + require.Equal(t, mode.Mode(mode.DegradedReadOnly), e.DumpInfo().Shards[0].Mode) return } else { require.Error(t, e.Init(context.Background())) diff --git a/pkg/local_object_storage/engine/writecache.go b/pkg/local_object_storage/engine/writecache.go index 670cfcbf..02357671 100644 --- a/pkg/local_object_storage/engine/writecache.go +++ b/pkg/local_object_storage/engine/writecache.go @@ -171,8 +171,8 @@ func (m *writeCacheMetrics) SetEstimateSize(db, fstree uint64) { m.metrics.SetEstimateSize(m.shardID, m.path, writecache.StorageTypeFSTree.String(), fstree) } -func (m *writeCacheMetrics) SetMode(mode mode.Mode) { - m.metrics.SetMode(m.shardID, mode.String()) +func (m *writeCacheMetrics) SetMode(mod mode.ComponentMode) { + m.metrics.SetMode(m.shardID, mod.String()) } func (m *writeCacheMetrics) SetActualCounters(db, fstree uint64) { diff --git a/pkg/local_object_storage/metabase/control.go b/pkg/local_object_storage/metabase/control.go index cd53f0cd..cbefdeeb 100644 --- a/pkg/local_object_storage/metabase/control.go +++ b/pkg/local_object_storage/metabase/control.go @@ -42,7 +42,7 @@ func (db *DB) Open(_ context.Context, mode mode.Mode) error { db.modeMtx.Lock() defer db.modeMtx.Unlock() db.mode = mode - db.metrics.SetMode(mode) + db.metrics.SetMode(db.getComponentMode(mode)) if mode.NoMetabase() { return nil @@ -239,8 +239,8 @@ func (db *DB) Reload(opts ...Option) (bool, error) { return false, err } - db.mode = mode.Degraded - db.metrics.SetMode(mode.Degraded) + db.mode = mode.Disabled + db.metrics.SetMode(mode.Disabled) db.info.Path = c.info.Path if err := db.openBolt(); err != nil { return false, metaerr.Wrap(fmt.Errorf("%w: %v", ErrDegradedMode, err)) diff --git a/pkg/local_object_storage/metabase/metrics.go b/pkg/local_object_storage/metabase/metrics.go index fc971bd8..d673560c 100644 --- a/pkg/local_object_storage/metabase/metrics.go +++ b/pkg/local_object_storage/metabase/metrics.go @@ -9,7 +9,7 @@ import ( type Metrics interface { SetParentID(parentID string) - SetMode(m mode.Mode) + SetMode(m mode.ComponentMode) Close() AddMethodDuration(method string, d time.Duration, success bool) @@ -18,6 +18,6 @@ type Metrics interface { type noopMetrics struct{} func (m *noopMetrics) SetParentID(string) {} -func (m *noopMetrics) SetMode(mode.Mode) {} +func (m *noopMetrics) SetMode(mode.ComponentMode) {} func (m *noopMetrics) Close() {} func (m *noopMetrics) AddMethodDuration(string, time.Duration, bool) {} diff --git a/pkg/local_object_storage/metabase/mode.go b/pkg/local_object_storage/metabase/mode.go index b382e99c..6727f6b3 100644 --- a/pkg/local_object_storage/metabase/mode.go +++ b/pkg/local_object_storage/metabase/mode.go @@ -6,6 +6,16 @@ import ( "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/shard/mode" ) +func (db *DB) getComponentMode(m mode.Mode) mode.ComponentMode { + if m.NoMetabase() { + return mode.Disabled + } + if m.ReadOnly() { + return mode.ReadOnly + } + return mode.ReadWrite +} + // SetMode sets the metabase mode of operation. // If the mode assumes no operation metabase, the database is closed. func (db *DB) SetMode(m mode.Mode) error { @@ -35,6 +45,6 @@ func (db *DB) SetMode(m mode.Mode) error { } db.mode = m - db.metrics.SetMode(m) + db.metrics.SetMode(db.getComponentMode(m)) return nil } diff --git a/pkg/local_object_storage/metabase/shard_id.go b/pkg/local_object_storage/metabase/shard_id.go index 7ae336a6..291e9549 100644 --- a/pkg/local_object_storage/metabase/shard_id.go +++ b/pkg/local_object_storage/metabase/shard_id.go @@ -69,7 +69,7 @@ func (db *DB) SetShardID(id []byte, mode metamode.Mode) error { err := db.writeShardID(id) if err == nil { - db.metrics.SetMode(mode) + db.metrics.SetMode(db.getComponentMode(mode)) } if cErr := db.close(); cErr != nil { diff --git a/pkg/local_object_storage/metrics/metabase.go b/pkg/local_object_storage/metrics/metabase.go index d0fb3193..1a02f270 100644 --- a/pkg/local_object_storage/metrics/metabase.go +++ b/pkg/local_object_storage/metrics/metabase.go @@ -26,7 +26,7 @@ func (m *metabaseMetrics) SetParentID(parentID string) { m.shardID = parentID } -func (m *metabaseMetrics) SetMode(mode mode.Mode) { +func (m *metabaseMetrics) SetMode(mode mode.ComponentMode) { m.m.SetMode(m.shardID, m.path, mode.String()) } diff --git a/pkg/local_object_storage/metrics/pilorama.go b/pkg/local_object_storage/metrics/pilorama.go index 21f027a6..1b715c2b 100644 --- a/pkg/local_object_storage/metrics/pilorama.go +++ b/pkg/local_object_storage/metrics/pilorama.go @@ -24,8 +24,8 @@ func (m *piloramaMetrics) SetParentID(id string) { m.shardID = id } -func (m *piloramaMetrics) SetMode(mode mode.Mode) { - m.m.SetMode(m.shardID, mode) +func (m *piloramaMetrics) SetMode(mod mode.ComponentMode) { + m.m.SetMode(m.shardID, mod) } func (m *piloramaMetrics) Close() { diff --git a/pkg/local_object_storage/pilorama/boltdb.go b/pkg/local_object_storage/pilorama/boltdb.go index 29a9306b..2fd7c7db 100644 --- a/pkg/local_object_storage/pilorama/boltdb.go +++ b/pkg/local_object_storage/pilorama/boltdb.go @@ -109,7 +109,7 @@ func (t *boltForest) SetMode(m mode.Mode) error { } t.mode = m - t.metrics.SetMode(m) + t.metrics.SetMode(t.getComponentMode(m)) return nil } @@ -123,6 +123,16 @@ func (t *boltForest) Open(_ context.Context, mode mode.Mode) error { return t.openBolt(mode) } +func (t *boltForest) getComponentMode(m mode.Mode) mode.ComponentMode { + if m.NoMetabase() { + return mode.Disabled + } + if m.ReadOnly() { + return mode.ReadOnly + } + return mode.ReadWrite +} + func (t *boltForest) openBolt(mode mode.Mode) error { readOnly := mode.ReadOnly() err := util.MkdirAllX(filepath.Dir(t.path), t.perm) @@ -143,7 +153,7 @@ func (t *boltForest) openBolt(mode mode.Mode) error { t.db.MaxBatchSize = t.maxBatchSize t.db.MaxBatchDelay = t.maxBatchDelay - t.metrics.SetMode(mode) + t.metrics.SetMode(t.getComponentMode(mode)) return nil } diff --git a/pkg/local_object_storage/pilorama/metrics.go b/pkg/local_object_storage/pilorama/metrics.go index 543ad3e3..6ffc479e 100644 --- a/pkg/local_object_storage/pilorama/metrics.go +++ b/pkg/local_object_storage/pilorama/metrics.go @@ -9,7 +9,7 @@ import ( type Metrics interface { SetParentID(id string) - SetMode(m mode.Mode) + SetMode(m mode.ComponentMode) Close() AddMethodDuration(method string, d time.Duration, success bool) @@ -18,6 +18,6 @@ type Metrics interface { type noopMetrics struct{} func (m *noopMetrics) SetParentID(string) {} -func (m *noopMetrics) SetMode(mode.Mode) {} +func (m *noopMetrics) SetMode(mode.ComponentMode) {} func (m *noopMetrics) Close() {} func (m *noopMetrics) AddMethodDuration(string, time.Duration, bool) {} diff --git a/pkg/local_object_storage/shard/control.go b/pkg/local_object_storage/shard/control.go index 6712822a..a5f938bb 100644 --- a/pkg/local_object_storage/shard/control.go +++ b/pkg/local_object_storage/shard/control.go @@ -22,22 +22,22 @@ import ( func (s *Shard) handleMetabaseFailure(stage string, err error) error { s.log.Error(logs.ShardMetabaseFailureSwitchingMode, zap.String("stage", stage), - zap.Stringer("mode", mode.ReadOnly), + zap.Stringer("mode", mode.Mode(mode.ReadOnly)), zap.Error(err)) - err = s.SetMode(mode.ReadOnly) + err = s.SetMode(mode.Mode(mode.ReadOnly)) if err == nil { return nil } s.log.Error(logs.ShardCantMoveShardToReadonlySwitchMode, zap.String("stage", stage), - zap.Stringer("mode", mode.DegradedReadOnly), + zap.Stringer("mode", mode.Mode(mode.DegradedReadOnly)), zap.Error(err)) err = s.SetMode(mode.DegradedReadOnly) if err != nil { - return fmt.Errorf("could not switch to mode %s", mode.DegradedReadOnly) + return fmt.Errorf("could not switch to mode %s", mode.Mode(mode.DegradedReadOnly)) } return nil } diff --git a/pkg/local_object_storage/shard/control_test.go b/pkg/local_object_storage/shard/control_test.go index 44fee163..6c8a9b2c 100644 --- a/pkg/local_object_storage/shard/control_test.go +++ b/pkg/local_object_storage/shard/control_test.go @@ -85,7 +85,7 @@ func TestShardOpen(t *testing.T) { sh := newShard() require.NoError(t, sh.Open(context.Background())) require.NoError(t, sh.Init(context.Background())) - require.Equal(t, mode.ReadWrite, sh.GetMode()) + require.Equal(t, mode.Mode(mode.ReadWrite), sh.GetMode()) require.NoError(t, sh.Close()) // Metabase can be opened in read-only => start in ReadOnly mode. @@ -94,9 +94,9 @@ func TestShardOpen(t *testing.T) { sh = newShard() require.NoError(t, sh.Open(context.Background())) require.NoError(t, sh.Init(context.Background())) - require.Equal(t, mode.ReadOnly, sh.GetMode()) + require.Equal(t, mode.Mode(mode.ReadOnly), sh.GetMode()) require.Error(t, sh.SetMode(mode.ReadWrite)) - require.Equal(t, mode.ReadOnly, sh.GetMode()) + require.Equal(t, mode.Mode(mode.ReadOnly), sh.GetMode()) require.NoError(t, sh.Close()) // Metabase is corrupted => start in DegradedReadOnly mode. @@ -105,7 +105,7 @@ func TestShardOpen(t *testing.T) { sh = newShard() require.NoError(t, sh.Open(context.Background())) require.NoError(t, sh.Init(context.Background())) - require.Equal(t, mode.DegradedReadOnly, sh.GetMode()) + require.Equal(t, mode.Mode(mode.DegradedReadOnly), sh.GetMode()) require.NoError(t, sh.Close()) } diff --git a/pkg/local_object_storage/shard/metrics_test.go b/pkg/local_object_storage/shard/metrics_test.go index 38d465f3..f7c61e75 100644 --- a/pkg/local_object_storage/shard/metrics_test.go +++ b/pkg/local_object_storage/shard/metrics_test.go @@ -200,9 +200,9 @@ func TestCounters(t *testing.T) { defer func() { require.NoError(t, sh.Close()) }() sh.SetMode(mode.ReadOnly) - require.Equal(t, mode.ReadOnly, mm.mode) + require.Equal(t, mode.Mode(mode.ReadOnly), mm.mode) sh.SetMode(mode.ReadWrite) - require.Equal(t, mode.ReadWrite, mm.mode) + require.Equal(t, mode.Mode(mode.ReadWrite), mm.mode) const objNumber = 10 oo := make([]*objectSDK.Object, objNumber) diff --git a/pkg/local_object_storage/shard/mode/mode.go b/pkg/local_object_storage/shard/mode/mode.go index 49c888d6..c9313125 100644 --- a/pkg/local_object_storage/shard/mode/mode.go +++ b/pkg/local_object_storage/shard/mode/mode.go @@ -5,15 +5,17 @@ import "math" // Mode represents enumeration of Shard work modes. type Mode uint32 +// ComponentMode represents the enumeration of Shard component work modes. +type ComponentMode uint32 + const ( // ReadWrite is a Mode value for shard that is available // for read and write operations. Default shard mode. - ReadWrite Mode = 0 + ReadWrite = 0b000 - // DegradedReadOnly is a Mode value for shard that is set automatically - // after a certain number of errors is encountered. It is the same as - // `mode.Degraded` but also is read-only. - DegradedReadOnly = Degraded | ReadOnly + // ReadOnly is a Mode value for shard that does not + // accept write operation but is readable. + ReadOnly = 0b001 // New iota cycle specific to Mode starts here // Disabled mode is a mode where a shard is disabled. // An existing shard can't have this mode, but it can be used in @@ -22,16 +24,34 @@ const ( ) const ( - // ReadOnly is a Mode value for shard that does not - // accept write operation but is readable. - ReadOnly Mode = 1 << iota + // DegradedReadOnly is a Mode value for shard that is set automatically + // after a certain number of errors is encountered. It is the same as + // `mode.Degraded` but also is read-only. + DegradedReadOnly = Degraded | ReadOnly // Degraded is a Mode value for shard when the metabase is unavailable. // It is hard to perform some modifying operations in this mode, thus it can only be set by an administrator. - Degraded + Degraded = 0b010 ) func (m Mode) String() string { + switch m { + default: + return "UNDEFINED" + case Mode(ReadWrite): + return "READ_WRITE" + case Mode(ReadOnly): + return "READ_ONLY" + case Degraded: + return "DEGRADED_READ_WRITE" + case DegradedReadOnly: + return "DEGRADED_READ_ONLY" + case Mode(Disabled): + return "DISABLED" + } +} + +func (m ComponentMode) String() string { switch m { default: return "UNDEFINED" @@ -39,12 +59,8 @@ func (m Mode) String() string { return "READ_WRITE" case ReadOnly: return "READ_ONLY" - case Degraded: - return "DEGRADED_READ_WRITE" - case DegradedReadOnly: - return "DEGRADED_READ_ONLY" case Disabled: - return "DISABLED" + return "CLOSED" } } @@ -54,10 +70,18 @@ func (m Mode) NoMetabase() bool { } // ReadOnly returns true iff m prohibits modifying operations with shard. +func (m ComponentMode) ReadOnly() bool { + return m&ReadOnly != 0 +} + func (m Mode) ReadOnly() bool { return m&ReadOnly != 0 } func (m Mode) Disabled() bool { + return m == math.MaxUint32 +} + +func (m ComponentMode) Disabled() bool { return m == Disabled } diff --git a/pkg/local_object_storage/writecache/cachebbolt.go b/pkg/local_object_storage/writecache/cachebbolt.go index fdba8d40..8d8453fb 100644 --- a/pkg/local_object_storage/writecache/cachebbolt.go +++ b/pkg/local_object_storage/writecache/cachebbolt.go @@ -112,7 +112,7 @@ func (c *cache) Open(_ context.Context, mode mode.Mode) error { // Init runs necessary services. func (c *cache) Init() error { - c.metrics.SetMode(c.mode) + c.metrics.SetMode(c.getComponentMode(c.mode)) ctx, cancel := context.WithCancel(context.Background()) c.cancel = cancel c.runFlushLoop(ctx) diff --git a/pkg/local_object_storage/writecache/flush.go b/pkg/local_object_storage/writecache/flush.go index c3e19c4d..a9248fb7 100644 --- a/pkg/local_object_storage/writecache/flush.go +++ b/pkg/local_object_storage/writecache/flush.go @@ -294,7 +294,7 @@ func (c *cache) Flush(ctx context.Context, ignoreErrors, seal bool) error { if err := c.setMode(ctx, m, ignoreErrors); err != nil { return err } - c.metrics.SetMode(m) + c.metrics.SetMode(c.getComponentMode(m)) } return nil } diff --git a/pkg/local_object_storage/writecache/metrics.go b/pkg/local_object_storage/writecache/metrics.go index 962e22ee..e68b6d8b 100644 --- a/pkg/local_object_storage/writecache/metrics.go +++ b/pkg/local_object_storage/writecache/metrics.go @@ -27,7 +27,7 @@ type Metrics interface { Evict(st StorageType) SetEstimateSize(db, fstree uint64) - SetMode(m mode.Mode) + SetMode(m mode.ComponentMode) SetActualCounters(db, fstree uint64) SetPath(path string) Close() @@ -49,7 +49,7 @@ func (metricsStub) Put(time.Duration, bool, StorageType) {} func (metricsStub) SetEstimateSize(uint64, uint64) {} -func (metricsStub) SetMode(mode.Mode) {} +func (metricsStub) SetMode(mode.ComponentMode) {} func (metricsStub) SetActualCounters(uint64, uint64) {} diff --git a/pkg/local_object_storage/writecache/mode.go b/pkg/local_object_storage/writecache/mode.go index a10e593a..7512b44c 100644 --- a/pkg/local_object_storage/writecache/mode.go +++ b/pkg/local_object_storage/writecache/mode.go @@ -12,6 +12,16 @@ import ( "go.opentelemetry.io/otel/trace" ) +func (c *cache) getComponentMode(m mode.Mode) mode.ComponentMode { + if m.NoMetabase() { + return mode.Disabled + } + if m.ReadOnly() { + return mode.ReadOnly + } + return mode.ReadWrite +} + // SetMode sets write-cache mode of operation. // When shard is put in read-only mode all objects in memory are flushed to disk // and all background jobs are suspended. @@ -27,7 +37,7 @@ func (c *cache) SetMode(m mode.Mode) error { err := c.setMode(ctx, m, true) if err == nil { - c.metrics.SetMode(m) + c.metrics.SetMode(c.getComponentMode(m)) } return err } diff --git a/pkg/metrics/pilorama.go b/pkg/metrics/pilorama.go index 41672a4b..c669275f 100644 --- a/pkg/metrics/pilorama.go +++ b/pkg/metrics/pilorama.go @@ -10,7 +10,7 @@ import ( ) type PiloramaMetrics interface { - SetMode(shardID string, m mode.Mode) + SetMode(shardID string, m mode.ComponentMode) Close(shardID string) AddMethodDuration(shardID string, method string, d time.Duration, success bool) @@ -33,7 +33,7 @@ type piloramaMetrics struct { reqDuration *prometheus.HistogramVec } -func (m *piloramaMetrics) SetMode(shardID string, mode mode.Mode) { +func (m *piloramaMetrics) SetMode(shardID string, mode mode.ComponentMode) { m.mode.SetMode(shardID, mode.String()) }