From 263c6fdc50693e5df0b453c593e2a9a7808a7909 Mon Sep 17 00:00:00 2001 From: Anton Nikiforov Date: Thu, 1 Jun 2023 17:28:04 +0300 Subject: [PATCH] [#372] node: Add metrics for the error counter in the engine Signed-off-by: Anton Nikiforov --- pkg/local_object_storage/engine/engine.go | 2 ++ pkg/local_object_storage/engine/metrics.go | 3 +++ pkg/local_object_storage/engine/shards.go | 15 ++++++++++++ .../shard/metrics_test.go | 13 ++++++++++ pkg/local_object_storage/shard/shard.go | 24 +++++++++++++++++++ pkg/metrics/engine.go | 14 +++++++++++ 6 files changed, 71 insertions(+) diff --git a/pkg/local_object_storage/engine/engine.go b/pkg/local_object_storage/engine/engine.go index 21e863005..7c36811e7 100644 --- a/pkg/local_object_storage/engine/engine.go +++ b/pkg/local_object_storage/engine/engine.go @@ -132,6 +132,7 @@ func (e *StorageEngine) reportShardErrorBackground(id string, msg string, err er } errCount := sh.errorCount.Add(1) + sh.Shard.IncErrorCounter() e.reportShardErrorWithFlags(sh.Shard, errCount, false, msg, err) } @@ -150,6 +151,7 @@ func (e *StorageEngine) reportShardError( } errCount := sh.errorCount.Add(1) + sh.Shard.IncErrorCounter() e.reportShardErrorWithFlags(sh.Shard, errCount, true, msg, err, fields...) } diff --git a/pkg/local_object_storage/engine/metrics.go b/pkg/local_object_storage/engine/metrics.go index 1be888eae..f9e9191cd 100644 --- a/pkg/local_object_storage/engine/metrics.go +++ b/pkg/local_object_storage/engine/metrics.go @@ -26,6 +26,9 @@ type MetricRegister interface { AddToContainerSize(cnrID string, size int64) AddToPayloadCounter(shardID string, size int64) + IncErrorCounter(shardID string) + ClearErrorCounter(shardID string) + DeleteErrorCounter(shardID string) WriteCache() metrics.WriteCacheMetrics GC() metrics.GCMetrics diff --git a/pkg/local_object_storage/engine/shards.go b/pkg/local_object_storage/engine/shards.go index 07d22d3fe..c4c356a75 100644 --- a/pkg/local_object_storage/engine/shards.go +++ b/pkg/local_object_storage/engine/shards.go @@ -62,6 +62,18 @@ func (m *metricsWithID) AddToPayloadSize(size int64) { m.mw.AddToPayloadCounter(m.id, size) } +func (m *metricsWithID) IncErrorCounter() { + m.mw.IncErrorCounter(m.id) +} + +func (m *metricsWithID) ClearErrorCounter() { + m.mw.ClearErrorCounter(m.id) +} + +func (m *metricsWithID) DeleteErrorCounter() { + m.mw.DeleteErrorCounter(m.id) +} + // AddShard adds a new shard to the storage engine. // // Returns any error encountered that did not allow adding a shard. @@ -174,6 +186,8 @@ func (e *StorageEngine) removeShards(ids ...string) { continue } + sh.DeleteErrorCounter() + ss = append(ss, sh) delete(e.shards, id) @@ -281,6 +295,7 @@ func (e *StorageEngine) SetShardMode(id *shard.ID, m mode.Mode, resetErrorCounte if id.String() == shID { if resetErrorCounter { sh.errorCount.Store(0) + sh.Shard.ClearErrorCounter() } return sh.SetMode(m) } diff --git a/pkg/local_object_storage/shard/metrics_test.go b/pkg/local_object_storage/shard/metrics_test.go index 16f6989c4..f1581b6d4 100644 --- a/pkg/local_object_storage/shard/metrics_test.go +++ b/pkg/local_object_storage/shard/metrics_test.go @@ -23,6 +23,7 @@ type metricsStore struct { cnrSize map[string]int64 pldSize int64 readOnly bool + errCounter int64 } func (m metricsStore) SetShardID(_ string) {} @@ -68,6 +69,18 @@ func (m *metricsStore) AddToPayloadSize(size int64) { m.pldSize += size } +func (m *metricsStore) IncErrorCounter() { + m.errCounter += 1 +} + +func (m *metricsStore) ClearErrorCounter() { + m.errCounter = 0 +} + +func (m *metricsStore) DeleteErrorCounter() { + m.errCounter = 0 +} + const physical = "phy" const logical = "logic" diff --git a/pkg/local_object_storage/shard/shard.go b/pkg/local_object_storage/shard/shard.go index 2123bca1f..b740fc572 100644 --- a/pkg/local_object_storage/shard/shard.go +++ b/pkg/local_object_storage/shard/shard.go @@ -75,6 +75,12 @@ type MetricsWriter interface { SetShardID(id string) // SetReadonly must set shard readonly state. SetReadonly(readonly bool) + // IncErrorCounter increment error counter. + IncErrorCounter() + // ClearErrorCounter clear error counter. + ClearErrorCounter() + // DeleteErrorCounter delete error counter. + DeleteErrorCounter() } type cfg struct { @@ -428,3 +434,21 @@ func (s *Shard) addToPayloadSize(size int64) { s.cfg.metricsWriter.AddToPayloadSize(size) } } + +func (s *Shard) IncErrorCounter() { + if s.cfg.metricsWriter != nil { + s.cfg.metricsWriter.IncErrorCounter() + } +} + +func (s *Shard) ClearErrorCounter() { + if s.cfg.metricsWriter != nil { + s.cfg.metricsWriter.ClearErrorCounter() + } +} + +func (s *Shard) DeleteErrorCounter() { + if s.cfg.metricsWriter != nil { + s.cfg.metricsWriter.DeleteErrorCounter() + } +} diff --git a/pkg/metrics/engine.go b/pkg/metrics/engine.go index 4e78f4ac2..7992da9f8 100644 --- a/pkg/metrics/engine.go +++ b/pkg/metrics/engine.go @@ -24,6 +24,7 @@ type ( listObjectsDuration prometheus.Counter containerSize *prometheus.GaugeVec payloadSize *prometheus.GaugeVec + errorCounter *prometheus.GaugeVec } ) @@ -44,6 +45,7 @@ func newEngineMetrics() engineMetrics { listObjectsDuration: newEngineMethodDurationCounter("list_objects"), containerSize: newEngineGaugeVector("container_size", "Accumulated size of all objects in a container", []string{containerIDLabelKey}), payloadSize: newEngineGaugeVector("payload_size", "Accumulated size of all objects in a shard", []string{shardIDLabelKey}), + errorCounter: newEngineGaugeVector("error_counter", "Shard's error counter", []string{shardIDLabelKey}), } } @@ -123,3 +125,15 @@ func (m engineMetrics) AddToContainerSize(cnrID string, size int64) { func (m engineMetrics) AddToPayloadCounter(shardID string, size int64) { m.payloadSize.With(prometheus.Labels{shardIDLabelKey: shardID}).Add(float64(size)) } + +func (m engineMetrics) IncErrorCounter(shardID string) { + m.errorCounter.With(prometheus.Labels{shardIDLabelKey: shardID}).Inc() +} + +func (m engineMetrics) ClearErrorCounter(shardID string) { + m.errorCounter.With(prometheus.Labels{shardIDLabelKey: shardID}).Set(0) +} + +func (m engineMetrics) DeleteErrorCounter(shardID string) { + m.errorCounter.Delete(prometheus.Labels{shardIDLabelKey: shardID}) +}