node: Add metrics for the error counter in the engine #418

Merged
realloc merged 1 commit from acid-ant/frostfs-node:feature/372-add-err-counter-shard into master 2023-06-07 13:04:48 +00:00
6 changed files with 71 additions and 0 deletions

View file

@ -132,6 +132,7 @@ func (e *StorageEngine) reportShardErrorBackground(id string, msg string, err er
} }
errCount := sh.errorCount.Add(1) errCount := sh.errorCount.Add(1)
sh.Shard.IncErrorCounter()
e.reportShardErrorWithFlags(sh.Shard, errCount, false, msg, err) e.reportShardErrorWithFlags(sh.Shard, errCount, false, msg, err)
} }
@ -150,6 +151,7 @@ func (e *StorageEngine) reportShardError(
} }
errCount := sh.errorCount.Add(1) errCount := sh.errorCount.Add(1)
sh.Shard.IncErrorCounter()
e.reportShardErrorWithFlags(sh.Shard, errCount, true, msg, err, fields...) e.reportShardErrorWithFlags(sh.Shard, errCount, true, msg, err, fields...)
} }

View file

@ -26,6 +26,9 @@ type MetricRegister interface {
AddToContainerSize(cnrID string, size int64) AddToContainerSize(cnrID string, size int64)
AddToPayloadCounter(shardID string, size int64) AddToPayloadCounter(shardID string, size int64)
IncErrorCounter(shardID string)
ClearErrorCounter(shardID string)
DeleteErrorCounter(shardID string)
WriteCache() metrics.WriteCacheMetrics WriteCache() metrics.WriteCacheMetrics
GC() metrics.GCMetrics GC() metrics.GCMetrics

View file

@ -62,6 +62,18 @@ func (m *metricsWithID) AddToPayloadSize(size int64) {
m.mw.AddToPayloadCounter(m.id, size) m.mw.AddToPayloadCounter(m.id, size)
} }
func (m *metricsWithID) IncErrorCounter() {
m.mw.IncErrorCounter(m.id)
}
func (m *metricsWithID) ClearErrorCounter() {
m.mw.ClearErrorCounter(m.id)
}
func (m *metricsWithID) DeleteErrorCounter() {
m.mw.DeleteErrorCounter(m.id)
}
// AddShard adds a new shard to the storage engine. // AddShard adds a new shard to the storage engine.
// //
// Returns any error encountered that did not allow adding a shard. // Returns any error encountered that did not allow adding a shard.
@ -174,6 +186,8 @@ func (e *StorageEngine) removeShards(ids ...string) {
continue continue
} }
sh.DeleteErrorCounter()
ss = append(ss, sh) ss = append(ss, sh)
delete(e.shards, id) delete(e.shards, id)
@ -281,6 +295,7 @@ func (e *StorageEngine) SetShardMode(id *shard.ID, m mode.Mode, resetErrorCounte
if id.String() == shID { if id.String() == shID {
if resetErrorCounter { if resetErrorCounter {
sh.errorCount.Store(0) sh.errorCount.Store(0)
sh.Shard.ClearErrorCounter()
} }
return sh.SetMode(m) return sh.SetMode(m)
} }

View file

@ -23,6 +23,7 @@ type metricsStore struct {
cnrSize map[string]int64 cnrSize map[string]int64
pldSize int64 pldSize int64
readOnly bool readOnly bool
errCounter int64
} }
func (m metricsStore) SetShardID(_ string) {} func (m metricsStore) SetShardID(_ string) {}
@ -68,6 +69,18 @@ func (m *metricsStore) AddToPayloadSize(size int64) {
m.pldSize += size m.pldSize += size
} }
func (m *metricsStore) IncErrorCounter() {
m.errCounter += 1
}
func (m *metricsStore) ClearErrorCounter() {
m.errCounter = 0
}
func (m *metricsStore) DeleteErrorCounter() {
m.errCounter = 0
}
const physical = "phy" const physical = "phy"
const logical = "logic" const logical = "logic"

View file

@ -75,6 +75,12 @@ type MetricsWriter interface {
SetShardID(id string) SetShardID(id string)
// SetReadonly must set shard readonly state. // SetReadonly must set shard readonly state.
SetReadonly(readonly bool) SetReadonly(readonly bool)
// IncErrorCounter increment error counter.
IncErrorCounter()
// ClearErrorCounter clear error counter.
ClearErrorCounter()
// DeleteErrorCounter delete error counter.
DeleteErrorCounter()
} }
type cfg struct { type cfg struct {
@ -428,3 +434,21 @@ func (s *Shard) addToPayloadSize(size int64) {
s.cfg.metricsWriter.AddToPayloadSize(size) s.cfg.metricsWriter.AddToPayloadSize(size)
} }
} }
func (s *Shard) IncErrorCounter() {
if s.cfg.metricsWriter != nil {
s.cfg.metricsWriter.IncErrorCounter()
}
}
func (s *Shard) ClearErrorCounter() {
if s.cfg.metricsWriter != nil {
s.cfg.metricsWriter.ClearErrorCounter()
}
}
func (s *Shard) DeleteErrorCounter() {
if s.cfg.metricsWriter != nil {
s.cfg.metricsWriter.DeleteErrorCounter()
}
}

View file

@ -24,6 +24,7 @@ type (
listObjectsDuration prometheus.Counter listObjectsDuration prometheus.Counter
containerSize *prometheus.GaugeVec containerSize *prometheus.GaugeVec
payloadSize *prometheus.GaugeVec payloadSize *prometheus.GaugeVec
errorCounter *prometheus.GaugeVec
} }
) )
@ -44,6 +45,7 @@ func newEngineMetrics() engineMetrics {
listObjectsDuration: newEngineMethodDurationCounter("list_objects"), listObjectsDuration: newEngineMethodDurationCounter("list_objects"),
containerSize: newEngineGaugeVector("container_size", "Accumulated size of all objects in a container", []string{containerIDLabelKey}), containerSize: newEngineGaugeVector("container_size", "Accumulated size of all objects in a container", []string{containerIDLabelKey}),
payloadSize: newEngineGaugeVector("payload_size", "Accumulated size of all objects in a shard", []string{shardIDLabelKey}), payloadSize: newEngineGaugeVector("payload_size", "Accumulated size of all objects in a shard", []string{shardIDLabelKey}),
errorCounter: newEngineGaugeVector("error_counter", "Shard's error counter", []string{shardIDLabelKey}),
} }
} }
@ -123,3 +125,15 @@ func (m engineMetrics) AddToContainerSize(cnrID string, size int64) {
func (m engineMetrics) AddToPayloadCounter(shardID string, size int64) { func (m engineMetrics) AddToPayloadCounter(shardID string, size int64) {
m.payloadSize.With(prometheus.Labels{shardIDLabelKey: shardID}).Add(float64(size)) m.payloadSize.With(prometheus.Labels{shardIDLabelKey: shardID}).Add(float64(size))
} }
func (m engineMetrics) IncErrorCounter(shardID string) {
m.errorCounter.With(prometheus.Labels{shardIDLabelKey: shardID}).Inc()
}
func (m engineMetrics) ClearErrorCounter(shardID string) {
m.errorCounter.With(prometheus.Labels{shardIDLabelKey: shardID}).Set(0)
}
func (m engineMetrics) DeleteErrorCounter(shardID string) {
m.errorCounter.Delete(prometheus.Labels{shardIDLabelKey: shardID})
}