node: Add metrics for the error counter in the engine #418

Merged
realloc merged 1 commit from acid-ant/frostfs-node:feature/372-add-err-counter-shard into master 2023-06-07 13:04:48 +00:00
6 changed files with 71 additions and 0 deletions

View file

@ -132,6 +132,7 @@ func (e *StorageEngine) reportShardErrorBackground(id string, msg string, err er
}
errCount := sh.errorCount.Add(1)
sh.Shard.IncErrorCounter()
e.reportShardErrorWithFlags(sh.Shard, errCount, false, msg, err)
}
@ -150,6 +151,7 @@ func (e *StorageEngine) reportShardError(
}
errCount := sh.errorCount.Add(1)
sh.Shard.IncErrorCounter()
e.reportShardErrorWithFlags(sh.Shard, errCount, true, msg, err, fields...)
}

View file

@ -26,6 +26,9 @@ type MetricRegister interface {
AddToContainerSize(cnrID string, size int64)
AddToPayloadCounter(shardID string, size int64)
IncErrorCounter(shardID string)
ClearErrorCounter(shardID string)
DeleteErrorCounter(shardID string)
WriteCache() metrics.WriteCacheMetrics
GC() metrics.GCMetrics

View file

@ -62,6 +62,18 @@ func (m *metricsWithID) AddToPayloadSize(size int64) {
m.mw.AddToPayloadCounter(m.id, size)
}
func (m *metricsWithID) IncErrorCounter() {
m.mw.IncErrorCounter(m.id)
}
func (m *metricsWithID) ClearErrorCounter() {
m.mw.ClearErrorCounter(m.id)
}
func (m *metricsWithID) DeleteErrorCounter() {
m.mw.DeleteErrorCounter(m.id)
}
// AddShard adds a new shard to the storage engine.
//
// Returns any error encountered that did not allow adding a shard.
@ -174,6 +186,8 @@ func (e *StorageEngine) removeShards(ids ...string) {
continue
}
sh.DeleteErrorCounter()
ss = append(ss, sh)
delete(e.shards, id)
@ -281,6 +295,7 @@ func (e *StorageEngine) SetShardMode(id *shard.ID, m mode.Mode, resetErrorCounte
if id.String() == shID {
if resetErrorCounter {
sh.errorCount.Store(0)
sh.Shard.ClearErrorCounter()
}
return sh.SetMode(m)
}

View file

@ -23,6 +23,7 @@ type metricsStore struct {
cnrSize map[string]int64
pldSize int64
readOnly bool
errCounter int64
}
func (m metricsStore) SetShardID(_ string) {}
@ -68,6 +69,18 @@ func (m *metricsStore) AddToPayloadSize(size int64) {
m.pldSize += size
}
func (m *metricsStore) IncErrorCounter() {
m.errCounter += 1
}
func (m *metricsStore) ClearErrorCounter() {
m.errCounter = 0
}
func (m *metricsStore) DeleteErrorCounter() {
m.errCounter = 0
}
const physical = "phy"
const logical = "logic"

View file

@ -75,6 +75,12 @@ type MetricsWriter interface {
SetShardID(id string)
// SetReadonly must set shard readonly state.
SetReadonly(readonly bool)
// IncErrorCounter increment error counter.
IncErrorCounter()
// ClearErrorCounter clear error counter.
ClearErrorCounter()
// DeleteErrorCounter delete error counter.
DeleteErrorCounter()
}
type cfg struct {
@ -428,3 +434,21 @@ func (s *Shard) addToPayloadSize(size int64) {
s.cfg.metricsWriter.AddToPayloadSize(size)
}
}
func (s *Shard) IncErrorCounter() {
if s.cfg.metricsWriter != nil {
s.cfg.metricsWriter.IncErrorCounter()
}
}
func (s *Shard) ClearErrorCounter() {
if s.cfg.metricsWriter != nil {
s.cfg.metricsWriter.ClearErrorCounter()
}
}
func (s *Shard) DeleteErrorCounter() {
if s.cfg.metricsWriter != nil {
s.cfg.metricsWriter.DeleteErrorCounter()
}
}

View file

@ -24,6 +24,7 @@ type (
listObjectsDuration prometheus.Counter
containerSize *prometheus.GaugeVec
payloadSize *prometheus.GaugeVec
errorCounter *prometheus.GaugeVec
}
)
@ -44,6 +45,7 @@ func newEngineMetrics() engineMetrics {
listObjectsDuration: newEngineMethodDurationCounter("list_objects"),
containerSize: newEngineGaugeVector("container_size", "Accumulated size of all objects in a container", []string{containerIDLabelKey}),
payloadSize: newEngineGaugeVector("payload_size", "Accumulated size of all objects in a shard", []string{shardIDLabelKey}),
errorCounter: newEngineGaugeVector("error_counter", "Shard's error counter", []string{shardIDLabelKey}),
}
}
@ -123,3 +125,15 @@ func (m engineMetrics) AddToContainerSize(cnrID string, size int64) {
func (m engineMetrics) AddToPayloadCounter(shardID string, size int64) {
m.payloadSize.With(prometheus.Labels{shardIDLabelKey: shardID}).Add(float64(size))
}
func (m engineMetrics) IncErrorCounter(shardID string) {
m.errorCounter.With(prometheus.Labels{shardIDLabelKey: shardID}).Inc()
}
func (m engineMetrics) ClearErrorCounter(shardID string) {
m.errorCounter.With(prometheus.Labels{shardIDLabelKey: shardID}).Set(0)
}
func (m engineMetrics) DeleteErrorCounter(shardID string) {
m.errorCounter.Delete(prometheus.Labels{shardIDLabelKey: shardID})
}