forked from TrueCloudLab/frostfs-node
[#372] node: Add metrics for the error counter in the engine
Signed-off-by: Anton Nikiforov <an.nikiforov@yadro.com>
This commit is contained in:
parent
189a367ef2
commit
263c6fdc50
6 changed files with 71 additions and 0 deletions
|
@ -132,6 +132,7 @@ func (e *StorageEngine) reportShardErrorBackground(id string, msg string, err er
|
|||
}
|
||||
|
||||
errCount := sh.errorCount.Add(1)
|
||||
sh.Shard.IncErrorCounter()
|
||||
e.reportShardErrorWithFlags(sh.Shard, errCount, false, msg, err)
|
||||
}
|
||||
|
||||
|
@ -150,6 +151,7 @@ func (e *StorageEngine) reportShardError(
|
|||
}
|
||||
|
||||
errCount := sh.errorCount.Add(1)
|
||||
sh.Shard.IncErrorCounter()
|
||||
e.reportShardErrorWithFlags(sh.Shard, errCount, true, msg, err, fields...)
|
||||
}
|
||||
|
||||
|
|
|
@ -26,6 +26,9 @@ type MetricRegister interface {
|
|||
|
||||
AddToContainerSize(cnrID string, size int64)
|
||||
AddToPayloadCounter(shardID string, size int64)
|
||||
IncErrorCounter(shardID string)
|
||||
ClearErrorCounter(shardID string)
|
||||
DeleteErrorCounter(shardID string)
|
||||
|
||||
WriteCache() metrics.WriteCacheMetrics
|
||||
GC() metrics.GCMetrics
|
||||
|
|
|
@ -62,6 +62,18 @@ func (m *metricsWithID) AddToPayloadSize(size int64) {
|
|||
m.mw.AddToPayloadCounter(m.id, size)
|
||||
}
|
||||
|
||||
func (m *metricsWithID) IncErrorCounter() {
|
||||
m.mw.IncErrorCounter(m.id)
|
||||
}
|
||||
|
||||
func (m *metricsWithID) ClearErrorCounter() {
|
||||
m.mw.ClearErrorCounter(m.id)
|
||||
}
|
||||
|
||||
func (m *metricsWithID) DeleteErrorCounter() {
|
||||
m.mw.DeleteErrorCounter(m.id)
|
||||
}
|
||||
|
||||
// AddShard adds a new shard to the storage engine.
|
||||
//
|
||||
// Returns any error encountered that did not allow adding a shard.
|
||||
|
@ -174,6 +186,8 @@ func (e *StorageEngine) removeShards(ids ...string) {
|
|||
continue
|
||||
}
|
||||
|
||||
sh.DeleteErrorCounter()
|
||||
|
||||
ss = append(ss, sh)
|
||||
delete(e.shards, id)
|
||||
|
||||
|
@ -281,6 +295,7 @@ func (e *StorageEngine) SetShardMode(id *shard.ID, m mode.Mode, resetErrorCounte
|
|||
if id.String() == shID {
|
||||
if resetErrorCounter {
|
||||
sh.errorCount.Store(0)
|
||||
sh.Shard.ClearErrorCounter()
|
||||
}
|
||||
return sh.SetMode(m)
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ type metricsStore struct {
|
|||
cnrSize map[string]int64
|
||||
pldSize int64
|
||||
readOnly bool
|
||||
errCounter int64
|
||||
}
|
||||
|
||||
func (m metricsStore) SetShardID(_ string) {}
|
||||
|
@ -68,6 +69,18 @@ func (m *metricsStore) AddToPayloadSize(size int64) {
|
|||
m.pldSize += size
|
||||
}
|
||||
|
||||
func (m *metricsStore) IncErrorCounter() {
|
||||
m.errCounter += 1
|
||||
}
|
||||
|
||||
func (m *metricsStore) ClearErrorCounter() {
|
||||
m.errCounter = 0
|
||||
}
|
||||
|
||||
func (m *metricsStore) DeleteErrorCounter() {
|
||||
m.errCounter = 0
|
||||
}
|
||||
|
||||
const physical = "phy"
|
||||
const logical = "logic"
|
||||
|
||||
|
|
|
@ -75,6 +75,12 @@ type MetricsWriter interface {
|
|||
SetShardID(id string)
|
||||
// SetReadonly must set shard readonly state.
|
||||
SetReadonly(readonly bool)
|
||||
// IncErrorCounter increment error counter.
|
||||
IncErrorCounter()
|
||||
// ClearErrorCounter clear error counter.
|
||||
ClearErrorCounter()
|
||||
// DeleteErrorCounter delete error counter.
|
||||
DeleteErrorCounter()
|
||||
}
|
||||
|
||||
type cfg struct {
|
||||
|
@ -428,3 +434,21 @@ func (s *Shard) addToPayloadSize(size int64) {
|
|||
s.cfg.metricsWriter.AddToPayloadSize(size)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Shard) IncErrorCounter() {
|
||||
if s.cfg.metricsWriter != nil {
|
||||
s.cfg.metricsWriter.IncErrorCounter()
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Shard) ClearErrorCounter() {
|
||||
if s.cfg.metricsWriter != nil {
|
||||
s.cfg.metricsWriter.ClearErrorCounter()
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Shard) DeleteErrorCounter() {
|
||||
if s.cfg.metricsWriter != nil {
|
||||
s.cfg.metricsWriter.DeleteErrorCounter()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ type (
|
|||
listObjectsDuration prometheus.Counter
|
||||
containerSize *prometheus.GaugeVec
|
||||
payloadSize *prometheus.GaugeVec
|
||||
errorCounter *prometheus.GaugeVec
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -44,6 +45,7 @@ func newEngineMetrics() engineMetrics {
|
|||
listObjectsDuration: newEngineMethodDurationCounter("list_objects"),
|
||||
containerSize: newEngineGaugeVector("container_size", "Accumulated size of all objects in a container", []string{containerIDLabelKey}),
|
||||
payloadSize: newEngineGaugeVector("payload_size", "Accumulated size of all objects in a shard", []string{shardIDLabelKey}),
|
||||
errorCounter: newEngineGaugeVector("error_counter", "Shard's error counter", []string{shardIDLabelKey}),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -123,3 +125,15 @@ func (m engineMetrics) AddToContainerSize(cnrID string, size int64) {
|
|||
func (m engineMetrics) AddToPayloadCounter(shardID string, size int64) {
|
||||
m.payloadSize.With(prometheus.Labels{shardIDLabelKey: shardID}).Add(float64(size))
|
||||
}
|
||||
|
||||
func (m engineMetrics) IncErrorCounter(shardID string) {
|
||||
m.errorCounter.With(prometheus.Labels{shardIDLabelKey: shardID}).Inc()
|
||||
}
|
||||
|
||||
func (m engineMetrics) ClearErrorCounter(shardID string) {
|
||||
m.errorCounter.With(prometheus.Labels{shardIDLabelKey: shardID}).Set(0)
|
||||
}
|
||||
|
||||
func (m engineMetrics) DeleteErrorCounter(shardID string) {
|
||||
m.errorCounter.Delete(prometheus.Labels{shardIDLabelKey: shardID})
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue