forked from TrueCloudLab/frostfs-node
[#372] node: Add metrics for the error counter in the engine
Signed-off-by: Anton Nikiforov <an.nikiforov@yadro.com>
This commit is contained in:
parent
189a367ef2
commit
263c6fdc50
6 changed files with 71 additions and 0 deletions
|
@ -132,6 +132,7 @@ func (e *StorageEngine) reportShardErrorBackground(id string, msg string, err er
|
||||||
}
|
}
|
||||||
|
|
||||||
errCount := sh.errorCount.Add(1)
|
errCount := sh.errorCount.Add(1)
|
||||||
|
sh.Shard.IncErrorCounter()
|
||||||
e.reportShardErrorWithFlags(sh.Shard, errCount, false, msg, err)
|
e.reportShardErrorWithFlags(sh.Shard, errCount, false, msg, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -150,6 +151,7 @@ func (e *StorageEngine) reportShardError(
|
||||||
}
|
}
|
||||||
|
|
||||||
errCount := sh.errorCount.Add(1)
|
errCount := sh.errorCount.Add(1)
|
||||||
|
sh.Shard.IncErrorCounter()
|
||||||
e.reportShardErrorWithFlags(sh.Shard, errCount, true, msg, err, fields...)
|
e.reportShardErrorWithFlags(sh.Shard, errCount, true, msg, err, fields...)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -26,6 +26,9 @@ type MetricRegister interface {
|
||||||
|
|
||||||
AddToContainerSize(cnrID string, size int64)
|
AddToContainerSize(cnrID string, size int64)
|
||||||
AddToPayloadCounter(shardID string, size int64)
|
AddToPayloadCounter(shardID string, size int64)
|
||||||
|
IncErrorCounter(shardID string)
|
||||||
|
ClearErrorCounter(shardID string)
|
||||||
|
DeleteErrorCounter(shardID string)
|
||||||
|
|
||||||
WriteCache() metrics.WriteCacheMetrics
|
WriteCache() metrics.WriteCacheMetrics
|
||||||
GC() metrics.GCMetrics
|
GC() metrics.GCMetrics
|
||||||
|
|
|
@ -62,6 +62,18 @@ func (m *metricsWithID) AddToPayloadSize(size int64) {
|
||||||
m.mw.AddToPayloadCounter(m.id, size)
|
m.mw.AddToPayloadCounter(m.id, size)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *metricsWithID) IncErrorCounter() {
|
||||||
|
m.mw.IncErrorCounter(m.id)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *metricsWithID) ClearErrorCounter() {
|
||||||
|
m.mw.ClearErrorCounter(m.id)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *metricsWithID) DeleteErrorCounter() {
|
||||||
|
m.mw.DeleteErrorCounter(m.id)
|
||||||
|
}
|
||||||
|
|
||||||
// AddShard adds a new shard to the storage engine.
|
// AddShard adds a new shard to the storage engine.
|
||||||
//
|
//
|
||||||
// Returns any error encountered that did not allow adding a shard.
|
// Returns any error encountered that did not allow adding a shard.
|
||||||
|
@ -174,6 +186,8 @@ func (e *StorageEngine) removeShards(ids ...string) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sh.DeleteErrorCounter()
|
||||||
|
|
||||||
ss = append(ss, sh)
|
ss = append(ss, sh)
|
||||||
delete(e.shards, id)
|
delete(e.shards, id)
|
||||||
|
|
||||||
|
@ -281,6 +295,7 @@ func (e *StorageEngine) SetShardMode(id *shard.ID, m mode.Mode, resetErrorCounte
|
||||||
if id.String() == shID {
|
if id.String() == shID {
|
||||||
if resetErrorCounter {
|
if resetErrorCounter {
|
||||||
sh.errorCount.Store(0)
|
sh.errorCount.Store(0)
|
||||||
|
sh.Shard.ClearErrorCounter()
|
||||||
}
|
}
|
||||||
return sh.SetMode(m)
|
return sh.SetMode(m)
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,6 +23,7 @@ type metricsStore struct {
|
||||||
cnrSize map[string]int64
|
cnrSize map[string]int64
|
||||||
pldSize int64
|
pldSize int64
|
||||||
readOnly bool
|
readOnly bool
|
||||||
|
errCounter int64
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m metricsStore) SetShardID(_ string) {}
|
func (m metricsStore) SetShardID(_ string) {}
|
||||||
|
@ -68,6 +69,18 @@ func (m *metricsStore) AddToPayloadSize(size int64) {
|
||||||
m.pldSize += size
|
m.pldSize += size
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *metricsStore) IncErrorCounter() {
|
||||||
|
m.errCounter += 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *metricsStore) ClearErrorCounter() {
|
||||||
|
m.errCounter = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *metricsStore) DeleteErrorCounter() {
|
||||||
|
m.errCounter = 0
|
||||||
|
}
|
||||||
|
|
||||||
const physical = "phy"
|
const physical = "phy"
|
||||||
const logical = "logic"
|
const logical = "logic"
|
||||||
|
|
||||||
|
|
|
@ -75,6 +75,12 @@ type MetricsWriter interface {
|
||||||
SetShardID(id string)
|
SetShardID(id string)
|
||||||
// SetReadonly must set shard readonly state.
|
// SetReadonly must set shard readonly state.
|
||||||
SetReadonly(readonly bool)
|
SetReadonly(readonly bool)
|
||||||
|
// IncErrorCounter increment error counter.
|
||||||
|
IncErrorCounter()
|
||||||
|
// ClearErrorCounter clear error counter.
|
||||||
|
ClearErrorCounter()
|
||||||
|
// DeleteErrorCounter delete error counter.
|
||||||
|
DeleteErrorCounter()
|
||||||
}
|
}
|
||||||
|
|
||||||
type cfg struct {
|
type cfg struct {
|
||||||
|
@ -428,3 +434,21 @@ func (s *Shard) addToPayloadSize(size int64) {
|
||||||
s.cfg.metricsWriter.AddToPayloadSize(size)
|
s.cfg.metricsWriter.AddToPayloadSize(size)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *Shard) IncErrorCounter() {
|
||||||
|
if s.cfg.metricsWriter != nil {
|
||||||
|
s.cfg.metricsWriter.IncErrorCounter()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Shard) ClearErrorCounter() {
|
||||||
|
if s.cfg.metricsWriter != nil {
|
||||||
|
s.cfg.metricsWriter.ClearErrorCounter()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Shard) DeleteErrorCounter() {
|
||||||
|
if s.cfg.metricsWriter != nil {
|
||||||
|
s.cfg.metricsWriter.DeleteErrorCounter()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -24,6 +24,7 @@ type (
|
||||||
listObjectsDuration prometheus.Counter
|
listObjectsDuration prometheus.Counter
|
||||||
containerSize *prometheus.GaugeVec
|
containerSize *prometheus.GaugeVec
|
||||||
payloadSize *prometheus.GaugeVec
|
payloadSize *prometheus.GaugeVec
|
||||||
|
errorCounter *prometheus.GaugeVec
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -44,6 +45,7 @@ func newEngineMetrics() engineMetrics {
|
||||||
listObjectsDuration: newEngineMethodDurationCounter("list_objects"),
|
listObjectsDuration: newEngineMethodDurationCounter("list_objects"),
|
||||||
containerSize: newEngineGaugeVector("container_size", "Accumulated size of all objects in a container", []string{containerIDLabelKey}),
|
containerSize: newEngineGaugeVector("container_size", "Accumulated size of all objects in a container", []string{containerIDLabelKey}),
|
||||||
payloadSize: newEngineGaugeVector("payload_size", "Accumulated size of all objects in a shard", []string{shardIDLabelKey}),
|
payloadSize: newEngineGaugeVector("payload_size", "Accumulated size of all objects in a shard", []string{shardIDLabelKey}),
|
||||||
|
errorCounter: newEngineGaugeVector("error_counter", "Shard's error counter", []string{shardIDLabelKey}),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -123,3 +125,15 @@ func (m engineMetrics) AddToContainerSize(cnrID string, size int64) {
|
||||||
func (m engineMetrics) AddToPayloadCounter(shardID string, size int64) {
|
func (m engineMetrics) AddToPayloadCounter(shardID string, size int64) {
|
||||||
m.payloadSize.With(prometheus.Labels{shardIDLabelKey: shardID}).Add(float64(size))
|
m.payloadSize.With(prometheus.Labels{shardIDLabelKey: shardID}).Add(float64(size))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m engineMetrics) IncErrorCounter(shardID string) {
|
||||||
|
m.errorCounter.With(prometheus.Labels{shardIDLabelKey: shardID}).Inc()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m engineMetrics) ClearErrorCounter(shardID string) {
|
||||||
|
m.errorCounter.With(prometheus.Labels{shardIDLabelKey: shardID}).Set(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m engineMetrics) DeleteErrorCounter(shardID string) {
|
||||||
|
m.errorCounter.Delete(prometheus.Labels{shardIDLabelKey: shardID})
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue