From 1b364d8cf4db87b07909331584c86ee0347e1bdf Mon Sep 17 00:00:00 2001 From: Dmitrii Stepanov Date: Tue, 13 Jun 2023 19:48:15 +0300 Subject: [PATCH] [#424] metrics: Refactor engine metrics Use histogram vector to measure request duration. Fix naming like in Prometheus best practice. Signed-off-by: Dmitrii Stepanov --- pkg/local_object_storage/engine/container.go | 4 +- pkg/local_object_storage/engine/delete.go | 2 +- pkg/local_object_storage/engine/get.go | 2 +- pkg/local_object_storage/engine/head.go | 2 +- pkg/local_object_storage/engine/inhume.go | 2 +- pkg/local_object_storage/engine/metrics.go | 18 +-- pkg/local_object_storage/engine/put.go | 2 +- pkg/local_object_storage/engine/range.go | 2 +- pkg/local_object_storage/engine/select.go | 4 +- pkg/local_object_storage/engine/shards.go | 6 +- .../shard/metrics_test.go | 2 +- pkg/local_object_storage/shard/shard.go | 8 +- pkg/metrics/engine.go | 114 ++++-------------- 13 files changed, 46 insertions(+), 122 deletions(-) diff --git a/pkg/local_object_storage/engine/container.go b/pkg/local_object_storage/engine/container.go index 034837110..061e2fea0 100644 --- a/pkg/local_object_storage/engine/container.go +++ b/pkg/local_object_storage/engine/container.go @@ -67,7 +67,7 @@ func ContainerSize(e *StorageEngine, id cid.ID) (uint64, error) { func (e *StorageEngine) containerSize(prm ContainerSizePrm) (res ContainerSizeRes, err error) { if e.metrics != nil { - defer elapsed(e.metrics.AddEstimateContainerSizeDuration)() + defer elapsed("EstimateContainerSize", e.metrics.AddMethodDuration)() } e.iterateOverUnsortedShards(func(sh hashedShard) (stop bool) { @@ -115,7 +115,7 @@ func ListContainers(e *StorageEngine) ([]cid.ID, error) { func (e *StorageEngine) listContainers() (ListContainersRes, error) { if e.metrics != nil { - defer elapsed(e.metrics.AddListContainersDuration)() + defer elapsed("ListContainers", e.metrics.AddMethodDuration)() } uniqueIDs := make(map[string]cid.ID) diff --git a/pkg/local_object_storage/engine/delete.go b/pkg/local_object_storage/engine/delete.go index 2125fad72..4d6d838bc 100644 --- a/pkg/local_object_storage/engine/delete.go +++ b/pkg/local_object_storage/engine/delete.go @@ -67,7 +67,7 @@ func (e *StorageEngine) Delete(ctx context.Context, prm DeletePrm) (res DeleteRe func (e *StorageEngine) delete(ctx context.Context, prm DeletePrm) (DeleteRes, error) { if e.metrics != nil { - defer elapsed(e.metrics.AddDeleteDuration)() + defer elapsed("Delete", e.metrics.AddMethodDuration)() } var locked struct { diff --git a/pkg/local_object_storage/engine/get.go b/pkg/local_object_storage/engine/get.go index d376198ba..bd094770a 100644 --- a/pkg/local_object_storage/engine/get.go +++ b/pkg/local_object_storage/engine/get.go @@ -64,7 +64,7 @@ func (e *StorageEngine) Get(ctx context.Context, prm GetPrm) (res GetRes, err er func (e *StorageEngine) get(ctx context.Context, prm GetPrm) (GetRes, error) { if e.metrics != nil { - defer elapsed(e.metrics.AddGetDuration)() + defer elapsed("Get", e.metrics.AddMethodDuration)() } var errNotFound apistatus.ObjectNotFound diff --git a/pkg/local_object_storage/engine/head.go b/pkg/local_object_storage/engine/head.go index 5da97bab5..ca51015db 100644 --- a/pkg/local_object_storage/engine/head.go +++ b/pkg/local_object_storage/engine/head.go @@ -68,7 +68,7 @@ func (e *StorageEngine) head(ctx context.Context, prm HeadPrm) (HeadRes, error) defer span.End() if e.metrics != nil { - defer elapsed(e.metrics.AddHeadDuration)() + defer elapsed("Head", e.metrics.AddMethodDuration)() } var ( diff --git a/pkg/local_object_storage/engine/inhume.go b/pkg/local_object_storage/engine/inhume.go index 57ac52751..0b9ae602b 100644 --- a/pkg/local_object_storage/engine/inhume.go +++ b/pkg/local_object_storage/engine/inhume.go @@ -78,7 +78,7 @@ func (e *StorageEngine) Inhume(ctx context.Context, prm InhumePrm) (res InhumeRe func (e *StorageEngine) inhume(ctx context.Context, prm InhumePrm) (InhumeRes, error) { if e.metrics != nil { - defer elapsed(e.metrics.AddInhumeDuration)() + defer elapsed("Inhume", e.metrics.AddMethodDuration)() } var shPrm shard.InhumePrm diff --git a/pkg/local_object_storage/engine/metrics.go b/pkg/local_object_storage/engine/metrics.go index f9e9191cd..dd1240ea7 100644 --- a/pkg/local_object_storage/engine/metrics.go +++ b/pkg/local_object_storage/engine/metrics.go @@ -7,17 +7,7 @@ import ( ) type MetricRegister interface { - AddListContainersDuration(d time.Duration) - AddEstimateContainerSizeDuration(d time.Duration) - AddDeleteDuration(d time.Duration) - AddExistsDuration(d time.Duration) - AddGetDuration(d time.Duration) - AddHeadDuration(d time.Duration) - AddInhumeDuration(d time.Duration) - AddPutDuration(d time.Duration) - AddRangeDuration(d time.Duration) - AddSearchDuration(d time.Duration) - AddListObjectsDuration(d time.Duration) + AddMethodDuration(method string, d time.Duration) SetObjectCounter(shardID, objectType string, v uint64) AddToObjectCounter(shardID, objectType string, delta int) @@ -28,17 +18,17 @@ type MetricRegister interface { AddToPayloadCounter(shardID string, size int64) IncErrorCounter(shardID string) ClearErrorCounter(shardID string) - DeleteErrorCounter(shardID string) + DeleteShardMetrics(shardID string) WriteCache() metrics.WriteCacheMetrics GC() metrics.GCMetrics } -func elapsed(addFunc func(d time.Duration)) func() { +func elapsed(method string, addFunc func(method string, d time.Duration)) func() { t := time.Now() return func() { - addFunc(time.Since(t)) + addFunc(method, time.Since(t)) } } diff --git a/pkg/local_object_storage/engine/put.go b/pkg/local_object_storage/engine/put.go index 4ac7f90f9..98c4504e6 100644 --- a/pkg/local_object_storage/engine/put.go +++ b/pkg/local_object_storage/engine/put.go @@ -57,7 +57,7 @@ func (e *StorageEngine) Put(ctx context.Context, prm PutPrm) (err error) { func (e *StorageEngine) put(ctx context.Context, prm PutPrm) error { if e.metrics != nil { - defer elapsed(e.metrics.AddPutDuration)() + defer elapsed("Put", e.metrics.AddMethodDuration)() } addr := object.AddressOf(prm.obj) diff --git a/pkg/local_object_storage/engine/range.go b/pkg/local_object_storage/engine/range.go index 29ec8b2bc..328df4587 100644 --- a/pkg/local_object_storage/engine/range.go +++ b/pkg/local_object_storage/engine/range.go @@ -80,7 +80,7 @@ func (e *StorageEngine) getRange(ctx context.Context, prm RngPrm) (RngRes, error defer span.End() if e.metrics != nil { - defer elapsed(e.metrics.AddRangeDuration)() + defer elapsed("GetRange", e.metrics.AddMethodDuration)() } var errNotFound apistatus.ObjectNotFound diff --git a/pkg/local_object_storage/engine/select.go b/pkg/local_object_storage/engine/select.go index 48d2be674..9f651845f 100644 --- a/pkg/local_object_storage/engine/select.go +++ b/pkg/local_object_storage/engine/select.go @@ -60,7 +60,7 @@ func (e *StorageEngine) Select(ctx context.Context, prm SelectPrm) (res SelectRe func (e *StorageEngine) _select(ctx context.Context, prm SelectPrm) (SelectRes, error) { if e.metrics != nil { - defer elapsed(e.metrics.AddSearchDuration)() + defer elapsed("Search", e.metrics.AddMethodDuration)() } addrList := make([]oid.Address, 0) @@ -109,7 +109,7 @@ func (e *StorageEngine) List(limit uint64) (res SelectRes, err error) { func (e *StorageEngine) list(limit uint64) (SelectRes, error) { if e.metrics != nil { - defer elapsed(e.metrics.AddListObjectsDuration)() + defer elapsed("ListObjects", e.metrics.AddMethodDuration)() } addrList := make([]oid.Address, 0, limit) diff --git a/pkg/local_object_storage/engine/shards.go b/pkg/local_object_storage/engine/shards.go index fd9605e9b..5f4f13635 100644 --- a/pkg/local_object_storage/engine/shards.go +++ b/pkg/local_object_storage/engine/shards.go @@ -70,8 +70,8 @@ func (m *metricsWithID) ClearErrorCounter() { m.mw.ClearErrorCounter(m.id) } -func (m *metricsWithID) DeleteErrorCounter() { - m.mw.DeleteErrorCounter(m.id) +func (m *metricsWithID) DeleteShardMetrics() { + m.mw.DeleteShardMetrics(m.id) } // AddShard adds a new shard to the storage engine. @@ -186,7 +186,7 @@ func (e *StorageEngine) removeShards(ids ...string) { continue } - sh.DeleteErrorCounter() + sh.DeleteShardMetrics() ss = append(ss, sh) delete(e.shards, id) diff --git a/pkg/local_object_storage/shard/metrics_test.go b/pkg/local_object_storage/shard/metrics_test.go index f1581b6d4..f9e1bc760 100644 --- a/pkg/local_object_storage/shard/metrics_test.go +++ b/pkg/local_object_storage/shard/metrics_test.go @@ -77,7 +77,7 @@ func (m *metricsStore) ClearErrorCounter() { m.errCounter = 0 } -func (m *metricsStore) DeleteErrorCounter() { +func (m *metricsStore) DeleteShardMetrics() { m.errCounter = 0 } diff --git a/pkg/local_object_storage/shard/shard.go b/pkg/local_object_storage/shard/shard.go index b740fc572..8c4db87b5 100644 --- a/pkg/local_object_storage/shard/shard.go +++ b/pkg/local_object_storage/shard/shard.go @@ -79,8 +79,8 @@ type MetricsWriter interface { IncErrorCounter() // ClearErrorCounter clear error counter. ClearErrorCounter() - // DeleteErrorCounter delete error counter. - DeleteErrorCounter() + // DeleteShardMetrics deletes shard metrics from registry. + DeleteShardMetrics() } type cfg struct { @@ -447,8 +447,8 @@ func (s *Shard) ClearErrorCounter() { } } -func (s *Shard) DeleteErrorCounter() { +func (s *Shard) DeleteShardMetrics() { if s.cfg.metricsWriter != nil { - s.cfg.metricsWriter.DeleteErrorCounter() + s.cfg.metricsWriter.DeleteShardMetrics() } } diff --git a/pkg/metrics/engine.go b/pkg/metrics/engine.go index 7992da9f8..4b32344e4 100644 --- a/pkg/metrics/engine.go +++ b/pkg/metrics/engine.go @@ -1,8 +1,6 @@ package metrics import ( - "fmt" - "strings" "time" "git.frostfs.info/TrueCloudLab/frostfs-observability/metrics" @@ -11,60 +9,33 @@ import ( type ( engineMetrics struct { - listContainersDuration prometheus.Counter - estimateContainerSizeDuration prometheus.Counter - deleteDuration prometheus.Counter - existsDuration prometheus.Counter - getDuration prometheus.Counter - headDuration prometheus.Counter - inhumeDuration prometheus.Counter - putDuration prometheus.Counter - rangeDuration prometheus.Counter - searchDuration prometheus.Counter - listObjectsDuration prometheus.Counter - containerSize *prometheus.GaugeVec - payloadSize *prometheus.GaugeVec - errorCounter *prometheus.GaugeVec + methodDuration *prometheus.HistogramVec + + containerSize *prometheus.GaugeVec + payloadSize *prometheus.GaugeVec + errorCounter *prometheus.GaugeVec } ) -const engineSubsystem = "engine" +const ( + engineSubsystem = "engine" + engineMethod = "method" +) func newEngineMetrics() engineMetrics { return engineMetrics{ - listContainersDuration: newEngineMethodDurationCounter("list_containers_"), - estimateContainerSizeDuration: newEngineCounter("estimate_container_size_duration", "Accumulated duration of engine container size estimate operations"), - deleteDuration: newEngineMethodDurationCounter("delete"), - existsDuration: newEngineMethodDurationCounter("exists"), - getDuration: newEngineMethodDurationCounter("get"), - headDuration: newEngineMethodDurationCounter("head"), - inhumeDuration: newEngineMethodDurationCounter("inhume"), - putDuration: newEngineMethodDurationCounter("put"), - rangeDuration: newEngineMethodDurationCounter("range"), - searchDuration: newEngineMethodDurationCounter("search"), - listObjectsDuration: newEngineMethodDurationCounter("list_objects"), - containerSize: newEngineGaugeVector("container_size", "Accumulated size of all objects in a container", []string{containerIDLabelKey}), - payloadSize: newEngineGaugeVector("payload_size", "Accumulated size of all objects in a shard", []string{shardIDLabelKey}), - errorCounter: newEngineGaugeVector("error_counter", "Shard's error counter", []string{shardIDLabelKey}), + containerSize: newEngineGaugeVector("container_size_bytes", "Accumulated size of all objects in a container", []string{containerIDLabelKey}), + payloadSize: newEngineGaugeVector("payload_size_bytes", "Accumulated size of all objects in a shard", []string{shardIDLabelKey}), + errorCounter: newEngineGaugeVector("error_counter", "Shard's error counter", []string{shardIDLabelKey}), + methodDuration: metrics.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: engineSubsystem, + Name: "request_duration_seconds", + Help: "Duration of Engine requests", + }, []string{engineMethod}), } } -func newEngineCounter(name, help string) prometheus.Counter { - return metrics.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: engineSubsystem, - Name: name, - Help: help, - }) -} - -func newEngineMethodDurationCounter(method string) prometheus.Counter { - return newEngineCounter( - fmt.Sprintf("%s_duration", method), - fmt.Sprintf("Accumulated duration of engine %s operations", strings.ReplaceAll(method, "_", " ")), - ) -} - func newEngineGaugeVector(name, help string, labels []string) *prometheus.GaugeVec { return metrics.NewGaugeVec(prometheus.GaugeOpts{ Namespace: namespace, @@ -74,48 +45,10 @@ func newEngineGaugeVector(name, help string, labels []string) *prometheus.GaugeV }, labels) } -func (m engineMetrics) AddListContainersDuration(d time.Duration) { - m.listObjectsDuration.Add(float64(d)) -} - -func (m engineMetrics) AddEstimateContainerSizeDuration(d time.Duration) { - m.estimateContainerSizeDuration.Add(float64(d)) -} - -func (m engineMetrics) AddDeleteDuration(d time.Duration) { - m.deleteDuration.Add(float64(d)) -} - -func (m engineMetrics) AddExistsDuration(d time.Duration) { - m.existsDuration.Add(float64(d)) -} - -func (m engineMetrics) AddGetDuration(d time.Duration) { - m.getDuration.Add(float64(d)) -} - -func (m engineMetrics) AddHeadDuration(d time.Duration) { - m.headDuration.Add(float64(d)) -} - -func (m engineMetrics) AddInhumeDuration(d time.Duration) { - m.inhumeDuration.Add(float64(d)) -} - -func (m engineMetrics) AddPutDuration(d time.Duration) { - m.putDuration.Add(float64(d)) -} - -func (m engineMetrics) AddRangeDuration(d time.Duration) { - m.rangeDuration.Add(float64(d)) -} - -func (m engineMetrics) AddSearchDuration(d time.Duration) { - m.searchDuration.Add(float64(d)) -} - -func (m engineMetrics) AddListObjectsDuration(d time.Duration) { - m.listObjectsDuration.Add(float64(d)) +func (m *engineMetrics) AddMethodDuration(method string, d time.Duration) { + m.methodDuration.With(prometheus.Labels{ + engineMethod: method, + }).Observe(d.Seconds()) } func (m engineMetrics) AddToContainerSize(cnrID string, size int64) { @@ -134,6 +67,7 @@ func (m engineMetrics) ClearErrorCounter(shardID string) { m.errorCounter.With(prometheus.Labels{shardIDLabelKey: shardID}).Set(0) } -func (m engineMetrics) DeleteErrorCounter(shardID string) { +func (m engineMetrics) DeleteShardMetrics(shardID string) { m.errorCounter.Delete(prometheus.Labels{shardIDLabelKey: shardID}) + m.payloadSize.Delete(prometheus.Labels{shardIDLabelKey: shardID}) }