From c09144ecf155758ec65d92fa8d4506ce0db68ee4 Mon Sep 17 00:00:00 2001 From: Dmitrii Stepanov Date: Wed, 31 May 2023 12:25:32 +0300 Subject: [PATCH] [#412] node: Replace metrics package Use observability module. Signed-off-by: Dmitrii Stepanov --- cmd/frostfs-ir/metrics.go | 2 +- cmd/frostfs-node/metrics.go | 2 +- pkg/metrics/desc.go | 109 --------------------------------- pkg/metrics/desc_test.go | 65 -------------------- pkg/metrics/engine.go | 79 ++++++++++-------------- pkg/metrics/gc.go | 34 +++++----- pkg/metrics/innerring.go | 23 +++---- pkg/metrics/node.go | 15 ++--- pkg/metrics/object.go | 100 +++++++++++------------------- pkg/metrics/registry.go | 42 ------------- pkg/metrics/replicator.go | 33 +++++----- pkg/metrics/state.go | 15 +++-- pkg/metrics/treeservice.go | 25 +++----- pkg/metrics/writecache.go | 64 ++++++++----------- scripts/export-metrics/main.go | 13 ++-- 15 files changed, 162 insertions(+), 459 deletions(-) delete mode 100644 pkg/metrics/desc.go delete mode 100644 pkg/metrics/desc_test.go delete mode 100644 pkg/metrics/registry.go diff --git a/cmd/frostfs-ir/metrics.go b/cmd/frostfs-ir/metrics.go index 39b432c7..dd982b78 100644 --- a/cmd/frostfs-ir/metrics.go +++ b/cmd/frostfs-ir/metrics.go @@ -1,7 +1,7 @@ package main import ( - "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/metrics" + "git.frostfs.info/TrueCloudLab/frostfs-observability/metrics" ) func newMetricsComponent() *httpComponent { diff --git a/cmd/frostfs-node/metrics.go b/cmd/frostfs-node/metrics.go index cf621086..19b4af51 100644 --- a/cmd/frostfs-node/metrics.go +++ b/cmd/frostfs-node/metrics.go @@ -2,7 +2,7 @@ package main import ( metricsconfig "git.frostfs.info/TrueCloudLab/frostfs-node/cmd/frostfs-node/config/metrics" - "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/metrics" + "git.frostfs.info/TrueCloudLab/frostfs-observability/metrics" ) func metricsComponent(c *cfg) (*httpComponent, bool) { diff --git a/pkg/metrics/desc.go b/pkg/metrics/desc.go deleted file mode 100644 index 612435b2..00000000 --- a/pkg/metrics/desc.go +++ /dev/null @@ -1,109 +0,0 @@ -package metrics - -import ( - "github.com/prometheus/client_golang/prometheus" - dto "github.com/prometheus/client_model/go" -) - -type metric[T prometheus.Collector] struct { - value T - desc Description -} - -// Descriptions contains metric description suitable for further processing. -// The only reason for it to exist is `prometheus.Desc` disallowing field access directly. -// https://github.com/prometheus/client_golang/pull/326 -// https://github.com/prometheus/client_golang/issues/516 -// https://github.com/prometheus/client_golang/issues/222 -type Description struct { - Name string `json:"name"` - Help string `json:"help"` - Type string `json:"type"` - ConstantLabels prometheus.Labels `json:"constant_labels,omitempty"` - VariableLabels []string `json:"variable_labels,omitempty"` -} - -func newGauge(opts prometheus.GaugeOpts) metric[prometheus.Gauge] { - return metric[prometheus.Gauge]{ - value: prometheus.NewGauge(opts), - desc: Description{ - Name: prometheus.BuildFQName(opts.Namespace, opts.Subsystem, opts.Name), - Type: dto.MetricType_GAUGE.String(), - Help: opts.Help, - ConstantLabels: opts.ConstLabels, - }, - } -} - -func newGaugeVec(opts prometheus.GaugeOpts, labelNames []string) metric[*prometheus.GaugeVec] { - return metric[*prometheus.GaugeVec]{ - value: prometheus.NewGaugeVec(opts, labelNames), - desc: Description{ - Name: prometheus.BuildFQName(opts.Namespace, opts.Subsystem, opts.Name), - Type: dto.MetricType_GAUGE.String(), - Help: opts.Help, - ConstantLabels: opts.ConstLabels, - VariableLabels: labelNames, - }, - } -} - -func newGaugeFunc(opts prometheus.GaugeOpts, f func() float64) metric[prometheus.GaugeFunc] { - return metric[prometheus.GaugeFunc]{ - value: prometheus.NewGaugeFunc(opts, f), - desc: Description{ - Name: prometheus.BuildFQName(opts.Namespace, opts.Subsystem, opts.Name), - Type: dto.MetricType_GAUGE.String(), - Help: opts.Help, - ConstantLabels: opts.ConstLabels, - }, - } -} - -func newCounter(opts prometheus.CounterOpts) metric[prometheus.Counter] { - return metric[prometheus.Counter]{ - value: prometheus.NewCounter(opts), - desc: Description{ - Name: prometheus.BuildFQName(opts.Namespace, opts.Subsystem, opts.Name), - Type: dto.MetricType_COUNTER.String(), - Help: opts.Help, - ConstantLabels: opts.ConstLabels, - }, - } -} - -func newCounterVec(opts prometheus.CounterOpts, labels []string) metric[*prometheus.CounterVec] { - return metric[*prometheus.CounterVec]{ - value: prometheus.NewCounterVec(opts, labels), - desc: Description{ - Name: prometheus.BuildFQName(opts.Namespace, opts.Subsystem, opts.Name), - Type: dto.MetricType_COUNTER.String(), - Help: opts.Help, - ConstantLabels: opts.ConstLabels, - VariableLabels: labels, - }, - } -} - -func newHistogramVec(opts prometheus.HistogramOpts, labelNames []string) metric[*prometheus.HistogramVec] { - return metric[*prometheus.HistogramVec]{ - value: prometheus.NewHistogramVec(opts, labelNames), - desc: Description{ - Name: prometheus.BuildFQName(opts.Namespace, opts.Subsystem, opts.Name), - Type: dto.MetricType_HISTOGRAM.String(), - Help: opts.Help, - ConstantLabels: opts.ConstLabels, - VariableLabels: labelNames, - }, - } -} - -// DescribeAll returns descriptions for all registered metrics. -func DescribeAll() ([]Description, error) { - registeredDescriptionsMtx.Lock() - defer registeredDescriptionsMtx.Unlock() - - ds := make([]Description, len(registeredDescriptions)) - copy(ds, registeredDescriptions) - return ds, nil -} diff --git a/pkg/metrics/desc_test.go b/pkg/metrics/desc_test.go deleted file mode 100644 index 28b5e213..00000000 --- a/pkg/metrics/desc_test.go +++ /dev/null @@ -1,65 +0,0 @@ -package metrics - -import ( - "strings" - "testing" - - "github.com/prometheus/client_golang/prometheus" - "github.com/stretchr/testify/require" -) - -func TestDescribeAll(t *testing.T) { - const ( - namespace = "my_ns" - subsystem = "mysub" - ) - mustRegister(newCounter(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: subsystem, - Name: "my_counter", - })) - - labels := []string{"label1", "label2"} - mustRegister(newGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: subsystem, - Name: "my_gauge", - }, labels)) - - constLabels := prometheus.Labels{ - "const1": "abc", - "const2": "xyz", - } - mustRegister(newCounter(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: subsystem, - Name: "with_const_labels", - ConstLabels: constLabels, - })) - - descriptions, err := DescribeAll() - require.NoError(t, err) - - seen := make(map[string]bool) - for i := range descriptions { - if !strings.HasPrefix(descriptions[i].Name, namespace) { - continue - } - - require.False(t, seen[descriptions[i].Name], "metric %s was seen twice", descriptions[i].Name) - seen[descriptions[i].Name] = true - - switch descriptions[i].Name { - case prometheus.BuildFQName(namespace, subsystem, "my_counter"): - require.True(t, len(descriptions[i].VariableLabels) == 0) - case prometheus.BuildFQName(namespace, subsystem, "my_gauge"): - require.Equal(t, labels, descriptions[i].VariableLabels) - case prometheus.BuildFQName(namespace, subsystem, "with_const_labels"): - require.Equal(t, len(constLabels), len(descriptions[i].ConstantLabels)) - require.Equal(t, constLabels, descriptions[i].ConstantLabels) - default: - require.FailNow(t, "unexpected metric name: %s", descriptions[i].Name) - } - } - require.Equal(t, 3, len(seen), "not all registered metrics were iterated over") -} diff --git a/pkg/metrics/engine.go b/pkg/metrics/engine.go index 28fc1e02..4e78f4ac 100644 --- a/pkg/metrics/engine.go +++ b/pkg/metrics/engine.go @@ -5,24 +5,25 @@ import ( "strings" "time" + "git.frostfs.info/TrueCloudLab/frostfs-observability/metrics" "github.com/prometheus/client_golang/prometheus" ) type ( engineMetrics struct { - listContainersDuration metric[prometheus.Counter] - estimateContainerSizeDuration metric[prometheus.Counter] - deleteDuration metric[prometheus.Counter] - existsDuration metric[prometheus.Counter] - getDuration metric[prometheus.Counter] - headDuration metric[prometheus.Counter] - inhumeDuration metric[prometheus.Counter] - putDuration metric[prometheus.Counter] - rangeDuration metric[prometheus.Counter] - searchDuration metric[prometheus.Counter] - listObjectsDuration metric[prometheus.Counter] - containerSize metric[*prometheus.GaugeVec] - payloadSize metric[*prometheus.GaugeVec] + listContainersDuration prometheus.Counter + estimateContainerSizeDuration prometheus.Counter + deleteDuration prometheus.Counter + existsDuration prometheus.Counter + getDuration prometheus.Counter + headDuration prometheus.Counter + inhumeDuration prometheus.Counter + putDuration prometheus.Counter + rangeDuration prometheus.Counter + searchDuration prometheus.Counter + listObjectsDuration prometheus.Counter + containerSize *prometheus.GaugeVec + payloadSize *prometheus.GaugeVec } ) @@ -46,8 +47,8 @@ func newEngineMetrics() engineMetrics { } } -func newEngineCounter(name, help string) metric[prometheus.Counter] { - return newCounter(prometheus.CounterOpts{ +func newEngineCounter(name, help string) prometheus.Counter { + return metrics.NewCounter(prometheus.CounterOpts{ Namespace: namespace, Subsystem: engineSubsystem, Name: name, @@ -55,15 +56,15 @@ func newEngineCounter(name, help string) metric[prometheus.Counter] { }) } -func newEngineMethodDurationCounter(method string) metric[prometheus.Counter] { +func newEngineMethodDurationCounter(method string) prometheus.Counter { return newEngineCounter( fmt.Sprintf("%s_duration", method), fmt.Sprintf("Accumulated duration of engine %s operations", strings.ReplaceAll(method, "_", " ")), ) } -func newEngineGaugeVector(name, help string, labels []string) metric[*prometheus.GaugeVec] { - return newGaugeVec(prometheus.GaugeOpts{ +func newEngineGaugeVector(name, help string, labels []string) *prometheus.GaugeVec { + return metrics.NewGaugeVec(prometheus.GaugeOpts{ Namespace: namespace, Subsystem: engineSubsystem, Name: name, @@ -71,70 +72,54 @@ func newEngineGaugeVector(name, help string, labels []string) metric[*prometheus }, labels) } -func (m engineMetrics) register() { - mustRegister(m.listContainersDuration) - mustRegister(m.estimateContainerSizeDuration) - mustRegister(m.deleteDuration) - mustRegister(m.existsDuration) - mustRegister(m.getDuration) - mustRegister(m.headDuration) - mustRegister(m.inhumeDuration) - mustRegister(m.putDuration) - mustRegister(m.rangeDuration) - mustRegister(m.searchDuration) - mustRegister(m.listObjectsDuration) - mustRegister(m.containerSize) - mustRegister(m.payloadSize) -} - func (m engineMetrics) AddListContainersDuration(d time.Duration) { - m.listObjectsDuration.value.Add(float64(d)) + m.listObjectsDuration.Add(float64(d)) } func (m engineMetrics) AddEstimateContainerSizeDuration(d time.Duration) { - m.estimateContainerSizeDuration.value.Add(float64(d)) + m.estimateContainerSizeDuration.Add(float64(d)) } func (m engineMetrics) AddDeleteDuration(d time.Duration) { - m.deleteDuration.value.Add(float64(d)) + m.deleteDuration.Add(float64(d)) } func (m engineMetrics) AddExistsDuration(d time.Duration) { - m.existsDuration.value.Add(float64(d)) + m.existsDuration.Add(float64(d)) } func (m engineMetrics) AddGetDuration(d time.Duration) { - m.getDuration.value.Add(float64(d)) + m.getDuration.Add(float64(d)) } func (m engineMetrics) AddHeadDuration(d time.Duration) { - m.headDuration.value.Add(float64(d)) + m.headDuration.Add(float64(d)) } func (m engineMetrics) AddInhumeDuration(d time.Duration) { - m.inhumeDuration.value.Add(float64(d)) + m.inhumeDuration.Add(float64(d)) } func (m engineMetrics) AddPutDuration(d time.Duration) { - m.putDuration.value.Add(float64(d)) + m.putDuration.Add(float64(d)) } func (m engineMetrics) AddRangeDuration(d time.Duration) { - m.rangeDuration.value.Add(float64(d)) + m.rangeDuration.Add(float64(d)) } func (m engineMetrics) AddSearchDuration(d time.Duration) { - m.searchDuration.value.Add(float64(d)) + m.searchDuration.Add(float64(d)) } func (m engineMetrics) AddListObjectsDuration(d time.Duration) { - m.listObjectsDuration.value.Add(float64(d)) + m.listObjectsDuration.Add(float64(d)) } func (m engineMetrics) AddToContainerSize(cnrID string, size int64) { - m.containerSize.value.With(prometheus.Labels{containerIDLabelKey: cnrID}).Add(float64(size)) + m.containerSize.With(prometheus.Labels{containerIDLabelKey: cnrID}).Add(float64(size)) } func (m engineMetrics) AddToPayloadCounter(shardID string, size int64) { - m.payloadSize.value.With(prometheus.Labels{shardIDLabelKey: shardID}).Add(float64(size)) + m.payloadSize.With(prometheus.Labels{shardIDLabelKey: shardID}).Add(float64(size)) } diff --git a/pkg/metrics/gc.go b/pkg/metrics/gc.go index c4d5ecb5..2457c0c6 100644 --- a/pkg/metrics/gc.go +++ b/pkg/metrics/gc.go @@ -4,6 +4,7 @@ import ( "fmt" "time" + "git.frostfs.info/TrueCloudLab/frostfs-observability/metrics" "github.com/prometheus/client_golang/prometheus" ) @@ -25,40 +26,33 @@ type GCMetrics interface { } type gcMetrics struct { - runDuration metric[*prometheus.CounterVec] - deletedCounter metric[*prometheus.CounterVec] - expCollectDuration metric[*prometheus.CounterVec] - inhumedCounter metric[*prometheus.CounterVec] -} - -func (m *gcMetrics) register() { - mustRegister(m.runDuration) - mustRegister(m.deletedCounter) - mustRegister(m.expCollectDuration) - mustRegister(m.inhumedCounter) + runDuration *prometheus.CounterVec + deletedCounter *prometheus.CounterVec + expCollectDuration *prometheus.CounterVec + inhumedCounter *prometheus.CounterVec } func newGCMetrics() *gcMetrics { return &gcMetrics{ - runDuration: newCounterVec(prometheus.CounterOpts{ + runDuration: metrics.NewCounterVec(prometheus.CounterOpts{ Namespace: namespace, Subsystem: gcSubsystem, Name: "delete_duration_seconds", Help: "The total time of GC runs to delete objects from disk", }, []string{gcShardID, gcSuccess}), - deletedCounter: newCounterVec(prometheus.CounterOpts{ + deletedCounter: metrics.NewCounterVec(prometheus.CounterOpts{ Namespace: namespace, Subsystem: gcSubsystem, Name: "deleted_objects_count", Help: "Total count of objects GC deleted or failed to delete from disk", }, []string{gcShardID, gcStatus}), - expCollectDuration: newCounterVec(prometheus.CounterOpts{ + expCollectDuration: metrics.NewCounterVec(prometheus.CounterOpts{ Namespace: namespace, Subsystem: gcSubsystem, Name: "marking_duration_seconds", Help: "The total time of GC runs to mark expired objects as removed", }, []string{gcShardID, gcSuccess, gcObjectType}), - inhumedCounter: newCounterVec(prometheus.CounterOpts{ + inhumedCounter: metrics.NewCounterVec(prometheus.CounterOpts{ Namespace: namespace, Subsystem: gcSubsystem, Name: "marked_for_removal_objects_count", @@ -68,19 +62,19 @@ func newGCMetrics() *gcMetrics { } func (m *gcMetrics) AddRunDuration(shardID string, d time.Duration, success bool) { - m.runDuration.value.With(prometheus.Labels{ + m.runDuration.With(prometheus.Labels{ gcShardID: shardID, gcSuccess: fmt.Sprintf("%v", success), }).Add(d.Seconds()) } func (m *gcMetrics) AddDeletedCount(shardID string, deleted, failed uint64) { - m.deletedCounter.value.With( + m.deletedCounter.With( prometheus.Labels{ gcShardID: shardID, gcStatus: gcDeleted, }).Add(float64(deleted)) - m.deletedCounter.value.With( + m.deletedCounter.With( prometheus.Labels{ gcShardID: shardID, gcStatus: gcFailed, @@ -88,7 +82,7 @@ func (m *gcMetrics) AddDeletedCount(shardID string, deleted, failed uint64) { } func (m *gcMetrics) AddExpiredObjectCollectionDuration(shardID string, d time.Duration, success bool, objectType string) { - m.expCollectDuration.value.With(prometheus.Labels{ + m.expCollectDuration.With(prometheus.Labels{ gcShardID: shardID, gcSuccess: fmt.Sprintf("%v", success), gcObjectType: objectType, @@ -96,7 +90,7 @@ func (m *gcMetrics) AddExpiredObjectCollectionDuration(shardID string, d time.Du } func (m *gcMetrics) AddInhumedObjectCount(shardID string, count uint64, objectType string) { - m.inhumedCounter.value.With( + m.inhumedCounter.With( prometheus.Labels{ gcShardID: shardID, gcObjectType: objectType, diff --git a/pkg/metrics/innerring.go b/pkg/metrics/innerring.go index bff9184e..9d8b76bf 100644 --- a/pkg/metrics/innerring.go +++ b/pkg/metrics/innerring.go @@ -4,6 +4,7 @@ import ( "strconv" "time" + "git.frostfs.info/TrueCloudLab/frostfs-observability/metrics" "github.com/prometheus/client_golang/prometheus" ) @@ -15,27 +16,27 @@ const ( // InnerRingServiceMetrics contains metrics collected by inner ring. type InnerRingServiceMetrics struct { - epoch metric[prometheus.Gauge] - health metric[prometheus.Gauge] - eventDuration metric[*prometheus.HistogramVec] + epoch prometheus.Gauge + health prometheus.Gauge + eventDuration *prometheus.HistogramVec } // NewInnerRingMetrics returns new instance of metrics collectors for inner ring. func NewInnerRingMetrics() *InnerRingServiceMetrics { var ( - epoch = newGauge(prometheus.GaugeOpts{ + epoch = metrics.NewGauge(prometheus.GaugeOpts{ Namespace: namespace, Subsystem: innerRingSubsystem, Name: "epoch", Help: "Current epoch as seen by inner-ring node.", }) - health = newGauge(prometheus.GaugeOpts{ + health = metrics.NewGauge(prometheus.GaugeOpts{ Namespace: namespace, Subsystem: innerRingSubsystem, Name: "health", Help: "Current inner-ring node state.", }) - eventDuration = newHistogramVec(prometheus.HistogramOpts{ + eventDuration = metrics.NewHistogramVec(prometheus.HistogramOpts{ Namespace: namespace, Subsystem: innerRingSubsystem, Name: "event_duration_seconds", @@ -43,10 +44,6 @@ func NewInnerRingMetrics() *InnerRingServiceMetrics { }, []string{innerRingLabelType, innerRingLabelSuccess}) ) - mustRegister(epoch) - mustRegister(health) - mustRegister(eventDuration) - return &InnerRingServiceMetrics{ epoch: epoch, health: health, @@ -56,16 +53,16 @@ func NewInnerRingMetrics() *InnerRingServiceMetrics { // SetEpoch updates epoch metrics. func (m InnerRingServiceMetrics) SetEpoch(epoch uint64) { - m.epoch.value.Set(float64(epoch)) + m.epoch.Set(float64(epoch)) } // SetHealth updates health metrics. func (m InnerRingServiceMetrics) SetHealth(s int32) { - m.health.value.Set(float64(s)) + m.health.Set(float64(s)) } func (m InnerRingServiceMetrics) AddEvent(d time.Duration, typ string, success bool) { - m.eventDuration.value.With(prometheus.Labels{ + m.eventDuration.With(prometheus.Labels{ innerRingLabelType: typ, innerRingLabelSuccess: strconv.FormatBool(success), }).Observe(d.Seconds()) diff --git a/pkg/metrics/node.go b/pkg/metrics/node.go index 526e460c..8819ba15 100644 --- a/pkg/metrics/node.go +++ b/pkg/metrics/node.go @@ -2,6 +2,7 @@ package metrics import ( "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/tree" + "git.frostfs.info/TrueCloudLab/frostfs-observability/metrics" "github.com/prometheus/client_golang/prometheus" ) @@ -15,39 +16,31 @@ type NodeMetrics struct { writeCacheMetrics *writeCacheMetrics treeService *treeServiceMetrics - epoch metric[prometheus.Gauge] + epoch prometheus.Gauge gc *gcMetrics } func NewNodeMetrics() *NodeMetrics { objectService := newObjectServiceMetrics() - objectService.register() engine := newEngineMetrics() - engine.register() state := newStateMetrics() - state.register() replicator := newReplicatorMetrics() - replicator.register() treeService := newTreeServiceMetrics() - treeService.register() - epoch := newGauge(prometheus.GaugeOpts{ + epoch := metrics.NewGauge(prometheus.GaugeOpts{ Namespace: namespace, Subsystem: innerRingSubsystem, Name: "epoch", Help: "Current epoch as seen by inner-ring node.", }) - mustRegister(epoch) writeCacheMetrics := newWriteCacheMetrics() - writeCacheMetrics.register() gc := newGCMetrics() - gc.register() return &NodeMetrics{ objectServiceMetrics: objectService, @@ -63,7 +56,7 @@ func NewNodeMetrics() *NodeMetrics { // SetEpoch updates epoch metric. func (m *NodeMetrics) SetEpoch(epoch uint64) { - m.epoch.value.Set(float64(epoch)) + m.epoch.Set(float64(epoch)) } // WriteCache returns WriteCache metrics. diff --git a/pkg/metrics/object.go b/pkg/metrics/object.go index 5ec57574..87916414 100644 --- a/pkg/metrics/object.go +++ b/pkg/metrics/object.go @@ -5,6 +5,7 @@ import ( "strings" "time" + "git.frostfs.info/TrueCloudLab/frostfs-observability/metrics" "github.com/prometheus/client_golang/prometheus" ) @@ -12,8 +13,8 @@ const objectSubsystem = "object" type ( methodCount struct { - success metric[prometheus.Counter] - total metric[prometheus.Counter] + success prometheus.Counter + total prometheus.Counter } objectServiceMetrics struct { @@ -25,19 +26,19 @@ type ( rangeCounter methodCount rangeHashCounter methodCount - getDuration metric[prometheus.Counter] - putDuration metric[prometheus.Counter] - headDuration metric[prometheus.Counter] - searchDuration metric[prometheus.Counter] - deleteDuration metric[prometheus.Counter] - rangeDuration metric[prometheus.Counter] - rangeHashDuration metric[prometheus.Counter] + getDuration prometheus.Counter + putDuration prometheus.Counter + headDuration prometheus.Counter + searchDuration prometheus.Counter + deleteDuration prometheus.Counter + rangeDuration prometheus.Counter + rangeHashDuration prometheus.Counter - putPayload metric[prometheus.Counter] - getPayload metric[prometheus.Counter] + putPayload prometheus.Counter + getPayload prometheus.Counter - shardMetrics metric[*prometheus.GaugeVec] - shardsReadonly metric[*prometheus.GaugeVec] + shardMetrics *prometheus.GaugeVec + shardsReadonly *prometheus.GaugeVec } ) @@ -49,13 +50,13 @@ const ( func newObjectMethodCallCounter(name string) methodCount { return methodCount{ - success: newCounter(prometheus.CounterOpts{ + success: metrics.NewCounter(prometheus.CounterOpts{ Namespace: namespace, Subsystem: objectSubsystem, Name: fmt.Sprintf("%s_req_count_success", name), Help: fmt.Sprintf("The number of successful %s requests processed", name), }), - total: newCounter(prometheus.CounterOpts{ + total: metrics.NewCounter(prometheus.CounterOpts{ Namespace: namespace, Subsystem: objectSubsystem, Name: fmt.Sprintf("%s_req_count", name), @@ -64,15 +65,10 @@ func newObjectMethodCallCounter(name string) methodCount { } } -func (m methodCount) mustRegister() { - mustRegister(m.success) - mustRegister(m.total) -} - func (m methodCount) Inc(success bool) { - m.total.value.Inc() + m.total.Inc() if success { - m.success.value.Inc() + m.success.Inc() } } @@ -99,8 +95,8 @@ func newObjectServiceMetrics() objectServiceMetrics { } } -func newObjectMethodPayloadCounter(method string) metric[prometheus.Counter] { - return newCounter(prometheus.CounterOpts{ +func newObjectMethodPayloadCounter(method string) prometheus.Counter { + return metrics.NewCounter(prometheus.CounterOpts{ Namespace: namespace, Subsystem: objectSubsystem, Name: fmt.Sprintf("%s_payload", method), @@ -108,8 +104,8 @@ func newObjectMethodPayloadCounter(method string) metric[prometheus.Counter] { }) } -func newObjectMethodDurationCounter(method string) metric[prometheus.Counter] { - return newCounter(prometheus.CounterOpts{ +func newObjectMethodDurationCounter(method string) prometheus.Counter { + return metrics.NewCounter(prometheus.CounterOpts{ Namespace: namespace, Subsystem: objectSubsystem, Name: fmt.Sprintf("%s_req_duration", method), @@ -117,8 +113,8 @@ func newObjectMethodDurationCounter(method string) metric[prometheus.Counter] { }) } -func newObjectGaugeVector(name, help string, labels []string) metric[*prometheus.GaugeVec] { - return newGaugeVec(prometheus.GaugeOpts{ +func newObjectGaugeVector(name, help string, labels []string) *prometheus.GaugeVec { + return metrics.NewGaugeVec(prometheus.GaugeOpts{ Namespace: namespace, Subsystem: objectSubsystem, Name: name, @@ -126,30 +122,6 @@ func newObjectGaugeVector(name, help string, labels []string) metric[*prometheus }, labels) } -func (m objectServiceMetrics) register() { - m.getCounter.mustRegister() - m.putCounter.mustRegister() - m.headCounter.mustRegister() - m.searchCounter.mustRegister() - m.deleteCounter.mustRegister() - m.rangeCounter.mustRegister() - m.rangeHashCounter.mustRegister() - - mustRegister(m.getDuration) - mustRegister(m.putDuration) - mustRegister(m.headDuration) - mustRegister(m.searchDuration) - mustRegister(m.deleteDuration) - mustRegister(m.rangeDuration) - mustRegister(m.rangeHashDuration) - - mustRegister(m.putPayload) - mustRegister(m.getPayload) - - mustRegister(m.shardMetrics) - mustRegister(m.shardsReadonly) -} - func (m objectServiceMetrics) IncGetReqCounter(success bool) { m.getCounter.Inc(success) } @@ -179,43 +151,43 @@ func (m objectServiceMetrics) IncRangeHashReqCounter(success bool) { } func (m objectServiceMetrics) AddGetReqDuration(d time.Duration) { - m.getDuration.value.Add(float64(d)) + m.getDuration.Add(float64(d)) } func (m objectServiceMetrics) AddPutReqDuration(d time.Duration) { - m.putDuration.value.Add(float64(d)) + m.putDuration.Add(float64(d)) } func (m objectServiceMetrics) AddHeadReqDuration(d time.Duration) { - m.headDuration.value.Add(float64(d)) + m.headDuration.Add(float64(d)) } func (m objectServiceMetrics) AddSearchReqDuration(d time.Duration) { - m.searchDuration.value.Add(float64(d)) + m.searchDuration.Add(float64(d)) } func (m objectServiceMetrics) AddDeleteReqDuration(d time.Duration) { - m.deleteDuration.value.Add(float64(d)) + m.deleteDuration.Add(float64(d)) } func (m objectServiceMetrics) AddRangeReqDuration(d time.Duration) { - m.rangeDuration.value.Add(float64(d)) + m.rangeDuration.Add(float64(d)) } func (m objectServiceMetrics) AddRangeHashReqDuration(d time.Duration) { - m.rangeHashDuration.value.Add(float64(d)) + m.rangeHashDuration.Add(float64(d)) } func (m objectServiceMetrics) AddPutPayload(ln int) { - m.putPayload.value.Add(float64(ln)) + m.putPayload.Add(float64(ln)) } func (m objectServiceMetrics) AddGetPayload(ln int) { - m.getPayload.value.Add(float64(ln)) + m.getPayload.Add(float64(ln)) } func (m objectServiceMetrics) AddToObjectCounter(shardID, objectType string, delta int) { - m.shardMetrics.value.With( + m.shardMetrics.With( prometheus.Labels{ shardIDLabelKey: shardID, counterTypeLabelKey: objectType, @@ -224,7 +196,7 @@ func (m objectServiceMetrics) AddToObjectCounter(shardID, objectType string, del } func (m objectServiceMetrics) SetObjectCounter(shardID, objectType string, v uint64) { - m.shardMetrics.value.With( + m.shardMetrics.With( prometheus.Labels{ shardIDLabelKey: shardID, counterTypeLabelKey: objectType, @@ -237,7 +209,7 @@ func (m objectServiceMetrics) SetReadonly(shardID string, readonly bool) { if readonly { flag = 1 } - m.shardsReadonly.value.With( + m.shardsReadonly.With( prometheus.Labels{ shardIDLabelKey: shardID, }, diff --git a/pkg/metrics/registry.go b/pkg/metrics/registry.go deleted file mode 100644 index eef613d0..00000000 --- a/pkg/metrics/registry.go +++ /dev/null @@ -1,42 +0,0 @@ -package metrics - -import ( - "net/http" - "sync" - - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/collectors" - "github.com/prometheus/client_golang/prometheus/promhttp" -) - -// Handler returns an http.Handler for the local registry. -func Handler() http.Handler { - promhttp.Handler() - return promhttp.InstrumentMetricHandler( - registry, - promhttp.HandlerFor(registry, promhttp.HandlerOpts{})) -} - -var ( - registry = prometheus.NewRegistry() - // registeredDescriptionsMtx protects collectors slice. - // It should not be acessed concurrently, but we can easily forget this in future, thus this mutex. - registeredDescriptionsMtx sync.Mutex - registeredDescriptions []Description -) - -func init() { - registry.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{})) - registry.MustRegister(collectors.NewGoCollector()) -} - -func mustRegister[T prometheus.Collector](cs ...metric[T]) { - for i := range cs { - registry.MustRegister(cs[i].value) - } - registeredDescriptionsMtx.Lock() - for i := range cs { - registeredDescriptions = append(registeredDescriptions, cs[i].desc) - } - registeredDescriptionsMtx.Unlock() -} diff --git a/pkg/metrics/replicator.go b/pkg/metrics/replicator.go index 55f736c6..0deafe91 100644 --- a/pkg/metrics/replicator.go +++ b/pkg/metrics/replicator.go @@ -1,29 +1,32 @@ package metrics -import "github.com/prometheus/client_golang/prometheus" +import ( + "git.frostfs.info/TrueCloudLab/frostfs-observability/metrics" + "github.com/prometheus/client_golang/prometheus" +) const replicatorSubsystem = "replicator" type replicatorMetrics struct { - inFlightRequests metric[prometheus.Gauge] - processedObjects metric[prometheus.Counter] - totalReplicatedPayloadSize metric[prometheus.Counter] + inFlightRequests prometheus.Gauge + processedObjects prometheus.Counter + totalReplicatedPayloadSize prometheus.Counter } func (m replicatorMetrics) IncInFlightRequest() { - m.inFlightRequests.value.Inc() + m.inFlightRequests.Inc() } func (m replicatorMetrics) DecInFlightRequest() { - m.inFlightRequests.value.Dec() + m.inFlightRequests.Dec() } func (m replicatorMetrics) IncProcessedObjects() { - m.processedObjects.value.Inc() + m.processedObjects.Inc() } func (m replicatorMetrics) AddPayloadSize(size int64) { - m.totalReplicatedPayloadSize.value.Add(float64(size)) + m.totalReplicatedPayloadSize.Add(float64(size)) } func newReplicatorMetrics() replicatorMetrics { @@ -34,14 +37,8 @@ func newReplicatorMetrics() replicatorMetrics { } } -func (m replicatorMetrics) register() { - mustRegister(m.inFlightRequests) - mustRegister(m.processedObjects) - mustRegister(m.totalReplicatedPayloadSize) -} - -func newReplicatorCounter(name, help string) metric[prometheus.Counter] { - return newCounter(prometheus.CounterOpts{ +func newReplicatorCounter(name, help string) prometheus.Counter { + return metrics.NewCounter(prometheus.CounterOpts{ Namespace: namespace, Subsystem: replicatorSubsystem, Name: name, @@ -49,8 +46,8 @@ func newReplicatorCounter(name, help string) metric[prometheus.Counter] { }) } -func newReplicatorGauge(name, help string) metric[prometheus.Gauge] { - return newGauge(prometheus.GaugeOpts{ +func newReplicatorGauge(name, help string) prometheus.Gauge { + return metrics.NewGauge(prometheus.GaugeOpts{ Namespace: namespace, Subsystem: replicatorSubsystem, Name: name, diff --git a/pkg/metrics/state.go b/pkg/metrics/state.go index dce0402c..89384991 100644 --- a/pkg/metrics/state.go +++ b/pkg/metrics/state.go @@ -1,16 +1,19 @@ package metrics -import "github.com/prometheus/client_golang/prometheus" +import ( + "git.frostfs.info/TrueCloudLab/frostfs-observability/metrics" + "github.com/prometheus/client_golang/prometheus" +) const stateSubsystem = "state" type stateMetrics struct { - healthCheck metric[prometheus.Gauge] + healthCheck prometheus.Gauge } func newStateMetrics() stateMetrics { return stateMetrics{ - healthCheck: newGauge(prometheus.GaugeOpts{ + healthCheck: metrics.NewGauge(prometheus.GaugeOpts{ Namespace: namespace, Subsystem: stateSubsystem, Name: "health", @@ -19,10 +22,6 @@ func newStateMetrics() stateMetrics { } } -func (m stateMetrics) register() { - mustRegister(m.healthCheck) -} - func (m stateMetrics) SetHealth(s int32) { - m.healthCheck.value.Set(float64(s)) + m.healthCheck.Set(float64(s)) } diff --git a/pkg/metrics/treeservice.go b/pkg/metrics/treeservice.go index 135f6e6d..903ef349 100644 --- a/pkg/metrics/treeservice.go +++ b/pkg/metrics/treeservice.go @@ -4,33 +4,34 @@ import ( "fmt" "time" + "git.frostfs.info/TrueCloudLab/frostfs-observability/metrics" "github.com/prometheus/client_golang/prometheus" ) const treeServiceLabelSuccess = "success" type treeServiceMetrics struct { - replicateTaskDuration metric[*prometheus.HistogramVec] - replicateWaitDuration metric[*prometheus.HistogramVec] - syncOpDuration metric[*prometheus.HistogramVec] + replicateTaskDuration *prometheus.HistogramVec + replicateWaitDuration *prometheus.HistogramVec + syncOpDuration *prometheus.HistogramVec } func newTreeServiceMetrics() *treeServiceMetrics { const treeServiceSubsystem = "treeservice" return &treeServiceMetrics{ - replicateTaskDuration: newHistogramVec(prometheus.HistogramOpts{ + replicateTaskDuration: metrics.NewHistogramVec(prometheus.HistogramOpts{ Namespace: namespace, Subsystem: treeServiceSubsystem, Name: "replicate_task_duration_seconds", Help: "Duration of individual replication tasks executed as part of replication loops", }, []string{treeServiceLabelSuccess}), - replicateWaitDuration: newHistogramVec(prometheus.HistogramOpts{ + replicateWaitDuration: metrics.NewHistogramVec(prometheus.HistogramOpts{ Namespace: namespace, Subsystem: treeServiceSubsystem, Name: "replicate_wait_duration_seconds", Help: "Duration of overall waiting time for replication loops", }, []string{treeServiceLabelSuccess}), - syncOpDuration: newHistogramVec(prometheus.HistogramOpts{ + syncOpDuration: metrics.NewHistogramVec(prometheus.HistogramOpts{ Namespace: namespace, Subsystem: treeServiceSubsystem, Name: "sync_duration_seconds", @@ -39,26 +40,20 @@ func newTreeServiceMetrics() *treeServiceMetrics { } } -func (m *treeServiceMetrics) register() { - mustRegister(m.replicateTaskDuration) - mustRegister(m.replicateWaitDuration) - mustRegister(m.syncOpDuration) -} - func (m *treeServiceMetrics) AddReplicateTaskDuration(d time.Duration, success bool) { - m.replicateTaskDuration.value.With(prometheus.Labels{ + m.replicateTaskDuration.With(prometheus.Labels{ treeServiceLabelSuccess: fmt.Sprintf("%v", success), }).Observe(d.Seconds()) } func (m *treeServiceMetrics) AddReplicateWaitDuration(d time.Duration, success bool) { - m.replicateWaitDuration.value.With(prometheus.Labels{ + m.replicateWaitDuration.With(prometheus.Labels{ treeServiceLabelSuccess: fmt.Sprintf("%v", success), }).Observe(d.Seconds()) } func (m *treeServiceMetrics) AddSyncDuration(d time.Duration, success bool) { - m.syncOpDuration.value.With(prometheus.Labels{ + m.syncOpDuration.With(prometheus.Labels{ treeServiceLabelSuccess: fmt.Sprintf("%v", success), }).Observe(d.Seconds()) } diff --git a/pkg/metrics/writecache.go b/pkg/metrics/writecache.go index 74c33084..3c56aa2b 100644 --- a/pkg/metrics/writecache.go +++ b/pkg/metrics/writecache.go @@ -5,6 +5,7 @@ import ( "sync" "time" + "git.frostfs.info/TrueCloudLab/frostfs-observability/metrics" "github.com/prometheus/client_golang/prometheus" ) @@ -37,18 +38,18 @@ type WriteCacheMetrics interface { } type writeCacheMetrics struct { - getDuration metric[*prometheus.HistogramVec] - putDuration metric[*prometheus.HistogramVec] - deleteDuration metric[*prometheus.HistogramVec] + getDuration *prometheus.HistogramVec + putDuration *prometheus.HistogramVec + deleteDuration *prometheus.HistogramVec - flushCounter metric[*prometheus.CounterVec] - evictCounter metric[*prometheus.CounterVec] + flushCounter *prometheus.CounterVec + evictCounter *prometheus.CounterVec - actualCount metric[*prometheus.GaugeVec] + actualCount *prometheus.GaugeVec - estimatedSize metric[*prometheus.GaugeVec] + estimatedSize *prometheus.GaugeVec - modeMetrics map[shardIDMode]metric[prometheus.GaugeFunc] + modeMetrics map[shardIDMode]prometheus.GaugeFunc modeValues map[string]string modeMtx sync.RWMutex } @@ -63,46 +64,46 @@ func newWriteCacheMetrics() *writeCacheMetrics { actualCount: newWCGaugeVec("actual_objects_count", "Actual objects count in writecache", []string{wcShardID, wcStorage}), estimatedSize: newWCGaugeVec("estimated_size_bytes", "Estimated writecache size", []string{wcShardID, wcStorage}), modeMtx: sync.RWMutex{}, - modeMetrics: make(map[shardIDMode]metric[prometheus.GaugeFunc]), + modeMetrics: make(map[shardIDMode]prometheus.GaugeFunc), modeValues: make(map[string]string), } } func (m *writeCacheMetrics) AddGetDuration(shardID string, success bool, d time.Duration, storageType string) { - setWriteCacheDuration(m.getDuration.value, shardID, success, d, storageType) + setWriteCacheDuration(m.getDuration, shardID, success, d, storageType) } func (m *writeCacheMetrics) AddDeleteDuration(shardID string, success bool, d time.Duration, storageType string) { - setWriteCacheDuration(m.deleteDuration.value, shardID, success, d, storageType) + setWriteCacheDuration(m.deleteDuration, shardID, success, d, storageType) } func (m *writeCacheMetrics) AddPutDuration(shardID string, success bool, d time.Duration, storageType string) { - setWriteCacheDuration(m.putDuration.value, shardID, success, d, storageType) + setWriteCacheDuration(m.putDuration, shardID, success, d, storageType) } func (m *writeCacheMetrics) IncActualCount(shardID string, storageType string) { - m.actualCount.value.With(prometheus.Labels{ + m.actualCount.With(prometheus.Labels{ wcShardID: shardID, wcStorage: storageType, }).Inc() } func (m *writeCacheMetrics) DecActualCount(shardID string, storageType string) { - m.actualCount.value.With(prometheus.Labels{ + m.actualCount.With(prometheus.Labels{ wcShardID: shardID, wcStorage: storageType, }).Dec() } func (m *writeCacheMetrics) SetActualCount(shardID string, count uint64, storageType string) { - m.actualCount.value.With(prometheus.Labels{ + m.actualCount.With(prometheus.Labels{ wcShardID: shardID, wcStorage: storageType, }).Set(float64(count)) } func (m *writeCacheMetrics) SetEstimateSize(shardID string, size uint64, storageType string) { - m.estimatedSize.value.With(prometheus.Labels{ + m.estimatedSize.With(prometheus.Labels{ wcShardID: shardID, wcStorage: storageType, }).Set(float64(size)) @@ -121,7 +122,7 @@ func (m *writeCacheMetrics) SetMode(shardID string, mode string) { return } - metric := newGaugeFunc( + metric := metrics.NewGaugeFunc( prometheus.GaugeOpts{ Namespace: namespace, Subsystem: wcSubsystem, @@ -141,12 +142,11 @@ func (m *writeCacheMetrics) SetMode(shardID string, mode string) { } return 0 }) - mustRegister(metric) m.modeMetrics[key] = metric } func (m *writeCacheMetrics) IncFlushCounter(shardID string, success bool, storageType string) { - m.flushCounter.value.With(prometheus.Labels{ + m.flushCounter.With(prometheus.Labels{ wcShardID: shardID, wcSuccess: fmt.Sprintf("%v", success), wcStorage: storageType, @@ -154,22 +154,12 @@ func (m *writeCacheMetrics) IncFlushCounter(shardID string, success bool, storag } func (m *writeCacheMetrics) IncEvictCounter(shardID string, storageType string) { - m.evictCounter.value.With(prometheus.Labels{ + m.evictCounter.With(prometheus.Labels{ wcShardID: shardID, wcStorage: storageType, }).Inc() } -func (m *writeCacheMetrics) register() { - mustRegister(m.getDuration) - mustRegister(m.putDuration) - mustRegister(m.deleteDuration) - mustRegister(m.actualCount) - mustRegister(m.estimatedSize) - mustRegister(m.flushCounter) - mustRegister(m.evictCounter) -} - func setWriteCacheDuration(m *prometheus.HistogramVec, shardID string, success bool, d time.Duration, storageType string) { m.With( prometheus.Labels{ @@ -180,17 +170,17 @@ func setWriteCacheDuration(m *prometheus.HistogramVec, shardID string, success b ).Observe(float64(d)) } -func newWCMethodDurationCounter(method string) metric[*prometheus.HistogramVec] { - return newHistogramVec(prometheus.HistogramOpts{ +func newWCMethodDurationCounter(method string) *prometheus.HistogramVec { + return metrics.NewHistogramVec(prometheus.HistogramOpts{ Namespace: namespace, Subsystem: wcSubsystem, Name: fmt.Sprintf("%s_req_duration_seconds", method), Help: fmt.Sprintf("Accumulated %s request process duration", method), - }, []string{wcShardID, wcSuccess, wcStorage}) + }, []string{wcShardID, wcSuccess}) } -func newWCOperationCounterVec(operation string, labels []string) metric[*prometheus.CounterVec] { - return newCounterVec(prometheus.CounterOpts{ +func newWCOperationCounterVec(operation string, labels []string) *prometheus.CounterVec { + return metrics.NewCounterVec(prometheus.CounterOpts{ Namespace: namespace, Subsystem: wcSubsystem, Name: fmt.Sprintf("%s_operation_count", operation), @@ -198,8 +188,8 @@ func newWCOperationCounterVec(operation string, labels []string) metric[*prometh }, labels) } -func newWCGaugeVec(name, help string, labels []string) metric[*prometheus.GaugeVec] { - return newGaugeVec(prometheus.GaugeOpts{ +func newWCGaugeVec(name, help string, labels []string) *prometheus.GaugeVec { + return metrics.NewGaugeVec(prometheus.GaugeOpts{ Namespace: namespace, Subsystem: wcSubsystem, Name: name, diff --git a/scripts/export-metrics/main.go b/scripts/export-metrics/main.go index 694eea38..f29eca37 100644 --- a/scripts/export-metrics/main.go +++ b/scripts/export-metrics/main.go @@ -6,7 +6,8 @@ import ( "fmt" "os" - "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/metrics" + local_metrics "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/metrics" + "git.frostfs.info/TrueCloudLab/frostfs-observability/metrics" ) var ( @@ -25,10 +26,10 @@ func main() { var filename string switch { case *node != "": - _ = metrics.NewNodeMetrics() + _ = local_metrics.NewNodeMetrics() filename = *node case *ir != "": - _ = metrics.NewInnerRingMetrics() + _ = local_metrics.NewInnerRingMetrics() filename = *ir default: @@ -36,11 +37,7 @@ func main() { os.Exit(1) } - ds, err := metrics.DescribeAll() - if err != nil { - fmt.Fprintf(os.Stderr, "Could not parse metric descriptions: %v\n", err) - os.Exit(1) - } + ds := metrics.DescribeAll() data, err := json.Marshal(ds) if err != nil {