From c750c87a618cd2b6f64d66fbb1b34673aee8ac8f Mon Sep 17 00:00:00 2001 From: Artem Tataurov Date: Wed, 16 Aug 2023 17:08:50 +0300 Subject: [PATCH] [#51] metrics: Add a metric of currently used nodes Signed-off-by: Artem Tataurov --- CHANGELOG.md | 1 + metrics/desc.go | 8 ++++++++ metrics/pool.go | 10 ++++++++++ 3 files changed, 19 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7dacb465c..00451eaed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ This document outlines major changes between releases. - Fix parsing signed headers in presigned urls (#182) ### Added +- Add a metric with addresses of nodes of the same and highest priority that are currently healthy (#51) - Support dump metrics descriptions (#80) - Add `copies_numbers` section to `placement_policy` in config file and support vectors of copies numbers (#70, #101) - Support impersonate bearer token (#81, #105) diff --git a/metrics/desc.go b/metrics/desc.go index 3354da178..0a748ac15 100644 --- a/metrics/desc.go +++ b/metrics/desc.go @@ -48,6 +48,14 @@ var appMetricsDesc = map[string]map[string]Description{ Help: "Average request duration (in milliseconds) for specific method on node in pool", VariableLabels: []string{"node", "method"}, }, + currentNodesMetric: Description{ + Type: dto.MetricType_GAUGE, + Namespace: namespace, + Subsystem: poolSubsystem, + Name: currentNodesMetric, + Help: "Addresses of nodes of the same and highest priority that are currently healthy", + VariableLabels: []string{"address"}, + }, }, billingSubsystem: { userRequestsMetric: Description{ diff --git a/metrics/pool.go b/metrics/pool.go index 411730083..538d64462 100644 --- a/metrics/pool.go +++ b/metrics/pool.go @@ -15,6 +15,7 @@ const ( overallNodeRequestsMetric = "overall_node_requests" currentErrorMetric = "current_errors" avgRequestDurationMetric = "avg_request_duration" + currentNodesMetric = "current_nodes" ) const ( @@ -42,6 +43,7 @@ type poolMetricsCollector struct { overallNodeRequests *prometheus.GaugeVec currentErrors *prometheus.GaugeVec requestDuration *prometheus.GaugeVec + currentNodes *prometheus.GaugeVec } func newPoolMetricsCollector(scraper StatisticScraper) *poolMetricsCollector { @@ -52,6 +54,7 @@ func newPoolMetricsCollector(scraper StatisticScraper) *poolMetricsCollector { overallNodeRequests: mustNewGaugeVec(appMetricsDesc[poolSubsystem][overallNodeRequestsMetric]), currentErrors: mustNewGaugeVec(appMetricsDesc[poolSubsystem][currentErrorMetric]), requestDuration: mustNewGaugeVec(appMetricsDesc[poolSubsystem][avgRequestDurationMetric]), + currentNodes: mustNewGaugeVec(appMetricsDesc[poolSubsystem][currentNodesMetric]), } } @@ -62,6 +65,7 @@ func (m *poolMetricsCollector) Collect(ch chan<- prometheus.Metric) { m.overallNodeRequests.Collect(ch) m.currentErrors.Collect(ch) m.requestDuration.Collect(ch) + m.currentNodes.Collect(ch) } func (m *poolMetricsCollector) Describe(descs chan<- *prometheus.Desc) { @@ -70,6 +74,7 @@ func (m *poolMetricsCollector) Describe(descs chan<- *prometheus.Desc) { m.overallNodeRequests.Describe(descs) m.currentErrors.Describe(descs) m.requestDuration.Describe(descs) + m.currentNodes.Describe(descs) } func (m *poolMetricsCollector) updateStatistic() { @@ -79,6 +84,7 @@ func (m *poolMetricsCollector) updateStatistic() { m.overallNodeRequests.Reset() m.currentErrors.Reset() m.requestDuration.Reset() + m.currentNodes.Reset() for _, node := range stat.Nodes() { m.overallNodeErrors.WithLabelValues(node.Address()).Set(float64(node.OverallErrors())) @@ -88,6 +94,10 @@ func (m *poolMetricsCollector) updateStatistic() { m.updateRequestsDuration(node) } + for _, addr := range stat.CurrentNodes() { + m.currentNodes.WithLabelValues(addr).Set(1) + } + m.overallErrors.Set(float64(stat.OverallErrors())) }