[#51] metrics: Add a metric of currently used nodes

Signed-off-by: Artem Tataurov <a.tataurov@yadro.com>
2023-08-16 17:08:50 +03:00 · 2023-08-16 17:08:50 +03:00 · c750c87a61
commit c750c87a61
parent 94a42fa273
3 changed files with 19 additions and 0 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -15,6 +15,7 @@ This document outlines major changes between releases.
 - Fix parsing signed headers in presigned urls (#182)

 ### Added
+- Add a metric with addresses of nodes of the same and highest priority that are currently healthy (#51)
 - Support dump metrics descriptions (#80)
 - Add `copies_numbers` section to `placement_policy` in config file and support vectors of copies numbers (#70, #101)
 - Support impersonate bearer token (#81, #105)
--- a/metrics/desc.go
+++ b/metrics/desc.go
@ -48,6 +48,14 @@ var appMetricsDesc = map[string]map[string]Description{
 			Help:           "Average request duration (in milliseconds) for specific method on node in pool",
 			VariableLabels: []string{"node", "method"},
 		},
+		currentNodesMetric: Description{
+			Type:           dto.MetricType_GAUGE,
+			Namespace:      namespace,
+			Subsystem:      poolSubsystem,
+			Name:           currentNodesMetric,
+			Help:           "Addresses of nodes of the same and highest priority that are currently healthy",
+			VariableLabels: []string{"address"},
+		},
 	},
 	billingSubsystem: {
 		userRequestsMetric: Description{
--- a/metrics/pool.go
+++ b/metrics/pool.go
@ -15,6 +15,7 @@ const (
 	overallNodeRequestsMetric = "overall_node_requests"
 	currentErrorMetric        = "current_errors"
 	avgRequestDurationMetric  = "avg_request_duration"
+	currentNodesMetric        = "current_nodes"
 )

 const (
@ -42,6 +43,7 @@ type poolMetricsCollector struct {
 	overallNodeRequests *prometheus.GaugeVec
 	currentErrors       *prometheus.GaugeVec
 	requestDuration     *prometheus.GaugeVec
+	currentNodes        *prometheus.GaugeVec
 }

 func newPoolMetricsCollector(scraper StatisticScraper) *poolMetricsCollector {
@ -52,6 +54,7 @@ func newPoolMetricsCollector(scraper StatisticScraper) *poolMetricsCollector {
 		overallNodeRequests: mustNewGaugeVec(appMetricsDesc[poolSubsystem][overallNodeRequestsMetric]),
 		currentErrors:       mustNewGaugeVec(appMetricsDesc[poolSubsystem][currentErrorMetric]),
 		requestDuration:     mustNewGaugeVec(appMetricsDesc[poolSubsystem][avgRequestDurationMetric]),
+		currentNodes:        mustNewGaugeVec(appMetricsDesc[poolSubsystem][currentNodesMetric]),
 	}
 }

@ -62,6 +65,7 @@ func (m *poolMetricsCollector) Collect(ch chan<- prometheus.Metric) {
 	m.overallNodeRequests.Collect(ch)
 	m.currentErrors.Collect(ch)
 	m.requestDuration.Collect(ch)
+	m.currentNodes.Collect(ch)
 }

 func (m *poolMetricsCollector) Describe(descs chan<- *prometheus.Desc) {
@ -70,6 +74,7 @@ func (m *poolMetricsCollector) Describe(descs chan<- *prometheus.Desc) {
 	m.overallNodeRequests.Describe(descs)
 	m.currentErrors.Describe(descs)
 	m.requestDuration.Describe(descs)
+	m.currentNodes.Describe(descs)
 }

 func (m *poolMetricsCollector) updateStatistic() {
@ -79,6 +84,7 @@ func (m *poolMetricsCollector) updateStatistic() {
 	m.overallNodeRequests.Reset()
 	m.currentErrors.Reset()
 	m.requestDuration.Reset()
+	m.currentNodes.Reset()

 	for _, node := range stat.Nodes() {
 		m.overallNodeErrors.WithLabelValues(node.Address()).Set(float64(node.OverallErrors()))
@ -88,6 +94,10 @@ func (m *poolMetricsCollector) updateStatistic() {
 		m.updateRequestsDuration(node)
 	}

+	for _, addr := range stat.CurrentNodes() {
+		m.currentNodes.WithLabelValues(addr).Set(1)
+	}
+
 	m.overallErrors.Set(float64(stat.OverallErrors()))
 }