[#51] metrics: Add a metric of currently used nodes
/ DCO (pull_request) Successful in 1m18s Details
/ Builds (1.19) (pull_request) Successful in 2m23s Details
/ Builds (1.20) (pull_request) Successful in 2m18s Details
/ Vulncheck (pull_request) Successful in 5m26s Details
/ Lint (pull_request) Failing after 2m4s Details
/ Tests (1.19) (pull_request) Successful in 7m42s Details
/ Tests (1.20) (pull_request) Successful in 2m11s Details

Signed-off-by: Artem Tataurov <a.tataurov@yadro.com>
pull/186/head
Artem Tataurov 2023-08-16 17:08:50 +03:00
parent 94a42fa273
commit c750c87a61
3 changed files with 19 additions and 0 deletions

View File

@ -15,6 +15,7 @@ This document outlines major changes between releases.
- Fix parsing signed headers in presigned urls (#182)
### Added
- Add a metric with addresses of nodes of the same and highest priority that are currently healthy (#51)
- Support dump metrics descriptions (#80)
- Add `copies_numbers` section to `placement_policy` in config file and support vectors of copies numbers (#70, #101)
- Support impersonate bearer token (#81, #105)

View File

@ -48,6 +48,14 @@ var appMetricsDesc = map[string]map[string]Description{
Help: "Average request duration (in milliseconds) for specific method on node in pool",
VariableLabels: []string{"node", "method"},
},
currentNodesMetric: Description{
Type: dto.MetricType_GAUGE,
Namespace: namespace,
Subsystem: poolSubsystem,
Name: currentNodesMetric,
Help: "Addresses of nodes of the same and highest priority that are currently healthy",
VariableLabels: []string{"address"},
},
},
billingSubsystem: {
userRequestsMetric: Description{

View File

@ -15,6 +15,7 @@ const (
overallNodeRequestsMetric = "overall_node_requests"
currentErrorMetric = "current_errors"
avgRequestDurationMetric = "avg_request_duration"
currentNodesMetric = "current_nodes"
)
const (
@ -42,6 +43,7 @@ type poolMetricsCollector struct {
overallNodeRequests *prometheus.GaugeVec
currentErrors *prometheus.GaugeVec
requestDuration *prometheus.GaugeVec
currentNodes *prometheus.GaugeVec
}
func newPoolMetricsCollector(scraper StatisticScraper) *poolMetricsCollector {
@ -52,6 +54,7 @@ func newPoolMetricsCollector(scraper StatisticScraper) *poolMetricsCollector {
overallNodeRequests: mustNewGaugeVec(appMetricsDesc[poolSubsystem][overallNodeRequestsMetric]),
currentErrors: mustNewGaugeVec(appMetricsDesc[poolSubsystem][currentErrorMetric]),
requestDuration: mustNewGaugeVec(appMetricsDesc[poolSubsystem][avgRequestDurationMetric]),
currentNodes: mustNewGaugeVec(appMetricsDesc[poolSubsystem][currentNodesMetric]),
}
}
@ -62,6 +65,7 @@ func (m *poolMetricsCollector) Collect(ch chan<- prometheus.Metric) {
m.overallNodeRequests.Collect(ch)
m.currentErrors.Collect(ch)
m.requestDuration.Collect(ch)
m.currentNodes.Collect(ch)
}
func (m *poolMetricsCollector) Describe(descs chan<- *prometheus.Desc) {
@ -70,6 +74,7 @@ func (m *poolMetricsCollector) Describe(descs chan<- *prometheus.Desc) {
m.overallNodeRequests.Describe(descs)
m.currentErrors.Describe(descs)
m.requestDuration.Describe(descs)
m.currentNodes.Describe(descs)
}
func (m *poolMetricsCollector) updateStatistic() {
@ -79,6 +84,7 @@ func (m *poolMetricsCollector) updateStatistic() {
m.overallNodeRequests.Reset()
m.currentErrors.Reset()
m.requestDuration.Reset()
m.currentNodes.Reset()
for _, node := range stat.Nodes() {
m.overallNodeErrors.WithLabelValues(node.Address()).Set(float64(node.OverallErrors()))
@ -88,6 +94,10 @@ func (m *poolMetricsCollector) updateStatistic() {
m.updateRequestsDuration(node)
}
for _, addr := range stat.CurrentNodes() {
m.currentNodes.WithLabelValues(addr).Set(1)
}
m.overallErrors.Set(float64(stat.OverallErrors()))
}