forked from TrueCloudLab/frostfs-http-gw
[#29] metrics: Support dump descriptions
Signed-off-by: Denis Kirillov <d.kirillov@yadro.com>
This commit is contained in:
parent
959213520e
commit
cc37c34396
5 changed files with 211 additions and 69 deletions
8
Makefile
8
Makefile
|
@ -9,6 +9,8 @@ BUILD ?= $(shell date -u --iso=seconds)
|
|||
HUB_IMAGE ?= truecloudlab/frostfs-http-gw
|
||||
HUB_TAG ?= "$(shell echo ${VERSION} | sed 's/^v//')"
|
||||
|
||||
METRICS_DUMP_OUT ?= ./metrics-dump.json
|
||||
|
||||
# List of binaries to build. For now just one.
|
||||
BINDIR = bin
|
||||
DIRS = $(BINDIR)
|
||||
|
@ -143,4 +145,10 @@ debpackage:
|
|||
debclean:
|
||||
dh clean
|
||||
|
||||
# Dump metrics (use METRICS_DUMP_OUT variable to override default out file './metrics-dump.json')
|
||||
.PHONY: dump-metrics
|
||||
dump-metrics:
|
||||
@go test ./metrics -run TestDescribeAll --tags=dump_metrics --out=$(abspath $(METRICS_DUMP_OUT))
|
||||
|
||||
|
||||
include help.mk
|
||||
|
|
12
app.go
12
app.go
|
@ -243,6 +243,17 @@ func (m *gateMetrics) SetHealth(status metrics.HealthStatus) {
|
|||
m.provider.SetHealth(status)
|
||||
}
|
||||
|
||||
func (m *gateMetrics) SetVersion(ver string) {
|
||||
m.mu.RLock()
|
||||
if !m.enabled {
|
||||
m.mu.RUnlock()
|
||||
return
|
||||
}
|
||||
m.mu.RUnlock()
|
||||
|
||||
m.provider.SetVersion(ver)
|
||||
}
|
||||
|
||||
func (m *gateMetrics) Shutdown() {
|
||||
m.mu.Lock()
|
||||
if m.enabled {
|
||||
|
@ -325,6 +336,7 @@ func getKeyFromWallet(w *wallet.Wallet, addrStr string, password *string) (*ecds
|
|||
func (a *app) Wait() {
|
||||
a.log.Info("starting application", zap.String("app_name", "frostfs-http-gw"), zap.String("version", Version))
|
||||
|
||||
a.metrics.SetVersion(Version)
|
||||
a.setHealthStatus()
|
||||
|
||||
<-a.webDone // wait for web-server to be stopped
|
||||
|
|
136
metrics/desc.go
Normal file
136
metrics/desc.go
Normal file
|
@ -0,0 +1,136 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
var appMetricsDesc = map[string]map[string]Description{
|
||||
poolSubsystem: {
|
||||
overallErrorsMetric: Description{
|
||||
Namespace: namespace,
|
||||
Subsystem: poolSubsystem,
|
||||
Name: overallErrorsMetric,
|
||||
Help: "Total number of errors in pool",
|
||||
},
|
||||
overallNodeErrorsMetric: Description{
|
||||
Namespace: namespace,
|
||||
Subsystem: poolSubsystem,
|
||||
Name: overallNodeErrorsMetric,
|
||||
Help: "Total number of errors for connection in pool",
|
||||
VariableLabels: []string{"node"},
|
||||
},
|
||||
overallNodeRequestsMetric: Description{
|
||||
Namespace: namespace,
|
||||
Subsystem: poolSubsystem,
|
||||
Name: overallNodeRequestsMetric,
|
||||
Help: "Total number of requests to specific node in pool",
|
||||
VariableLabels: []string{"node"},
|
||||
},
|
||||
currentErrorMetric: Description{
|
||||
Namespace: namespace,
|
||||
Subsystem: poolSubsystem,
|
||||
Name: currentErrorMetric,
|
||||
Help: "Number of errors on current connections that will be reset after the threshold",
|
||||
VariableLabels: []string{"node"},
|
||||
},
|
||||
avgRequestDurationMetric: Description{
|
||||
Namespace: namespace,
|
||||
Subsystem: poolSubsystem,
|
||||
Name: avgRequestDurationMetric,
|
||||
Help: "Average request duration (in milliseconds) for specific method on node in pool",
|
||||
VariableLabels: []string{"node", "method"},
|
||||
},
|
||||
},
|
||||
stateSubsystem: {
|
||||
healthMetric: Description{
|
||||
Namespace: namespace,
|
||||
Subsystem: stateSubsystem,
|
||||
Name: healthMetric,
|
||||
Help: "Current HTTP gateway state",
|
||||
},
|
||||
versionInfoMetric: Description{
|
||||
Namespace: namespace,
|
||||
Subsystem: stateSubsystem,
|
||||
Name: versionInfoMetric,
|
||||
Help: "Version of current FrostFS HTTP Gate instance",
|
||||
VariableLabels: []string{"version"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
type Description struct {
|
||||
Namespace string
|
||||
Subsystem string
|
||||
Name string
|
||||
Help string
|
||||
ConstantLabels []KeyValue
|
||||
VariableLabels []string
|
||||
}
|
||||
|
||||
type KeyValue struct {
|
||||
Key string `json:"key"`
|
||||
Value string `json:"value"`
|
||||
}
|
||||
|
||||
func (d *Description) MarshalJSON() ([]byte, error) {
|
||||
return json.Marshal(&struct {
|
||||
FQName string `json:"name"`
|
||||
Help string `json:"help"`
|
||||
ConstantLabels []KeyValue `json:"constant_labels"`
|
||||
VariableLabels []string `json:"variable_labels"`
|
||||
}{
|
||||
FQName: d.BuildFQName(),
|
||||
Help: d.Help,
|
||||
ConstantLabels: d.ConstantLabels,
|
||||
VariableLabels: d.VariableLabels,
|
||||
})
|
||||
}
|
||||
|
||||
func (d *Description) BuildFQName() string {
|
||||
return prometheus.BuildFQName(d.Namespace, d.Subsystem, d.Name)
|
||||
}
|
||||
|
||||
func (d *Description) ConstLabelsMap() map[string]string {
|
||||
constsLabels := make(map[string]string, len(d.ConstantLabels))
|
||||
for _, kv := range d.ConstantLabels {
|
||||
constsLabels[kv.Key] = kv.Value
|
||||
}
|
||||
return constsLabels
|
||||
}
|
||||
|
||||
// DescribeAll returns descriptions for metrics.
|
||||
func DescribeAll() []Description {
|
||||
var list []Description
|
||||
for _, m := range appMetricsDesc {
|
||||
for _, description := range m {
|
||||
list = append(list, description)
|
||||
}
|
||||
}
|
||||
|
||||
return list
|
||||
}
|
||||
|
||||
func newOpts(description Description) prometheus.Opts {
|
||||
return prometheus.Opts{
|
||||
Namespace: description.Namespace,
|
||||
Subsystem: description.Subsystem,
|
||||
Name: description.Name,
|
||||
Help: description.Help,
|
||||
ConstLabels: description.ConstLabelsMap(),
|
||||
}
|
||||
}
|
||||
|
||||
func newGauge(description Description) prometheus.Gauge {
|
||||
return prometheus.NewGauge(
|
||||
prometheus.GaugeOpts(newOpts(description)),
|
||||
)
|
||||
}
|
||||
|
||||
func newGaugeVec(description Description) *prometheus.GaugeVec {
|
||||
return prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts(newOpts(description)),
|
||||
description.VariableLabels,
|
||||
)
|
||||
}
|
26
metrics/desc_test.go
Normal file
26
metrics/desc_test.go
Normal file
|
@ -0,0 +1,26 @@
|
|||
//go:build dump_metrics
|
||||
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
var metricsPath = flag.String("out", "", "File to export http gateway metrics to.")
|
||||
|
||||
func TestDescribeAll(t *testing.T) {
|
||||
flag.Parse()
|
||||
|
||||
require.NotEmpty(t, metricsPath, "flag 'out' must be provided to dump metrics description")
|
||||
|
||||
data, err := json.Marshal(DescribeAll())
|
||||
require.NoError(t, err)
|
||||
|
||||
err = os.WriteFile(*metricsPath, data, 0644)
|
||||
require.NoError(t, err)
|
||||
}
|
|
@ -13,7 +13,22 @@ const (
|
|||
namespace = "frostfs_http_gw"
|
||||
stateSubsystem = "state"
|
||||
poolSubsystem = "pool"
|
||||
)
|
||||
|
||||
const (
|
||||
healthMetric = "health"
|
||||
versionInfoMetric = "version_info"
|
||||
)
|
||||
|
||||
const (
|
||||
overallErrorsMetric = "overall_errors"
|
||||
overallNodeErrorsMetric = "overall_node_errors"
|
||||
overallNodeRequestsMetric = "overall_node_requests"
|
||||
currentErrorMetric = "current_errors"
|
||||
avgRequestDurationMetric = "avg_request_duration"
|
||||
)
|
||||
|
||||
const (
|
||||
methodGetBalance = "get_balance"
|
||||
methodPutContainer = "put_container"
|
||||
methodGetContainer = "get_container"
|
||||
|
@ -48,6 +63,7 @@ type GateMetrics struct {
|
|||
|
||||
type stateMetrics struct {
|
||||
healthCheck prometheus.Gauge
|
||||
versionInfo *prometheus.GaugeVec
|
||||
}
|
||||
|
||||
type poolMetricsCollector struct {
|
||||
|
@ -80,93 +96,37 @@ func (g *GateMetrics) Unregister() {
|
|||
|
||||
func newStateMetrics() *stateMetrics {
|
||||
return &stateMetrics{
|
||||
healthCheck: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: stateSubsystem,
|
||||
Name: "health",
|
||||
Help: "Current HTTP gateway state",
|
||||
}),
|
||||
healthCheck: newGauge(appMetricsDesc[stateSubsystem][healthMetric]),
|
||||
versionInfo: newGaugeVec(appMetricsDesc[stateSubsystem][versionInfoMetric]),
|
||||
}
|
||||
}
|
||||
|
||||
func (m stateMetrics) register() {
|
||||
prometheus.MustRegister(m.healthCheck)
|
||||
prometheus.MustRegister(m.versionInfo)
|
||||
}
|
||||
|
||||
func (m stateMetrics) unregister() {
|
||||
prometheus.Unregister(m.healthCheck)
|
||||
prometheus.Unregister(m.versionInfo)
|
||||
}
|
||||
|
||||
func (m stateMetrics) SetHealth(s HealthStatus) {
|
||||
m.healthCheck.Set(float64(s))
|
||||
}
|
||||
|
||||
func (m stateMetrics) SetVersion(ver string) {
|
||||
m.versionInfo.WithLabelValues(ver).Set(1)
|
||||
}
|
||||
|
||||
func newPoolMetricsCollector(p *pool.Pool) *poolMetricsCollector {
|
||||
overallErrors := prometheus.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: poolSubsystem,
|
||||
Name: "overall_errors",
|
||||
Help: "Total number of errors in pool",
|
||||
},
|
||||
)
|
||||
|
||||
overallNodeErrors := prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: poolSubsystem,
|
||||
Name: "overall_node_errors",
|
||||
Help: "Total number of errors for connection in pool",
|
||||
},
|
||||
[]string{
|
||||
"node",
|
||||
},
|
||||
)
|
||||
|
||||
overallNodeRequests := prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: poolSubsystem,
|
||||
Name: "overall_node_requests",
|
||||
Help: "Total number of requests to specific node in pool",
|
||||
},
|
||||
[]string{
|
||||
"node",
|
||||
},
|
||||
)
|
||||
|
||||
currentErrors := prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: poolSubsystem,
|
||||
Name: "current_errors",
|
||||
Help: "Number of errors on current connections that will be reset after the threshold",
|
||||
},
|
||||
[]string{
|
||||
"node",
|
||||
},
|
||||
)
|
||||
|
||||
requestsDuration := prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: poolSubsystem,
|
||||
Name: "avg_request_duration",
|
||||
Help: "Average request duration (in milliseconds) for specific method on node in pool",
|
||||
},
|
||||
[]string{
|
||||
"node",
|
||||
"method",
|
||||
},
|
||||
)
|
||||
|
||||
return &poolMetricsCollector{
|
||||
pool: p,
|
||||
overallErrors: overallErrors,
|
||||
overallNodeErrors: overallNodeErrors,
|
||||
overallNodeRequests: overallNodeRequests,
|
||||
currentErrors: currentErrors,
|
||||
requestDuration: requestsDuration,
|
||||
overallErrors: newGauge(appMetricsDesc[poolSubsystem][overallErrorsMetric]),
|
||||
overallNodeErrors: newGaugeVec(appMetricsDesc[poolSubsystem][overallNodeErrorsMetric]),
|
||||
overallNodeRequests: newGaugeVec(appMetricsDesc[poolSubsystem][overallNodeRequestsMetric]),
|
||||
currentErrors: newGaugeVec(appMetricsDesc[poolSubsystem][currentErrorMetric]),
|
||||
requestDuration: newGaugeVec(appMetricsDesc[poolSubsystem][avgRequestDurationMetric]),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue