diff --git a/Makefile b/Makefile index c8a1c23..eb74c3e 100755 --- a/Makefile +++ b/Makefile @@ -9,6 +9,8 @@ BUILD ?= $(shell date -u --iso=seconds) HUB_IMAGE ?= truecloudlab/frostfs-http-gw HUB_TAG ?= "$(shell echo ${VERSION} | sed 's/^v//')" +METRICS_DUMP_OUT ?= ./metrics-dump.json + # List of binaries to build. For now just one. BINDIR = bin DIRS = $(BINDIR) @@ -143,4 +145,10 @@ debpackage: debclean: dh clean +# Dump metrics (use METRICS_DUMP_OUT variable to override default out file './metrics-dump.json') +.PHONY: dump-metrics +dump-metrics: + @go test ./metrics -run TestDescribeAll --tags=dump_metrics --out=$(abspath $(METRICS_DUMP_OUT)) + + include help.mk diff --git a/app.go b/app.go index 0f49477..8522ef4 100644 --- a/app.go +++ b/app.go @@ -243,6 +243,17 @@ func (m *gateMetrics) SetHealth(status metrics.HealthStatus) { m.provider.SetHealth(status) } +func (m *gateMetrics) SetVersion(ver string) { + m.mu.RLock() + if !m.enabled { + m.mu.RUnlock() + return + } + m.mu.RUnlock() + + m.provider.SetVersion(ver) +} + func (m *gateMetrics) Shutdown() { m.mu.Lock() if m.enabled { @@ -325,6 +336,7 @@ func getKeyFromWallet(w *wallet.Wallet, addrStr string, password *string) (*ecds func (a *app) Wait() { a.log.Info("starting application", zap.String("app_name", "frostfs-http-gw"), zap.String("version", Version)) + a.metrics.SetVersion(Version) a.setHealthStatus() <-a.webDone // wait for web-server to be stopped diff --git a/metrics/desc.go b/metrics/desc.go new file mode 100644 index 0000000..d904f8a --- /dev/null +++ b/metrics/desc.go @@ -0,0 +1,136 @@ +package metrics + +import ( + "encoding/json" + + "github.com/prometheus/client_golang/prometheus" +) + +var appMetricsDesc = map[string]map[string]Description{ + poolSubsystem: { + overallErrorsMetric: Description{ + Namespace: namespace, + Subsystem: poolSubsystem, + Name: overallErrorsMetric, + Help: "Total number of errors in pool", + }, + overallNodeErrorsMetric: Description{ + Namespace: namespace, + Subsystem: poolSubsystem, + Name: overallNodeErrorsMetric, + Help: "Total number of errors for connection in pool", + VariableLabels: []string{"node"}, + }, + overallNodeRequestsMetric: Description{ + Namespace: namespace, + Subsystem: poolSubsystem, + Name: overallNodeRequestsMetric, + Help: "Total number of requests to specific node in pool", + VariableLabels: []string{"node"}, + }, + currentErrorMetric: Description{ + Namespace: namespace, + Subsystem: poolSubsystem, + Name: currentErrorMetric, + Help: "Number of errors on current connections that will be reset after the threshold", + VariableLabels: []string{"node"}, + }, + avgRequestDurationMetric: Description{ + Namespace: namespace, + Subsystem: poolSubsystem, + Name: avgRequestDurationMetric, + Help: "Average request duration (in milliseconds) for specific method on node in pool", + VariableLabels: []string{"node", "method"}, + }, + }, + stateSubsystem: { + healthMetric: Description{ + Namespace: namespace, + Subsystem: stateSubsystem, + Name: healthMetric, + Help: "Current HTTP gateway state", + }, + versionInfoMetric: Description{ + Namespace: namespace, + Subsystem: stateSubsystem, + Name: versionInfoMetric, + Help: "Version of current FrostFS HTTP Gate instance", + VariableLabels: []string{"version"}, + }, + }, +} + +type Description struct { + Namespace string + Subsystem string + Name string + Help string + ConstantLabels []KeyValue + VariableLabels []string +} + +type KeyValue struct { + Key string `json:"key"` + Value string `json:"value"` +} + +func (d *Description) MarshalJSON() ([]byte, error) { + return json.Marshal(&struct { + FQName string `json:"name"` + Help string `json:"help"` + ConstantLabels []KeyValue `json:"constant_labels"` + VariableLabels []string `json:"variable_labels"` + }{ + FQName: d.BuildFQName(), + Help: d.Help, + ConstantLabels: d.ConstantLabels, + VariableLabels: d.VariableLabels, + }) +} + +func (d *Description) BuildFQName() string { + return prometheus.BuildFQName(d.Namespace, d.Subsystem, d.Name) +} + +func (d *Description) ConstLabelsMap() map[string]string { + constsLabels := make(map[string]string, len(d.ConstantLabels)) + for _, kv := range d.ConstantLabels { + constsLabels[kv.Key] = kv.Value + } + return constsLabels +} + +// DescribeAll returns descriptions for metrics. +func DescribeAll() []Description { + var list []Description + for _, m := range appMetricsDesc { + for _, description := range m { + list = append(list, description) + } + } + + return list +} + +func newOpts(description Description) prometheus.Opts { + return prometheus.Opts{ + Namespace: description.Namespace, + Subsystem: description.Subsystem, + Name: description.Name, + Help: description.Help, + ConstLabels: description.ConstLabelsMap(), + } +} + +func newGauge(description Description) prometheus.Gauge { + return prometheus.NewGauge( + prometheus.GaugeOpts(newOpts(description)), + ) +} + +func newGaugeVec(description Description) *prometheus.GaugeVec { + return prometheus.NewGaugeVec( + prometheus.GaugeOpts(newOpts(description)), + description.VariableLabels, + ) +} diff --git a/metrics/desc_test.go b/metrics/desc_test.go new file mode 100644 index 0000000..3c2d06b --- /dev/null +++ b/metrics/desc_test.go @@ -0,0 +1,26 @@ +//go:build dump_metrics + +package metrics + +import ( + "encoding/json" + "flag" + "os" + "testing" + + "github.com/stretchr/testify/require" +) + +var metricsPath = flag.String("out", "", "File to export http gateway metrics to.") + +func TestDescribeAll(t *testing.T) { + flag.Parse() + + require.NotEmpty(t, metricsPath, "flag 'out' must be provided to dump metrics description") + + data, err := json.Marshal(DescribeAll()) + require.NoError(t, err) + + err = os.WriteFile(*metricsPath, data, 0644) + require.NoError(t, err) +} diff --git a/metrics/metrics.go b/metrics/metrics.go index 4797fd6..e2b33da 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -13,7 +13,22 @@ const ( namespace = "frostfs_http_gw" stateSubsystem = "state" poolSubsystem = "pool" +) +const ( + healthMetric = "health" + versionInfoMetric = "version_info" +) + +const ( + overallErrorsMetric = "overall_errors" + overallNodeErrorsMetric = "overall_node_errors" + overallNodeRequestsMetric = "overall_node_requests" + currentErrorMetric = "current_errors" + avgRequestDurationMetric = "avg_request_duration" +) + +const ( methodGetBalance = "get_balance" methodPutContainer = "put_container" methodGetContainer = "get_container" @@ -48,6 +63,7 @@ type GateMetrics struct { type stateMetrics struct { healthCheck prometheus.Gauge + versionInfo *prometheus.GaugeVec } type poolMetricsCollector struct { @@ -80,93 +96,37 @@ func (g *GateMetrics) Unregister() { func newStateMetrics() *stateMetrics { return &stateMetrics{ - healthCheck: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: stateSubsystem, - Name: "health", - Help: "Current HTTP gateway state", - }), + healthCheck: newGauge(appMetricsDesc[stateSubsystem][healthMetric]), + versionInfo: newGaugeVec(appMetricsDesc[stateSubsystem][versionInfoMetric]), } } func (m stateMetrics) register() { prometheus.MustRegister(m.healthCheck) + prometheus.MustRegister(m.versionInfo) } func (m stateMetrics) unregister() { prometheus.Unregister(m.healthCheck) + prometheus.Unregister(m.versionInfo) } func (m stateMetrics) SetHealth(s HealthStatus) { m.healthCheck.Set(float64(s)) } +func (m stateMetrics) SetVersion(ver string) { + m.versionInfo.WithLabelValues(ver).Set(1) +} + func newPoolMetricsCollector(p *pool.Pool) *poolMetricsCollector { - overallErrors := prometheus.NewGauge( - prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: poolSubsystem, - Name: "overall_errors", - Help: "Total number of errors in pool", - }, - ) - - overallNodeErrors := prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: poolSubsystem, - Name: "overall_node_errors", - Help: "Total number of errors for connection in pool", - }, - []string{ - "node", - }, - ) - - overallNodeRequests := prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: poolSubsystem, - Name: "overall_node_requests", - Help: "Total number of requests to specific node in pool", - }, - []string{ - "node", - }, - ) - - currentErrors := prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: poolSubsystem, - Name: "current_errors", - Help: "Number of errors on current connections that will be reset after the threshold", - }, - []string{ - "node", - }, - ) - - requestsDuration := prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: poolSubsystem, - Name: "avg_request_duration", - Help: "Average request duration (in milliseconds) for specific method on node in pool", - }, - []string{ - "node", - "method", - }, - ) - return &poolMetricsCollector{ pool: p, - overallErrors: overallErrors, - overallNodeErrors: overallNodeErrors, - overallNodeRequests: overallNodeRequests, - currentErrors: currentErrors, - requestDuration: requestsDuration, + overallErrors: newGauge(appMetricsDesc[poolSubsystem][overallErrorsMetric]), + overallNodeErrors: newGaugeVec(appMetricsDesc[poolSubsystem][overallNodeErrorsMetric]), + overallNodeRequests: newGaugeVec(appMetricsDesc[poolSubsystem][overallNodeRequestsMetric]), + currentErrors: newGaugeVec(appMetricsDesc[poolSubsystem][currentErrorMetric]), + requestDuration: newGaugeVec(appMetricsDesc[poolSubsystem][avgRequestDurationMetric]), } }