forked from TrueCloudLab/frostfs-http-gw
[#29] metrics: Support dump descriptions
Signed-off-by: Denis Kirillov <d.kirillov@yadro.com>
This commit is contained in:
parent
959213520e
commit
cc37c34396
5 changed files with 211 additions and 69 deletions
8
Makefile
8
Makefile
|
@ -9,6 +9,8 @@ BUILD ?= $(shell date -u --iso=seconds)
|
||||||
HUB_IMAGE ?= truecloudlab/frostfs-http-gw
|
HUB_IMAGE ?= truecloudlab/frostfs-http-gw
|
||||||
HUB_TAG ?= "$(shell echo ${VERSION} | sed 's/^v//')"
|
HUB_TAG ?= "$(shell echo ${VERSION} | sed 's/^v//')"
|
||||||
|
|
||||||
|
METRICS_DUMP_OUT ?= ./metrics-dump.json
|
||||||
|
|
||||||
# List of binaries to build. For now just one.
|
# List of binaries to build. For now just one.
|
||||||
BINDIR = bin
|
BINDIR = bin
|
||||||
DIRS = $(BINDIR)
|
DIRS = $(BINDIR)
|
||||||
|
@ -143,4 +145,10 @@ debpackage:
|
||||||
debclean:
|
debclean:
|
||||||
dh clean
|
dh clean
|
||||||
|
|
||||||
|
# Dump metrics (use METRICS_DUMP_OUT variable to override default out file './metrics-dump.json')
|
||||||
|
.PHONY: dump-metrics
|
||||||
|
dump-metrics:
|
||||||
|
@go test ./metrics -run TestDescribeAll --tags=dump_metrics --out=$(abspath $(METRICS_DUMP_OUT))
|
||||||
|
|
||||||
|
|
||||||
include help.mk
|
include help.mk
|
||||||
|
|
12
app.go
12
app.go
|
@ -243,6 +243,17 @@ func (m *gateMetrics) SetHealth(status metrics.HealthStatus) {
|
||||||
m.provider.SetHealth(status)
|
m.provider.SetHealth(status)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *gateMetrics) SetVersion(ver string) {
|
||||||
|
m.mu.RLock()
|
||||||
|
if !m.enabled {
|
||||||
|
m.mu.RUnlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
m.mu.RUnlock()
|
||||||
|
|
||||||
|
m.provider.SetVersion(ver)
|
||||||
|
}
|
||||||
|
|
||||||
func (m *gateMetrics) Shutdown() {
|
func (m *gateMetrics) Shutdown() {
|
||||||
m.mu.Lock()
|
m.mu.Lock()
|
||||||
if m.enabled {
|
if m.enabled {
|
||||||
|
@ -325,6 +336,7 @@ func getKeyFromWallet(w *wallet.Wallet, addrStr string, password *string) (*ecds
|
||||||
func (a *app) Wait() {
|
func (a *app) Wait() {
|
||||||
a.log.Info("starting application", zap.String("app_name", "frostfs-http-gw"), zap.String("version", Version))
|
a.log.Info("starting application", zap.String("app_name", "frostfs-http-gw"), zap.String("version", Version))
|
||||||
|
|
||||||
|
a.metrics.SetVersion(Version)
|
||||||
a.setHealthStatus()
|
a.setHealthStatus()
|
||||||
|
|
||||||
<-a.webDone // wait for web-server to be stopped
|
<-a.webDone // wait for web-server to be stopped
|
||||||
|
|
136
metrics/desc.go
Normal file
136
metrics/desc.go
Normal file
|
@ -0,0 +1,136 @@
|
||||||
|
package metrics
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
|
||||||
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
)
|
||||||
|
|
||||||
|
var appMetricsDesc = map[string]map[string]Description{
|
||||||
|
poolSubsystem: {
|
||||||
|
overallErrorsMetric: Description{
|
||||||
|
Namespace: namespace,
|
||||||
|
Subsystem: poolSubsystem,
|
||||||
|
Name: overallErrorsMetric,
|
||||||
|
Help: "Total number of errors in pool",
|
||||||
|
},
|
||||||
|
overallNodeErrorsMetric: Description{
|
||||||
|
Namespace: namespace,
|
||||||
|
Subsystem: poolSubsystem,
|
||||||
|
Name: overallNodeErrorsMetric,
|
||||||
|
Help: "Total number of errors for connection in pool",
|
||||||
|
VariableLabels: []string{"node"},
|
||||||
|
},
|
||||||
|
overallNodeRequestsMetric: Description{
|
||||||
|
Namespace: namespace,
|
||||||
|
Subsystem: poolSubsystem,
|
||||||
|
Name: overallNodeRequestsMetric,
|
||||||
|
Help: "Total number of requests to specific node in pool",
|
||||||
|
VariableLabels: []string{"node"},
|
||||||
|
},
|
||||||
|
currentErrorMetric: Description{
|
||||||
|
Namespace: namespace,
|
||||||
|
Subsystem: poolSubsystem,
|
||||||
|
Name: currentErrorMetric,
|
||||||
|
Help: "Number of errors on current connections that will be reset after the threshold",
|
||||||
|
VariableLabels: []string{"node"},
|
||||||
|
},
|
||||||
|
avgRequestDurationMetric: Description{
|
||||||
|
Namespace: namespace,
|
||||||
|
Subsystem: poolSubsystem,
|
||||||
|
Name: avgRequestDurationMetric,
|
||||||
|
Help: "Average request duration (in milliseconds) for specific method on node in pool",
|
||||||
|
VariableLabels: []string{"node", "method"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
stateSubsystem: {
|
||||||
|
healthMetric: Description{
|
||||||
|
Namespace: namespace,
|
||||||
|
Subsystem: stateSubsystem,
|
||||||
|
Name: healthMetric,
|
||||||
|
Help: "Current HTTP gateway state",
|
||||||
|
},
|
||||||
|
versionInfoMetric: Description{
|
||||||
|
Namespace: namespace,
|
||||||
|
Subsystem: stateSubsystem,
|
||||||
|
Name: versionInfoMetric,
|
||||||
|
Help: "Version of current FrostFS HTTP Gate instance",
|
||||||
|
VariableLabels: []string{"version"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
type Description struct {
|
||||||
|
Namespace string
|
||||||
|
Subsystem string
|
||||||
|
Name string
|
||||||
|
Help string
|
||||||
|
ConstantLabels []KeyValue
|
||||||
|
VariableLabels []string
|
||||||
|
}
|
||||||
|
|
||||||
|
type KeyValue struct {
|
||||||
|
Key string `json:"key"`
|
||||||
|
Value string `json:"value"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *Description) MarshalJSON() ([]byte, error) {
|
||||||
|
return json.Marshal(&struct {
|
||||||
|
FQName string `json:"name"`
|
||||||
|
Help string `json:"help"`
|
||||||
|
ConstantLabels []KeyValue `json:"constant_labels"`
|
||||||
|
VariableLabels []string `json:"variable_labels"`
|
||||||
|
}{
|
||||||
|
FQName: d.BuildFQName(),
|
||||||
|
Help: d.Help,
|
||||||
|
ConstantLabels: d.ConstantLabels,
|
||||||
|
VariableLabels: d.VariableLabels,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *Description) BuildFQName() string {
|
||||||
|
return prometheus.BuildFQName(d.Namespace, d.Subsystem, d.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *Description) ConstLabelsMap() map[string]string {
|
||||||
|
constsLabels := make(map[string]string, len(d.ConstantLabels))
|
||||||
|
for _, kv := range d.ConstantLabels {
|
||||||
|
constsLabels[kv.Key] = kv.Value
|
||||||
|
}
|
||||||
|
return constsLabels
|
||||||
|
}
|
||||||
|
|
||||||
|
// DescribeAll returns descriptions for metrics.
|
||||||
|
func DescribeAll() []Description {
|
||||||
|
var list []Description
|
||||||
|
for _, m := range appMetricsDesc {
|
||||||
|
for _, description := range m {
|
||||||
|
list = append(list, description)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return list
|
||||||
|
}
|
||||||
|
|
||||||
|
func newOpts(description Description) prometheus.Opts {
|
||||||
|
return prometheus.Opts{
|
||||||
|
Namespace: description.Namespace,
|
||||||
|
Subsystem: description.Subsystem,
|
||||||
|
Name: description.Name,
|
||||||
|
Help: description.Help,
|
||||||
|
ConstLabels: description.ConstLabelsMap(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func newGauge(description Description) prometheus.Gauge {
|
||||||
|
return prometheus.NewGauge(
|
||||||
|
prometheus.GaugeOpts(newOpts(description)),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
func newGaugeVec(description Description) *prometheus.GaugeVec {
|
||||||
|
return prometheus.NewGaugeVec(
|
||||||
|
prometheus.GaugeOpts(newOpts(description)),
|
||||||
|
description.VariableLabels,
|
||||||
|
)
|
||||||
|
}
|
26
metrics/desc_test.go
Normal file
26
metrics/desc_test.go
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
//go:build dump_metrics
|
||||||
|
|
||||||
|
package metrics
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"flag"
|
||||||
|
"os"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
var metricsPath = flag.String("out", "", "File to export http gateway metrics to.")
|
||||||
|
|
||||||
|
func TestDescribeAll(t *testing.T) {
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
require.NotEmpty(t, metricsPath, "flag 'out' must be provided to dump metrics description")
|
||||||
|
|
||||||
|
data, err := json.Marshal(DescribeAll())
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
err = os.WriteFile(*metricsPath, data, 0644)
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
|
@ -13,7 +13,22 @@ const (
|
||||||
namespace = "frostfs_http_gw"
|
namespace = "frostfs_http_gw"
|
||||||
stateSubsystem = "state"
|
stateSubsystem = "state"
|
||||||
poolSubsystem = "pool"
|
poolSubsystem = "pool"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
healthMetric = "health"
|
||||||
|
versionInfoMetric = "version_info"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
overallErrorsMetric = "overall_errors"
|
||||||
|
overallNodeErrorsMetric = "overall_node_errors"
|
||||||
|
overallNodeRequestsMetric = "overall_node_requests"
|
||||||
|
currentErrorMetric = "current_errors"
|
||||||
|
avgRequestDurationMetric = "avg_request_duration"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
methodGetBalance = "get_balance"
|
methodGetBalance = "get_balance"
|
||||||
methodPutContainer = "put_container"
|
methodPutContainer = "put_container"
|
||||||
methodGetContainer = "get_container"
|
methodGetContainer = "get_container"
|
||||||
|
@ -48,6 +63,7 @@ type GateMetrics struct {
|
||||||
|
|
||||||
type stateMetrics struct {
|
type stateMetrics struct {
|
||||||
healthCheck prometheus.Gauge
|
healthCheck prometheus.Gauge
|
||||||
|
versionInfo *prometheus.GaugeVec
|
||||||
}
|
}
|
||||||
|
|
||||||
type poolMetricsCollector struct {
|
type poolMetricsCollector struct {
|
||||||
|
@ -80,93 +96,37 @@ func (g *GateMetrics) Unregister() {
|
||||||
|
|
||||||
func newStateMetrics() *stateMetrics {
|
func newStateMetrics() *stateMetrics {
|
||||||
return &stateMetrics{
|
return &stateMetrics{
|
||||||
healthCheck: prometheus.NewGauge(prometheus.GaugeOpts{
|
healthCheck: newGauge(appMetricsDesc[stateSubsystem][healthMetric]),
|
||||||
Namespace: namespace,
|
versionInfo: newGaugeVec(appMetricsDesc[stateSubsystem][versionInfoMetric]),
|
||||||
Subsystem: stateSubsystem,
|
|
||||||
Name: "health",
|
|
||||||
Help: "Current HTTP gateway state",
|
|
||||||
}),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m stateMetrics) register() {
|
func (m stateMetrics) register() {
|
||||||
prometheus.MustRegister(m.healthCheck)
|
prometheus.MustRegister(m.healthCheck)
|
||||||
|
prometheus.MustRegister(m.versionInfo)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m stateMetrics) unregister() {
|
func (m stateMetrics) unregister() {
|
||||||
prometheus.Unregister(m.healthCheck)
|
prometheus.Unregister(m.healthCheck)
|
||||||
|
prometheus.Unregister(m.versionInfo)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m stateMetrics) SetHealth(s HealthStatus) {
|
func (m stateMetrics) SetHealth(s HealthStatus) {
|
||||||
m.healthCheck.Set(float64(s))
|
m.healthCheck.Set(float64(s))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m stateMetrics) SetVersion(ver string) {
|
||||||
|
m.versionInfo.WithLabelValues(ver).Set(1)
|
||||||
|
}
|
||||||
|
|
||||||
func newPoolMetricsCollector(p *pool.Pool) *poolMetricsCollector {
|
func newPoolMetricsCollector(p *pool.Pool) *poolMetricsCollector {
|
||||||
overallErrors := prometheus.NewGauge(
|
|
||||||
prometheus.GaugeOpts{
|
|
||||||
Namespace: namespace,
|
|
||||||
Subsystem: poolSubsystem,
|
|
||||||
Name: "overall_errors",
|
|
||||||
Help: "Total number of errors in pool",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
overallNodeErrors := prometheus.NewGaugeVec(
|
|
||||||
prometheus.GaugeOpts{
|
|
||||||
Namespace: namespace,
|
|
||||||
Subsystem: poolSubsystem,
|
|
||||||
Name: "overall_node_errors",
|
|
||||||
Help: "Total number of errors for connection in pool",
|
|
||||||
},
|
|
||||||
[]string{
|
|
||||||
"node",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
overallNodeRequests := prometheus.NewGaugeVec(
|
|
||||||
prometheus.GaugeOpts{
|
|
||||||
Namespace: namespace,
|
|
||||||
Subsystem: poolSubsystem,
|
|
||||||
Name: "overall_node_requests",
|
|
||||||
Help: "Total number of requests to specific node in pool",
|
|
||||||
},
|
|
||||||
[]string{
|
|
||||||
"node",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
currentErrors := prometheus.NewGaugeVec(
|
|
||||||
prometheus.GaugeOpts{
|
|
||||||
Namespace: namespace,
|
|
||||||
Subsystem: poolSubsystem,
|
|
||||||
Name: "current_errors",
|
|
||||||
Help: "Number of errors on current connections that will be reset after the threshold",
|
|
||||||
},
|
|
||||||
[]string{
|
|
||||||
"node",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
requestsDuration := prometheus.NewGaugeVec(
|
|
||||||
prometheus.GaugeOpts{
|
|
||||||
Namespace: namespace,
|
|
||||||
Subsystem: poolSubsystem,
|
|
||||||
Name: "avg_request_duration",
|
|
||||||
Help: "Average request duration (in milliseconds) for specific method on node in pool",
|
|
||||||
},
|
|
||||||
[]string{
|
|
||||||
"node",
|
|
||||||
"method",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
return &poolMetricsCollector{
|
return &poolMetricsCollector{
|
||||||
pool: p,
|
pool: p,
|
||||||
overallErrors: overallErrors,
|
overallErrors: newGauge(appMetricsDesc[poolSubsystem][overallErrorsMetric]),
|
||||||
overallNodeErrors: overallNodeErrors,
|
overallNodeErrors: newGaugeVec(appMetricsDesc[poolSubsystem][overallNodeErrorsMetric]),
|
||||||
overallNodeRequests: overallNodeRequests,
|
overallNodeRequests: newGaugeVec(appMetricsDesc[poolSubsystem][overallNodeRequestsMetric]),
|
||||||
currentErrors: currentErrors,
|
currentErrors: newGaugeVec(appMetricsDesc[poolSubsystem][currentErrorMetric]),
|
||||||
requestDuration: requestsDuration,
|
requestDuration: newGaugeVec(appMetricsDesc[poolSubsystem][avgRequestDurationMetric]),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue