2023-04-07 14:28:21 +00:00
|
|
|
package metrics
|
|
|
|
|
|
|
|
import (
|
|
|
|
"encoding/json"
|
|
|
|
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
2023-04-10 08:40:58 +00:00
|
|
|
dto "github.com/prometheus/client_model/go"
|
2023-04-07 14:28:21 +00:00
|
|
|
)
|
|
|
|
|
2023-04-07 14:58:36 +00:00
|
|
|
var appMetricsDesc = map[string]map[string]Description{
|
2023-04-07 14:28:21 +00:00
|
|
|
poolSubsystem: {
|
|
|
|
overallErrorsMetric: Description{
|
2023-04-10 08:40:58 +00:00
|
|
|
Type: dto.MetricType_GAUGE,
|
2023-04-07 14:28:21 +00:00
|
|
|
Namespace: namespace,
|
|
|
|
Subsystem: poolSubsystem,
|
|
|
|
Name: overallErrorsMetric,
|
|
|
|
Help: "Total number of errors in pool",
|
|
|
|
},
|
|
|
|
overallNodeErrorsMetric: Description{
|
2023-04-10 08:40:58 +00:00
|
|
|
Type: dto.MetricType_GAUGE,
|
2023-04-07 14:28:21 +00:00
|
|
|
Namespace: namespace,
|
|
|
|
Subsystem: poolSubsystem,
|
|
|
|
Name: overallNodeErrorsMetric,
|
|
|
|
Help: "Total number of errors for connection in pool",
|
|
|
|
VariableLabels: []string{"node"},
|
|
|
|
},
|
|
|
|
overallNodeRequestsMetric: Description{
|
2023-04-10 08:40:58 +00:00
|
|
|
Type: dto.MetricType_GAUGE,
|
2023-04-07 14:28:21 +00:00
|
|
|
Namespace: namespace,
|
|
|
|
Subsystem: poolSubsystem,
|
|
|
|
Name: overallNodeRequestsMetric,
|
|
|
|
Help: "Total number of requests to specific node in pool",
|
|
|
|
VariableLabels: []string{"node"},
|
|
|
|
},
|
|
|
|
currentErrorMetric: Description{
|
2023-04-10 08:40:58 +00:00
|
|
|
Type: dto.MetricType_GAUGE,
|
2023-04-07 14:28:21 +00:00
|
|
|
Namespace: namespace,
|
|
|
|
Subsystem: poolSubsystem,
|
|
|
|
Name: currentErrorMetric,
|
|
|
|
Help: "Number of errors on current connections that will be reset after the threshold",
|
|
|
|
VariableLabels: []string{"node"},
|
|
|
|
},
|
|
|
|
avgRequestDurationMetric: Description{
|
2023-04-10 08:40:58 +00:00
|
|
|
Type: dto.MetricType_GAUGE,
|
2023-04-07 14:28:21 +00:00
|
|
|
Namespace: namespace,
|
|
|
|
Subsystem: poolSubsystem,
|
|
|
|
Name: avgRequestDurationMetric,
|
|
|
|
Help: "Average request duration (in milliseconds) for specific method on node in pool",
|
|
|
|
VariableLabels: []string{"node", "method"},
|
|
|
|
},
|
2023-08-16 14:08:50 +00:00
|
|
|
currentNodesMetric: Description{
|
|
|
|
Type: dto.MetricType_GAUGE,
|
|
|
|
Namespace: namespace,
|
|
|
|
Subsystem: poolSubsystem,
|
|
|
|
Name: currentNodesMetric,
|
|
|
|
Help: "Addresses of nodes of the same and highest priority that are currently healthy",
|
|
|
|
VariableLabels: []string{"address"},
|
|
|
|
},
|
2023-04-07 14:28:21 +00:00
|
|
|
},
|
|
|
|
billingSubsystem: {
|
|
|
|
userRequestsMetric: Description{
|
2023-04-10 08:40:58 +00:00
|
|
|
Type: dto.MetricType_GAUGE,
|
2023-04-07 14:28:21 +00:00
|
|
|
Namespace: namespace,
|
|
|
|
Subsystem: billingSubsystem,
|
|
|
|
Name: userRequestsMetric,
|
|
|
|
Help: "Accumulated user requests",
|
2023-12-01 11:16:19 +00:00
|
|
|
VariableLabels: []string{"user", "bucket", "cid", "operation", "namespace"},
|
2023-04-07 14:28:21 +00:00
|
|
|
},
|
|
|
|
userTrafficMetric: Description{
|
2023-04-10 08:40:58 +00:00
|
|
|
Type: dto.MetricType_GAUGE,
|
2023-04-07 14:28:21 +00:00
|
|
|
Namespace: namespace,
|
|
|
|
Subsystem: billingSubsystem,
|
|
|
|
Name: userTrafficMetric,
|
|
|
|
Help: "Accumulated user traffic",
|
2023-12-01 11:16:19 +00:00
|
|
|
VariableLabels: []string{"user", "bucket", "cid", "direction", "namespace"},
|
2023-04-07 14:28:21 +00:00
|
|
|
},
|
|
|
|
},
|
|
|
|
stateSubsystem: {
|
|
|
|
healthMetric: Description{
|
2023-04-10 08:40:58 +00:00
|
|
|
Type: dto.MetricType_GAUGE,
|
2023-04-07 14:28:21 +00:00
|
|
|
Namespace: namespace,
|
|
|
|
Subsystem: stateSubsystem,
|
|
|
|
Name: healthMetric,
|
|
|
|
Help: "Current S3 gateway state",
|
|
|
|
},
|
|
|
|
versionInfoMetric: Description{
|
2023-04-10 08:40:58 +00:00
|
|
|
Type: dto.MetricType_GAUGE,
|
2023-04-07 14:28:21 +00:00
|
|
|
Namespace: namespace,
|
|
|
|
Subsystem: stateSubsystem,
|
|
|
|
Name: versionInfoMetric,
|
|
|
|
Help: "Version of current FrostFS S3 Gate instance",
|
|
|
|
VariableLabels: []string{"version"},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
statisticSubsystem: {
|
|
|
|
requestsSecondsMetric: Description{
|
2023-04-10 08:40:58 +00:00
|
|
|
Type: dto.MetricType_HISTOGRAM,
|
2023-04-07 14:28:21 +00:00
|
|
|
Namespace: namespace,
|
|
|
|
Subsystem: statisticSubsystem,
|
|
|
|
Name: requestsSecondsMetric,
|
|
|
|
Help: "Time taken by requests served by current FrostFS S3 Gate instance",
|
|
|
|
VariableLabels: []string{"api"},
|
|
|
|
},
|
|
|
|
requestsCurrentMetric: Description{
|
2023-04-10 08:40:58 +00:00
|
|
|
Type: dto.MetricType_GAUGE,
|
2023-04-07 14:28:21 +00:00
|
|
|
Namespace: namespace,
|
|
|
|
Subsystem: statisticSubsystem,
|
|
|
|
Name: requestsCurrentMetric,
|
|
|
|
Help: "Total number of running s3 requests in current FrostFS S3 Gate instance",
|
|
|
|
VariableLabels: []string{"api"},
|
|
|
|
},
|
|
|
|
requestsTotalMetric: Description{
|
2023-04-10 08:40:58 +00:00
|
|
|
Type: dto.MetricType_GAUGE,
|
2023-04-07 14:28:21 +00:00
|
|
|
Namespace: namespace,
|
|
|
|
Subsystem: statisticSubsystem,
|
|
|
|
Name: requestsTotalMetric,
|
|
|
|
Help: "Total number of s3 requests in current FrostFS S3 Gate instance",
|
|
|
|
VariableLabels: []string{"api"},
|
|
|
|
},
|
|
|
|
errorsTotalMetric: Description{
|
2023-04-10 08:40:58 +00:00
|
|
|
Type: dto.MetricType_GAUGE,
|
2023-04-07 14:28:21 +00:00
|
|
|
Namespace: namespace,
|
|
|
|
Subsystem: statisticSubsystem,
|
|
|
|
Name: errorsTotalMetric,
|
|
|
|
Help: "Total number of s3 errors in current FrostFS S3 Gate instance",
|
|
|
|
VariableLabels: []string{"api"},
|
|
|
|
},
|
2023-08-14 14:56:22 +00:00
|
|
|
bytesTotalMetric: Description{
|
|
|
|
Type: dto.MetricType_GAUGE,
|
|
|
|
Namespace: namespace,
|
|
|
|
Subsystem: statisticSubsystem,
|
|
|
|
Name: bytesTotalMetric,
|
|
|
|
Help: "Total number of bytes sent/received by current FrostFS S3 Gate instance",
|
|
|
|
VariableLabels: []string{"direction"},
|
2023-04-07 14:28:21 +00:00
|
|
|
},
|
|
|
|
},
|
2023-08-29 15:17:56 +00:00
|
|
|
serverSubsystem: {
|
|
|
|
httpHealthMetric: Description{
|
|
|
|
Type: dto.MetricType_GAUGE,
|
|
|
|
Namespace: namespace,
|
|
|
|
Subsystem: serverSubsystem,
|
|
|
|
Name: httpHealthMetric,
|
|
|
|
Help: "HTTP Server endpoint health",
|
|
|
|
VariableLabels: []string{"endpoint"},
|
|
|
|
},
|
|
|
|
},
|
2024-07-30 16:40:58 +00:00
|
|
|
treePoolSubsystem: {
|
|
|
|
avgRequestDurationMetric: Description{
|
|
|
|
Type: dto.MetricType_GAUGE,
|
|
|
|
Namespace: namespace,
|
|
|
|
Subsystem: treePoolSubsystem,
|
|
|
|
Name: avgRequestDurationMetric,
|
|
|
|
Help: "Average request duration (in milliseconds) for specific method in tree pool",
|
|
|
|
VariableLabels: []string{"method"},
|
|
|
|
},
|
|
|
|
},
|
2023-04-07 14:28:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
type Description struct {
|
2023-04-10 08:40:58 +00:00
|
|
|
Type dto.MetricType
|
2023-04-07 14:28:21 +00:00
|
|
|
Namespace string
|
|
|
|
Subsystem string
|
|
|
|
Name string
|
|
|
|
Help string
|
2023-04-19 12:15:31 +00:00
|
|
|
ConstantLabels prometheus.Labels
|
2023-04-07 14:28:21 +00:00
|
|
|
VariableLabels []string
|
|
|
|
}
|
|
|
|
|
|
|
|
func (d *Description) MarshalJSON() ([]byte, error) {
|
|
|
|
return json.Marshal(&struct {
|
2023-04-19 12:15:31 +00:00
|
|
|
Type string `json:"type"`
|
|
|
|
FQName string `json:"name"`
|
|
|
|
Help string `json:"help"`
|
|
|
|
ConstantLabels prometheus.Labels `json:"constant_labels,omitempty"`
|
|
|
|
VariableLabels []string `json:"variable_labels,omitempty"`
|
2023-04-07 14:28:21 +00:00
|
|
|
}{
|
2023-04-10 08:40:58 +00:00
|
|
|
Type: d.Type.String(),
|
2023-04-07 14:28:21 +00:00
|
|
|
FQName: d.BuildFQName(),
|
|
|
|
Help: d.Help,
|
|
|
|
ConstantLabels: d.ConstantLabels,
|
|
|
|
VariableLabels: d.VariableLabels,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
func (d *Description) BuildFQName() string {
|
|
|
|
return prometheus.BuildFQName(d.Namespace, d.Subsystem, d.Name)
|
|
|
|
}
|
|
|
|
|
|
|
|
// DescribeAll returns descriptions for metrics.
|
|
|
|
func DescribeAll() []Description {
|
|
|
|
var list []Description
|
2023-04-07 14:58:36 +00:00
|
|
|
for _, m := range appMetricsDesc {
|
2023-04-07 14:28:21 +00:00
|
|
|
for _, description := range m {
|
|
|
|
list = append(list, description)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return list
|
|
|
|
}
|
|
|
|
|
2023-04-07 14:58:36 +00:00
|
|
|
func newOpts(description Description) prometheus.Opts {
|
2023-04-07 14:28:21 +00:00
|
|
|
return prometheus.Opts{
|
|
|
|
Namespace: description.Namespace,
|
|
|
|
Subsystem: description.Subsystem,
|
|
|
|
Name: description.Name,
|
|
|
|
Help: description.Help,
|
2023-04-19 12:15:31 +00:00
|
|
|
ConstLabels: description.ConstantLabels,
|
2023-04-07 14:28:21 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-07 14:58:36 +00:00
|
|
|
func newDesc(description Description) *prometheus.Desc {
|
2023-04-07 14:28:21 +00:00
|
|
|
return prometheus.NewDesc(
|
|
|
|
description.BuildFQName(),
|
|
|
|
description.Help,
|
|
|
|
description.VariableLabels,
|
2023-04-19 12:15:31 +00:00
|
|
|
description.ConstantLabels)
|
2023-04-07 14:28:21 +00:00
|
|
|
}
|
|
|
|
|
2023-04-10 08:40:58 +00:00
|
|
|
func mustNewGauge(description Description) prometheus.Gauge {
|
|
|
|
if description.Type != dto.MetricType_GAUGE {
|
|
|
|
panic("invalid metric type")
|
|
|
|
}
|
2023-04-07 14:28:21 +00:00
|
|
|
return prometheus.NewGauge(
|
2023-04-07 14:58:36 +00:00
|
|
|
prometheus.GaugeOpts(newOpts(description)),
|
2023-04-07 14:28:21 +00:00
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2023-04-10 08:40:58 +00:00
|
|
|
func mustNewGaugeVec(description Description) *prometheus.GaugeVec {
|
|
|
|
if description.Type != dto.MetricType_GAUGE {
|
|
|
|
panic("invalid metric type")
|
|
|
|
}
|
2023-04-07 14:28:21 +00:00
|
|
|
return prometheus.NewGaugeVec(
|
2023-04-07 14:58:36 +00:00
|
|
|
prometheus.GaugeOpts(newOpts(description)),
|
2023-04-07 14:28:21 +00:00
|
|
|
description.VariableLabels,
|
|
|
|
)
|
|
|
|
}
|
2023-04-10 08:40:58 +00:00
|
|
|
|
|
|
|
func mustNewHistogramVec(description Description, buckets []float64) *prometheus.HistogramVec {
|
|
|
|
if description.Type != dto.MetricType_HISTOGRAM {
|
|
|
|
panic("invalid metric type")
|
|
|
|
}
|
|
|
|
|
|
|
|
return prometheus.NewHistogramVec(
|
|
|
|
prometheus.HistogramOpts{
|
|
|
|
Namespace: description.Namespace,
|
|
|
|
Subsystem: description.Subsystem,
|
|
|
|
Name: description.Name,
|
|
|
|
Help: description.Name,
|
2023-04-19 12:15:31 +00:00
|
|
|
ConstLabels: description.ConstantLabels,
|
2023-04-10 08:40:58 +00:00
|
|
|
Buckets: buckets,
|
|
|
|
},
|
|
|
|
description.VariableLabels,
|
|
|
|
)
|
|
|
|
}
|