frostfs-s3-gw/metrics/desc.go
Pavel Pogodaev 51322cccdf [#502] Add Dropped logs (by sampling) metric
Signed-off-by: Pavel Pogodaev <p.pogodaev@yadro.com>
2024-12-03 12:16:56 +00:00

270 lines
8.3 KiB
Go

package metrics
import (
"encoding/json"
"github.com/prometheus/client_golang/prometheus"
dto "github.com/prometheus/client_model/go"
)
var appMetricsDesc = map[string]map[string]Description{
poolSubsystem: {
overallErrorsMetric: Description{
Type: dto.MetricType_GAUGE,
Namespace: namespace,
Subsystem: poolSubsystem,
Name: overallErrorsMetric,
Help: "Total number of errors in pool",
},
overallNodeErrorsMetric: Description{
Type: dto.MetricType_GAUGE,
Namespace: namespace,
Subsystem: poolSubsystem,
Name: overallNodeErrorsMetric,
Help: "Total number of errors for connection in pool",
VariableLabels: []string{"node"},
},
overallNodeRequestsMetric: Description{
Type: dto.MetricType_GAUGE,
Namespace: namespace,
Subsystem: poolSubsystem,
Name: overallNodeRequestsMetric,
Help: "Total number of requests to specific node in pool",
VariableLabels: []string{"node"},
},
currentErrorMetric: Description{
Type: dto.MetricType_GAUGE,
Namespace: namespace,
Subsystem: poolSubsystem,
Name: currentErrorMetric,
Help: "Number of errors on current connections that will be reset after the threshold",
VariableLabels: []string{"node"},
},
avgRequestDurationMetric: Description{
Type: dto.MetricType_GAUGE,
Namespace: namespace,
Subsystem: poolSubsystem,
Name: avgRequestDurationMetric,
Help: "Average request duration (in milliseconds) for specific method on node in pool",
VariableLabels: []string{"node", "method"},
},
currentNodesMetric: Description{
Type: dto.MetricType_GAUGE,
Namespace: namespace,
Subsystem: poolSubsystem,
Name: currentNodesMetric,
Help: "Addresses of nodes of the same and highest priority that are currently healthy",
VariableLabels: []string{"address"},
},
},
billingSubsystem: {
userRequestsMetric: Description{
Type: dto.MetricType_GAUGE,
Namespace: namespace,
Subsystem: billingSubsystem,
Name: userRequestsMetric,
Help: "Accumulated user requests",
VariableLabels: []string{"user", "bucket", "cid", "operation", "namespace"},
},
userTrafficMetric: Description{
Type: dto.MetricType_GAUGE,
Namespace: namespace,
Subsystem: billingSubsystem,
Name: userTrafficMetric,
Help: "Accumulated user traffic",
VariableLabels: []string{"user", "bucket", "cid", "direction", "namespace"},
},
},
stateSubsystem: {
healthMetric: Description{
Type: dto.MetricType_GAUGE,
Namespace: namespace,
Subsystem: stateSubsystem,
Name: healthMetric,
Help: "Current S3 gateway state",
},
versionInfoMetric: Description{
Type: dto.MetricType_GAUGE,
Namespace: namespace,
Subsystem: stateSubsystem,
Name: versionInfoMetric,
Help: "Version of current FrostFS S3 Gate instance",
VariableLabels: []string{"version"},
},
},
statisticSubsystem: {
droppedLogs: Description{
Type: dto.MetricType_COUNTER,
Namespace: namespace,
Subsystem: statisticSubsystem,
Name: droppedLogs,
Help: "Dropped logs (by sampling) count",
},
requestsSecondsMetric: Description{
Type: dto.MetricType_HISTOGRAM,
Namespace: namespace,
Subsystem: statisticSubsystem,
Name: requestsSecondsMetric,
Help: "Time taken by requests served by current FrostFS S3 Gate instance",
VariableLabels: []string{"api"},
},
requestsCurrentMetric: Description{
Type: dto.MetricType_GAUGE,
Namespace: namespace,
Subsystem: statisticSubsystem,
Name: requestsCurrentMetric,
Help: "Total number of running s3 requests in current FrostFS S3 Gate instance",
VariableLabels: []string{"api"},
},
requestsTotalMetric: Description{
Type: dto.MetricType_GAUGE,
Namespace: namespace,
Subsystem: statisticSubsystem,
Name: requestsTotalMetric,
Help: "Total number of s3 requests in current FrostFS S3 Gate instance",
VariableLabels: []string{"api"},
},
errorsTotalMetric: Description{
Type: dto.MetricType_GAUGE,
Namespace: namespace,
Subsystem: statisticSubsystem,
Name: errorsTotalMetric,
Help: "Total number of s3 errors in current FrostFS S3 Gate instance",
VariableLabels: []string{"api"},
},
bytesTotalMetric: Description{
Type: dto.MetricType_GAUGE,
Namespace: namespace,
Subsystem: statisticSubsystem,
Name: bytesTotalMetric,
Help: "Total number of bytes sent/received by current FrostFS S3 Gate instance",
VariableLabels: []string{"direction"},
},
},
serverSubsystem: {
httpHealthMetric: Description{
Type: dto.MetricType_GAUGE,
Namespace: namespace,
Subsystem: serverSubsystem,
Name: httpHealthMetric,
Help: "HTTP Server endpoint health",
VariableLabels: []string{"endpoint"},
},
},
treePoolSubsystem: {
avgRequestDurationMetric: Description{
Type: dto.MetricType_GAUGE,
Namespace: namespace,
Subsystem: treePoolSubsystem,
Name: avgRequestDurationMetric,
Help: "Average request duration (in milliseconds) for specific method in tree pool",
VariableLabels: []string{"method"},
},
},
}
type Description struct {
Type dto.MetricType
Namespace string
Subsystem string
Name string
Help string
ConstantLabels prometheus.Labels
VariableLabels []string
}
func (d *Description) MarshalJSON() ([]byte, error) {
return json.Marshal(&struct {
Type string `json:"type"`
FQName string `json:"name"`
Help string `json:"help"`
ConstantLabels prometheus.Labels `json:"constant_labels,omitempty"`
VariableLabels []string `json:"variable_labels,omitempty"`
}{
Type: d.Type.String(),
FQName: d.BuildFQName(),
Help: d.Help,
ConstantLabels: d.ConstantLabels,
VariableLabels: d.VariableLabels,
})
}
func (d *Description) BuildFQName() string {
return prometheus.BuildFQName(d.Namespace, d.Subsystem, d.Name)
}
// DescribeAll returns descriptions for metrics.
func DescribeAll() []Description {
var list []Description
for _, m := range appMetricsDesc {
for _, description := range m {
list = append(list, description)
}
}
return list
}
func newOpts(description Description) prometheus.Opts {
return prometheus.Opts{
Namespace: description.Namespace,
Subsystem: description.Subsystem,
Name: description.Name,
Help: description.Help,
ConstLabels: description.ConstantLabels,
}
}
func newDesc(description Description) *prometheus.Desc {
return prometheus.NewDesc(
description.BuildFQName(),
description.Help,
description.VariableLabels,
description.ConstantLabels)
}
func mustNewGauge(description Description) prometheus.Gauge {
if description.Type != dto.MetricType_GAUGE {
panic("invalid metric type")
}
return prometheus.NewGauge(
prometheus.GaugeOpts(newOpts(description)),
)
}
func mustNewGaugeVec(description Description) *prometheus.GaugeVec {
if description.Type != dto.MetricType_GAUGE {
panic("invalid metric type")
}
return prometheus.NewGaugeVec(
prometheus.GaugeOpts(newOpts(description)),
description.VariableLabels,
)
}
func mustNewHistogramVec(description Description, buckets []float64) *prometheus.HistogramVec {
if description.Type != dto.MetricType_HISTOGRAM {
panic("invalid metric type")
}
return prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: description.Namespace,
Subsystem: description.Subsystem,
Name: description.Name,
Help: description.Name,
ConstLabels: description.ConstantLabels,
Buckets: buckets,
},
description.VariableLabels,
)
}
func mustNewCounter(description Description) prometheus.Counter {
if description.Type != dto.MetricType_COUNTER {
panic("invalid metric type")
}
return prometheus.NewCounter(
prometheus.CounterOpts(newOpts(description)),
)
}