frostfs-http-gw/metrics/metrics.go
Nikita Zinkevich a4233b006c [#144] Update frostfs-sdk-go
Signed-off-by: Nikita Zinkevich <n.zinkevich@yadro.com>
2024-09-24 18:17:21 +03:00

241 lines
7.7 KiB
Go

package metrics
import (
"net/http"
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/pool"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"go.uber.org/zap"
)
const (
namespace = "frostfs_http_gw"
stateSubsystem = "state"
poolSubsystem = "pool"
serverSubsystem = "server"
)
const (
healthMetric = "health"
versionInfoMetric = "version_info"
)
const (
overallErrorsMetric = "overall_errors"
overallNodeErrorsMetric = "overall_node_errors"
overallNodeRequestsMetric = "overall_node_requests"
currentErrorMetric = "current_errors"
avgRequestDurationMetric = "avg_request_duration"
)
const (
methodGetBalance = "get_balance"
methodPutContainer = "put_container"
methodGetContainer = "get_container"
methodListContainer = "list_container"
methodDeleteContainer = "delete_container"
methodGetContainerEacl = "get_container_eacl"
methodSetContainerEacl = "set_container_eacl"
methodEndpointInfo = "endpoint_info"
methodNetworkInfo = "network_info"
methodPutObject = "put_object"
methodDeleteObject = "delete_object"
methodGetObject = "get_object"
methodHeadObject = "head_object"
methodRangeObject = "range_object"
methodCreateSession = "create_session"
)
// HealthStatus of the gate application.
type HealthStatus int32
const (
HealthStatusUndefined HealthStatus = 0
HealthStatusStarting HealthStatus = 1
HealthStatusReady HealthStatus = 2
HealthStatusShuttingDown HealthStatus = 3
)
type StatisticScraper interface {
Statistic() pool.Statistic
}
type serverMetrics struct {
endpointHealth *prometheus.GaugeVec
}
type GateMetrics struct {
stateMetrics
poolMetricsCollector
serverMetrics
}
type stateMetrics struct {
healthCheck prometheus.Gauge
versionInfo *prometheus.GaugeVec
}
type poolMetricsCollector struct {
scraper StatisticScraper
overallErrors prometheus.Gauge
overallNodeErrors *prometheus.GaugeVec
overallNodeRequests *prometheus.GaugeVec
currentErrors *prometheus.GaugeVec
requestDuration *prometheus.GaugeVec
}
// NewGateMetrics creates new metrics for http gate.
func NewGateMetrics(p StatisticScraper) *GateMetrics {
stateMetric := newStateMetrics()
stateMetric.register()
poolMetric := newPoolMetricsCollector(p)
poolMetric.register()
serverMetric := newServerMetrics()
serverMetric.register()
return &GateMetrics{
stateMetrics: *stateMetric,
poolMetricsCollector: *poolMetric,
serverMetrics: *serverMetric,
}
}
func (g *GateMetrics) Unregister() {
g.stateMetrics.unregister()
prometheus.Unregister(&g.poolMetricsCollector)
g.serverMetrics.unregister()
}
func newStateMetrics() *stateMetrics {
return &stateMetrics{
healthCheck: mustNewGauge(appMetricsDesc[stateSubsystem][healthMetric]),
versionInfo: mustNewGaugeVec(appMetricsDesc[stateSubsystem][versionInfoMetric]),
}
}
func (m stateMetrics) register() {
prometheus.MustRegister(m.healthCheck)
prometheus.MustRegister(m.versionInfo)
}
func (m stateMetrics) unregister() {
prometheus.Unregister(m.healthCheck)
prometheus.Unregister(m.versionInfo)
}
func (m stateMetrics) SetHealth(s HealthStatus) {
m.healthCheck.Set(float64(s))
}
func (m stateMetrics) SetVersion(ver string) {
m.versionInfo.WithLabelValues(ver).Set(1)
}
func newPoolMetricsCollector(p StatisticScraper) *poolMetricsCollector {
return &poolMetricsCollector{
scraper: p,
overallErrors: mustNewGauge(appMetricsDesc[poolSubsystem][overallErrorsMetric]),
overallNodeErrors: mustNewGaugeVec(appMetricsDesc[poolSubsystem][overallNodeErrorsMetric]),
overallNodeRequests: mustNewGaugeVec(appMetricsDesc[poolSubsystem][overallNodeRequestsMetric]),
currentErrors: mustNewGaugeVec(appMetricsDesc[poolSubsystem][currentErrorMetric]),
requestDuration: mustNewGaugeVec(appMetricsDesc[poolSubsystem][avgRequestDurationMetric]),
}
}
func (m *poolMetricsCollector) Collect(ch chan<- prometheus.Metric) {
m.updateStatistic()
m.overallErrors.Collect(ch)
m.overallNodeErrors.Collect(ch)
m.overallNodeRequests.Collect(ch)
m.currentErrors.Collect(ch)
m.requestDuration.Collect(ch)
}
func (m *poolMetricsCollector) Describe(descs chan<- *prometheus.Desc) {
m.overallErrors.Describe(descs)
m.overallNodeErrors.Describe(descs)
m.overallNodeRequests.Describe(descs)
m.currentErrors.Describe(descs)
m.requestDuration.Describe(descs)
}
func (m *poolMetricsCollector) register() {
prometheus.MustRegister(m)
}
func (m *poolMetricsCollector) updateStatistic() {
stat := m.scraper.Statistic()
m.overallNodeErrors.Reset()
m.overallNodeRequests.Reset()
m.currentErrors.Reset()
m.requestDuration.Reset()
for _, node := range stat.Nodes() {
m.overallNodeErrors.WithLabelValues(node.Address()).Set(float64(node.OverallErrors()))
m.overallNodeRequests.WithLabelValues(node.Address()).Set(float64(node.Requests()))
m.currentErrors.WithLabelValues(node.Address()).Set(float64(node.CurrentErrors()))
m.updateRequestsDuration(node)
}
m.overallErrors.Set(float64(stat.OverallErrors()))
}
func (m *poolMetricsCollector) updateRequestsDuration(node pool.NodeStatistic) {
m.requestDuration.WithLabelValues(node.Address(), methodGetBalance).Set(float64(node.AverageGetBalance().Milliseconds()))
m.requestDuration.WithLabelValues(node.Address(), methodPutContainer).Set(float64(node.AveragePutContainer().Milliseconds()))
m.requestDuration.WithLabelValues(node.Address(), methodGetContainer).Set(float64(node.AverageGetContainer().Milliseconds()))
m.requestDuration.WithLabelValues(node.Address(), methodListContainer).Set(float64(node.AverageListContainer().Milliseconds()))
m.requestDuration.WithLabelValues(node.Address(), methodDeleteContainer).Set(float64(node.AverageDeleteContainer().Milliseconds()))
m.requestDuration.WithLabelValues(node.Address(), methodEndpointInfo).Set(float64(node.AverageEndpointInfo().Milliseconds()))
m.requestDuration.WithLabelValues(node.Address(), methodNetworkInfo).Set(float64(node.AverageNetworkInfo().Milliseconds()))
m.requestDuration.WithLabelValues(node.Address(), methodPutObject).Set(float64(node.AveragePutObject().Milliseconds()))
m.requestDuration.WithLabelValues(node.Address(), methodDeleteObject).Set(float64(node.AverageDeleteObject().Milliseconds()))
m.requestDuration.WithLabelValues(node.Address(), methodGetObject).Set(float64(node.AverageGetObject().Milliseconds()))
m.requestDuration.WithLabelValues(node.Address(), methodHeadObject).Set(float64(node.AverageHeadObject().Milliseconds()))
m.requestDuration.WithLabelValues(node.Address(), methodRangeObject).Set(float64(node.AverageRangeObject().Milliseconds()))
m.requestDuration.WithLabelValues(node.Address(), methodCreateSession).Set(float64(node.AverageCreateSession().Milliseconds()))
}
func newServerMetrics() *serverMetrics {
return &serverMetrics{
endpointHealth: mustNewGaugeVec(appMetricsDesc[serverSubsystem][healthMetric]),
}
}
func (m serverMetrics) register() {
prometheus.MustRegister(m.endpointHealth)
}
func (m serverMetrics) unregister() {
prometheus.Unregister(m.endpointHealth)
}
func (m serverMetrics) MarkHealthy(endpoint string) {
m.endpointHealth.WithLabelValues(endpoint).Set(float64(1))
}
func (m serverMetrics) MarkUnhealthy(endpoint string) {
m.endpointHealth.WithLabelValues(endpoint).Set(float64(0))
}
// NewPrometheusService creates a new service for gathering prometheus metrics.
func NewPrometheusService(log *zap.Logger, cfg Config) *Service {
if log == nil {
return nil
}
return &Service{
Server: &http.Server{
Addr: cfg.Address,
Handler: promhttp.Handler(),
},
enabled: cfg.Enabled,
serviceType: "Prometheus",
log: log.With(zap.String("service", "Prometheus")),
}
}