package metrics import ( "net/http" "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/pool" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" "go.uber.org/zap" ) const ( namespace = "frostfs_http_gw" stateSubsystem = "state" poolSubsystem = "pool" serverSubsystem = "server" ) const ( healthMetric = "health" versionInfoMetric = "version_info" ) const ( overallErrorsMetric = "overall_errors" overallNodeErrorsMetric = "overall_node_errors" overallNodeRequestsMetric = "overall_node_requests" currentErrorMetric = "current_errors" avgRequestDurationMetric = "avg_request_duration" ) const ( methodGetBalance = "get_balance" methodPutContainer = "put_container" methodGetContainer = "get_container" methodListContainer = "list_container" methodDeleteContainer = "delete_container" methodGetContainerEacl = "get_container_eacl" methodSetContainerEacl = "set_container_eacl" methodEndpointInfo = "endpoint_info" methodNetworkInfo = "network_info" methodPutObject = "put_object" methodDeleteObject = "delete_object" methodGetObject = "get_object" methodHeadObject = "head_object" methodRangeObject = "range_object" methodCreateSession = "create_session" ) // HealthStatus of the gate application. type HealthStatus int32 const ( HealthStatusUndefined HealthStatus = 0 HealthStatusStarting HealthStatus = 1 HealthStatusReady HealthStatus = 2 HealthStatusShuttingDown HealthStatus = 3 ) type StatisticScraper interface { Statistic() pool.Statistic } type serverMetrics struct { endpointHealth *prometheus.GaugeVec } type GateMetrics struct { stateMetrics poolMetricsCollector serverMetrics } type stateMetrics struct { healthCheck prometheus.Gauge versionInfo *prometheus.GaugeVec } type poolMetricsCollector struct { scraper StatisticScraper overallErrors prometheus.Gauge overallNodeErrors *prometheus.GaugeVec overallNodeRequests *prometheus.GaugeVec currentErrors *prometheus.GaugeVec requestDuration *prometheus.GaugeVec } // NewGateMetrics creates new metrics for http gate. func NewGateMetrics(p StatisticScraper) *GateMetrics { stateMetric := newStateMetrics() stateMetric.register() poolMetric := newPoolMetricsCollector(p) poolMetric.register() serverMetric := newServerMetrics() serverMetric.register() return &GateMetrics{ stateMetrics: *stateMetric, poolMetricsCollector: *poolMetric, serverMetrics: *serverMetric, } } func (g *GateMetrics) Unregister() { g.stateMetrics.unregister() prometheus.Unregister(&g.poolMetricsCollector) g.serverMetrics.unregister() } func newStateMetrics() *stateMetrics { return &stateMetrics{ healthCheck: mustNewGauge(appMetricsDesc[stateSubsystem][healthMetric]), versionInfo: mustNewGaugeVec(appMetricsDesc[stateSubsystem][versionInfoMetric]), } } func (m stateMetrics) register() { prometheus.MustRegister(m.healthCheck) prometheus.MustRegister(m.versionInfo) } func (m stateMetrics) unregister() { prometheus.Unregister(m.healthCheck) prometheus.Unregister(m.versionInfo) } func (m stateMetrics) SetHealth(s HealthStatus) { m.healthCheck.Set(float64(s)) } func (m stateMetrics) SetVersion(ver string) { m.versionInfo.WithLabelValues(ver).Set(1) } func newPoolMetricsCollector(p StatisticScraper) *poolMetricsCollector { return &poolMetricsCollector{ scraper: p, overallErrors: mustNewGauge(appMetricsDesc[poolSubsystem][overallErrorsMetric]), overallNodeErrors: mustNewGaugeVec(appMetricsDesc[poolSubsystem][overallNodeErrorsMetric]), overallNodeRequests: mustNewGaugeVec(appMetricsDesc[poolSubsystem][overallNodeRequestsMetric]), currentErrors: mustNewGaugeVec(appMetricsDesc[poolSubsystem][currentErrorMetric]), requestDuration: mustNewGaugeVec(appMetricsDesc[poolSubsystem][avgRequestDurationMetric]), } } func (m *poolMetricsCollector) Collect(ch chan<- prometheus.Metric) { m.updateStatistic() m.overallErrors.Collect(ch) m.overallNodeErrors.Collect(ch) m.overallNodeRequests.Collect(ch) m.currentErrors.Collect(ch) m.requestDuration.Collect(ch) } func (m *poolMetricsCollector) Describe(descs chan<- *prometheus.Desc) { m.overallErrors.Describe(descs) m.overallNodeErrors.Describe(descs) m.overallNodeRequests.Describe(descs) m.currentErrors.Describe(descs) m.requestDuration.Describe(descs) } func (m *poolMetricsCollector) register() { prometheus.MustRegister(m) } func (m *poolMetricsCollector) updateStatistic() { stat := m.scraper.Statistic() m.overallNodeErrors.Reset() m.overallNodeRequests.Reset() m.currentErrors.Reset() m.requestDuration.Reset() for _, node := range stat.Nodes() { m.overallNodeErrors.WithLabelValues(node.Address()).Set(float64(node.OverallErrors())) m.overallNodeRequests.WithLabelValues(node.Address()).Set(float64(node.Requests())) m.currentErrors.WithLabelValues(node.Address()).Set(float64(node.CurrentErrors())) m.updateRequestsDuration(node) } m.overallErrors.Set(float64(stat.OverallErrors())) } func (m *poolMetricsCollector) updateRequestsDuration(node pool.NodeStatistic) { m.requestDuration.WithLabelValues(node.Address(), methodGetBalance).Set(float64(node.AverageGetBalance().Milliseconds())) m.requestDuration.WithLabelValues(node.Address(), methodPutContainer).Set(float64(node.AveragePutContainer().Milliseconds())) m.requestDuration.WithLabelValues(node.Address(), methodGetContainer).Set(float64(node.AverageGetContainer().Milliseconds())) m.requestDuration.WithLabelValues(node.Address(), methodListContainer).Set(float64(node.AverageListContainer().Milliseconds())) m.requestDuration.WithLabelValues(node.Address(), methodDeleteContainer).Set(float64(node.AverageDeleteContainer().Milliseconds())) m.requestDuration.WithLabelValues(node.Address(), methodGetContainerEacl).Set(float64(node.AverageGetContainerEACL().Milliseconds())) m.requestDuration.WithLabelValues(node.Address(), methodSetContainerEacl).Set(float64(node.AverageSetContainerEACL().Milliseconds())) m.requestDuration.WithLabelValues(node.Address(), methodEndpointInfo).Set(float64(node.AverageEndpointInfo().Milliseconds())) m.requestDuration.WithLabelValues(node.Address(), methodNetworkInfo).Set(float64(node.AverageNetworkInfo().Milliseconds())) m.requestDuration.WithLabelValues(node.Address(), methodPutObject).Set(float64(node.AveragePutObject().Milliseconds())) m.requestDuration.WithLabelValues(node.Address(), methodDeleteObject).Set(float64(node.AverageDeleteObject().Milliseconds())) m.requestDuration.WithLabelValues(node.Address(), methodGetObject).Set(float64(node.AverageGetObject().Milliseconds())) m.requestDuration.WithLabelValues(node.Address(), methodHeadObject).Set(float64(node.AverageHeadObject().Milliseconds())) m.requestDuration.WithLabelValues(node.Address(), methodRangeObject).Set(float64(node.AverageRangeObject().Milliseconds())) m.requestDuration.WithLabelValues(node.Address(), methodCreateSession).Set(float64(node.AverageCreateSession().Milliseconds())) } func newServerMetrics() *serverMetrics { return &serverMetrics{ endpointHealth: mustNewGaugeVec(appMetricsDesc[serverSubsystem][healthMetric]), } } func (m serverMetrics) register() { prometheus.MustRegister(m.endpointHealth) } func (m serverMetrics) unregister() { prometheus.Unregister(m.endpointHealth) } func (m serverMetrics) MarkHealthy(endpoint string) { m.endpointHealth.WithLabelValues(endpoint).Set(float64(1)) } func (m serverMetrics) MarkUnhealthy(endpoint string) { m.endpointHealth.WithLabelValues(endpoint).Set(float64(0)) } // NewPrometheusService creates a new service for gathering prometheus metrics. func NewPrometheusService(log *zap.Logger, cfg Config) *Service { if log == nil { return nil } return &Service{ Server: &http.Server{ Addr: cfg.Address, Handler: promhttp.Handler(), }, enabled: cfg.Enabled, serviceType: "Prometheus", log: log.With(zap.String("service", "Prometheus")), } }