2022-07-27 06:52:08 +00:00
|
|
|
package metrics
|
|
|
|
|
|
|
|
import (
|
|
|
|
"net/http"
|
|
|
|
|
2023-03-07 14:08:53 +00:00
|
|
|
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/pool"
|
2022-07-27 06:52:08 +00:00
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
|
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
|
|
|
"go.uber.org/zap"
|
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
2024-11-24 10:32:40 +00:00
|
|
|
namespace = "frostfs_http_gw"
|
|
|
|
stateSubsystem = "state"
|
|
|
|
poolSubsystem = "pool"
|
|
|
|
serverSubsystem = "server"
|
|
|
|
statisticSubsystem = "statistic"
|
2023-04-07 15:14:31 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
healthMetric = "health"
|
|
|
|
versionInfoMetric = "version_info"
|
2024-11-24 10:32:40 +00:00
|
|
|
droppedLogs = "dropped_logs"
|
2023-04-07 15:14:31 +00:00
|
|
|
)
|
2022-07-28 13:44:58 +00:00
|
|
|
|
2023-04-07 15:14:31 +00:00
|
|
|
const (
|
|
|
|
overallErrorsMetric = "overall_errors"
|
|
|
|
overallNodeErrorsMetric = "overall_node_errors"
|
|
|
|
overallNodeRequestsMetric = "overall_node_requests"
|
|
|
|
currentErrorMetric = "current_errors"
|
|
|
|
avgRequestDurationMetric = "avg_request_duration"
|
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
2024-11-24 10:32:40 +00:00
|
|
|
methodGetBalance = "get_balance"
|
|
|
|
methodPutContainer = "put_container"
|
|
|
|
methodGetContainer = "get_container"
|
|
|
|
methodListContainer = "list_container"
|
|
|
|
methodDeleteContainer = "delete_container"
|
|
|
|
methodEndpointInfo = "endpoint_info"
|
|
|
|
methodNetworkInfo = "network_info"
|
|
|
|
methodPutObject = "put_object"
|
|
|
|
methodDeleteObject = "delete_object"
|
|
|
|
methodGetObject = "get_object"
|
|
|
|
methodHeadObject = "head_object"
|
|
|
|
methodRangeObject = "range_object"
|
|
|
|
methodCreateSession = "create_session"
|
2022-07-27 06:52:08 +00:00
|
|
|
)
|
|
|
|
|
2023-04-17 13:28:27 +00:00
|
|
|
// HealthStatus of the gate application.
|
|
|
|
type HealthStatus int32
|
|
|
|
|
|
|
|
const (
|
|
|
|
HealthStatusUndefined HealthStatus = 0
|
|
|
|
HealthStatusStarting HealthStatus = 1
|
|
|
|
HealthStatusReady HealthStatus = 2
|
|
|
|
HealthStatusShuttingDown HealthStatus = 3
|
|
|
|
)
|
|
|
|
|
2023-04-10 09:22:11 +00:00
|
|
|
type StatisticScraper interface {
|
|
|
|
Statistic() pool.Statistic
|
|
|
|
}
|
|
|
|
|
2023-08-29 12:17:20 +00:00
|
|
|
type serverMetrics struct {
|
|
|
|
endpointHealth *prometheus.GaugeVec
|
|
|
|
}
|
|
|
|
|
2022-07-27 06:52:08 +00:00
|
|
|
type GateMetrics struct {
|
|
|
|
stateMetrics
|
2022-07-28 13:44:58 +00:00
|
|
|
poolMetricsCollector
|
2023-08-29 12:17:20 +00:00
|
|
|
serverMetrics
|
2024-11-24 10:32:40 +00:00
|
|
|
statisticMetrics
|
2022-07-27 06:52:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
type stateMetrics struct {
|
|
|
|
healthCheck prometheus.Gauge
|
2023-04-07 15:14:31 +00:00
|
|
|
versionInfo *prometheus.GaugeVec
|
2022-07-27 06:52:08 +00:00
|
|
|
}
|
|
|
|
|
2024-11-24 10:32:40 +00:00
|
|
|
type statisticMetrics struct {
|
|
|
|
droppedLogs prometheus.Counter
|
|
|
|
}
|
|
|
|
|
2022-07-28 13:44:58 +00:00
|
|
|
type poolMetricsCollector struct {
|
2023-04-10 09:22:11 +00:00
|
|
|
scraper StatisticScraper
|
2022-08-25 06:36:38 +00:00
|
|
|
overallErrors prometheus.Gauge
|
|
|
|
overallNodeErrors *prometheus.GaugeVec
|
|
|
|
overallNodeRequests *prometheus.GaugeVec
|
2022-07-28 13:44:58 +00:00
|
|
|
currentErrors *prometheus.GaugeVec
|
|
|
|
requestDuration *prometheus.GaugeVec
|
|
|
|
}
|
|
|
|
|
2022-07-27 06:52:08 +00:00
|
|
|
// NewGateMetrics creates new metrics for http gate.
|
2023-04-10 09:22:11 +00:00
|
|
|
func NewGateMetrics(p StatisticScraper) *GateMetrics {
|
2022-07-27 06:52:08 +00:00
|
|
|
stateMetric := newStateMetrics()
|
|
|
|
stateMetric.register()
|
|
|
|
|
2022-07-28 13:44:58 +00:00
|
|
|
poolMetric := newPoolMetricsCollector(p)
|
|
|
|
poolMetric.register()
|
|
|
|
|
2023-08-29 12:17:20 +00:00
|
|
|
serverMetric := newServerMetrics()
|
|
|
|
serverMetric.register()
|
|
|
|
|
2024-11-24 10:32:40 +00:00
|
|
|
statsMetric := newStatisticMetrics()
|
|
|
|
statsMetric.register()
|
|
|
|
|
2022-07-27 06:52:08 +00:00
|
|
|
return &GateMetrics{
|
2022-07-28 13:44:58 +00:00
|
|
|
stateMetrics: *stateMetric,
|
|
|
|
poolMetricsCollector: *poolMetric,
|
2023-08-29 12:17:20 +00:00
|
|
|
serverMetrics: *serverMetric,
|
2024-11-24 10:32:40 +00:00
|
|
|
statisticMetrics: *statsMetric,
|
2022-07-27 06:52:08 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-09-09 06:57:48 +00:00
|
|
|
func (g *GateMetrics) Unregister() {
|
|
|
|
g.stateMetrics.unregister()
|
|
|
|
prometheus.Unregister(&g.poolMetricsCollector)
|
2023-08-29 12:17:20 +00:00
|
|
|
g.serverMetrics.unregister()
|
2024-11-24 10:32:40 +00:00
|
|
|
g.statisticMetrics.unregister()
|
2022-09-09 06:57:48 +00:00
|
|
|
}
|
|
|
|
|
2022-07-27 06:52:08 +00:00
|
|
|
func newStateMetrics() *stateMetrics {
|
|
|
|
return &stateMetrics{
|
2023-04-10 09:22:11 +00:00
|
|
|
healthCheck: mustNewGauge(appMetricsDesc[stateSubsystem][healthMetric]),
|
|
|
|
versionInfo: mustNewGaugeVec(appMetricsDesc[stateSubsystem][versionInfoMetric]),
|
2022-07-27 06:52:08 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-11-24 10:32:40 +00:00
|
|
|
func newStatisticMetrics() *statisticMetrics {
|
|
|
|
return &statisticMetrics{
|
|
|
|
droppedLogs: mustNewCounter(appMetricsDesc[statisticSubsystem][droppedLogs]),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *statisticMetrics) register() {
|
|
|
|
prometheus.MustRegister(s.droppedLogs)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *statisticMetrics) unregister() {
|
|
|
|
prometheus.Unregister(s.droppedLogs)
|
|
|
|
}
|
|
|
|
|
2022-07-27 06:52:08 +00:00
|
|
|
func (m stateMetrics) register() {
|
|
|
|
prometheus.MustRegister(m.healthCheck)
|
2023-04-07 15:14:31 +00:00
|
|
|
prometheus.MustRegister(m.versionInfo)
|
2022-07-27 06:52:08 +00:00
|
|
|
}
|
|
|
|
|
2022-09-09 06:57:48 +00:00
|
|
|
func (m stateMetrics) unregister() {
|
|
|
|
prometheus.Unregister(m.healthCheck)
|
2023-04-07 15:14:31 +00:00
|
|
|
prometheus.Unregister(m.versionInfo)
|
2022-09-09 06:57:48 +00:00
|
|
|
}
|
|
|
|
|
2023-04-17 13:28:27 +00:00
|
|
|
func (m stateMetrics) SetHealth(s HealthStatus) {
|
2022-07-27 06:52:08 +00:00
|
|
|
m.healthCheck.Set(float64(s))
|
|
|
|
}
|
|
|
|
|
2023-04-07 15:14:31 +00:00
|
|
|
func (m stateMetrics) SetVersion(ver string) {
|
|
|
|
m.versionInfo.WithLabelValues(ver).Set(1)
|
|
|
|
}
|
2022-07-28 13:44:58 +00:00
|
|
|
|
2024-11-24 10:32:40 +00:00
|
|
|
func (s *statisticMetrics) DroppedLogsInc() {
|
|
|
|
if s == nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
s.droppedLogs.Inc()
|
|
|
|
}
|
|
|
|
|
2023-04-10 09:22:11 +00:00
|
|
|
func newPoolMetricsCollector(p StatisticScraper) *poolMetricsCollector {
|
2022-07-28 13:44:58 +00:00
|
|
|
return &poolMetricsCollector{
|
2023-04-10 09:22:11 +00:00
|
|
|
scraper: p,
|
|
|
|
overallErrors: mustNewGauge(appMetricsDesc[poolSubsystem][overallErrorsMetric]),
|
|
|
|
overallNodeErrors: mustNewGaugeVec(appMetricsDesc[poolSubsystem][overallNodeErrorsMetric]),
|
|
|
|
overallNodeRequests: mustNewGaugeVec(appMetricsDesc[poolSubsystem][overallNodeRequestsMetric]),
|
|
|
|
currentErrors: mustNewGaugeVec(appMetricsDesc[poolSubsystem][currentErrorMetric]),
|
|
|
|
requestDuration: mustNewGaugeVec(appMetricsDesc[poolSubsystem][avgRequestDurationMetric]),
|
2022-07-28 13:44:58 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (m *poolMetricsCollector) Collect(ch chan<- prometheus.Metric) {
|
|
|
|
m.updateStatistic()
|
|
|
|
m.overallErrors.Collect(ch)
|
|
|
|
m.overallNodeErrors.Collect(ch)
|
|
|
|
m.overallNodeRequests.Collect(ch)
|
|
|
|
m.currentErrors.Collect(ch)
|
|
|
|
m.requestDuration.Collect(ch)
|
|
|
|
}
|
|
|
|
|
2022-09-09 06:57:48 +00:00
|
|
|
func (m *poolMetricsCollector) Describe(descs chan<- *prometheus.Desc) {
|
2022-07-28 13:44:58 +00:00
|
|
|
m.overallErrors.Describe(descs)
|
|
|
|
m.overallNodeErrors.Describe(descs)
|
|
|
|
m.overallNodeRequests.Describe(descs)
|
|
|
|
m.currentErrors.Describe(descs)
|
|
|
|
m.requestDuration.Describe(descs)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (m *poolMetricsCollector) register() {
|
|
|
|
prometheus.MustRegister(m)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (m *poolMetricsCollector) updateStatistic() {
|
2023-04-10 09:22:11 +00:00
|
|
|
stat := m.scraper.Statistic()
|
2022-07-28 13:44:58 +00:00
|
|
|
|
2022-08-25 06:36:38 +00:00
|
|
|
m.overallNodeErrors.Reset()
|
|
|
|
m.overallNodeRequests.Reset()
|
2022-07-28 13:44:58 +00:00
|
|
|
m.currentErrors.Reset()
|
|
|
|
m.requestDuration.Reset()
|
|
|
|
|
|
|
|
for _, node := range stat.Nodes() {
|
2022-08-25 06:36:38 +00:00
|
|
|
m.overallNodeErrors.WithLabelValues(node.Address()).Set(float64(node.OverallErrors()))
|
|
|
|
m.overallNodeRequests.WithLabelValues(node.Address()).Set(float64(node.Requests()))
|
2022-07-28 13:44:58 +00:00
|
|
|
|
|
|
|
m.currentErrors.WithLabelValues(node.Address()).Set(float64(node.CurrentErrors()))
|
|
|
|
m.updateRequestsDuration(node)
|
|
|
|
}
|
|
|
|
|
2022-08-25 06:36:38 +00:00
|
|
|
m.overallErrors.Set(float64(stat.OverallErrors()))
|
2022-07-28 13:44:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (m *poolMetricsCollector) updateRequestsDuration(node pool.NodeStatistic) {
|
|
|
|
m.requestDuration.WithLabelValues(node.Address(), methodGetBalance).Set(float64(node.AverageGetBalance().Milliseconds()))
|
|
|
|
m.requestDuration.WithLabelValues(node.Address(), methodPutContainer).Set(float64(node.AveragePutContainer().Milliseconds()))
|
|
|
|
m.requestDuration.WithLabelValues(node.Address(), methodGetContainer).Set(float64(node.AverageGetContainer().Milliseconds()))
|
|
|
|
m.requestDuration.WithLabelValues(node.Address(), methodListContainer).Set(float64(node.AverageListContainer().Milliseconds()))
|
|
|
|
m.requestDuration.WithLabelValues(node.Address(), methodDeleteContainer).Set(float64(node.AverageDeleteContainer().Milliseconds()))
|
|
|
|
m.requestDuration.WithLabelValues(node.Address(), methodEndpointInfo).Set(float64(node.AverageEndpointInfo().Milliseconds()))
|
|
|
|
m.requestDuration.WithLabelValues(node.Address(), methodNetworkInfo).Set(float64(node.AverageNetworkInfo().Milliseconds()))
|
|
|
|
m.requestDuration.WithLabelValues(node.Address(), methodPutObject).Set(float64(node.AveragePutObject().Milliseconds()))
|
|
|
|
m.requestDuration.WithLabelValues(node.Address(), methodDeleteObject).Set(float64(node.AverageDeleteObject().Milliseconds()))
|
|
|
|
m.requestDuration.WithLabelValues(node.Address(), methodGetObject).Set(float64(node.AverageGetObject().Milliseconds()))
|
|
|
|
m.requestDuration.WithLabelValues(node.Address(), methodHeadObject).Set(float64(node.AverageHeadObject().Milliseconds()))
|
|
|
|
m.requestDuration.WithLabelValues(node.Address(), methodRangeObject).Set(float64(node.AverageRangeObject().Milliseconds()))
|
|
|
|
m.requestDuration.WithLabelValues(node.Address(), methodCreateSession).Set(float64(node.AverageCreateSession().Milliseconds()))
|
|
|
|
}
|
|
|
|
|
2023-08-29 12:17:20 +00:00
|
|
|
func newServerMetrics() *serverMetrics {
|
|
|
|
return &serverMetrics{
|
|
|
|
endpointHealth: mustNewGaugeVec(appMetricsDesc[serverSubsystem][healthMetric]),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (m serverMetrics) register() {
|
|
|
|
prometheus.MustRegister(m.endpointHealth)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (m serverMetrics) unregister() {
|
|
|
|
prometheus.Unregister(m.endpointHealth)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (m serverMetrics) MarkHealthy(endpoint string) {
|
|
|
|
m.endpointHealth.WithLabelValues(endpoint).Set(float64(1))
|
|
|
|
}
|
|
|
|
|
|
|
|
func (m serverMetrics) MarkUnhealthy(endpoint string) {
|
|
|
|
m.endpointHealth.WithLabelValues(endpoint).Set(float64(0))
|
|
|
|
}
|
|
|
|
|
2022-07-27 06:52:08 +00:00
|
|
|
// NewPrometheusService creates a new service for gathering prometheus metrics.
|
|
|
|
func NewPrometheusService(log *zap.Logger, cfg Config) *Service {
|
|
|
|
if log == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return &Service{
|
|
|
|
Server: &http.Server{
|
|
|
|
Addr: cfg.Address,
|
|
|
|
Handler: promhttp.Handler(),
|
|
|
|
},
|
|
|
|
enabled: cfg.Enabled,
|
|
|
|
serviceType: "Prometheus",
|
|
|
|
log: log.With(zap.String("service", "Prometheus")),
|
|
|
|
}
|
|
|
|
}
|