From dbc6804d273bf6b9df7b8f9a813e20e5c73835eb Mon Sep 17 00:00:00 2001 From: Marina Biryukova Date: Tue, 29 Aug 2023 15:17:20 +0300 Subject: [PATCH] [#77] Add metrics for HTTP endpoint status Signed-off-by: Marina Biryukova --- app.go | 36 ++++++++++++++++++++++++++++-------- metrics/desc.go | 10 ++++++++++ metrics/metrics.go | 39 ++++++++++++++++++++++++++++++++++++--- 3 files changed, 74 insertions(+), 11 deletions(-) diff --git a/app.go b/app.go index 864f39c..be9aaea 100644 --- a/app.go +++ b/app.go @@ -189,6 +189,13 @@ func newGateMetrics(logger *zap.Logger, provider *metrics.GateMetrics, enabled b } } +func (m *gateMetrics) isEnabled() bool { + m.mu.RLock() + defer m.mu.RUnlock() + + return m.enabled +} + func (m *gateMetrics) SetEnabled(enabled bool) { if !enabled { m.logger.Warn(logs.MetricsAreDisabled) @@ -200,23 +207,17 @@ func (m *gateMetrics) SetEnabled(enabled bool) { } func (m *gateMetrics) SetHealth(status metrics.HealthStatus) { - m.mu.RLock() - if !m.enabled { - m.mu.RUnlock() + if !m.isEnabled() { return } - m.mu.RUnlock() m.provider.SetHealth(status) } func (m *gateMetrics) SetVersion(ver string) { - m.mu.RLock() - if !m.enabled { - m.mu.RUnlock() + if !m.isEnabled() { return } - m.mu.RUnlock() m.provider.SetVersion(ver) } @@ -231,6 +232,22 @@ func (m *gateMetrics) Shutdown() { m.mu.Unlock() } +func (m *gateMetrics) MarkHealthy(endpoint string) { + if !m.isEnabled() { + return + } + + m.provider.MarkHealthy(endpoint) +} + +func (m *gateMetrics) MarkUnhealthy(endpoint string) { + if !m.isEnabled() { + return + } + + m.provider.MarkUnhealthy(endpoint) +} + func remove(list []string, element string) []string { for i, item := range list { if item == element { @@ -327,6 +344,7 @@ func (a *app) Serve() { go func(i int) { a.log.Info(logs.StartingServer, zap.String("address", a.servers[i].Address())) if err := a.webServer.Serve(a.servers[i].Listener()); err != nil && err != http.ErrServerClosed { + a.metrics.MarkUnhealthy(a.servers[i].Address()) a.log.Fatal(logs.ListenAndServe, zap.Error(err)) } }(i) @@ -508,9 +526,11 @@ func (a *app) initServers(ctx context.Context) { } srv, err := newServer(ctx, serverInfo) if err != nil { + a.metrics.MarkUnhealthy(serverInfo.Address) a.log.Warn(logs.FailedToAddServer, append(fields, zap.Error(err))...) continue } + a.metrics.MarkHealthy(serverInfo.Address) a.servers = append(a.servers, srv) a.log.Info(logs.AddServer, fields...) diff --git a/metrics/desc.go b/metrics/desc.go index f2ff4f4..e10050c 100644 --- a/metrics/desc.go +++ b/metrics/desc.go @@ -66,6 +66,16 @@ var appMetricsDesc = map[string]map[string]Description{ VariableLabels: []string{"version"}, }, }, + serverSubsystem: { + healthMetric: Description{ + Type: dto.MetricType_GAUGE, + Namespace: namespace, + Subsystem: serverSubsystem, + Name: healthMetric, + Help: "HTTP Server endpoint health", + VariableLabels: []string{"endpoint"}, + }, + }, } type Description struct { diff --git a/metrics/metrics.go b/metrics/metrics.go index cf22099..bfb66ee 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -10,9 +10,10 @@ import ( ) const ( - namespace = "frostfs_http_gw" - stateSubsystem = "state" - poolSubsystem = "pool" + namespace = "frostfs_http_gw" + stateSubsystem = "state" + poolSubsystem = "pool" + serverSubsystem = "server" ) const ( @@ -60,9 +61,14 @@ type StatisticScraper interface { Statistic() pool.Statistic } +type serverMetrics struct { + endpointHealth *prometheus.GaugeVec +} + type GateMetrics struct { stateMetrics poolMetricsCollector + serverMetrics } type stateMetrics struct { @@ -87,15 +93,20 @@ func NewGateMetrics(p StatisticScraper) *GateMetrics { poolMetric := newPoolMetricsCollector(p) poolMetric.register() + serverMetric := newServerMetrics() + serverMetric.register() + return &GateMetrics{ stateMetrics: *stateMetric, poolMetricsCollector: *poolMetric, + serverMetrics: *serverMetric, } } func (g *GateMetrics) Unregister() { g.stateMetrics.unregister() prometheus.Unregister(&g.poolMetricsCollector) + g.serverMetrics.unregister() } func newStateMetrics() *stateMetrics { @@ -192,6 +203,28 @@ func (m *poolMetricsCollector) updateRequestsDuration(node pool.NodeStatistic) { m.requestDuration.WithLabelValues(node.Address(), methodCreateSession).Set(float64(node.AverageCreateSession().Milliseconds())) } +func newServerMetrics() *serverMetrics { + return &serverMetrics{ + endpointHealth: mustNewGaugeVec(appMetricsDesc[serverSubsystem][healthMetric]), + } +} + +func (m serverMetrics) register() { + prometheus.MustRegister(m.endpointHealth) +} + +func (m serverMetrics) unregister() { + prometheus.Unregister(m.endpointHealth) +} + +func (m serverMetrics) MarkHealthy(endpoint string) { + m.endpointHealth.WithLabelValues(endpoint).Set(float64(1)) +} + +func (m serverMetrics) MarkUnhealthy(endpoint string) { + m.endpointHealth.WithLabelValues(endpoint).Set(float64(0)) +} + // NewPrometheusService creates a new service for gathering prometheus metrics. func NewPrometheusService(log *zap.Logger, cfg Config) *Service { if log == nil { -- 2.45.2