[#77] Add metrics for HTTP endpoint status
All checks were successful
/ DCO (pull_request) Successful in 1m2s
/ Vulncheck (pull_request) Successful in 1m18s
/ Lint (pull_request) Successful in 3m23s
/ Tests (1.20) (pull_request) Successful in 1m49s
/ Tests (1.21) (pull_request) Successful in 1m43s
/ Builds (1.20) (pull_request) Successful in 1m42s
/ Builds (1.21) (pull_request) Successful in 9m50s

Signed-off-by: Marina Biryukova <m.biryukova@yadro.com>
This commit is contained in:
Marina Biryukova 2023-08-29 15:17:20 +03:00
parent 7d47e88e36
commit dbc6804d27
3 changed files with 74 additions and 11 deletions

36
app.go
View file

@ -189,6 +189,13 @@ func newGateMetrics(logger *zap.Logger, provider *metrics.GateMetrics, enabled b
} }
} }
func (m *gateMetrics) isEnabled() bool {
m.mu.RLock()
defer m.mu.RUnlock()
return m.enabled
}
func (m *gateMetrics) SetEnabled(enabled bool) { func (m *gateMetrics) SetEnabled(enabled bool) {
if !enabled { if !enabled {
m.logger.Warn(logs.MetricsAreDisabled) m.logger.Warn(logs.MetricsAreDisabled)
@ -200,23 +207,17 @@ func (m *gateMetrics) SetEnabled(enabled bool) {
} }
func (m *gateMetrics) SetHealth(status metrics.HealthStatus) { func (m *gateMetrics) SetHealth(status metrics.HealthStatus) {
m.mu.RLock() if !m.isEnabled() {
if !m.enabled {
m.mu.RUnlock()
return return
} }
m.mu.RUnlock()
m.provider.SetHealth(status) m.provider.SetHealth(status)
} }
func (m *gateMetrics) SetVersion(ver string) { func (m *gateMetrics) SetVersion(ver string) {
m.mu.RLock() if !m.isEnabled() {
if !m.enabled {
m.mu.RUnlock()
return return
} }
m.mu.RUnlock()
m.provider.SetVersion(ver) m.provider.SetVersion(ver)
} }
@ -231,6 +232,22 @@ func (m *gateMetrics) Shutdown() {
m.mu.Unlock() m.mu.Unlock()
} }
func (m *gateMetrics) MarkHealthy(endpoint string) {
if !m.isEnabled() {
return
}
m.provider.MarkHealthy(endpoint)
}
func (m *gateMetrics) MarkUnhealthy(endpoint string) {
if !m.isEnabled() {
return
}
m.provider.MarkUnhealthy(endpoint)
}
func remove(list []string, element string) []string { func remove(list []string, element string) []string {
for i, item := range list { for i, item := range list {
if item == element { if item == element {
@ -327,6 +344,7 @@ func (a *app) Serve() {
go func(i int) { go func(i int) {
a.log.Info(logs.StartingServer, zap.String("address", a.servers[i].Address())) a.log.Info(logs.StartingServer, zap.String("address", a.servers[i].Address()))
if err := a.webServer.Serve(a.servers[i].Listener()); err != nil && err != http.ErrServerClosed { if err := a.webServer.Serve(a.servers[i].Listener()); err != nil && err != http.ErrServerClosed {
a.metrics.MarkUnhealthy(a.servers[i].Address())
a.log.Fatal(logs.ListenAndServe, zap.Error(err)) a.log.Fatal(logs.ListenAndServe, zap.Error(err))
} }
}(i) }(i)
@ -508,9 +526,11 @@ func (a *app) initServers(ctx context.Context) {
} }
srv, err := newServer(ctx, serverInfo) srv, err := newServer(ctx, serverInfo)
if err != nil { if err != nil {
a.metrics.MarkUnhealthy(serverInfo.Address)
a.log.Warn(logs.FailedToAddServer, append(fields, zap.Error(err))...) a.log.Warn(logs.FailedToAddServer, append(fields, zap.Error(err))...)
continue continue
} }
a.metrics.MarkHealthy(serverInfo.Address)
a.servers = append(a.servers, srv) a.servers = append(a.servers, srv)
a.log.Info(logs.AddServer, fields...) a.log.Info(logs.AddServer, fields...)

View file

@ -66,6 +66,16 @@ var appMetricsDesc = map[string]map[string]Description{
VariableLabels: []string{"version"}, VariableLabels: []string{"version"},
}, },
}, },
serverSubsystem: {
healthMetric: Description{
Type: dto.MetricType_GAUGE,
Namespace: namespace,
Subsystem: serverSubsystem,
Name: healthMetric,
Help: "HTTP Server endpoint health",
VariableLabels: []string{"endpoint"},
},
},
} }
type Description struct { type Description struct {

View file

@ -13,6 +13,7 @@ const (
namespace = "frostfs_http_gw" namespace = "frostfs_http_gw"
stateSubsystem = "state" stateSubsystem = "state"
poolSubsystem = "pool" poolSubsystem = "pool"
serverSubsystem = "server"
) )
const ( const (
@ -60,9 +61,14 @@ type StatisticScraper interface {
Statistic() pool.Statistic Statistic() pool.Statistic
} }
type serverMetrics struct {
endpointHealth *prometheus.GaugeVec
}
type GateMetrics struct { type GateMetrics struct {
stateMetrics stateMetrics
poolMetricsCollector poolMetricsCollector
serverMetrics
} }
type stateMetrics struct { type stateMetrics struct {
@ -87,15 +93,20 @@ func NewGateMetrics(p StatisticScraper) *GateMetrics {
poolMetric := newPoolMetricsCollector(p) poolMetric := newPoolMetricsCollector(p)
poolMetric.register() poolMetric.register()
serverMetric := newServerMetrics()
serverMetric.register()
return &GateMetrics{ return &GateMetrics{
stateMetrics: *stateMetric, stateMetrics: *stateMetric,
poolMetricsCollector: *poolMetric, poolMetricsCollector: *poolMetric,
serverMetrics: *serverMetric,
} }
} }
func (g *GateMetrics) Unregister() { func (g *GateMetrics) Unregister() {
g.stateMetrics.unregister() g.stateMetrics.unregister()
prometheus.Unregister(&g.poolMetricsCollector) prometheus.Unregister(&g.poolMetricsCollector)
g.serverMetrics.unregister()
} }
func newStateMetrics() *stateMetrics { func newStateMetrics() *stateMetrics {
@ -192,6 +203,28 @@ func (m *poolMetricsCollector) updateRequestsDuration(node pool.NodeStatistic) {
m.requestDuration.WithLabelValues(node.Address(), methodCreateSession).Set(float64(node.AverageCreateSession().Milliseconds())) m.requestDuration.WithLabelValues(node.Address(), methodCreateSession).Set(float64(node.AverageCreateSession().Milliseconds()))
} }
func newServerMetrics() *serverMetrics {
return &serverMetrics{
endpointHealth: mustNewGaugeVec(appMetricsDesc[serverSubsystem][healthMetric]),
}
}
func (m serverMetrics) register() {
prometheus.MustRegister(m.endpointHealth)
}
func (m serverMetrics) unregister() {
prometheus.Unregister(m.endpointHealth)
}
func (m serverMetrics) MarkHealthy(endpoint string) {
m.endpointHealth.WithLabelValues(endpoint).Set(float64(1))
}
func (m serverMetrics) MarkUnhealthy(endpoint string) {
m.endpointHealth.WithLabelValues(endpoint).Set(float64(0))
}
// NewPrometheusService creates a new service for gathering prometheus metrics. // NewPrometheusService creates a new service for gathering prometheus metrics.
func NewPrometheusService(log *zap.Logger, cfg Config) *Service { func NewPrometheusService(log *zap.Logger, cfg Config) *Service {
if log == nil { if log == nil {