[#77] Add metrics for HTTP endpoint status
All checks were successful
/ DCO (pull_request) Successful in 1m2s
/ Vulncheck (pull_request) Successful in 1m18s
/ Lint (pull_request) Successful in 3m23s
/ Tests (1.20) (pull_request) Successful in 1m49s
/ Tests (1.21) (pull_request) Successful in 1m43s
/ Builds (1.20) (pull_request) Successful in 1m42s
/ Builds (1.21) (pull_request) Successful in 9m50s
All checks were successful
/ DCO (pull_request) Successful in 1m2s
/ Vulncheck (pull_request) Successful in 1m18s
/ Lint (pull_request) Successful in 3m23s
/ Tests (1.20) (pull_request) Successful in 1m49s
/ Tests (1.21) (pull_request) Successful in 1m43s
/ Builds (1.20) (pull_request) Successful in 1m42s
/ Builds (1.21) (pull_request) Successful in 9m50s
Signed-off-by: Marina Biryukova <m.biryukova@yadro.com>
This commit is contained in:
parent
7d47e88e36
commit
dbc6804d27
3 changed files with 74 additions and 11 deletions
36
app.go
36
app.go
|
@ -189,6 +189,13 @@ func newGateMetrics(logger *zap.Logger, provider *metrics.GateMetrics, enabled b
|
|||
}
|
||||
}
|
||||
|
||||
func (m *gateMetrics) isEnabled() bool {
|
||||
m.mu.RLock()
|
||||
defer m.mu.RUnlock()
|
||||
|
||||
return m.enabled
|
||||
}
|
||||
|
||||
func (m *gateMetrics) SetEnabled(enabled bool) {
|
||||
if !enabled {
|
||||
m.logger.Warn(logs.MetricsAreDisabled)
|
||||
|
@ -200,23 +207,17 @@ func (m *gateMetrics) SetEnabled(enabled bool) {
|
|||
}
|
||||
|
||||
func (m *gateMetrics) SetHealth(status metrics.HealthStatus) {
|
||||
m.mu.RLock()
|
||||
if !m.enabled {
|
||||
m.mu.RUnlock()
|
||||
if !m.isEnabled() {
|
||||
return
|
||||
}
|
||||
m.mu.RUnlock()
|
||||
|
||||
m.provider.SetHealth(status)
|
||||
}
|
||||
|
||||
func (m *gateMetrics) SetVersion(ver string) {
|
||||
m.mu.RLock()
|
||||
if !m.enabled {
|
||||
m.mu.RUnlock()
|
||||
if !m.isEnabled() {
|
||||
return
|
||||
}
|
||||
m.mu.RUnlock()
|
||||
|
||||
m.provider.SetVersion(ver)
|
||||
}
|
||||
|
@ -231,6 +232,22 @@ func (m *gateMetrics) Shutdown() {
|
|||
m.mu.Unlock()
|
||||
}
|
||||
|
||||
func (m *gateMetrics) MarkHealthy(endpoint string) {
|
||||
if !m.isEnabled() {
|
||||
return
|
||||
}
|
||||
|
||||
m.provider.MarkHealthy(endpoint)
|
||||
}
|
||||
|
||||
func (m *gateMetrics) MarkUnhealthy(endpoint string) {
|
||||
if !m.isEnabled() {
|
||||
return
|
||||
}
|
||||
|
||||
m.provider.MarkUnhealthy(endpoint)
|
||||
}
|
||||
|
||||
func remove(list []string, element string) []string {
|
||||
for i, item := range list {
|
||||
if item == element {
|
||||
|
@ -327,6 +344,7 @@ func (a *app) Serve() {
|
|||
go func(i int) {
|
||||
a.log.Info(logs.StartingServer, zap.String("address", a.servers[i].Address()))
|
||||
if err := a.webServer.Serve(a.servers[i].Listener()); err != nil && err != http.ErrServerClosed {
|
||||
a.metrics.MarkUnhealthy(a.servers[i].Address())
|
||||
a.log.Fatal(logs.ListenAndServe, zap.Error(err))
|
||||
}
|
||||
}(i)
|
||||
|
@ -508,9 +526,11 @@ func (a *app) initServers(ctx context.Context) {
|
|||
}
|
||||
srv, err := newServer(ctx, serverInfo)
|
||||
if err != nil {
|
||||
a.metrics.MarkUnhealthy(serverInfo.Address)
|
||||
a.log.Warn(logs.FailedToAddServer, append(fields, zap.Error(err))...)
|
||||
continue
|
||||
}
|
||||
a.metrics.MarkHealthy(serverInfo.Address)
|
||||
|
||||
a.servers = append(a.servers, srv)
|
||||
a.log.Info(logs.AddServer, fields...)
|
||||
|
|
|
@ -66,6 +66,16 @@ var appMetricsDesc = map[string]map[string]Description{
|
|||
VariableLabels: []string{"version"},
|
||||
},
|
||||
},
|
||||
serverSubsystem: {
|
||||
healthMetric: Description{
|
||||
Type: dto.MetricType_GAUGE,
|
||||
Namespace: namespace,
|
||||
Subsystem: serverSubsystem,
|
||||
Name: healthMetric,
|
||||
Help: "HTTP Server endpoint health",
|
||||
VariableLabels: []string{"endpoint"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
type Description struct {
|
||||
|
|
|
@ -10,9 +10,10 @@ import (
|
|||
)
|
||||
|
||||
const (
|
||||
namespace = "frostfs_http_gw"
|
||||
stateSubsystem = "state"
|
||||
poolSubsystem = "pool"
|
||||
namespace = "frostfs_http_gw"
|
||||
stateSubsystem = "state"
|
||||
poolSubsystem = "pool"
|
||||
serverSubsystem = "server"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -60,9 +61,14 @@ type StatisticScraper interface {
|
|||
Statistic() pool.Statistic
|
||||
}
|
||||
|
||||
type serverMetrics struct {
|
||||
endpointHealth *prometheus.GaugeVec
|
||||
}
|
||||
|
||||
type GateMetrics struct {
|
||||
stateMetrics
|
||||
poolMetricsCollector
|
||||
serverMetrics
|
||||
}
|
||||
|
||||
type stateMetrics struct {
|
||||
|
@ -87,15 +93,20 @@ func NewGateMetrics(p StatisticScraper) *GateMetrics {
|
|||
poolMetric := newPoolMetricsCollector(p)
|
||||
poolMetric.register()
|
||||
|
||||
serverMetric := newServerMetrics()
|
||||
serverMetric.register()
|
||||
|
||||
return &GateMetrics{
|
||||
stateMetrics: *stateMetric,
|
||||
poolMetricsCollector: *poolMetric,
|
||||
serverMetrics: *serverMetric,
|
||||
}
|
||||
}
|
||||
|
||||
func (g *GateMetrics) Unregister() {
|
||||
g.stateMetrics.unregister()
|
||||
prometheus.Unregister(&g.poolMetricsCollector)
|
||||
g.serverMetrics.unregister()
|
||||
}
|
||||
|
||||
func newStateMetrics() *stateMetrics {
|
||||
|
@ -192,6 +203,28 @@ func (m *poolMetricsCollector) updateRequestsDuration(node pool.NodeStatistic) {
|
|||
m.requestDuration.WithLabelValues(node.Address(), methodCreateSession).Set(float64(node.AverageCreateSession().Milliseconds()))
|
||||
}
|
||||
|
||||
func newServerMetrics() *serverMetrics {
|
||||
return &serverMetrics{
|
||||
endpointHealth: mustNewGaugeVec(appMetricsDesc[serverSubsystem][healthMetric]),
|
||||
}
|
||||
}
|
||||
|
||||
func (m serverMetrics) register() {
|
||||
prometheus.MustRegister(m.endpointHealth)
|
||||
}
|
||||
|
||||
func (m serverMetrics) unregister() {
|
||||
prometheus.Unregister(m.endpointHealth)
|
||||
}
|
||||
|
||||
func (m serverMetrics) MarkHealthy(endpoint string) {
|
||||
m.endpointHealth.WithLabelValues(endpoint).Set(float64(1))
|
||||
}
|
||||
|
||||
func (m serverMetrics) MarkUnhealthy(endpoint string) {
|
||||
m.endpointHealth.WithLabelValues(endpoint).Set(float64(0))
|
||||
}
|
||||
|
||||
// NewPrometheusService creates a new service for gathering prometheus metrics.
|
||||
func NewPrometheusService(log *zap.Logger, cfg Config) *Service {
|
||||
if log == nil {
|
||||
|
|
Loading…
Reference in a new issue