forked from TrueCloudLab/frostfs-http-gw
[#77] Add metrics for HTTP endpoint status
Signed-off-by: Marina Biryukova <m.biryukova@yadro.com>
This commit is contained in:
parent
7d47e88e36
commit
dbc6804d27
3 changed files with 74 additions and 11 deletions
36
app.go
36
app.go
|
@ -189,6 +189,13 @@ func newGateMetrics(logger *zap.Logger, provider *metrics.GateMetrics, enabled b
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *gateMetrics) isEnabled() bool {
|
||||||
|
m.mu.RLock()
|
||||||
|
defer m.mu.RUnlock()
|
||||||
|
|
||||||
|
return m.enabled
|
||||||
|
}
|
||||||
|
|
||||||
func (m *gateMetrics) SetEnabled(enabled bool) {
|
func (m *gateMetrics) SetEnabled(enabled bool) {
|
||||||
if !enabled {
|
if !enabled {
|
||||||
m.logger.Warn(logs.MetricsAreDisabled)
|
m.logger.Warn(logs.MetricsAreDisabled)
|
||||||
|
@ -200,23 +207,17 @@ func (m *gateMetrics) SetEnabled(enabled bool) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *gateMetrics) SetHealth(status metrics.HealthStatus) {
|
func (m *gateMetrics) SetHealth(status metrics.HealthStatus) {
|
||||||
m.mu.RLock()
|
if !m.isEnabled() {
|
||||||
if !m.enabled {
|
|
||||||
m.mu.RUnlock()
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
m.mu.RUnlock()
|
|
||||||
|
|
||||||
m.provider.SetHealth(status)
|
m.provider.SetHealth(status)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *gateMetrics) SetVersion(ver string) {
|
func (m *gateMetrics) SetVersion(ver string) {
|
||||||
m.mu.RLock()
|
if !m.isEnabled() {
|
||||||
if !m.enabled {
|
|
||||||
m.mu.RUnlock()
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
m.mu.RUnlock()
|
|
||||||
|
|
||||||
m.provider.SetVersion(ver)
|
m.provider.SetVersion(ver)
|
||||||
}
|
}
|
||||||
|
@ -231,6 +232,22 @@ func (m *gateMetrics) Shutdown() {
|
||||||
m.mu.Unlock()
|
m.mu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *gateMetrics) MarkHealthy(endpoint string) {
|
||||||
|
if !m.isEnabled() {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
m.provider.MarkHealthy(endpoint)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *gateMetrics) MarkUnhealthy(endpoint string) {
|
||||||
|
if !m.isEnabled() {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
m.provider.MarkUnhealthy(endpoint)
|
||||||
|
}
|
||||||
|
|
||||||
func remove(list []string, element string) []string {
|
func remove(list []string, element string) []string {
|
||||||
for i, item := range list {
|
for i, item := range list {
|
||||||
if item == element {
|
if item == element {
|
||||||
|
@ -327,6 +344,7 @@ func (a *app) Serve() {
|
||||||
go func(i int) {
|
go func(i int) {
|
||||||
a.log.Info(logs.StartingServer, zap.String("address", a.servers[i].Address()))
|
a.log.Info(logs.StartingServer, zap.String("address", a.servers[i].Address()))
|
||||||
if err := a.webServer.Serve(a.servers[i].Listener()); err != nil && err != http.ErrServerClosed {
|
if err := a.webServer.Serve(a.servers[i].Listener()); err != nil && err != http.ErrServerClosed {
|
||||||
|
a.metrics.MarkUnhealthy(a.servers[i].Address())
|
||||||
a.log.Fatal(logs.ListenAndServe, zap.Error(err))
|
a.log.Fatal(logs.ListenAndServe, zap.Error(err))
|
||||||
}
|
}
|
||||||
}(i)
|
}(i)
|
||||||
|
@ -508,9 +526,11 @@ func (a *app) initServers(ctx context.Context) {
|
||||||
}
|
}
|
||||||
srv, err := newServer(ctx, serverInfo)
|
srv, err := newServer(ctx, serverInfo)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
a.metrics.MarkUnhealthy(serverInfo.Address)
|
||||||
a.log.Warn(logs.FailedToAddServer, append(fields, zap.Error(err))...)
|
a.log.Warn(logs.FailedToAddServer, append(fields, zap.Error(err))...)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
a.metrics.MarkHealthy(serverInfo.Address)
|
||||||
|
|
||||||
a.servers = append(a.servers, srv)
|
a.servers = append(a.servers, srv)
|
||||||
a.log.Info(logs.AddServer, fields...)
|
a.log.Info(logs.AddServer, fields...)
|
||||||
|
|
|
@ -66,6 +66,16 @@ var appMetricsDesc = map[string]map[string]Description{
|
||||||
VariableLabels: []string{"version"},
|
VariableLabels: []string{"version"},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
serverSubsystem: {
|
||||||
|
healthMetric: Description{
|
||||||
|
Type: dto.MetricType_GAUGE,
|
||||||
|
Namespace: namespace,
|
||||||
|
Subsystem: serverSubsystem,
|
||||||
|
Name: healthMetric,
|
||||||
|
Help: "HTTP Server endpoint health",
|
||||||
|
VariableLabels: []string{"endpoint"},
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
type Description struct {
|
type Description struct {
|
||||||
|
|
|
@ -13,6 +13,7 @@ const (
|
||||||
namespace = "frostfs_http_gw"
|
namespace = "frostfs_http_gw"
|
||||||
stateSubsystem = "state"
|
stateSubsystem = "state"
|
||||||
poolSubsystem = "pool"
|
poolSubsystem = "pool"
|
||||||
|
serverSubsystem = "server"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
@ -60,9 +61,14 @@ type StatisticScraper interface {
|
||||||
Statistic() pool.Statistic
|
Statistic() pool.Statistic
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type serverMetrics struct {
|
||||||
|
endpointHealth *prometheus.GaugeVec
|
||||||
|
}
|
||||||
|
|
||||||
type GateMetrics struct {
|
type GateMetrics struct {
|
||||||
stateMetrics
|
stateMetrics
|
||||||
poolMetricsCollector
|
poolMetricsCollector
|
||||||
|
serverMetrics
|
||||||
}
|
}
|
||||||
|
|
||||||
type stateMetrics struct {
|
type stateMetrics struct {
|
||||||
|
@ -87,15 +93,20 @@ func NewGateMetrics(p StatisticScraper) *GateMetrics {
|
||||||
poolMetric := newPoolMetricsCollector(p)
|
poolMetric := newPoolMetricsCollector(p)
|
||||||
poolMetric.register()
|
poolMetric.register()
|
||||||
|
|
||||||
|
serverMetric := newServerMetrics()
|
||||||
|
serverMetric.register()
|
||||||
|
|
||||||
return &GateMetrics{
|
return &GateMetrics{
|
||||||
stateMetrics: *stateMetric,
|
stateMetrics: *stateMetric,
|
||||||
poolMetricsCollector: *poolMetric,
|
poolMetricsCollector: *poolMetric,
|
||||||
|
serverMetrics: *serverMetric,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (g *GateMetrics) Unregister() {
|
func (g *GateMetrics) Unregister() {
|
||||||
g.stateMetrics.unregister()
|
g.stateMetrics.unregister()
|
||||||
prometheus.Unregister(&g.poolMetricsCollector)
|
prometheus.Unregister(&g.poolMetricsCollector)
|
||||||
|
g.serverMetrics.unregister()
|
||||||
}
|
}
|
||||||
|
|
||||||
func newStateMetrics() *stateMetrics {
|
func newStateMetrics() *stateMetrics {
|
||||||
|
@ -192,6 +203,28 @@ func (m *poolMetricsCollector) updateRequestsDuration(node pool.NodeStatistic) {
|
||||||
m.requestDuration.WithLabelValues(node.Address(), methodCreateSession).Set(float64(node.AverageCreateSession().Milliseconds()))
|
m.requestDuration.WithLabelValues(node.Address(), methodCreateSession).Set(float64(node.AverageCreateSession().Milliseconds()))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func newServerMetrics() *serverMetrics {
|
||||||
|
return &serverMetrics{
|
||||||
|
endpointHealth: mustNewGaugeVec(appMetricsDesc[serverSubsystem][healthMetric]),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m serverMetrics) register() {
|
||||||
|
prometheus.MustRegister(m.endpointHealth)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m serverMetrics) unregister() {
|
||||||
|
prometheus.Unregister(m.endpointHealth)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m serverMetrics) MarkHealthy(endpoint string) {
|
||||||
|
m.endpointHealth.WithLabelValues(endpoint).Set(float64(1))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m serverMetrics) MarkUnhealthy(endpoint string) {
|
||||||
|
m.endpointHealth.WithLabelValues(endpoint).Set(float64(0))
|
||||||
|
}
|
||||||
|
|
||||||
// NewPrometheusService creates a new service for gathering prometheus metrics.
|
// NewPrometheusService creates a new service for gathering prometheus metrics.
|
||||||
func NewPrometheusService(log *zap.Logger, cfg Config) *Service {
|
func NewPrometheusService(log *zap.Logger, cfg Config) *Service {
|
||||||
if log == nil {
|
if log == nil {
|
||||||
|
|
Loading…
Reference in a new issue