[#702] Metrics reload on SIGHUP

Signed-off-by: Denis Kirillov <denis@nspcc.ru>
This commit is contained in:
Denis Kirillov 2022-09-12 15:30:51 +03:00 committed by Alex Vanin
parent 63275f7876
commit 42893ec046
2 changed files with 112 additions and 24 deletions

View file

@ -9,6 +9,7 @@ import (
"os" "os"
"os/signal" "os/signal"
"strconv" "strconv"
"sync"
"syscall" "syscall"
"time" "time"
@ -39,13 +40,14 @@ type (
obj layer.Client obj layer.Client
api api.Handler api api.Handler
metrics GateMetricsCollector metrics *appMetrics
maxClients api.MaxClients maxClients api.MaxClients
webDone chan struct{} webDone chan struct{}
wrkDone chan struct{} wrkDone chan struct{}
services []*Service
settings *appSettings settings *appSettings
} }
@ -63,8 +65,16 @@ type (
CertFile string CertFile string
} }
appMetrics struct {
logger *zap.Logger
provider GateMetricsCollector
mu sync.RWMutex
enabled bool
}
GateMetricsCollector interface { GateMetricsCollector interface {
SetHealth(int32) SetHealth(int32)
Unregister()
} }
) )
@ -80,8 +90,6 @@ func newApp(ctx context.Context, log *Logger, v *viper.Viper) *App {
obj layer.Client obj layer.Client
nc *notifications.Controller nc *notifications.Controller
gateMetrics GateMetricsCollector
prmPool pool.InitParameters prmPool pool.InitParameters
reBalance = defaultRebalanceInterval reBalance = defaultRebalanceInterval
@ -211,11 +219,7 @@ func newApp(ctx context.Context, log *Logger, v *viper.Viper) *App {
l.Fatal("could not initialize API handler", zap.Error(err)) l.Fatal("could not initialize API handler", zap.Error(err))
} }
if v.GetBool(cfgPrometheusEnabled) { app := &App{
gateMetrics = newGateMetrics(neofs.NewPoolStatistic(conns))
}
return &App{
ctr: ctr, ctr: ctr,
log: l, log: l,
cfg: v, cfg: v,
@ -223,13 +227,61 @@ func newApp(ctx context.Context, log *Logger, v *viper.Viper) *App {
tls: tls, tls: tls,
api: caller, api: caller,
metrics: gateMetrics,
webDone: make(chan struct{}, 1), webDone: make(chan struct{}, 1),
wrkDone: make(chan struct{}, 1), wrkDone: make(chan struct{}, 1),
maxClients: api.NewMaxClientsMiddleware(maxClientsCount, maxClientsDeadline), maxClients: api.NewMaxClientsMiddleware(maxClientsCount, maxClientsDeadline),
} }
app.initMetrics(neofs.NewPoolStatistic(conns))
return app
}
func (a *App) initMetrics(scraper StatisticScraper) {
gateMetricsProvider := newGateMetrics(scraper)
a.metrics = newAppMetrics(a.log, gateMetricsProvider, a.cfg.GetBool(cfgPrometheusEnabled))
}
func newAppMetrics(logger *zap.Logger, provider GateMetricsCollector, enabled bool) *appMetrics {
if !enabled {
logger.Warn("metrics are disabled")
}
return &appMetrics{
logger: logger,
provider: provider,
}
}
func (m *appMetrics) SetEnabled(enabled bool) {
if !enabled {
m.logger.Warn("metrics are disabled")
}
m.mu.Lock()
m.enabled = enabled
m.mu.Unlock()
}
func (m *appMetrics) SetHealth(status int32) {
m.mu.RLock()
if !m.enabled {
m.mu.RUnlock()
return
}
m.mu.RUnlock()
m.provider.SetHealth(status)
}
func (m *appMetrics) Shutdown() {
m.mu.Lock()
if m.enabled {
m.provider.SetHealth(0)
m.enabled = false
}
m.provider.Unregister()
m.mu.Unlock()
} }
func remove(list []string, element string) []string { func remove(list []string, element string) []string {
@ -252,15 +304,17 @@ func (a *App) Wait() {
zap.String("version", version.Version), zap.String("version", version.Version),
) )
if a.metrics != nil { a.setHealthStatus()
a.metrics.SetHealth(1)
}
<-a.webDone // wait for web-server to be stopped <-a.webDone // wait for web-server to be stopped
a.log.Info("application finished") a.log.Info("application finished")
} }
func (a *App) setHealthStatus() {
a.metrics.SetHealth(1)
}
// Serve runs HTTP server to handle S3 API requests. // Serve runs HTTP server to handle S3 API requests.
func (a *App) Serve(ctx context.Context) { func (a *App) Serve(ctx context.Context) {
var ( var (
@ -276,9 +330,6 @@ func (a *App) Serve(ctx context.Context) {
zap.Error(err)) zap.Error(err))
} }
pprof := NewPprofService(a.cfg, a.log)
prometheus := NewPrometheusService(a.cfg, a.log)
router := mux.NewRouter().SkipClean(true).UseEncodedPath() router := mux.NewRouter().SkipClean(true).UseEncodedPath()
// Attach S3 API: // Attach S3 API:
domains := a.cfg.GetStringSlice(cfgListenDomains) domains := a.cfg.GetStringSlice(cfgListenDomains)
@ -290,8 +341,7 @@ func (a *App) Serve(ctx context.Context) {
srv.Handler = router srv.Handler = router
srv.ErrorLog = zap.NewStdLog(a.log) srv.ErrorLog = zap.NewStdLog(a.log)
go pprof.Start() a.startServices()
go prometheus.Start()
go func() { go func() {
a.log.Info("starting server", a.log.Info("starting server",
@ -328,17 +378,21 @@ LOOP:
} }
} }
ctx, cancel := context.WithTimeout(context.Background(), defaultShutdownTimeout) ctx, cancel := shutdownContext()
defer cancel() defer cancel()
a.log.Info("stopping server", a.log.Info("stopping server", zap.Error(srv.Shutdown(ctx)))
zap.Error(srv.Shutdown(ctx)))
pprof.ShutDown(ctx) a.metrics.Shutdown()
prometheus.ShutDown(ctx) a.stopServices()
close(a.webDone) close(a.webDone)
} }
func shutdownContext() (context.Context, context.CancelFunc) {
return context.WithTimeout(context.Background(), defaultShutdownTimeout)
}
func (a *App) configReload() { func (a *App) configReload() {
a.log.Info("SIGHUP config reload started") a.log.Info("SIGHUP config reload started")
@ -351,8 +405,14 @@ func (a *App) configReload() {
return return
} }
a.stopServices()
a.startServices()
a.updateSettings() a.updateSettings()
a.metrics.SetEnabled(a.cfg.GetBool(cfgPrometheusEnabled))
a.setHealthStatus()
a.log.Info("SIGHUP config reload completed") a.log.Info("SIGHUP config reload completed")
} }
@ -364,6 +424,25 @@ func (a *App) updateSettings() {
} }
} }
func (a *App) startServices() {
pprofService := NewPprofService(a.cfg, a.log)
a.services = append(a.services, pprofService)
go pprofService.Start()
prometheusService := NewPrometheusService(a.cfg, a.log)
a.services = append(a.services, prometheusService)
go prometheusService.Start()
}
func (a *App) stopServices() {
ctx, cancel := shutdownContext()
defer cancel()
for _, svc := range a.services {
svc.ShutDown(ctx)
}
}
func getNotificationsOptions(v *viper.Viper, l *zap.Logger) *notifications.Options { func getNotificationsOptions(v *viper.Viper, l *zap.Logger) *notifications.Options {
cfg := notifications.Options{} cfg := notifications.Options{}
cfg.URL = v.GetString(cfgNATSEndpoint) cfg.URL = v.GetString(cfgNATSEndpoint)

View file

@ -67,6 +67,11 @@ func newGateMetrics(scraper StatisticScraper) *GateMetrics {
} }
} }
func (g *GateMetrics) Unregister() {
g.stateMetrics.unregister()
prometheus.Unregister(&g.poolMetricsCollector)
}
func newStateMetrics() *stateMetrics { func newStateMetrics() *stateMetrics {
return &stateMetrics{ return &stateMetrics{
healthCheck: prometheus.NewGauge(prometheus.GaugeOpts{ healthCheck: prometheus.NewGauge(prometheus.GaugeOpts{
@ -82,6 +87,10 @@ func (m stateMetrics) register() {
prometheus.MustRegister(m.healthCheck) prometheus.MustRegister(m.healthCheck)
} }
func (m stateMetrics) unregister() {
prometheus.Unregister(m.healthCheck)
}
func (m stateMetrics) SetHealth(s int32) { func (m stateMetrics) SetHealth(s int32) {
m.healthCheck.Set(float64(s)) m.healthCheck.Set(float64(s))
} }
@ -164,7 +173,7 @@ func (m *poolMetricsCollector) Collect(ch chan<- prometheus.Metric) {
m.requestDuration.Collect(ch) m.requestDuration.Collect(ch)
} }
func (m poolMetricsCollector) Describe(descs chan<- *prometheus.Desc) { func (m *poolMetricsCollector) Describe(descs chan<- *prometheus.Desc) {
m.overallErrors.Describe(descs) m.overallErrors.Describe(descs)
m.overallNodeErrors.Describe(descs) m.overallNodeErrors.Describe(descs)
m.overallNodeRequests.Describe(descs) m.overallNodeRequests.Describe(descs)