[#591] Sync metrics and pprof configuration

Signed-off-by: Denis Kirillov <denis@nspcc.ru>
This commit is contained in:
Denis Kirillov 2022-07-26 16:29:07 +03:00 committed by Alex Vanin
parent 1fd943ee88
commit e5c1acf1e5
10 changed files with 202 additions and 116 deletions

View file

@ -9,6 +9,7 @@ import (
"strconv" "strconv"
"time" "time"
"github.com/gorilla/mux"
"github.com/nspcc-dev/neo-go/pkg/crypto/keys" "github.com/nspcc-dev/neo-go/pkg/crypto/keys"
"github.com/nspcc-dev/neofs-s3-gw/api" "github.com/nspcc-dev/neofs-s3-gw/api"
"github.com/nspcc-dev/neofs-s3-gw/api/auth" "github.com/nspcc-dev/neofs-s3-gw/api/auth"
@ -188,7 +189,7 @@ func newApp(ctx context.Context, l *zap.Logger, v *viper.Viper) *App {
l.Fatal("could not initialize API handler", zap.Error(err)) l.Fatal("could not initialize API handler", zap.Error(err))
} }
if v.GetBool(cfgEnableMetrics) { if v.GetBool(cfgPrometheusEnabled) {
gateMetrics = newGateMetrics() gateMetrics = newGateMetrics()
} }
@ -253,12 +254,10 @@ func (a *App) Server(ctx context.Context) {
zap.Error(err)) zap.Error(err))
} }
router := newS3Router() pprof := NewPprofService(a.cfg, a.log)
prometheus := NewPrometheusService(a.cfg, a.log)
// Attach app-specific routes:
attachMetrics(router, a.cfg, a.log)
attachProfiler(router, a.cfg, a.log)
router := mux.NewRouter().SkipClean(true).UseEncodedPath()
// Attach S3 API: // Attach S3 API:
domains := fetchDomains(a.cfg) domains := fetchDomains(a.cfg)
a.log.Info("fetch domains, prepare to use API", a.log.Info("fetch domains, prepare to use API",
@ -269,6 +268,9 @@ func (a *App) Server(ctx context.Context) {
srv.Handler = router srv.Handler = router
srv.ErrorLog = zap.NewStdLog(a.log) srv.ErrorLog = zap.NewStdLog(a.log)
go pprof.Start()
go prometheus.Start()
go func() { go func() {
a.log.Info("starting server", a.log.Info("starting server",
zap.String("bind", addr)) zap.String("bind", addr))
@ -298,6 +300,8 @@ func (a *App) Server(ctx context.Context) {
a.log.Info("stopping server", a.log.Info("stopping server",
zap.Error(srv.Shutdown(ctx))) zap.Error(srv.Shutdown(ctx)))
pprof.ShutDown(ctx)
prometheus.ShutDown(ctx)
close(a.webDone) close(a.webDone)
} }

View file

@ -1,46 +0,0 @@
package main
import (
"github.com/prometheus/client_golang/prometheus"
)
const (
namespace = "neofs_s3_gw"
stateSubsystem = "state"
)
type GateMetrics struct {
stateMetrics
}
type stateMetrics struct {
healthCheck prometheus.Gauge
}
func newGateMetrics() *GateMetrics {
stateMetric := newStateMetrics()
stateMetric.register()
return &GateMetrics{
stateMetrics: *stateMetric,
}
}
func newStateMetrics() *stateMetrics {
return &stateMetrics{
healthCheck: prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: stateSubsystem,
Name: "health",
Help: "Current S3 gateway state",
}),
}
}
func (m stateMetrics) register() {
prometheus.MustRegister(m.healthCheck)
}
func (m stateMetrics) SetHealth(s int32) {
m.healthCheck.Set(float64(s))
}

View file

@ -1,20 +1,68 @@
package main package main
import ( import (
"github.com/gorilla/mux" "net/http"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp" "github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/spf13/viper" "github.com/spf13/viper"
"go.uber.org/zap" "go.uber.org/zap"
) )
func attachMetrics(r *mux.Router, v *viper.Viper, l *zap.Logger) { const (
if !v.GetBool(cfgEnableMetrics) { namespace = "neofs_s3_gw"
return stateSubsystem = "state"
)
type GateMetrics struct {
stateMetrics
} }
l.Info("enable metrics") type stateMetrics struct {
r.PathPrefix(systemPath+"/metrics"). healthCheck prometheus.Gauge
Subrouter(). }
StrictSlash(true).
Handle("", promhttp.Handler()) func newGateMetrics() *GateMetrics {
stateMetric := newStateMetrics()
stateMetric.register()
return &GateMetrics{
stateMetrics: *stateMetric,
}
}
func newStateMetrics() *stateMetrics {
return &stateMetrics{
healthCheck: prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: stateSubsystem,
Name: "health",
Help: "Current S3 gateway state",
}),
}
}
func (m stateMetrics) register() {
prometheus.MustRegister(m.healthCheck)
}
func (m stateMetrics) SetHealth(s int32) {
m.healthCheck.Set(float64(s))
}
// NewPrometheusService creates a new service for gathering prometheus metrics.
func NewPrometheusService(v *viper.Viper, log *zap.Logger) *Service {
if log == nil {
return nil
}
return &Service{
Server: &http.Server{
Addr: v.GetString(cfgPrometheusAddress),
Handler: promhttp.Handler(),
},
enabled: v.GetBool(cfgPrometheusEnabled),
serviceType: "Prometheus",
log: log.With(zap.String("service", "Prometheus")),
}
} }

View file

@ -1,32 +1,34 @@
package main package main
import ( import (
"net/http"
"net/http/pprof" "net/http/pprof"
"github.com/gorilla/mux"
"github.com/spf13/viper" "github.com/spf13/viper"
"go.uber.org/zap" "go.uber.org/zap"
) )
func attachProfiler(r *mux.Router, v *viper.Viper, l *zap.Logger) { // NewPprofService creates a new service for gathering pprof metrics.
if !v.GetBool(cfgEnableProfiler) { func NewPprofService(v *viper.Viper, l *zap.Logger) *Service {
return handler := http.NewServeMux()
} handler.HandleFunc("/debug/pprof/", pprof.Index)
handler.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline)
l.Info("enable profiler") handler.HandleFunc("/debug/pprof/profile", pprof.Profile)
handler.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
profiler := r.PathPrefix(systemPath + "/debug/pprof"). handler.HandleFunc("/debug/pprof/trace", pprof.Trace)
Subrouter().
StrictSlash(true)
profiler.HandleFunc("/", pprof.Index)
profiler.HandleFunc("/cmdline", pprof.Cmdline)
profiler.HandleFunc("/profile", pprof.Profile)
profiler.HandleFunc("/symbol", pprof.Symbol)
profiler.HandleFunc("/trace", pprof.Trace)
// Manually add support for paths linked to by index page at /debug/pprof/ // Manually add support for paths linked to by index page at /debug/pprof/
for _, item := range []string{"allocs", "block", "heap", "goroutine", "mutex", "threadcreate"} { for _, item := range []string{"allocs", "block", "heap", "goroutine", "mutex", "threadcreate"} {
profiler.Handle("/"+item, pprof.Handler(item)) handler.Handle("/debug/pprof/"+item, pprof.Handler(item))
}
return &Service{
Server: &http.Server{
Addr: v.GetString(cfgPProfAddress),
Handler: handler,
},
enabled: v.GetBool(cfgPProfEnabled),
serviceType: "Pprof",
log: l.With(zap.String("service", "Pprof")),
} }
} }

View file

@ -1,10 +0,0 @@
package main
import "github.com/gorilla/mux"
const systemPath = "/system"
func newS3Router() *mux.Router {
// Initialize router
return mux.NewRouter().SkipClean(true).UseEncodedPath()
}

View file

@ -78,8 +78,11 @@ const ( // Settings.
cfgMaxClientsDeadline = "max_clients_deadline" cfgMaxClientsDeadline = "max_clients_deadline"
// Metrics / Profiler / Web. // Metrics / Profiler / Web.
cfgEnableMetrics = "metrics" cfgPrometheusEnabled = "prometheus.enabled"
cfgEnableProfiler = "pprof" cfgPrometheusAddress = "prometheus.address"
cfgPProfEnabled = "pprof.enabled"
cfgPProfAddress = "pprof.address"
cfgListenAddress = "listen_address" cfgListenAddress = "listen_address"
cfgListenDomains = "listen_domains" cfgListenDomains = "listen_domains"
@ -101,6 +104,8 @@ const ( // Settings.
cmdHelp = "help" cmdHelp = "help"
cmdVersion = "version" cmdVersion = "version"
cmdConfig = "config" cmdConfig = "config"
cmdPProf = "pprof"
cmdMetrics = "metrics"
// envPrefix is an environment variables prefix used for configuration. // envPrefix is an environment variables prefix used for configuration.
envPrefix = "S3_GW" envPrefix = "S3_GW"
@ -173,8 +178,8 @@ func newSettings() *viper.Viper {
flags.SetOutput(os.Stdout) flags.SetOutput(os.Stdout)
flags.SortFlags = false flags.SortFlags = false
flags.Bool(cfgEnableProfiler, false, "enable pprof") flags.Bool(cmdPProf, false, "enable pprof")
flags.Bool(cfgEnableMetrics, false, "enable prometheus metrics") flags.Bool(cmdMetrics, false, "enable prometheus metrics")
help := flags.BoolP(cmdHelp, "h", false, "show help") help := flags.BoolP(cmdHelp, "h", false, "show help")
versionFlag := flags.BoolP(cmdVersion, "v", false, "show version") versionFlag := flags.BoolP(cmdVersion, "v", false, "show version")
@ -206,6 +211,17 @@ func newSettings() *viper.Viper {
// logger: // logger:
v.SetDefault(cfgLoggerLevel, "debug") v.SetDefault(cfgLoggerLevel, "debug")
v.SetDefault(cfgPProfAddress, "localhost:8085")
v.SetDefault(cfgPrometheusAddress, "localhost:8086")
// Binding flags
if err := v.BindPFlag(cfgPProfEnabled, flags.Lookup(cmdPProf)); err != nil {
panic(err)
}
if err := v.BindPFlag(cfgPrometheusEnabled, flags.Lookup(cmdMetrics)); err != nil {
panic(err)
}
if err := v.BindPFlags(flags); err != nil { if err := v.BindPFlags(flags); err != nil {
panic(err) panic(err)
} }

38
cmd/s3-gw/service.go Normal file
View file

@ -0,0 +1,38 @@
package main
import (
"context"
"net/http"
"go.uber.org/zap"
)
// Service serves metrics.
type Service struct {
*http.Server
enabled bool
log *zap.Logger
serviceType string
}
// Start runs http service with the exposed endpoint on the configured port.
func (ms *Service) Start() {
if ms.enabled {
ms.log.Info("service is running", zap.String("endpoint", ms.Addr))
err := ms.ListenAndServe()
if err != nil && err != http.ErrServerClosed {
ms.log.Warn("service couldn't start on configured port")
}
} else {
ms.log.Info("service hasn't started since it's disabled")
}
}
// ShutDown stops the service.
func (ms *Service) ShutDown(ctx context.Context) {
ms.log.Info("shutting down service", zap.String("endpoint", ms.Addr))
err := ms.Shutdown(ctx)
if err != nil {
ms.log.Panic("can't shut down service")
}
}

View file

@ -42,8 +42,11 @@ S3_GW_RPC_ENDPOINT=http://morph-chain.neofs.devenv:30333/
S3_GW_RESOLVE_ORDER="nns dns" S3_GW_RESOLVE_ORDER="nns dns"
# Metrics # Metrics
S3_GW_METRICS=false S3_GW_PPROF_ENABLED=true
S3_GW_PPROF=false S3_GW_PPROF_ADDRESS=localhost:8085
S3_GW_PROMETHEUS_ENABLED=true
S3_GW_PROMETHEUS_ADDRESS=localhost:8086
# Timeout to connect to a node # Timeout to connect to a node
S3_GW_CONNECT_TIMEOUT=10s S3_GW_CONNECT_TIMEOUT=10s

View file

@ -45,8 +45,13 @@ resolve_order:
- nns - nns
# Metrics # Metrics
metrics: false pprof:
pprof: false enabled: true
address: localhost:8085
prometheus:
enabled: true
address: localhost:8086
# Timeout to connect to a node # Timeout to connect to a node
connect_timeout: 10s connect_timeout: 10s

View file

@ -121,7 +121,7 @@ There are some custom types used for brevity:
### Structure ### Structure
| Section | Description | | Section | Description |
|------------|-----------------------------------------| |--------------|-------------------------------------------------|
| no section | [General parameters](#general-section) | | no section | [General parameters](#general-section) |
| `wallet` | [Wallet configuration](#wallet-section) | | `wallet` | [Wallet configuration](#wallet-section) |
| `peers` | [Nodes configuration](#peers-section) | | `peers` | [Nodes configuration](#peers-section) |
@ -131,6 +131,8 @@ There are some custom types used for brevity:
| `cache` | [Cache configuration](#cache-section) | | `cache` | [Cache configuration](#cache-section) |
| `nats` | [NATS configuration](#nats-section) | | `nats` | [NATS configuration](#nats-section) |
| `cors` | [CORS configuration](#cors-section) | | `cors` | [CORS configuration](#cors-section) |
| `pprof` | [Pprof configuration](#pprof-section) |
| `prometheus` | [Prometheus configuration](#prometheus-section) |
### General section ### General section
@ -144,9 +146,6 @@ resolve_order:
- nns - nns
- dns - dns
metrics: false
pprof: false
connect_timeout: 10s connect_timeout: 10s
healthcheck_timeout: 15s healthcheck_timeout: 15s
rebalance_interval: 60s rebalance_interval: 60s
@ -162,9 +161,7 @@ default_policy: REP 3
| `address` | `string` | | Account address to get from wallet. If omitted default one will be used. | | `address` | `string` | | Account address to get from wallet. If omitted default one will be used. |
| `listen_address` | `string` | `0.0.0.0:8080` | The address that the gateway is listening on. | | `listen_address` | `string` | `0.0.0.0:8080` | The address that the gateway is listening on. |
| `rpc_endpoint` | `string` | | The address of the RPC host to which the gateway connects to resolve bucket names (required to use the `nns` resolver). | | `rpc_endpoint` | `string` | | The address of the RPC host to which the gateway connects to resolve bucket names (required to use the `nns` resolver). |
| `resolve_order` | `[]string` | `[dns]` | Order of bucket name resolvers to use. Available resolvers: `dns`, `nns`. | | `resolve_order` | `[]string` | `[dns]` | Order of bucket name resolvers to use. Available resolvers: `dns`, `nns`. | |
| `metrics` | `bool` | `false` | Flag to enable and expose the prometheus metrics. |
| `pprof` | `bool` | `false` | Flag to enable the profiler. |
| `connect_timeout` | `duration` | `10s` | Timeout to connect to a node. | | `connect_timeout` | `duration` | `10s` | Timeout to connect to a node. |
| `healthcheck_timeout` | `duration` | `15s` | Timeout to check node health during rebalance. | | `healthcheck_timeout` | `duration` | `15s` | Timeout to check node health during rebalance. |
| `rebalance_interval` | `duration` | `60s` | Interval to check node health. | | `rebalance_interval` | `duration` | `60s` | Interval to check node health. |
@ -335,3 +332,32 @@ cors:
|-------------------|-------|---------------|------------------------------------------------------| |-------------------|-------|---------------|------------------------------------------------------|
| `default_max_age` | `int` | `600` | Value of `Access-Control-Max-Age` header in seconds. | | `default_max_age` | `int` | `600` | Value of `Access-Control-Max-Age` header in seconds. |
# `pprof` section
Contains configuration for the `pprof` profiler.
```yaml
pprof:
enabled: true
address: localhost:8085
```
| Parameter | Type | Default value | Description |
|-----------|----------|------------------|-----------------------------------------|
| `enabled` | `bool` | `false` | Flag to enable the service. |
| `address` | `string` | `localhost:8085` | Address that service listener binds to. |
# `prometheus` section
Contains configuration for the `prometheus` metrics service.
```yaml
prometheus:
enabled: true
address: localhost:8086
```
| Parameter | Type | Default value | Description |
|-----------|----------|------------------|-----------------------------------------|
| `enabled` | `bool` | `false` | Flag to enable the service. |
| `address` | `string` | `localhost:8086` | Address that service listener binds to. |