From e5c1acf1e59d7ff9132f7bb5f059687e72ffce59 Mon Sep 17 00:00:00 2001 From: Denis Kirillov Date: Tue, 26 Jul 2022 16:29:07 +0300 Subject: [PATCH] [#591] Sync metrics and pprof configuration Signed-off-by: Denis Kirillov --- cmd/s3-gw/app.go | 16 ++++++---- cmd/s3-gw/app_healthy.go | 46 --------------------------- cmd/s3-gw/app_metrics.go | 66 +++++++++++++++++++++++++++++++++------ cmd/s3-gw/app_profiler.go | 38 +++++++++++----------- cmd/s3-gw/app_router.go | 10 ------ cmd/s3-gw/app_settings.go | 28 +++++++++++++---- cmd/s3-gw/service.go | 38 ++++++++++++++++++++++ config/config.env | 7 +++-- config/config.yaml | 9 ++++-- docs/configuration.md | 60 +++++++++++++++++++++++++---------- 10 files changed, 202 insertions(+), 116 deletions(-) delete mode 100644 cmd/s3-gw/app_healthy.go delete mode 100644 cmd/s3-gw/app_router.go create mode 100644 cmd/s3-gw/service.go diff --git a/cmd/s3-gw/app.go b/cmd/s3-gw/app.go index 4484243..24f9531 100644 --- a/cmd/s3-gw/app.go +++ b/cmd/s3-gw/app.go @@ -9,6 +9,7 @@ import ( "strconv" "time" + "github.com/gorilla/mux" "github.com/nspcc-dev/neo-go/pkg/crypto/keys" "github.com/nspcc-dev/neofs-s3-gw/api" "github.com/nspcc-dev/neofs-s3-gw/api/auth" @@ -188,7 +189,7 @@ func newApp(ctx context.Context, l *zap.Logger, v *viper.Viper) *App { l.Fatal("could not initialize API handler", zap.Error(err)) } - if v.GetBool(cfgEnableMetrics) { + if v.GetBool(cfgPrometheusEnabled) { gateMetrics = newGateMetrics() } @@ -253,12 +254,10 @@ func (a *App) Server(ctx context.Context) { zap.Error(err)) } - router := newS3Router() - - // Attach app-specific routes: - attachMetrics(router, a.cfg, a.log) - attachProfiler(router, a.cfg, a.log) + pprof := NewPprofService(a.cfg, a.log) + prometheus := NewPrometheusService(a.cfg, a.log) + router := mux.NewRouter().SkipClean(true).UseEncodedPath() // Attach S3 API: domains := fetchDomains(a.cfg) a.log.Info("fetch domains, prepare to use API", @@ -269,6 +268,9 @@ func (a *App) Server(ctx context.Context) { srv.Handler = router srv.ErrorLog = zap.NewStdLog(a.log) + go pprof.Start() + go prometheus.Start() + go func() { a.log.Info("starting server", zap.String("bind", addr)) @@ -298,6 +300,8 @@ func (a *App) Server(ctx context.Context) { a.log.Info("stopping server", zap.Error(srv.Shutdown(ctx))) + pprof.ShutDown(ctx) + prometheus.ShutDown(ctx) close(a.webDone) } diff --git a/cmd/s3-gw/app_healthy.go b/cmd/s3-gw/app_healthy.go deleted file mode 100644 index 71841fa..0000000 --- a/cmd/s3-gw/app_healthy.go +++ /dev/null @@ -1,46 +0,0 @@ -package main - -import ( - "github.com/prometheus/client_golang/prometheus" -) - -const ( - namespace = "neofs_s3_gw" - stateSubsystem = "state" -) - -type GateMetrics struct { - stateMetrics -} - -type stateMetrics struct { - healthCheck prometheus.Gauge -} - -func newGateMetrics() *GateMetrics { - stateMetric := newStateMetrics() - stateMetric.register() - - return &GateMetrics{ - stateMetrics: *stateMetric, - } -} - -func newStateMetrics() *stateMetrics { - return &stateMetrics{ - healthCheck: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: stateSubsystem, - Name: "health", - Help: "Current S3 gateway state", - }), - } -} - -func (m stateMetrics) register() { - prometheus.MustRegister(m.healthCheck) -} - -func (m stateMetrics) SetHealth(s int32) { - m.healthCheck.Set(float64(s)) -} diff --git a/cmd/s3-gw/app_metrics.go b/cmd/s3-gw/app_metrics.go index dea8d44..f21b804 100644 --- a/cmd/s3-gw/app_metrics.go +++ b/cmd/s3-gw/app_metrics.go @@ -1,20 +1,68 @@ package main import ( - "github.com/gorilla/mux" + "net/http" + + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/spf13/viper" "go.uber.org/zap" ) -func attachMetrics(r *mux.Router, v *viper.Viper, l *zap.Logger) { - if !v.GetBool(cfgEnableMetrics) { - return +const ( + namespace = "neofs_s3_gw" + stateSubsystem = "state" +) + +type GateMetrics struct { + stateMetrics +} + +type stateMetrics struct { + healthCheck prometheus.Gauge +} + +func newGateMetrics() *GateMetrics { + stateMetric := newStateMetrics() + stateMetric.register() + + return &GateMetrics{ + stateMetrics: *stateMetric, + } +} + +func newStateMetrics() *stateMetrics { + return &stateMetrics{ + healthCheck: prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: stateSubsystem, + Name: "health", + Help: "Current S3 gateway state", + }), + } +} + +func (m stateMetrics) register() { + prometheus.MustRegister(m.healthCheck) +} + +func (m stateMetrics) SetHealth(s int32) { + m.healthCheck.Set(float64(s)) +} + +// NewPrometheusService creates a new service for gathering prometheus metrics. +func NewPrometheusService(v *viper.Viper, log *zap.Logger) *Service { + if log == nil { + return nil } - l.Info("enable metrics") - r.PathPrefix(systemPath+"/metrics"). - Subrouter(). - StrictSlash(true). - Handle("", promhttp.Handler()) + return &Service{ + Server: &http.Server{ + Addr: v.GetString(cfgPrometheusAddress), + Handler: promhttp.Handler(), + }, + enabled: v.GetBool(cfgPrometheusEnabled), + serviceType: "Prometheus", + log: log.With(zap.String("service", "Prometheus")), + } } diff --git a/cmd/s3-gw/app_profiler.go b/cmd/s3-gw/app_profiler.go index 58879f6..30793d0 100644 --- a/cmd/s3-gw/app_profiler.go +++ b/cmd/s3-gw/app_profiler.go @@ -1,32 +1,34 @@ package main import ( + "net/http" "net/http/pprof" - "github.com/gorilla/mux" "github.com/spf13/viper" "go.uber.org/zap" ) -func attachProfiler(r *mux.Router, v *viper.Viper, l *zap.Logger) { - if !v.GetBool(cfgEnableProfiler) { - return - } - - l.Info("enable profiler") - - profiler := r.PathPrefix(systemPath + "/debug/pprof"). - Subrouter(). - StrictSlash(true) - - profiler.HandleFunc("/", pprof.Index) - profiler.HandleFunc("/cmdline", pprof.Cmdline) - profiler.HandleFunc("/profile", pprof.Profile) - profiler.HandleFunc("/symbol", pprof.Symbol) - profiler.HandleFunc("/trace", pprof.Trace) +// NewPprofService creates a new service for gathering pprof metrics. +func NewPprofService(v *viper.Viper, l *zap.Logger) *Service { + handler := http.NewServeMux() + handler.HandleFunc("/debug/pprof/", pprof.Index) + handler.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) + handler.HandleFunc("/debug/pprof/profile", pprof.Profile) + handler.HandleFunc("/debug/pprof/symbol", pprof.Symbol) + handler.HandleFunc("/debug/pprof/trace", pprof.Trace) // Manually add support for paths linked to by index page at /debug/pprof/ for _, item := range []string{"allocs", "block", "heap", "goroutine", "mutex", "threadcreate"} { - profiler.Handle("/"+item, pprof.Handler(item)) + handler.Handle("/debug/pprof/"+item, pprof.Handler(item)) + } + + return &Service{ + Server: &http.Server{ + Addr: v.GetString(cfgPProfAddress), + Handler: handler, + }, + enabled: v.GetBool(cfgPProfEnabled), + serviceType: "Pprof", + log: l.With(zap.String("service", "Pprof")), } } diff --git a/cmd/s3-gw/app_router.go b/cmd/s3-gw/app_router.go deleted file mode 100644 index 3890e3c..0000000 --- a/cmd/s3-gw/app_router.go +++ /dev/null @@ -1,10 +0,0 @@ -package main - -import "github.com/gorilla/mux" - -const systemPath = "/system" - -func newS3Router() *mux.Router { - // Initialize router - return mux.NewRouter().SkipClean(true).UseEncodedPath() -} diff --git a/cmd/s3-gw/app_settings.go b/cmd/s3-gw/app_settings.go index 7357f11..ef59fc5 100644 --- a/cmd/s3-gw/app_settings.go +++ b/cmd/s3-gw/app_settings.go @@ -78,10 +78,13 @@ const ( // Settings. cfgMaxClientsDeadline = "max_clients_deadline" // Metrics / Profiler / Web. - cfgEnableMetrics = "metrics" - cfgEnableProfiler = "pprof" - cfgListenAddress = "listen_address" - cfgListenDomains = "listen_domains" + cfgPrometheusEnabled = "prometheus.enabled" + cfgPrometheusAddress = "prometheus.address" + cfgPProfEnabled = "pprof.enabled" + cfgPProfAddress = "pprof.address" + + cfgListenAddress = "listen_address" + cfgListenDomains = "listen_domains" // Peers. cfgPeers = "peers" @@ -101,6 +104,8 @@ const ( // Settings. cmdHelp = "help" cmdVersion = "version" cmdConfig = "config" + cmdPProf = "pprof" + cmdMetrics = "metrics" // envPrefix is an environment variables prefix used for configuration. envPrefix = "S3_GW" @@ -173,8 +178,8 @@ func newSettings() *viper.Viper { flags.SetOutput(os.Stdout) flags.SortFlags = false - flags.Bool(cfgEnableProfiler, false, "enable pprof") - flags.Bool(cfgEnableMetrics, false, "enable prometheus metrics") + flags.Bool(cmdPProf, false, "enable pprof") + flags.Bool(cmdMetrics, false, "enable prometheus metrics") help := flags.BoolP(cmdHelp, "h", false, "show help") versionFlag := flags.BoolP(cmdVersion, "v", false, "show version") @@ -206,6 +211,17 @@ func newSettings() *viper.Viper { // logger: v.SetDefault(cfgLoggerLevel, "debug") + v.SetDefault(cfgPProfAddress, "localhost:8085") + v.SetDefault(cfgPrometheusAddress, "localhost:8086") + + // Binding flags + if err := v.BindPFlag(cfgPProfEnabled, flags.Lookup(cmdPProf)); err != nil { + panic(err) + } + if err := v.BindPFlag(cfgPrometheusEnabled, flags.Lookup(cmdMetrics)); err != nil { + panic(err) + } + if err := v.BindPFlags(flags); err != nil { panic(err) } diff --git a/cmd/s3-gw/service.go b/cmd/s3-gw/service.go new file mode 100644 index 0000000..c3f91a0 --- /dev/null +++ b/cmd/s3-gw/service.go @@ -0,0 +1,38 @@ +package main + +import ( + "context" + "net/http" + + "go.uber.org/zap" +) + +// Service serves metrics. +type Service struct { + *http.Server + enabled bool + log *zap.Logger + serviceType string +} + +// Start runs http service with the exposed endpoint on the configured port. +func (ms *Service) Start() { + if ms.enabled { + ms.log.Info("service is running", zap.String("endpoint", ms.Addr)) + err := ms.ListenAndServe() + if err != nil && err != http.ErrServerClosed { + ms.log.Warn("service couldn't start on configured port") + } + } else { + ms.log.Info("service hasn't started since it's disabled") + } +} + +// ShutDown stops the service. +func (ms *Service) ShutDown(ctx context.Context) { + ms.log.Info("shutting down service", zap.String("endpoint", ms.Addr)) + err := ms.Shutdown(ctx) + if err != nil { + ms.log.Panic("can't shut down service") + } +} diff --git a/config/config.env b/config/config.env index 3026c86..27425ba 100644 --- a/config/config.env +++ b/config/config.env @@ -42,8 +42,11 @@ S3_GW_RPC_ENDPOINT=http://morph-chain.neofs.devenv:30333/ S3_GW_RESOLVE_ORDER="nns dns" # Metrics -S3_GW_METRICS=false -S3_GW_PPROF=false +S3_GW_PPROF_ENABLED=true +S3_GW_PPROF_ADDRESS=localhost:8085 + +S3_GW_PROMETHEUS_ENABLED=true +S3_GW_PROMETHEUS_ADDRESS=localhost:8086 # Timeout to connect to a node S3_GW_CONNECT_TIMEOUT=10s diff --git a/config/config.yaml b/config/config.yaml index fc413fc..b5956c7 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -45,8 +45,13 @@ resolve_order: - nns # Metrics -metrics: false -pprof: false +pprof: + enabled: true + address: localhost:8085 + +prometheus: + enabled: true + address: localhost:8086 # Timeout to connect to a node connect_timeout: 10s diff --git a/docs/configuration.md b/docs/configuration.md index bb44448..6d6d169 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -120,17 +120,19 @@ There are some custom types used for brevity: ### Structure -| Section | Description | -|------------|-----------------------------------------| -| no section | [General parameters](#general-section) | -| `wallet` | [Wallet configuration](#wallet-section) | -| `peers` | [Nodes configuration](#peers-section) | -| `tls` | [TLS configuration](#tls-section) | -| `logger` | [Logger configuration](#logger-section) | -| `tree` | [Tree configuration](#tree-section) | -| `cache` | [Cache configuration](#cache-section) | -| `nats` | [NATS configuration](#nats-section) | -| `cors` | [CORS configuration](#cors-section) | +| Section | Description | +|--------------|-------------------------------------------------| +| no section | [General parameters](#general-section) | +| `wallet` | [Wallet configuration](#wallet-section) | +| `peers` | [Nodes configuration](#peers-section) | +| `tls` | [TLS configuration](#tls-section) | +| `logger` | [Logger configuration](#logger-section) | +| `tree` | [Tree configuration](#tree-section) | +| `cache` | [Cache configuration](#cache-section) | +| `nats` | [NATS configuration](#nats-section) | +| `cors` | [CORS configuration](#cors-section) | +| `pprof` | [Pprof configuration](#pprof-section) | +| `prometheus` | [Prometheus configuration](#prometheus-section) | ### General section @@ -144,9 +146,6 @@ resolve_order: - nns - dns -metrics: false -pprof: false - connect_timeout: 10s healthcheck_timeout: 15s rebalance_interval: 60s @@ -162,9 +161,7 @@ default_policy: REP 3 | `address` | `string` | | Account address to get from wallet. If omitted default one will be used. | | `listen_address` | `string` | `0.0.0.0:8080` | The address that the gateway is listening on. | | `rpc_endpoint` | `string` | | The address of the RPC host to which the gateway connects to resolve bucket names (required to use the `nns` resolver). | -| `resolve_order` | `[]string` | `[dns]` | Order of bucket name resolvers to use. Available resolvers: `dns`, `nns`. | -| `metrics` | `bool` | `false` | Flag to enable and expose the prometheus metrics. | -| `pprof` | `bool` | `false` | Flag to enable the profiler. | +| `resolve_order` | `[]string` | `[dns]` | Order of bucket name resolvers to use. Available resolvers: `dns`, `nns`. | | | `connect_timeout` | `duration` | `10s` | Timeout to connect to a node. | | `healthcheck_timeout` | `duration` | `15s` | Timeout to check node health during rebalance. | | `rebalance_interval` | `duration` | `60s` | Interval to check node health. | @@ -335,3 +332,32 @@ cors: |-------------------|-------|---------------|------------------------------------------------------| | `default_max_age` | `int` | `600` | Value of `Access-Control-Max-Age` header in seconds. | +# `pprof` section + +Contains configuration for the `pprof` profiler. + +```yaml +pprof: + enabled: true + address: localhost:8085 +``` + +| Parameter | Type | Default value | Description | +|-----------|----------|------------------|-----------------------------------------| +| `enabled` | `bool` | `false` | Flag to enable the service. | +| `address` | `string` | `localhost:8085` | Address that service listener binds to. | + +# `prometheus` section + +Contains configuration for the `prometheus` metrics service. + +```yaml +prometheus: + enabled: true + address: localhost:8086 +``` + +| Parameter | Type | Default value | Description | +|-----------|----------|------------------|-----------------------------------------| +| `enabled` | `bool` | `false` | Flag to enable the service. | +| `address` | `string` | `localhost:8086` | Address that service listener binds to. |