From 08898f4fb45431e2989dcdd14bad6dd5628e619d Mon Sep 17 00:00:00 2001 From: Denis Kirillov Date: Wed, 27 Jul 2022 17:52:25 +0300 Subject: [PATCH] [#615] Expose pool metrics Signed-off-by: Denis Kirillov --- cmd/s3-gw/app.go | 2 +- cmd/s3-gw/app_metrics.go | 165 ++++++++++++++++++++++++++++++++++++++- internal/neofs/neofs.go | 15 ++++ 3 files changed, 179 insertions(+), 3 deletions(-) diff --git a/cmd/s3-gw/app.go b/cmd/s3-gw/app.go index 4a43b88..393499f 100644 --- a/cmd/s3-gw/app.go +++ b/cmd/s3-gw/app.go @@ -196,7 +196,7 @@ func newApp(ctx context.Context, l *zap.Logger, v *viper.Viper) *App { } if v.GetBool(cfgPrometheusEnabled) { - gateMetrics = newGateMetrics() + gateMetrics = newGateMetrics(neofs.NewPoolStatistic(conns)) } return &App{ diff --git a/cmd/s3-gw/app_metrics.go b/cmd/s3-gw/app_metrics.go index f21b804..5ca423a 100644 --- a/cmd/s3-gw/app_metrics.go +++ b/cmd/s3-gw/app_metrics.go @@ -3,6 +3,7 @@ package main import ( "net/http" + "github.com/nspcc-dev/neofs-sdk-go/pool" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/spf13/viper" @@ -12,22 +13,57 @@ import ( const ( namespace = "neofs_s3_gw" stateSubsystem = "state" + poolSubsystem = "pool" + + methodGetBalance = "get_balance" + methodPutContainer = "put_container" + methodGetContainer = "get_container" + methodListContainer = "list_container" + methodDeleteContainer = "delete_container" + methodGetContainerEacl = "get_container_eacl" + methodSetContainerEacl = "set_container_eacl" + methodEndpointInfo = "endpoint_info" + methodNetworkInfo = "network_info" + methodPutObject = "put_object" + methodDeleteObject = "delete_object" + methodGetObject = "get_object" + methodHeadObject = "head_object" + methodRangeObject = "range_object" + methodCreateSession = "create_session" ) +type StatisticScraper interface { + Statistic() pool.Statistic +} + type GateMetrics struct { stateMetrics + poolMetricsCollector } type stateMetrics struct { healthCheck prometheus.Gauge } -func newGateMetrics() *GateMetrics { +type poolMetricsCollector struct { + poolStatScraper StatisticScraper + overallErrors prometheus.Counter + overallNodeErrors *prometheus.CounterVec + overallNodeRequests *prometheus.CounterVec + currentErrors *prometheus.GaugeVec + requestDuration *prometheus.GaugeVec +} + +func newGateMetrics(scraper StatisticScraper) *GateMetrics { stateMetric := newStateMetrics() stateMetric.register() + poolMetric := newPoolMetricsCollector(scraper) + poolMetric.register() + return &GateMetrics{ - stateMetrics: *stateMetric, + stateMetrics: *stateMetric, + poolMetricsCollector: *poolMetric, } } @@ -50,6 +86,131 @@ func (m stateMetrics) SetHealth(s int32) { m.healthCheck.Set(float64(s)) } +func newPoolMetricsCollector(scraper StatisticScraper) *poolMetricsCollector { + overallErrors := prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: poolSubsystem, + Name: "overall_errors", + Help: "Total number of errors in pool", + }, + ) + + overallNodeErrors := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: poolSubsystem, + Name: "overall_node_errors", + Help: "Total number of errors for connection in pool", + }, + []string{ + "node", + }, + ) + + overallNodeRequests := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: poolSubsystem, + Name: "overall_node_requests", + Help: "Total number of requests to specific node in pool", + }, + []string{ + "node", + }, + ) + + currentErrors := prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: poolSubsystem, + Name: "current_errors", + Help: "Number of errors on current connections that will be reset after the threshold", + }, + []string{ + "node", + }, + ) + + requestsDuration := prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: poolSubsystem, + Name: "avg_request_duration", + Help: "Average request duration (in milliseconds) for specific method on node in pool", + }, + []string{ + "node", + "method", + }, + ) + + return &poolMetricsCollector{ + poolStatScraper: scraper, + overallErrors: overallErrors, + overallNodeErrors: overallNodeErrors, + overallNodeRequests: overallNodeRequests, + currentErrors: currentErrors, + requestDuration: requestsDuration, + } +} + +func (m *poolMetricsCollector) Collect(ch chan<- prometheus.Metric) { + m.updateStatistic() + m.overallErrors.Collect(ch) + m.overallNodeErrors.Collect(ch) + m.overallNodeRequests.Collect(ch) + m.currentErrors.Collect(ch) + m.requestDuration.Collect(ch) +} + +func (m poolMetricsCollector) Describe(descs chan<- *prometheus.Desc) { + m.overallErrors.Describe(descs) + m.overallNodeErrors.Describe(descs) + m.overallNodeRequests.Describe(descs) + m.currentErrors.Describe(descs) + m.requestDuration.Describe(descs) +} + +func (m *poolMetricsCollector) register() { + prometheus.MustRegister(m) +} + +func (m *poolMetricsCollector) updateStatistic() { + stat := m.poolStatScraper.Statistic() + + m.currentErrors.Reset() + m.requestDuration.Reset() + + for _, node := range stat.Nodes() { + m.overallNodeErrors.WithLabelValues(node.Address()).Add(float64(node.OverallErrors())) + m.overallNodeRequests.WithLabelValues(node.Address()).Add(float64(node.Requests())) + + m.currentErrors.WithLabelValues(node.Address()).Set(float64(node.CurrentErrors())) + m.updateRequestsDuration(node) + } + + m.overallErrors.Add(float64(stat.OverallErrors())) +} + +func (m *poolMetricsCollector) updateRequestsDuration(node pool.NodeStatistic) { + m.requestDuration.WithLabelValues(node.Address(), methodGetBalance).Set(float64(node.AverageGetBalance().Milliseconds())) + m.requestDuration.WithLabelValues(node.Address(), methodPutContainer).Set(float64(node.AveragePutContainer().Milliseconds())) + m.requestDuration.WithLabelValues(node.Address(), methodGetContainer).Set(float64(node.AverageGetContainer().Milliseconds())) + m.requestDuration.WithLabelValues(node.Address(), methodListContainer).Set(float64(node.AverageListContainer().Milliseconds())) + m.requestDuration.WithLabelValues(node.Address(), methodDeleteContainer).Set(float64(node.AverageDeleteContainer().Milliseconds())) + m.requestDuration.WithLabelValues(node.Address(), methodGetContainerEacl).Set(float64(node.AverageGetContainerEACL().Milliseconds())) + m.requestDuration.WithLabelValues(node.Address(), methodSetContainerEacl).Set(float64(node.AverageSetContainerEACL().Milliseconds())) + m.requestDuration.WithLabelValues(node.Address(), methodEndpointInfo).Set(float64(node.AverageEndpointInfo().Milliseconds())) + m.requestDuration.WithLabelValues(node.Address(), methodNetworkInfo).Set(float64(node.AverageNetworkInfo().Milliseconds())) + m.requestDuration.WithLabelValues(node.Address(), methodPutObject).Set(float64(node.AveragePutObject().Milliseconds())) + m.requestDuration.WithLabelValues(node.Address(), methodDeleteObject).Set(float64(node.AverageDeleteObject().Milliseconds())) + m.requestDuration.WithLabelValues(node.Address(), methodGetObject).Set(float64(node.AverageGetObject().Milliseconds())) + m.requestDuration.WithLabelValues(node.Address(), methodHeadObject).Set(float64(node.AverageHeadObject().Milliseconds())) + m.requestDuration.WithLabelValues(node.Address(), methodRangeObject).Set(float64(node.AverageRangeObject().Milliseconds())) + m.requestDuration.WithLabelValues(node.Address(), methodCreateSession).Set(float64(node.AverageCreateSession().Milliseconds())) +} + // NewPrometheusService creates a new service for gathering prometheus metrics. func NewPrometheusService(v *viper.Viper, log *zap.Logger) *Service { if log == nil { diff --git a/internal/neofs/neofs.go b/internal/neofs/neofs.go index 483b7b8..f74210d 100644 --- a/internal/neofs/neofs.go +++ b/internal/neofs/neofs.go @@ -579,3 +579,18 @@ func (x *AuthmateNeoFS) CreateObject(ctx context.Context, prm tokens.PrmObjectCr Payload: bytes.NewReader(prm.Payload), }) } + +// PoolStatistic is a mediator which implements authmate.NeoFS through pool.Pool. +type PoolStatistic struct { + pool *pool.Pool +} + +// NewPoolStatistic creates new PoolStatistic using provided pool.Pool. +func NewPoolStatistic(p *pool.Pool) *PoolStatistic { + return &PoolStatistic{pool: p} +} + +// Statistic implements interface method. +func (x *PoolStatistic) Statistic() pool.Statistic { + return x.pool.Statistic() +}