[#615] Expose pool metrics

Signed-off-by: Denis Kirillov <denis@nspcc.ru>
This commit is contained in:
Denis Kirillov 2022-07-27 17:52:25 +03:00 committed by Kirillov Denis
parent 0949c7e782
commit 08898f4fb4
3 changed files with 179 additions and 3 deletions

View file

@ -196,7 +196,7 @@ func newApp(ctx context.Context, l *zap.Logger, v *viper.Viper) *App {
}
if v.GetBool(cfgPrometheusEnabled) {
gateMetrics = newGateMetrics()
gateMetrics = newGateMetrics(neofs.NewPoolStatistic(conns))
}
return &App{

View file

@ -3,6 +3,7 @@ package main
import (
"net/http"
"github.com/nspcc-dev/neofs-sdk-go/pool"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/spf13/viper"
@ -12,22 +13,57 @@ import (
const (
namespace = "neofs_s3_gw"
stateSubsystem = "state"
poolSubsystem = "pool"
methodGetBalance = "get_balance"
methodPutContainer = "put_container"
methodGetContainer = "get_container"
methodListContainer = "list_container"
methodDeleteContainer = "delete_container"
methodGetContainerEacl = "get_container_eacl"
methodSetContainerEacl = "set_container_eacl"
methodEndpointInfo = "endpoint_info"
methodNetworkInfo = "network_info"
methodPutObject = "put_object"
methodDeleteObject = "delete_object"
methodGetObject = "get_object"
methodHeadObject = "head_object"
methodRangeObject = "range_object"
methodCreateSession = "create_session"
)
type StatisticScraper interface {
Statistic() pool.Statistic
}
type GateMetrics struct {
stateMetrics
poolMetricsCollector
}
type stateMetrics struct {
healthCheck prometheus.Gauge
}
func newGateMetrics() *GateMetrics {
type poolMetricsCollector struct {
poolStatScraper StatisticScraper
overallErrors prometheus.Counter
overallNodeErrors *prometheus.CounterVec
overallNodeRequests *prometheus.CounterVec
currentErrors *prometheus.GaugeVec
requestDuration *prometheus.GaugeVec
}
func newGateMetrics(scraper StatisticScraper) *GateMetrics {
stateMetric := newStateMetrics()
stateMetric.register()
poolMetric := newPoolMetricsCollector(scraper)
poolMetric.register()
return &GateMetrics{
stateMetrics: *stateMetric,
stateMetrics: *stateMetric,
poolMetricsCollector: *poolMetric,
}
}
@ -50,6 +86,131 @@ func (m stateMetrics) SetHealth(s int32) {
m.healthCheck.Set(float64(s))
}
func newPoolMetricsCollector(scraper StatisticScraper) *poolMetricsCollector {
overallErrors := prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: namespace,
Subsystem: poolSubsystem,
Name: "overall_errors",
Help: "Total number of errors in pool",
},
)
overallNodeErrors := prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Subsystem: poolSubsystem,
Name: "overall_node_errors",
Help: "Total number of errors for connection in pool",
},
[]string{
"node",
},
)
overallNodeRequests := prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Subsystem: poolSubsystem,
Name: "overall_node_requests",
Help: "Total number of requests to specific node in pool",
},
[]string{
"node",
},
)
currentErrors := prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: poolSubsystem,
Name: "current_errors",
Help: "Number of errors on current connections that will be reset after the threshold",
},
[]string{
"node",
},
)
requestsDuration := prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: poolSubsystem,
Name: "avg_request_duration",
Help: "Average request duration (in milliseconds) for specific method on node in pool",
},
[]string{
"node",
"method",
},
)
return &poolMetricsCollector{
poolStatScraper: scraper,
overallErrors: overallErrors,
overallNodeErrors: overallNodeErrors,
overallNodeRequests: overallNodeRequests,
currentErrors: currentErrors,
requestDuration: requestsDuration,
}
}
func (m *poolMetricsCollector) Collect(ch chan<- prometheus.Metric) {
m.updateStatistic()
m.overallErrors.Collect(ch)
m.overallNodeErrors.Collect(ch)
m.overallNodeRequests.Collect(ch)
m.currentErrors.Collect(ch)
m.requestDuration.Collect(ch)
}
func (m poolMetricsCollector) Describe(descs chan<- *prometheus.Desc) {
m.overallErrors.Describe(descs)
m.overallNodeErrors.Describe(descs)
m.overallNodeRequests.Describe(descs)
m.currentErrors.Describe(descs)
m.requestDuration.Describe(descs)
}
func (m *poolMetricsCollector) register() {
prometheus.MustRegister(m)
}
func (m *poolMetricsCollector) updateStatistic() {
stat := m.poolStatScraper.Statistic()
m.currentErrors.Reset()
m.requestDuration.Reset()
for _, node := range stat.Nodes() {
m.overallNodeErrors.WithLabelValues(node.Address()).Add(float64(node.OverallErrors()))
m.overallNodeRequests.WithLabelValues(node.Address()).Add(float64(node.Requests()))
m.currentErrors.WithLabelValues(node.Address()).Set(float64(node.CurrentErrors()))
m.updateRequestsDuration(node)
}
m.overallErrors.Add(float64(stat.OverallErrors()))
}
func (m *poolMetricsCollector) updateRequestsDuration(node pool.NodeStatistic) {
m.requestDuration.WithLabelValues(node.Address(), methodGetBalance).Set(float64(node.AverageGetBalance().Milliseconds()))
m.requestDuration.WithLabelValues(node.Address(), methodPutContainer).Set(float64(node.AveragePutContainer().Milliseconds()))
m.requestDuration.WithLabelValues(node.Address(), methodGetContainer).Set(float64(node.AverageGetContainer().Milliseconds()))
m.requestDuration.WithLabelValues(node.Address(), methodListContainer).Set(float64(node.AverageListContainer().Milliseconds()))
m.requestDuration.WithLabelValues(node.Address(), methodDeleteContainer).Set(float64(node.AverageDeleteContainer().Milliseconds()))
m.requestDuration.WithLabelValues(node.Address(), methodGetContainerEacl).Set(float64(node.AverageGetContainerEACL().Milliseconds()))
m.requestDuration.WithLabelValues(node.Address(), methodSetContainerEacl).Set(float64(node.AverageSetContainerEACL().Milliseconds()))
m.requestDuration.WithLabelValues(node.Address(), methodEndpointInfo).Set(float64(node.AverageEndpointInfo().Milliseconds()))
m.requestDuration.WithLabelValues(node.Address(), methodNetworkInfo).Set(float64(node.AverageNetworkInfo().Milliseconds()))
m.requestDuration.WithLabelValues(node.Address(), methodPutObject).Set(float64(node.AveragePutObject().Milliseconds()))
m.requestDuration.WithLabelValues(node.Address(), methodDeleteObject).Set(float64(node.AverageDeleteObject().Milliseconds()))
m.requestDuration.WithLabelValues(node.Address(), methodGetObject).Set(float64(node.AverageGetObject().Milliseconds()))
m.requestDuration.WithLabelValues(node.Address(), methodHeadObject).Set(float64(node.AverageHeadObject().Milliseconds()))
m.requestDuration.WithLabelValues(node.Address(), methodRangeObject).Set(float64(node.AverageRangeObject().Milliseconds()))
m.requestDuration.WithLabelValues(node.Address(), methodCreateSession).Set(float64(node.AverageCreateSession().Milliseconds()))
}
// NewPrometheusService creates a new service for gathering prometheus metrics.
func NewPrometheusService(v *viper.Viper, log *zap.Logger) *Service {
if log == nil {

View file

@ -579,3 +579,18 @@ func (x *AuthmateNeoFS) CreateObject(ctx context.Context, prm tokens.PrmObjectCr
Payload: bytes.NewReader(prm.Payload),
})
}
// PoolStatistic is a mediator which implements authmate.NeoFS through pool.Pool.
type PoolStatistic struct {
pool *pool.Pool
}
// NewPoolStatistic creates new PoolStatistic using provided pool.Pool.
func NewPoolStatistic(p *pool.Pool) *PoolStatistic {
return &PoolStatistic{pool: p}
}
// Statistic implements interface method.
func (x *PoolStatistic) Statistic() pool.Statistic {
return x.pool.Statistic()
}