From c64871ff6c75e650df7262f625d1050a22a03cfb Mon Sep 17 00:00:00 2001 From: Marina Biryukova Date: Tue, 30 Jul 2024 19:40:58 +0300 Subject: [PATCH] Add intermediate metrics Signed-off-by: Marina Biryukova --- cmd/s3-gw/app.go | 1 + go.mod | 4 ++- go.sum | 8 +++--- metrics/app.go | 3 ++- metrics/desc.go | 26 +++++++++++++++++++ metrics/gate.go | 12 ++++++++- metrics/pool.go | 62 ++++++++++++++++++++++++++++++--------------- metrics/treepool.go | 62 +++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 151 insertions(+), 27 deletions(-) create mode 100644 metrics/treepool.go diff --git a/cmd/s3-gw/app.go b/cmd/s3-gw/app.go index f46c39dc..b2acdfa4 100644 --- a/cmd/s3-gw/app.go +++ b/cmd/s3-gw/app.go @@ -443,6 +443,7 @@ func (a *App) initMetrics() { cfg := metrics.AppMetricsConfig{ Logger: a.log, PoolStatistics: frostfs.NewPoolStatistic(a.pool), + TreeStatistic: a.treePool, Enabled: a.cfg.GetBool(cfgPrometheusEnabled), } diff --git a/go.mod b/go.mod index 9984415e..981ae5c3 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module git.frostfs.info/TrueCloudLab/frostfs-s3-gw go 1.21 require ( - git.frostfs.info/TrueCloudLab/frostfs-api-go/v2 v2.16.1-0.20240716113920-f517e3949164 + git.frostfs.info/TrueCloudLab/frostfs-api-go/v2 v2.16.1-0.20240726072425-3dfa2f4fd65e git.frostfs.info/TrueCloudLab/frostfs-contract v0.19.3-0.20240621131249-49e5270f673e git.frostfs.info/TrueCloudLab/frostfs-observability v0.0.0-20230531082742-c97d21411eb6 git.frostfs.info/TrueCloudLab/frostfs-sdk-go v0.0.0-20240722121227-fa89999d919c @@ -36,6 +36,8 @@ require ( google.golang.org/protobuf v1.33.0 ) +replace git.frostfs.info/TrueCloudLab/frostfs-sdk-go v0.0.0-20240722121227-fa89999d919c => git.frostfs.info/mbiryukova/frostfs-sdk-go v0.0.0-20240731115503-64dc65b6661d + require ( git.frostfs.info/TrueCloudLab/frostfs-crypto v0.6.0 // indirect git.frostfs.info/TrueCloudLab/hrw v1.2.1 // indirect diff --git a/go.sum b/go.sum index cb9e3b0f..e3f89eaa 100644 --- a/go.sum +++ b/go.sum @@ -36,16 +36,14 @@ cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RX cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0= cloud.google.com/go/storage v1.14.0/go.mod h1:GrKmX003DSIwi9o29oFT7YDnHYwZoctc3fOKtUw0Xmo= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= -git.frostfs.info/TrueCloudLab/frostfs-api-go/v2 v2.16.1-0.20240716113920-f517e3949164 h1:XxvwQKJT/f16qS3df5PBQPRYKkhy0/A7zH6644QpKD0= -git.frostfs.info/TrueCloudLab/frostfs-api-go/v2 v2.16.1-0.20240716113920-f517e3949164/go.mod h1:OBDSr+DqV1z4VDouoX3YMleNc4DPBVBWTG3WDT2PK1o= +git.frostfs.info/TrueCloudLab/frostfs-api-go/v2 v2.16.1-0.20240726072425-3dfa2f4fd65e h1:gEWT+70E/RvGkxtSv+PlyUN2vtJVymhQa1mypvrXukM= +git.frostfs.info/TrueCloudLab/frostfs-api-go/v2 v2.16.1-0.20240726072425-3dfa2f4fd65e/go.mod h1:OBDSr+DqV1z4VDouoX3YMleNc4DPBVBWTG3WDT2PK1o= git.frostfs.info/TrueCloudLab/frostfs-contract v0.19.3-0.20240621131249-49e5270f673e h1:kcBqZBiFIUBATUqEuvVigtkJJWQ2Gug/eYXn967o3M4= git.frostfs.info/TrueCloudLab/frostfs-contract v0.19.3-0.20240621131249-49e5270f673e/go.mod h1:F/fe1OoIDKr5Bz99q4sriuHDuf3aZefZy9ZsCqEtgxc= git.frostfs.info/TrueCloudLab/frostfs-crypto v0.6.0 h1:FxqFDhQYYgpe41qsIHVOcdzSVCB8JNSfPG7Uk4r2oSk= git.frostfs.info/TrueCloudLab/frostfs-crypto v0.6.0/go.mod h1:RUIKZATQLJ+TaYQa60X2fTDwfuhMfm8Ar60bQ5fr+vU= git.frostfs.info/TrueCloudLab/frostfs-observability v0.0.0-20230531082742-c97d21411eb6 h1:aGQ6QaAnTerQ5Dq5b2/f9DUQtSqPkZZ/bkMx/HKuLCo= git.frostfs.info/TrueCloudLab/frostfs-observability v0.0.0-20230531082742-c97d21411eb6/go.mod h1:W8Nn08/l6aQ7UlIbpF7FsQou7TVpcRD1ZT1KG4TrFhE= -git.frostfs.info/TrueCloudLab/frostfs-sdk-go v0.0.0-20240722121227-fa89999d919c h1:8ZS6eUFnOhzUo9stFqwq1Zyq+Y5YNcYAidCGICcZVL4= -git.frostfs.info/TrueCloudLab/frostfs-sdk-go v0.0.0-20240722121227-fa89999d919c/go.mod h1:vluJ/+yQMcq8ZIZZSA7Te+JKClr0lgtRErjICvb8wto= git.frostfs.info/TrueCloudLab/hrw v1.2.1 h1:ccBRK21rFvY5R1WotI6LNoPlizk7qSvdfD8lNIRudVc= git.frostfs.info/TrueCloudLab/hrw v1.2.1/go.mod h1:C1Ygde2n843yTZEQ0FP69jYiuaYV0kriLvP4zm8JuvM= git.frostfs.info/TrueCloudLab/policy-engine v0.0.0-20240611102930-ac965e8d176a h1:Bk1fB4cQASPKgAVGCdlBOEp5ohZfDxqK6fZM8eP+Emo= @@ -56,6 +54,8 @@ git.frostfs.info/TrueCloudLab/tzhash v1.8.0 h1:UFMnUIk0Zh17m8rjGHJMqku2hCgaXDqjq git.frostfs.info/TrueCloudLab/tzhash v1.8.0/go.mod h1:dhY+oy274hV8wGvGL4MwwMpdL3GYvaX1a8GQZQHvlF8= git.frostfs.info/TrueCloudLab/zapjournald v0.0.0-20240124114243-cb2e66427d02 h1:HeY8n27VyPRQe49l/fzyVMkWEB2fsLJYKp64pwA7tz4= git.frostfs.info/TrueCloudLab/zapjournald v0.0.0-20240124114243-cb2e66427d02/go.mod h1:rQFJJdEOV7KbbMtQYR2lNfiZk+ONRDJSbMCTWxKt8Fw= +git.frostfs.info/mbiryukova/frostfs-sdk-go v0.0.0-20240731115503-64dc65b6661d h1:1b4r91VZqTC+v7h7B3vrnEqgGsYWipFPlzgMa/VAKrQ= +git.frostfs.info/mbiryukova/frostfs-sdk-go v0.0.0-20240731115503-64dc65b6661d/go.mod h1:DlJmgV4/qkFkx2ab+YWznlMijiF2yZHnrJswJOB7XGs= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= diff --git a/metrics/app.go b/metrics/app.go index 930331b0..1a2ef077 100644 --- a/metrics/app.go +++ b/metrics/app.go @@ -20,6 +20,7 @@ type AppMetrics struct { type AppMetricsConfig struct { Logger *zap.Logger PoolStatistics StatisticScraper + TreeStatistic TreePoolStatistic Registerer prometheus.Registerer Enabled bool } @@ -36,7 +37,7 @@ func NewAppMetrics(cfg AppMetricsConfig) *AppMetrics { return &AppMetrics{ logger: cfg.Logger, - gate: NewGateMetrics(cfg.PoolStatistics, registry), + gate: NewGateMetrics(cfg.PoolStatistics, cfg.TreeStatistic, registry), enabled: cfg.Enabled, } } diff --git a/metrics/desc.go b/metrics/desc.go index ae9fc0f4..54836c8d 100644 --- a/metrics/desc.go +++ b/metrics/desc.go @@ -48,6 +48,14 @@ var appMetricsDesc = map[string]map[string]Description{ Help: "Average request duration (in milliseconds) for specific method on node in pool", VariableLabels: []string{"node", "method"}, }, + interAvgRequestDurationMetric: Description{ + Type: dto.MetricType_GAUGE, + Namespace: namespace, + Subsystem: poolSubsystem, + Name: interAvgRequestDurationMetric, + Help: "Intermediate average request duration (in milliseconds) for specific method on node in pool", + VariableLabels: []string{"node", "method"}, + }, currentNodesMetric: Description{ Type: dto.MetricType_GAUGE, Namespace: namespace, @@ -144,6 +152,24 @@ var appMetricsDesc = map[string]map[string]Description{ VariableLabels: []string{"endpoint"}, }, }, + treePoolSubsystem: { + avgRequestDurationMetric: Description{ + Type: dto.MetricType_GAUGE, + Namespace: namespace, + Subsystem: treePoolSubsystem, + Name: avgRequestDurationMetric, + Help: "Average request duration (in milliseconds) for specific method in tree pool", + VariableLabels: []string{"method"}, + }, + interAvgRequestDurationMetric: Description{ + Type: dto.MetricType_GAUGE, + Namespace: namespace, + Subsystem: treePoolSubsystem, + Name: interAvgRequestDurationMetric, + Help: "Intermediate average request duration (in milliseconds) for specific method in tree pool", + VariableLabels: []string{"method"}, + }, + }, } type Description struct { diff --git a/metrics/gate.go b/metrics/gate.go index 5a928366..02d608bd 100644 --- a/metrics/gate.go +++ b/metrics/gate.go @@ -4,6 +4,7 @@ import ( "net/http" "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/pool" + "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/pool/tree" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" dto "github.com/prometheus/client_model/go" @@ -15,6 +16,10 @@ type StatisticScraper interface { Statistic() pool.Statistic } +type TreePoolStatistic interface { + Statistic() tree.Statistic +} + type GateMetrics struct { registry prometheus.Registerer State *StateMetrics @@ -22,9 +27,10 @@ type GateMetrics struct { Billing *billingMetrics Stats *APIStatMetrics HTTPServer *httpServerMetrics + TreePool *treePoolMetricsCollector } -func NewGateMetrics(scraper StatisticScraper, registry prometheus.Registerer) *GateMetrics { +func NewGateMetrics(scraper StatisticScraper, treeScraper TreePoolStatistic, registry prometheus.Registerer) *GateMetrics { stateMetric := newStateMetrics() registry.MustRegister(stateMetric) @@ -40,6 +46,9 @@ func NewGateMetrics(scraper StatisticScraper, registry prometheus.Registerer) *G serverMetric := newHTTPServerMetrics() registry.MustRegister(serverMetric) + treePoolMetric := newTreePoolMetricsCollector(treeScraper) + registry.MustRegister(treePoolMetric) + return &GateMetrics{ registry: registry, State: stateMetric, @@ -47,6 +56,7 @@ func NewGateMetrics(scraper StatisticScraper, registry prometheus.Registerer) *G Billing: billingMetric, Stats: statsMetric, HTTPServer: serverMetric, + TreePool: treePoolMetric, } } diff --git a/metrics/pool.go b/metrics/pool.go index 538d6446..7aef2f5d 100644 --- a/metrics/pool.go +++ b/metrics/pool.go @@ -10,12 +10,13 @@ const ( ) const ( - overallErrorsMetric = "overall_errors" - overallNodeErrorsMetric = "overall_node_errors" - overallNodeRequestsMetric = "overall_node_requests" - currentErrorMetric = "current_errors" - avgRequestDurationMetric = "avg_request_duration" - currentNodesMetric = "current_nodes" + overallErrorsMetric = "overall_errors" + overallNodeErrorsMetric = "overall_node_errors" + overallNodeRequestsMetric = "overall_node_requests" + currentErrorMetric = "current_errors" + avgRequestDurationMetric = "avg_request_duration" + interAvgRequestDurationMetric = "inter_avg_request_duration" + currentNodesMetric = "current_nodes" ) const ( @@ -37,24 +38,26 @@ const ( ) type poolMetricsCollector struct { - poolStatScraper StatisticScraper - overallErrors prometheus.Gauge - overallNodeErrors *prometheus.GaugeVec - overallNodeRequests *prometheus.GaugeVec - currentErrors *prometheus.GaugeVec - requestDuration *prometheus.GaugeVec - currentNodes *prometheus.GaugeVec + poolStatScraper StatisticScraper + overallErrors prometheus.Gauge + overallNodeErrors *prometheus.GaugeVec + overallNodeRequests *prometheus.GaugeVec + currentErrors *prometheus.GaugeVec + requestDuration *prometheus.GaugeVec + interRequestDuration *prometheus.GaugeVec + currentNodes *prometheus.GaugeVec } func newPoolMetricsCollector(scraper StatisticScraper) *poolMetricsCollector { return &poolMetricsCollector{ - poolStatScraper: scraper, - overallErrors: mustNewGauge(appMetricsDesc[poolSubsystem][overallErrorsMetric]), - overallNodeErrors: mustNewGaugeVec(appMetricsDesc[poolSubsystem][overallNodeErrorsMetric]), - overallNodeRequests: mustNewGaugeVec(appMetricsDesc[poolSubsystem][overallNodeRequestsMetric]), - currentErrors: mustNewGaugeVec(appMetricsDesc[poolSubsystem][currentErrorMetric]), - requestDuration: mustNewGaugeVec(appMetricsDesc[poolSubsystem][avgRequestDurationMetric]), - currentNodes: mustNewGaugeVec(appMetricsDesc[poolSubsystem][currentNodesMetric]), + poolStatScraper: scraper, + overallErrors: mustNewGauge(appMetricsDesc[poolSubsystem][overallErrorsMetric]), + overallNodeErrors: mustNewGaugeVec(appMetricsDesc[poolSubsystem][overallNodeErrorsMetric]), + overallNodeRequests: mustNewGaugeVec(appMetricsDesc[poolSubsystem][overallNodeRequestsMetric]), + currentErrors: mustNewGaugeVec(appMetricsDesc[poolSubsystem][currentErrorMetric]), + requestDuration: mustNewGaugeVec(appMetricsDesc[poolSubsystem][avgRequestDurationMetric]), + interRequestDuration: mustNewGaugeVec(appMetricsDesc[poolSubsystem][interAvgRequestDurationMetric]), + currentNodes: mustNewGaugeVec(appMetricsDesc[poolSubsystem][currentNodesMetric]), } } @@ -65,6 +68,7 @@ func (m *poolMetricsCollector) Collect(ch chan<- prometheus.Metric) { m.overallNodeRequests.Collect(ch) m.currentErrors.Collect(ch) m.requestDuration.Collect(ch) + m.interRequestDuration.Collect(ch) m.currentNodes.Collect(ch) } @@ -74,6 +78,7 @@ func (m *poolMetricsCollector) Describe(descs chan<- *prometheus.Desc) { m.overallNodeRequests.Describe(descs) m.currentErrors.Describe(descs) m.requestDuration.Describe(descs) + m.interRequestDuration.Describe(descs) m.currentNodes.Describe(descs) } @@ -84,6 +89,7 @@ func (m *poolMetricsCollector) updateStatistic() { m.overallNodeRequests.Reset() m.currentErrors.Reset() m.requestDuration.Reset() + m.interRequestDuration.Reset() m.currentNodes.Reset() for _, node := range stat.Nodes() { @@ -117,4 +123,20 @@ func (m *poolMetricsCollector) updateRequestsDuration(node pool.NodeStatistic) { m.requestDuration.WithLabelValues(node.Address(), methodHeadObject).Set(float64(node.AverageHeadObject().Milliseconds())) m.requestDuration.WithLabelValues(node.Address(), methodRangeObject).Set(float64(node.AverageRangeObject().Milliseconds())) m.requestDuration.WithLabelValues(node.Address(), methodCreateSession).Set(float64(node.AverageCreateSession().Milliseconds())) + + m.interRequestDuration.WithLabelValues(node.Address(), methodGetBalance).Set(float64(node.InterAverageGetBalance().Milliseconds())) + m.interRequestDuration.WithLabelValues(node.Address(), methodPutContainer).Set(float64(node.InterAveragePutContainer().Milliseconds())) + m.interRequestDuration.WithLabelValues(node.Address(), methodGetContainer).Set(float64(node.InterAverageGetContainer().Milliseconds())) + m.interRequestDuration.WithLabelValues(node.Address(), methodListContainer).Set(float64(node.InterAverageListContainer().Milliseconds())) + m.interRequestDuration.WithLabelValues(node.Address(), methodDeleteContainer).Set(float64(node.InterAverageDeleteContainer().Milliseconds())) + m.interRequestDuration.WithLabelValues(node.Address(), methodGetContainerEacl).Set(float64(node.InterAverageGetContainerEACL().Milliseconds())) + m.interRequestDuration.WithLabelValues(node.Address(), methodSetContainerEacl).Set(float64(node.InterAverageSetContainerEACL().Milliseconds())) + m.interRequestDuration.WithLabelValues(node.Address(), methodEndpointInfo).Set(float64(node.InterAverageEndpointInfo().Milliseconds())) + m.interRequestDuration.WithLabelValues(node.Address(), methodNetworkInfo).Set(float64(node.InterAverageNetworkInfo().Milliseconds())) + m.interRequestDuration.WithLabelValues(node.Address(), methodPutObject).Set(float64(node.InterAveragePutObject().Milliseconds())) + m.interRequestDuration.WithLabelValues(node.Address(), methodDeleteObject).Set(float64(node.InterAverageDeleteObject().Milliseconds())) + m.interRequestDuration.WithLabelValues(node.Address(), methodGetObject).Set(float64(node.InterAverageGetObject().Milliseconds())) + m.interRequestDuration.WithLabelValues(node.Address(), methodHeadObject).Set(float64(node.InterAverageHeadObject().Milliseconds())) + m.interRequestDuration.WithLabelValues(node.Address(), methodRangeObject).Set(float64(node.InterAverageRangeObject().Milliseconds())) + m.interRequestDuration.WithLabelValues(node.Address(), methodCreateSession).Set(float64(node.InterAverageCreateSession().Milliseconds())) } diff --git a/metrics/treepool.go b/metrics/treepool.go new file mode 100644 index 00000000..3261fc21 --- /dev/null +++ b/metrics/treepool.go @@ -0,0 +1,62 @@ +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" +) + +const ( + treePoolSubsystem = "tree_pool" + + methodGetNodes = "get_nodes" + methodGetSubTree = "get_sub_tree" + methodAddNode = "add_node" + methodAddNodeByPath = "add_node_by_path" + methodMoveNode = "move_node" + methodRemoveNode = "remove_node" +) + +type treePoolMetricsCollector struct { + statScraper TreePoolStatistic + requestDuration *prometheus.GaugeVec + interRequestDuration *prometheus.GaugeVec +} + +func newTreePoolMetricsCollector(stat TreePoolStatistic) *treePoolMetricsCollector { + return &treePoolMetricsCollector{ + statScraper: stat, + requestDuration: mustNewGaugeVec(appMetricsDesc[treePoolSubsystem][avgRequestDurationMetric]), + interRequestDuration: mustNewGaugeVec(appMetricsDesc[treePoolSubsystem][interAvgRequestDurationMetric]), + } +} + +func (m *treePoolMetricsCollector) Collect(ch chan<- prometheus.Metric) { + m.updateStatistic() + m.requestDuration.Collect(ch) + m.interRequestDuration.Collect(ch) +} + +func (m *treePoolMetricsCollector) Describe(descs chan<- *prometheus.Desc) { + m.requestDuration.Describe(descs) + m.interRequestDuration.Describe(descs) +} + +func (m *treePoolMetricsCollector) updateStatistic() { + stat := m.statScraper.Statistic() + + m.requestDuration.Reset() + m.interRequestDuration.Reset() + + m.requestDuration.WithLabelValues(methodGetNodes).Set(float64(stat.AverageGetNodes().Milliseconds())) + m.requestDuration.WithLabelValues(methodGetSubTree).Set(float64(stat.AverageGetSubTree().Milliseconds())) + m.requestDuration.WithLabelValues(methodAddNode).Set(float64(stat.AverageAddNode().Milliseconds())) + m.requestDuration.WithLabelValues(methodAddNodeByPath).Set(float64(stat.AverageAddNodeByPath().Milliseconds())) + m.requestDuration.WithLabelValues(methodMoveNode).Set(float64(stat.AverageMoveNode().Milliseconds())) + m.requestDuration.WithLabelValues(methodRemoveNode).Set(float64(stat.AverageRemoveNode().Milliseconds())) + + m.interRequestDuration.WithLabelValues(methodGetNodes).Set(float64(stat.InterAverageGetNodes().Milliseconds())) + m.interRequestDuration.WithLabelValues(methodGetSubTree).Set(float64(stat.InterAverageGetSubTree().Milliseconds())) + m.interRequestDuration.WithLabelValues(methodAddNode).Set(float64(stat.InterAverageAddNode().Milliseconds())) + m.interRequestDuration.WithLabelValues(methodAddNodeByPath).Set(float64(stat.InterAverageAddNodeByPath().Milliseconds())) + m.interRequestDuration.WithLabelValues(methodMoveNode).Set(float64(stat.InterAverageMoveNode().Milliseconds())) + m.interRequestDuration.WithLabelValues(methodRemoveNode).Set(float64(stat.InterAverageRemoveNode().Milliseconds())) +}