Add intermediate metrics

Signed-off-by: Marina Biryukova <m.biryukova@yadro.com>
This commit is contained in:
Marina Biryukova 2024-07-30 19:40:58 +03:00
parent 3bbc8cce39
commit c64871ff6c
8 changed files with 151 additions and 27 deletions

View file

@ -443,6 +443,7 @@ func (a *App) initMetrics() {
cfg := metrics.AppMetricsConfig{
Logger: a.log,
PoolStatistics: frostfs.NewPoolStatistic(a.pool),
TreeStatistic: a.treePool,
Enabled: a.cfg.GetBool(cfgPrometheusEnabled),
}

4
go.mod
View file

@ -3,7 +3,7 @@ module git.frostfs.info/TrueCloudLab/frostfs-s3-gw
go 1.21
require (
git.frostfs.info/TrueCloudLab/frostfs-api-go/v2 v2.16.1-0.20240716113920-f517e3949164
git.frostfs.info/TrueCloudLab/frostfs-api-go/v2 v2.16.1-0.20240726072425-3dfa2f4fd65e
git.frostfs.info/TrueCloudLab/frostfs-contract v0.19.3-0.20240621131249-49e5270f673e
git.frostfs.info/TrueCloudLab/frostfs-observability v0.0.0-20230531082742-c97d21411eb6
git.frostfs.info/TrueCloudLab/frostfs-sdk-go v0.0.0-20240722121227-fa89999d919c
@ -36,6 +36,8 @@ require (
google.golang.org/protobuf v1.33.0
)
replace git.frostfs.info/TrueCloudLab/frostfs-sdk-go v0.0.0-20240722121227-fa89999d919c => git.frostfs.info/mbiryukova/frostfs-sdk-go v0.0.0-20240731115503-64dc65b6661d
require (
git.frostfs.info/TrueCloudLab/frostfs-crypto v0.6.0 // indirect
git.frostfs.info/TrueCloudLab/hrw v1.2.1 // indirect

8
go.sum
View file

@ -36,16 +36,14 @@ cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RX
cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0=
cloud.google.com/go/storage v1.14.0/go.mod h1:GrKmX003DSIwi9o29oFT7YDnHYwZoctc3fOKtUw0Xmo=
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
git.frostfs.info/TrueCloudLab/frostfs-api-go/v2 v2.16.1-0.20240716113920-f517e3949164 h1:XxvwQKJT/f16qS3df5PBQPRYKkhy0/A7zH6644QpKD0=
git.frostfs.info/TrueCloudLab/frostfs-api-go/v2 v2.16.1-0.20240716113920-f517e3949164/go.mod h1:OBDSr+DqV1z4VDouoX3YMleNc4DPBVBWTG3WDT2PK1o=
git.frostfs.info/TrueCloudLab/frostfs-api-go/v2 v2.16.1-0.20240726072425-3dfa2f4fd65e h1:gEWT+70E/RvGkxtSv+PlyUN2vtJVymhQa1mypvrXukM=
git.frostfs.info/TrueCloudLab/frostfs-api-go/v2 v2.16.1-0.20240726072425-3dfa2f4fd65e/go.mod h1:OBDSr+DqV1z4VDouoX3YMleNc4DPBVBWTG3WDT2PK1o=
git.frostfs.info/TrueCloudLab/frostfs-contract v0.19.3-0.20240621131249-49e5270f673e h1:kcBqZBiFIUBATUqEuvVigtkJJWQ2Gug/eYXn967o3M4=
git.frostfs.info/TrueCloudLab/frostfs-contract v0.19.3-0.20240621131249-49e5270f673e/go.mod h1:F/fe1OoIDKr5Bz99q4sriuHDuf3aZefZy9ZsCqEtgxc=
git.frostfs.info/TrueCloudLab/frostfs-crypto v0.6.0 h1:FxqFDhQYYgpe41qsIHVOcdzSVCB8JNSfPG7Uk4r2oSk=
git.frostfs.info/TrueCloudLab/frostfs-crypto v0.6.0/go.mod h1:RUIKZATQLJ+TaYQa60X2fTDwfuhMfm8Ar60bQ5fr+vU=
git.frostfs.info/TrueCloudLab/frostfs-observability v0.0.0-20230531082742-c97d21411eb6 h1:aGQ6QaAnTerQ5Dq5b2/f9DUQtSqPkZZ/bkMx/HKuLCo=
git.frostfs.info/TrueCloudLab/frostfs-observability v0.0.0-20230531082742-c97d21411eb6/go.mod h1:W8Nn08/l6aQ7UlIbpF7FsQou7TVpcRD1ZT1KG4TrFhE=
git.frostfs.info/TrueCloudLab/frostfs-sdk-go v0.0.0-20240722121227-fa89999d919c h1:8ZS6eUFnOhzUo9stFqwq1Zyq+Y5YNcYAidCGICcZVL4=
git.frostfs.info/TrueCloudLab/frostfs-sdk-go v0.0.0-20240722121227-fa89999d919c/go.mod h1:vluJ/+yQMcq8ZIZZSA7Te+JKClr0lgtRErjICvb8wto=
git.frostfs.info/TrueCloudLab/hrw v1.2.1 h1:ccBRK21rFvY5R1WotI6LNoPlizk7qSvdfD8lNIRudVc=
git.frostfs.info/TrueCloudLab/hrw v1.2.1/go.mod h1:C1Ygde2n843yTZEQ0FP69jYiuaYV0kriLvP4zm8JuvM=
git.frostfs.info/TrueCloudLab/policy-engine v0.0.0-20240611102930-ac965e8d176a h1:Bk1fB4cQASPKgAVGCdlBOEp5ohZfDxqK6fZM8eP+Emo=
@ -56,6 +54,8 @@ git.frostfs.info/TrueCloudLab/tzhash v1.8.0 h1:UFMnUIk0Zh17m8rjGHJMqku2hCgaXDqjq
git.frostfs.info/TrueCloudLab/tzhash v1.8.0/go.mod h1:dhY+oy274hV8wGvGL4MwwMpdL3GYvaX1a8GQZQHvlF8=
git.frostfs.info/TrueCloudLab/zapjournald v0.0.0-20240124114243-cb2e66427d02 h1:HeY8n27VyPRQe49l/fzyVMkWEB2fsLJYKp64pwA7tz4=
git.frostfs.info/TrueCloudLab/zapjournald v0.0.0-20240124114243-cb2e66427d02/go.mod h1:rQFJJdEOV7KbbMtQYR2lNfiZk+ONRDJSbMCTWxKt8Fw=
git.frostfs.info/mbiryukova/frostfs-sdk-go v0.0.0-20240731115503-64dc65b6661d h1:1b4r91VZqTC+v7h7B3vrnEqgGsYWipFPlzgMa/VAKrQ=
git.frostfs.info/mbiryukova/frostfs-sdk-go v0.0.0-20240731115503-64dc65b6661d/go.mod h1:DlJmgV4/qkFkx2ab+YWznlMijiF2yZHnrJswJOB7XGs=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=

View file

@ -20,6 +20,7 @@ type AppMetrics struct {
type AppMetricsConfig struct {
Logger *zap.Logger
PoolStatistics StatisticScraper
TreeStatistic TreePoolStatistic
Registerer prometheus.Registerer
Enabled bool
}
@ -36,7 +37,7 @@ func NewAppMetrics(cfg AppMetricsConfig) *AppMetrics {
return &AppMetrics{
logger: cfg.Logger,
gate: NewGateMetrics(cfg.PoolStatistics, registry),
gate: NewGateMetrics(cfg.PoolStatistics, cfg.TreeStatistic, registry),
enabled: cfg.Enabled,
}
}

View file

@ -48,6 +48,14 @@ var appMetricsDesc = map[string]map[string]Description{
Help: "Average request duration (in milliseconds) for specific method on node in pool",
VariableLabels: []string{"node", "method"},
},
interAvgRequestDurationMetric: Description{
Type: dto.MetricType_GAUGE,
Namespace: namespace,
Subsystem: poolSubsystem,
Name: interAvgRequestDurationMetric,
Help: "Intermediate average request duration (in milliseconds) for specific method on node in pool",
VariableLabels: []string{"node", "method"},
},
currentNodesMetric: Description{
Type: dto.MetricType_GAUGE,
Namespace: namespace,
@ -144,6 +152,24 @@ var appMetricsDesc = map[string]map[string]Description{
VariableLabels: []string{"endpoint"},
},
},
treePoolSubsystem: {
avgRequestDurationMetric: Description{
Type: dto.MetricType_GAUGE,
Namespace: namespace,
Subsystem: treePoolSubsystem,
Name: avgRequestDurationMetric,
Help: "Average request duration (in milliseconds) for specific method in tree pool",
VariableLabels: []string{"method"},
},
interAvgRequestDurationMetric: Description{
Type: dto.MetricType_GAUGE,
Namespace: namespace,
Subsystem: treePoolSubsystem,
Name: interAvgRequestDurationMetric,
Help: "Intermediate average request duration (in milliseconds) for specific method in tree pool",
VariableLabels: []string{"method"},
},
},
}
type Description struct {

View file

@ -4,6 +4,7 @@ import (
"net/http"
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/pool"
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/pool/tree"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
dto "github.com/prometheus/client_model/go"
@ -15,6 +16,10 @@ type StatisticScraper interface {
Statistic() pool.Statistic
}
type TreePoolStatistic interface {
Statistic() tree.Statistic
}
type GateMetrics struct {
registry prometheus.Registerer
State *StateMetrics
@ -22,9 +27,10 @@ type GateMetrics struct {
Billing *billingMetrics
Stats *APIStatMetrics
HTTPServer *httpServerMetrics
TreePool *treePoolMetricsCollector
}
func NewGateMetrics(scraper StatisticScraper, registry prometheus.Registerer) *GateMetrics {
func NewGateMetrics(scraper StatisticScraper, treeScraper TreePoolStatistic, registry prometheus.Registerer) *GateMetrics {
stateMetric := newStateMetrics()
registry.MustRegister(stateMetric)
@ -40,6 +46,9 @@ func NewGateMetrics(scraper StatisticScraper, registry prometheus.Registerer) *G
serverMetric := newHTTPServerMetrics()
registry.MustRegister(serverMetric)
treePoolMetric := newTreePoolMetricsCollector(treeScraper)
registry.MustRegister(treePoolMetric)
return &GateMetrics{
registry: registry,
State: stateMetric,
@ -47,6 +56,7 @@ func NewGateMetrics(scraper StatisticScraper, registry prometheus.Registerer) *G
Billing: billingMetric,
Stats: statsMetric,
HTTPServer: serverMetric,
TreePool: treePoolMetric,
}
}

View file

@ -10,12 +10,13 @@ const (
)
const (
overallErrorsMetric = "overall_errors"
overallNodeErrorsMetric = "overall_node_errors"
overallNodeRequestsMetric = "overall_node_requests"
currentErrorMetric = "current_errors"
avgRequestDurationMetric = "avg_request_duration"
currentNodesMetric = "current_nodes"
overallErrorsMetric = "overall_errors"
overallNodeErrorsMetric = "overall_node_errors"
overallNodeRequestsMetric = "overall_node_requests"
currentErrorMetric = "current_errors"
avgRequestDurationMetric = "avg_request_duration"
interAvgRequestDurationMetric = "inter_avg_request_duration"
currentNodesMetric = "current_nodes"
)
const (
@ -37,24 +38,26 @@ const (
)
type poolMetricsCollector struct {
poolStatScraper StatisticScraper
overallErrors prometheus.Gauge
overallNodeErrors *prometheus.GaugeVec
overallNodeRequests *prometheus.GaugeVec
currentErrors *prometheus.GaugeVec
requestDuration *prometheus.GaugeVec
currentNodes *prometheus.GaugeVec
poolStatScraper StatisticScraper
overallErrors prometheus.Gauge
overallNodeErrors *prometheus.GaugeVec
overallNodeRequests *prometheus.GaugeVec
currentErrors *prometheus.GaugeVec
requestDuration *prometheus.GaugeVec
interRequestDuration *prometheus.GaugeVec
currentNodes *prometheus.GaugeVec
}
func newPoolMetricsCollector(scraper StatisticScraper) *poolMetricsCollector {
return &poolMetricsCollector{
poolStatScraper: scraper,
overallErrors: mustNewGauge(appMetricsDesc[poolSubsystem][overallErrorsMetric]),
overallNodeErrors: mustNewGaugeVec(appMetricsDesc[poolSubsystem][overallNodeErrorsMetric]),
overallNodeRequests: mustNewGaugeVec(appMetricsDesc[poolSubsystem][overallNodeRequestsMetric]),
currentErrors: mustNewGaugeVec(appMetricsDesc[poolSubsystem][currentErrorMetric]),
requestDuration: mustNewGaugeVec(appMetricsDesc[poolSubsystem][avgRequestDurationMetric]),
currentNodes: mustNewGaugeVec(appMetricsDesc[poolSubsystem][currentNodesMetric]),
poolStatScraper: scraper,
overallErrors: mustNewGauge(appMetricsDesc[poolSubsystem][overallErrorsMetric]),
overallNodeErrors: mustNewGaugeVec(appMetricsDesc[poolSubsystem][overallNodeErrorsMetric]),
overallNodeRequests: mustNewGaugeVec(appMetricsDesc[poolSubsystem][overallNodeRequestsMetric]),
currentErrors: mustNewGaugeVec(appMetricsDesc[poolSubsystem][currentErrorMetric]),
requestDuration: mustNewGaugeVec(appMetricsDesc[poolSubsystem][avgRequestDurationMetric]),
interRequestDuration: mustNewGaugeVec(appMetricsDesc[poolSubsystem][interAvgRequestDurationMetric]),
currentNodes: mustNewGaugeVec(appMetricsDesc[poolSubsystem][currentNodesMetric]),
}
}
@ -65,6 +68,7 @@ func (m *poolMetricsCollector) Collect(ch chan<- prometheus.Metric) {
m.overallNodeRequests.Collect(ch)
m.currentErrors.Collect(ch)
m.requestDuration.Collect(ch)
m.interRequestDuration.Collect(ch)
m.currentNodes.Collect(ch)
}
@ -74,6 +78,7 @@ func (m *poolMetricsCollector) Describe(descs chan<- *prometheus.Desc) {
m.overallNodeRequests.Describe(descs)
m.currentErrors.Describe(descs)
m.requestDuration.Describe(descs)
m.interRequestDuration.Describe(descs)
m.currentNodes.Describe(descs)
}
@ -84,6 +89,7 @@ func (m *poolMetricsCollector) updateStatistic() {
m.overallNodeRequests.Reset()
m.currentErrors.Reset()
m.requestDuration.Reset()
m.interRequestDuration.Reset()
m.currentNodes.Reset()
for _, node := range stat.Nodes() {
@ -117,4 +123,20 @@ func (m *poolMetricsCollector) updateRequestsDuration(node pool.NodeStatistic) {
m.requestDuration.WithLabelValues(node.Address(), methodHeadObject).Set(float64(node.AverageHeadObject().Milliseconds()))
m.requestDuration.WithLabelValues(node.Address(), methodRangeObject).Set(float64(node.AverageRangeObject().Milliseconds()))
m.requestDuration.WithLabelValues(node.Address(), methodCreateSession).Set(float64(node.AverageCreateSession().Milliseconds()))
m.interRequestDuration.WithLabelValues(node.Address(), methodGetBalance).Set(float64(node.InterAverageGetBalance().Milliseconds()))
m.interRequestDuration.WithLabelValues(node.Address(), methodPutContainer).Set(float64(node.InterAveragePutContainer().Milliseconds()))
m.interRequestDuration.WithLabelValues(node.Address(), methodGetContainer).Set(float64(node.InterAverageGetContainer().Milliseconds()))
m.interRequestDuration.WithLabelValues(node.Address(), methodListContainer).Set(float64(node.InterAverageListContainer().Milliseconds()))
m.interRequestDuration.WithLabelValues(node.Address(), methodDeleteContainer).Set(float64(node.InterAverageDeleteContainer().Milliseconds()))
m.interRequestDuration.WithLabelValues(node.Address(), methodGetContainerEacl).Set(float64(node.InterAverageGetContainerEACL().Milliseconds()))
m.interRequestDuration.WithLabelValues(node.Address(), methodSetContainerEacl).Set(float64(node.InterAverageSetContainerEACL().Milliseconds()))
m.interRequestDuration.WithLabelValues(node.Address(), methodEndpointInfo).Set(float64(node.InterAverageEndpointInfo().Milliseconds()))
m.interRequestDuration.WithLabelValues(node.Address(), methodNetworkInfo).Set(float64(node.InterAverageNetworkInfo().Milliseconds()))
m.interRequestDuration.WithLabelValues(node.Address(), methodPutObject).Set(float64(node.InterAveragePutObject().Milliseconds()))
m.interRequestDuration.WithLabelValues(node.Address(), methodDeleteObject).Set(float64(node.InterAverageDeleteObject().Milliseconds()))
m.interRequestDuration.WithLabelValues(node.Address(), methodGetObject).Set(float64(node.InterAverageGetObject().Milliseconds()))
m.interRequestDuration.WithLabelValues(node.Address(), methodHeadObject).Set(float64(node.InterAverageHeadObject().Milliseconds()))
m.interRequestDuration.WithLabelValues(node.Address(), methodRangeObject).Set(float64(node.InterAverageRangeObject().Milliseconds()))
m.interRequestDuration.WithLabelValues(node.Address(), methodCreateSession).Set(float64(node.InterAverageCreateSession().Milliseconds()))
}

62
metrics/treepool.go Normal file
View file

@ -0,0 +1,62 @@
package metrics
import (
"github.com/prometheus/client_golang/prometheus"
)
const (
treePoolSubsystem = "tree_pool"
methodGetNodes = "get_nodes"
methodGetSubTree = "get_sub_tree"
methodAddNode = "add_node"
methodAddNodeByPath = "add_node_by_path"
methodMoveNode = "move_node"
methodRemoveNode = "remove_node"
)
type treePoolMetricsCollector struct {
statScraper TreePoolStatistic
requestDuration *prometheus.GaugeVec
interRequestDuration *prometheus.GaugeVec
}
func newTreePoolMetricsCollector(stat TreePoolStatistic) *treePoolMetricsCollector {
return &treePoolMetricsCollector{
statScraper: stat,
requestDuration: mustNewGaugeVec(appMetricsDesc[treePoolSubsystem][avgRequestDurationMetric]),
interRequestDuration: mustNewGaugeVec(appMetricsDesc[treePoolSubsystem][interAvgRequestDurationMetric]),
}
}
func (m *treePoolMetricsCollector) Collect(ch chan<- prometheus.Metric) {
m.updateStatistic()
m.requestDuration.Collect(ch)
m.interRequestDuration.Collect(ch)
}
func (m *treePoolMetricsCollector) Describe(descs chan<- *prometheus.Desc) {
m.requestDuration.Describe(descs)
m.interRequestDuration.Describe(descs)
}
func (m *treePoolMetricsCollector) updateStatistic() {
stat := m.statScraper.Statistic()
m.requestDuration.Reset()
m.interRequestDuration.Reset()
m.requestDuration.WithLabelValues(methodGetNodes).Set(float64(stat.AverageGetNodes().Milliseconds()))
m.requestDuration.WithLabelValues(methodGetSubTree).Set(float64(stat.AverageGetSubTree().Milliseconds()))
m.requestDuration.WithLabelValues(methodAddNode).Set(float64(stat.AverageAddNode().Milliseconds()))
m.requestDuration.WithLabelValues(methodAddNodeByPath).Set(float64(stat.AverageAddNodeByPath().Milliseconds()))
m.requestDuration.WithLabelValues(methodMoveNode).Set(float64(stat.AverageMoveNode().Milliseconds()))
m.requestDuration.WithLabelValues(methodRemoveNode).Set(float64(stat.AverageRemoveNode().Milliseconds()))
m.interRequestDuration.WithLabelValues(methodGetNodes).Set(float64(stat.InterAverageGetNodes().Milliseconds()))
m.interRequestDuration.WithLabelValues(methodGetSubTree).Set(float64(stat.InterAverageGetSubTree().Milliseconds()))
m.interRequestDuration.WithLabelValues(methodAddNode).Set(float64(stat.InterAverageAddNode().Milliseconds()))
m.interRequestDuration.WithLabelValues(methodAddNodeByPath).Set(float64(stat.InterAverageAddNodeByPath().Milliseconds()))
m.interRequestDuration.WithLabelValues(methodMoveNode).Set(float64(stat.InterAverageMoveNode().Milliseconds()))
m.interRequestDuration.WithLabelValues(methodRemoveNode).Set(float64(stat.InterAverageRemoveNode().Milliseconds()))
}