node: Add metrics for current GRPC endpoint status #580

Merged
fyrchik merged 1 commit from acid-ant/frostfs-node:bugfix/565-metrics-for-grpc-endpnt into master 2023-08-10 16:37:52 +00:00
5 changed files with 50 additions and 0 deletions

View file

@ -446,6 +446,8 @@ type cfgGRPC struct {
servers []*grpc.Server
endpoints []string
maxChunkSize uint64
maxAddrAmount uint64

View file

@ -69,11 +69,14 @@ func initGRPC(c *cfg) {
lis, err := net.Listen("tcp", sc.Endpoint())
if err != nil {
c.metricsCollector.GrpcServerMetrics().MarkUnhealthy(sc.Endpoint())

srv.Serve can fail too, what about setting status there?

`srv.Serve` can fail too, what about setting status there?

My fault, updated. Introduced new slice because listener return string representation of address based on implementation.

My fault, updated. Introduced new slice because listener return string representation of address based on implementation.

What are the differences between representations?

What are the differences between representations?

According to doc it is up to implementation the exact form of the string. Looks like there are no guarantees that String() will return the value which equal to arguments of net.Listen(...)
https://github.com/golang/go/blob/master/src/net/net.go#L96

According to doc it is up to implementation the exact form of the string. Looks like there are no guarantees that String() will return the value which equal to arguments of `net.Listen(...)` https://github.com/golang/go/blob/master/src/net/net.go#L96
c.log.Error(logs.FrostFSNodeCantListenGRPCEndpoint, zap.Error(err))
return
}
c.metricsCollector.GrpcServerMetrics().MarkHealthy(sc.Endpoint())
c.cfgGRPC.listeners = append(c.cfgGRPC.listeners, lis)
c.cfgGRPC.endpoints = append(c.cfgGRPC.endpoints, sc.Endpoint())
srv := grpc.NewServer(serverOpts...)
@ -96,6 +99,7 @@ func serveGRPC(c *cfg) {
srv := c.cfgGRPC.servers[i]
lis := c.cfgGRPC.listeners[i]
endpoint := c.cfgGRPC.endpoints[i]
go func() {
defer func() {
@ -111,6 +115,7 @@ func serveGRPC(c *cfg) {
)
if err := srv.Serve(lis); err != nil {
c.metricsCollector.GrpcServerMetrics().MarkUnhealthy(endpoint)
fmt.Println("gRPC server error", err)
}
}()

View file

@ -19,6 +19,7 @@ const (
stateSubsystem = "state"
treeServiceSubsystem = "treeservice"
writeCacheSubsystem = "writecache"
grpcServerSubsystem = "grpc_server"
successLabel = "success"
shardIDLabel = "shard_id"
@ -35,6 +36,7 @@ const (
containerIDLabelKey = "cid"
storageLabel = "storage"
operationLabel = "operation"
endpointLabel = "endpoint"
readWriteMode = "READ_WRITE"
readOnlyMode = "READ_ONLY"

35
pkg/metrics/grpc.go Normal file
View file

@ -0,0 +1,35 @@
package metrics
import (
"git.frostfs.info/TrueCloudLab/frostfs-observability/metrics"
"github.com/prometheus/client_golang/prometheus"
)
type GrpcServerMetrics interface {
MarkHealthy(endpoint string)
MarkUnhealthy(endpoint string)
}
type grpcServerMetrics struct {
endpointHealth *prometheus.GaugeVec
}
func newGrpcServerMetrics() *grpcServerMetrics {
return &grpcServerMetrics{
endpointHealth: metrics.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: grpcServerSubsystem,
Name: "health",
fyrchik marked this conversation as resolved Outdated

Maybe frostfs_grpc_server_health?

Maybe `frostfs_grpc_server_health`?

Agree, updated. Now it looks like this:

# TYPE frostfs_node_grpc_server_health gauge
frostfs_node_grpc_server_health{endpoint="s01.frostfs.devenv:8080"} 1
frostfs_node_grpc_server_health{endpoint="s01ssss.frostfs.devenv:8080"} 0
Agree, updated. Now it looks like this: ``` # TYPE frostfs_node_grpc_server_health gauge frostfs_node_grpc_server_health{endpoint="s01.frostfs.devenv:8080"} 1 frostfs_node_grpc_server_health{endpoint="s01ssss.frostfs.devenv:8080"} 0 ```
Help: "GRPC Server Endpoint health",
}, []string{endpointLabel}),
}
}
func (m *grpcServerMetrics) MarkHealthy(endpoint string) {
m.endpointHealth.With(prometheus.Labels{endpointLabel: endpoint}).Set(float64(1))
}
func (m *grpcServerMetrics) MarkUnhealthy(endpoint string) {
m.endpointHealth.With(prometheus.Labels{endpointLabel: endpoint}).Set(float64(0))
}

View file

@ -17,6 +17,7 @@ type NodeMetrics struct {
blobobvnizca *blobovnizca
metabase *metabaseMetrics
pilorama *piloramaMetrics
grpc *grpcServerMetrics
}
func NewNodeMetrics() *NodeMetrics {
@ -37,6 +38,7 @@ func NewNodeMetrics() *NodeMetrics {
blobobvnizca: newBlobovnizca(),
metabase: newMetabaseMetrics(),
pilorama: newPiloramaMetrics(),
grpc: newGrpcServerMetrics(),
}
}
@ -84,3 +86,7 @@ func (m *NodeMetrics) MetabaseMetrics() MetabaseMetrics {
func (m *NodeMetrics) PiloramaMetrics() PiloramaMetrics {
return m.pilorama
}
func (m *NodeMetrics) GrpcServerMetrics() GrpcServerMetrics {
return m.grpc
}