From 4ad0ebb32fa6cd99303944f637edd5994400e68a Mon Sep 17 00:00:00 2001 From: Anton Nikiforov Date: Wed, 9 Aug 2023 09:23:03 +0300 Subject: [PATCH] [#565] Add metrics for current GRPC endpoint status Signed-off-by: Anton Nikiforov --- cmd/frostfs-node/config.go | 2 ++ cmd/frostfs-node/grpc.go | 5 +++++ pkg/metrics/consts.go | 2 ++ pkg/metrics/grpc.go | 35 +++++++++++++++++++++++++++++++++++ pkg/metrics/node.go | 6 ++++++ 5 files changed, 50 insertions(+) create mode 100644 pkg/metrics/grpc.go diff --git a/cmd/frostfs-node/config.go b/cmd/frostfs-node/config.go index f5c106f77..935aefe7f 100644 --- a/cmd/frostfs-node/config.go +++ b/cmd/frostfs-node/config.go @@ -446,6 +446,8 @@ type cfgGRPC struct { servers []*grpc.Server + endpoints []string + maxChunkSize uint64 maxAddrAmount uint64 diff --git a/cmd/frostfs-node/grpc.go b/cmd/frostfs-node/grpc.go index 1dd0f0729..10d387b13 100644 --- a/cmd/frostfs-node/grpc.go +++ b/cmd/frostfs-node/grpc.go @@ -69,11 +69,14 @@ func initGRPC(c *cfg) { lis, err := net.Listen("tcp", sc.Endpoint()) if err != nil { + c.metricsCollector.GrpcServerMetrics().MarkUnhealthy(sc.Endpoint()) c.log.Error(logs.FrostFSNodeCantListenGRPCEndpoint, zap.Error(err)) return } + c.metricsCollector.GrpcServerMetrics().MarkHealthy(sc.Endpoint()) c.cfgGRPC.listeners = append(c.cfgGRPC.listeners, lis) + c.cfgGRPC.endpoints = append(c.cfgGRPC.endpoints, sc.Endpoint()) srv := grpc.NewServer(serverOpts...) @@ -96,6 +99,7 @@ func serveGRPC(c *cfg) { srv := c.cfgGRPC.servers[i] lis := c.cfgGRPC.listeners[i] + endpoint := c.cfgGRPC.endpoints[i] go func() { defer func() { @@ -111,6 +115,7 @@ func serveGRPC(c *cfg) { ) if err := srv.Serve(lis); err != nil { + c.metricsCollector.GrpcServerMetrics().MarkUnhealthy(endpoint) fmt.Println("gRPC server error", err) } }() diff --git a/pkg/metrics/consts.go b/pkg/metrics/consts.go index ae3ac7839..691aaa375 100644 --- a/pkg/metrics/consts.go +++ b/pkg/metrics/consts.go @@ -19,6 +19,7 @@ const ( stateSubsystem = "state" treeServiceSubsystem = "treeservice" writeCacheSubsystem = "writecache" + grpcServerSubsystem = "grpc_server" successLabel = "success" shardIDLabel = "shard_id" @@ -35,6 +36,7 @@ const ( containerIDLabelKey = "cid" storageLabel = "storage" operationLabel = "operation" + endpointLabel = "endpoint" readWriteMode = "READ_WRITE" readOnlyMode = "READ_ONLY" diff --git a/pkg/metrics/grpc.go b/pkg/metrics/grpc.go new file mode 100644 index 000000000..a83f53998 --- /dev/null +++ b/pkg/metrics/grpc.go @@ -0,0 +1,35 @@ +package metrics + +import ( + "git.frostfs.info/TrueCloudLab/frostfs-observability/metrics" + "github.com/prometheus/client_golang/prometheus" +) + +type GrpcServerMetrics interface { + MarkHealthy(endpoint string) + MarkUnhealthy(endpoint string) +} + +type grpcServerMetrics struct { + endpointHealth *prometheus.GaugeVec +} + +func newGrpcServerMetrics() *grpcServerMetrics { + return &grpcServerMetrics{ + endpointHealth: metrics.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: grpcServerSubsystem, + Name: "health", + Help: "GRPC Server Endpoint health", + }, []string{endpointLabel}), + } +} + +func (m *grpcServerMetrics) MarkHealthy(endpoint string) { + m.endpointHealth.With(prometheus.Labels{endpointLabel: endpoint}).Set(float64(1)) +} + +func (m *grpcServerMetrics) MarkUnhealthy(endpoint string) { + m.endpointHealth.With(prometheus.Labels{endpointLabel: endpoint}).Set(float64(0)) +} diff --git a/pkg/metrics/node.go b/pkg/metrics/node.go index 45d50b5b0..ce650c220 100644 --- a/pkg/metrics/node.go +++ b/pkg/metrics/node.go @@ -17,6 +17,7 @@ type NodeMetrics struct { blobobvnizca *blobovnizca metabase *metabaseMetrics pilorama *piloramaMetrics + grpc *grpcServerMetrics } func NewNodeMetrics() *NodeMetrics { @@ -37,6 +38,7 @@ func NewNodeMetrics() *NodeMetrics { blobobvnizca: newBlobovnizca(), metabase: newMetabaseMetrics(), pilorama: newPiloramaMetrics(), + grpc: newGrpcServerMetrics(), } } @@ -84,3 +86,7 @@ func (m *NodeMetrics) MetabaseMetrics() MetabaseMetrics { func (m *NodeMetrics) PiloramaMetrics() PiloramaMetrics { return m.pilorama } + +func (m *NodeMetrics) GrpcServerMetrics() GrpcServerMetrics { + return m.grpc +}