[#565] Add metrics for current GRPC endpoint status

Signed-off-by: Anton Nikiforov <an.nikiforov@yadro.com>
This commit is contained in:
Anton Nikiforov 2023-08-09 09:23:03 +03:00 committed by Evgenii Stratonikov
parent c3e23a1448
commit 4ad0ebb32f
5 changed files with 50 additions and 0 deletions

View file

@ -446,6 +446,8 @@ type cfgGRPC struct {
servers []*grpc.Server
endpoints []string
maxChunkSize uint64
maxAddrAmount uint64

View file

@ -69,11 +69,14 @@ func initGRPC(c *cfg) {
lis, err := net.Listen("tcp", sc.Endpoint())
if err != nil {
c.metricsCollector.GrpcServerMetrics().MarkUnhealthy(sc.Endpoint())
c.log.Error(logs.FrostFSNodeCantListenGRPCEndpoint, zap.Error(err))
return
}
c.metricsCollector.GrpcServerMetrics().MarkHealthy(sc.Endpoint())
c.cfgGRPC.listeners = append(c.cfgGRPC.listeners, lis)
c.cfgGRPC.endpoints = append(c.cfgGRPC.endpoints, sc.Endpoint())
srv := grpc.NewServer(serverOpts...)
@ -96,6 +99,7 @@ func serveGRPC(c *cfg) {
srv := c.cfgGRPC.servers[i]
lis := c.cfgGRPC.listeners[i]
endpoint := c.cfgGRPC.endpoints[i]
go func() {
defer func() {
@ -111,6 +115,7 @@ func serveGRPC(c *cfg) {
)
if err := srv.Serve(lis); err != nil {
c.metricsCollector.GrpcServerMetrics().MarkUnhealthy(endpoint)
fmt.Println("gRPC server error", err)
}
}()

View file

@ -19,6 +19,7 @@ const (
stateSubsystem = "state"
treeServiceSubsystem = "treeservice"
writeCacheSubsystem = "writecache"
grpcServerSubsystem = "grpc_server"
successLabel = "success"
shardIDLabel = "shard_id"
@ -35,6 +36,7 @@ const (
containerIDLabelKey = "cid"
storageLabel = "storage"
operationLabel = "operation"
endpointLabel = "endpoint"
readWriteMode = "READ_WRITE"
readOnlyMode = "READ_ONLY"

35
pkg/metrics/grpc.go Normal file
View file

@ -0,0 +1,35 @@
package metrics
import (
"git.frostfs.info/TrueCloudLab/frostfs-observability/metrics"
"github.com/prometheus/client_golang/prometheus"
)
type GrpcServerMetrics interface {
MarkHealthy(endpoint string)
MarkUnhealthy(endpoint string)
}
type grpcServerMetrics struct {
endpointHealth *prometheus.GaugeVec
}
func newGrpcServerMetrics() *grpcServerMetrics {
return &grpcServerMetrics{
endpointHealth: metrics.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: grpcServerSubsystem,
Name: "health",
Help: "GRPC Server Endpoint health",
}, []string{endpointLabel}),
}
}
func (m *grpcServerMetrics) MarkHealthy(endpoint string) {
m.endpointHealth.With(prometheus.Labels{endpointLabel: endpoint}).Set(float64(1))
}
func (m *grpcServerMetrics) MarkUnhealthy(endpoint string) {
m.endpointHealth.With(prometheus.Labels{endpointLabel: endpoint}).Set(float64(0))
}

View file

@ -17,6 +17,7 @@ type NodeMetrics struct {
blobobvnizca *blobovnizca
metabase *metabaseMetrics
pilorama *piloramaMetrics
grpc *grpcServerMetrics
}
func NewNodeMetrics() *NodeMetrics {
@ -37,6 +38,7 @@ func NewNodeMetrics() *NodeMetrics {
blobobvnizca: newBlobovnizca(),
metabase: newMetabaseMetrics(),
pilorama: newPiloramaMetrics(),
grpc: newGrpcServerMetrics(),
}
}
@ -84,3 +86,7 @@ func (m *NodeMetrics) MetabaseMetrics() MetabaseMetrics {
func (m *NodeMetrics) PiloramaMetrics() PiloramaMetrics {
return m.pilorama
}
func (m *NodeMetrics) GrpcServerMetrics() GrpcServerMetrics {
return m.grpc
}