frostfs-node/cmd/frostfs-node/grpc.go
Aleksey Savchuk c660271039
[#1658] node: Validate RPC limiter configuration
Validate that configured limits match the methods registered earlier.

Signed-off-by: Aleksey Savchuk <a.savchuk@yadro.com>
2025-02-28 16:31:47 +03:00

286 lines
7.8 KiB
Go

package main
import (
"context"
"crypto/tls"
"errors"
"fmt"
"net"
"time"
grpcconfig "git.frostfs.info/TrueCloudLab/frostfs-node/cmd/frostfs-node/config/grpc"
rpcconfig "git.frostfs.info/TrueCloudLab/frostfs-node/cmd/frostfs-node/config/rpc"
"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
qosInternal "git.frostfs.info/TrueCloudLab/frostfs-node/internal/qos"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util/logger"
metrics "git.frostfs.info/TrueCloudLab/frostfs-observability/metrics/grpc"
tracing "git.frostfs.info/TrueCloudLab/frostfs-observability/tracing/grpc"
"git.frostfs.info/TrueCloudLab/frostfs-qos/limiting"
qos "git.frostfs.info/TrueCloudLab/frostfs-qos/tagging"
"go.uber.org/zap"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials"
)
const maxRecvMsgSize = 256 << 20
func initGRPC(ctx context.Context, c *cfg) {
var endpointsToReconnect []string
var successCount int
grpcconfig.IterateEndpoints(c.appCfg, func(sc *grpcconfig.Config) {
serverOpts, ok := getGrpcServerOpts(ctx, c, sc)
if !ok {
return
}
lis, err := net.Listen("tcp", sc.Endpoint())
if err != nil {
c.metricsCollector.GrpcServerMetrics().MarkUnhealthy(sc.Endpoint())
c.log.Error(ctx, logs.FrostFSNodeCantListenGRPCEndpoint, zap.Error(err))
endpointsToReconnect = append(endpointsToReconnect, sc.Endpoint())
return
}
c.metricsCollector.GrpcServerMetrics().MarkHealthy(sc.Endpoint())
srv := grpc.NewServer(serverOpts...)
c.onShutdown(func() {
stopGRPC(ctx, "FrostFS Public API", srv, c.log)
})
c.cfgGRPC.append(sc.Endpoint(), lis, srv)
successCount++
})
if successCount == 0 {
fatalOnErr(errors.New("could not listen to any gRPC endpoints"))
}
c.cfgGRPC.reconnectTimeout = grpcconfig.ReconnectTimeout(c.appCfg)
for _, endpoint := range endpointsToReconnect {
scheduleReconnect(ctx, endpoint, c)
}
}
func scheduleReconnect(ctx context.Context, endpoint string, c *cfg) {
c.wg.Add(1)
go func() {
defer c.wg.Done()
t := time.NewTicker(c.cfgGRPC.reconnectTimeout)
for {
select {
case <-t.C:
if tryReconnect(ctx, endpoint, c) {
return
}
case <-c.done:
return
}
}
}()
}
func tryReconnect(ctx context.Context, endpoint string, c *cfg) bool {
c.log.Info(ctx, logs.FrostFSNodeGRPCReconnecting, zap.String("endpoint", endpoint))
serverOpts, found := getGRPCEndpointOpts(ctx, endpoint, c)
if !found {
c.log.Warn(ctx, logs.FrostFSNodeGRPCServerConfigNotFound, zap.String("endpoint", endpoint))
return true
}
lis, err := net.Listen("tcp", endpoint)
if err != nil {
c.metricsCollector.GrpcServerMetrics().MarkUnhealthy(endpoint)
c.log.Error(ctx, logs.FrostFSNodeCantListenGRPCEndpoint, zap.Error(err))
c.log.Warn(ctx, logs.FrostFSNodeGRPCReconnectFailed, zap.Duration("next_try_in", c.cfgGRPC.reconnectTimeout))
return false
}
c.metricsCollector.GrpcServerMetrics().MarkHealthy(endpoint)
srv := grpc.NewServer(serverOpts...)
c.onShutdown(func() {
stopGRPC(ctx, "FrostFS Public API", srv, c.log)
})
c.cfgGRPC.appendAndHandle(endpoint, lis, srv)
c.log.Info(ctx, logs.FrostFSNodeGRPCReconnectedSuccessfully, zap.String("endpoint", endpoint))
return true
}
func getGRPCEndpointOpts(ctx context.Context, endpoint string, c *cfg) (result []grpc.ServerOption, found bool) {
unlock := c.LockAppConfigShared()
defer unlock()
grpcconfig.IterateEndpoints(c.appCfg, func(sc *grpcconfig.Config) {
if found {
return
}
if sc.Endpoint() != endpoint {
return
}
var ok bool
result, ok = getGrpcServerOpts(ctx, c, sc)
if !ok {
return
}
found = true
})
return
}
func getGrpcServerOpts(ctx context.Context, c *cfg, sc *grpcconfig.Config) ([]grpc.ServerOption, bool) {
serverOpts := []grpc.ServerOption{
grpc.MaxRecvMsgSize(maxRecvMsgSize),
grpc.ChainUnaryInterceptor(
qos.NewUnaryServerInterceptor(),
metrics.NewUnaryServerInterceptor(),
tracing.NewUnaryServerInterceptor(),
qosInternal.NewMaxActiveRPCLimiterUnaryServerInterceptor(func() limiting.Limiter { return c.cfgGRPC.limiter.Load() }),
),
grpc.ChainStreamInterceptor(
qos.NewStreamServerInterceptor(),
metrics.NewStreamServerInterceptor(),
tracing.NewStreamServerInterceptor(),
qosInternal.NewMaxActiveRPCLimiterStreamServerInterceptor(func() limiting.Limiter { return c.cfgGRPC.limiter.Load() }),
),
}
tlsCfg := sc.TLS()
if tlsCfg != nil {
cert, err := tls.LoadX509KeyPair(tlsCfg.CertificateFile(), tlsCfg.KeyFile())
if err != nil {
c.log.Error(ctx, logs.FrostFSNodeCouldNotReadCertificateFromFile, zap.Error(err))
return nil, false
}
var cipherSuites []uint16
if !tlsCfg.UseInsecureCrypto() {
// This more or less follows the list in https://wiki.mozilla.org/Security/Server_Side_TLS
// excluding:
// 1. TLS 1.3 suites need not be specified here.
// 2. Suites that use DH key exchange are not implemented by stdlib.
cipherSuites = []uint16{
tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
tls.TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,
tls.TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,
tls.TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256,
tls.TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256,
}
}
creds := credentials.NewTLS(&tls.Config{
MinVersion: tls.VersionTLS12,
CipherSuites: cipherSuites,
Certificates: []tls.Certificate{cert},
})
serverOpts = append(serverOpts, grpc.Creds(creds))
}
return serverOpts, true
}
func serveGRPC(ctx context.Context, c *cfg) {
c.cfgGRPC.performAndSave(func(e string, l net.Listener, s *grpc.Server) {
c.wg.Add(1)
go func() {
defer func() {
c.log.Info(ctx, logs.FrostFSNodeStopListeningGRPCEndpoint,
zap.Stringer("endpoint", l.Addr()),
)
c.wg.Done()
}()
c.log.Info(ctx, logs.FrostFSNodeStartListeningEndpoint,
zap.String("service", "gRPC"),
zap.Stringer("endpoint", l.Addr()),
)
if err := s.Serve(l); err != nil {
c.metricsCollector.GrpcServerMetrics().MarkUnhealthy(e)
c.log.Error(ctx, logs.FrostFSNodeGRPCServerError, zap.Error(err))
c.cfgGRPC.dropConnection(e)
scheduleReconnect(ctx, e, c)
}
}()
})
}
func stopGRPC(ctx context.Context, name string, s *grpc.Server, l *logger.Logger) {
l = l.With(zap.String("name", name))
l.Info(ctx, logs.FrostFSNodeStoppingGRPCServer)
// GracefulStop() may freeze forever, see #1270
done := make(chan struct{})
go func() {
s.GracefulStop()
close(done)
}()
select {
case <-done:
case <-time.After(1 * time.Minute):
l.Info(ctx, logs.FrostFSNodeGRPCCannotShutdownGracefullyForcingStop)
s.Stop()
}
l.Info(ctx, logs.FrostFSNodeGRPCServerStoppedSuccessfully)
}
func initRPCLimiter(c *cfg) error {
var limits []limiting.KeyLimit
for _, l := range rpcconfig.Limits(c.appCfg) {
limits = append(limits, limiting.KeyLimit{Keys: l.Methods, Limit: l.MaxOps})
}
if err := validateRPCLimits(c, limits); err != nil {
return fmt.Errorf("validate RPC limits: %w", err)
}
limiter, err := limiting.NewSemaphoreLimiter(limits)
if err != nil {
return fmt.Errorf("create RPC limiter: %w", err)
}
c.cfgGRPC.limiter.Store(limiter)
return nil
}
func validateRPCLimits(c *cfg, limits []limiting.KeyLimit) error {
availableMethods := getAvailableMethods(c.cfgGRPC.servers)
for _, limit := range limits {
for _, method := range limit.Keys {
if _, ok := availableMethods[method]; !ok {
return fmt.Errorf("set limit on an unknown method %q", method)
}
}
}
return nil
}
func getAvailableMethods(servers []grpcServer) map[string]struct{} {
res := make(map[string]struct{})
for _, server := range servers {
for _, method := range getMethodsForServer(server.Server) {
res[method] = struct{}{}
}
}
return res
}
func getMethodsForServer(server *grpc.Server) []string {
var res []string
for service, info := range server.GetServiceInfo() {
for _, method := range info.Methods {
res = append(res, fmt.Sprintf("/%s/%s", service, method.Name))
}
}
return res
}