From 652237500fb9e1618eb66310d3f2a51c599a033e Mon Sep 17 00:00:00 2001 From: Dmitrii Stepanov Date: Thu, 24 Apr 2025 16:54:47 +0300 Subject: [PATCH] [#1689] tree: Dial tree service after create connection on sync Background trees sync creates grpc connection with `grpc.WithDefaultCallOptions(grpc.WaitForReady(true))` option. When grpc connection created with this option, client will wait until a connection becomes available or the RPC's deadline is reached. As background sync has no timeout in context, so in case of client is in TRANSIENT_FAILURE RPC call will hang forever. Change-Id: I17c8c1d2779bb81c541f47dd0e558e0b8ed2e7c1 Signed-off-by: Dmitrii Stepanov --- pkg/services/tree/cache.go | 36 ++++++------------------------------ pkg/services/tree/sync.go | 28 +++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 31 deletions(-) diff --git a/pkg/services/tree/cache.go b/pkg/services/tree/cache.go index 462c8554f8..a117007719 100644 --- a/pkg/services/tree/cache.go +++ b/pkg/services/tree/cache.go @@ -66,7 +66,12 @@ func (c *clientCache) get(ctx context.Context, netmapAddr string) (TreeServiceCl } } - cc, err := c.dialTreeService(ctx, netmapAddr) + var netAddr network.Address + if err := netAddr.FromString(netmapAddr); err != nil { + return nil, err + } + + cc, err := dialTreeService(ctx, netAddr, c.key, c.ds) lastTry := time.Now() c.Lock() @@ -83,32 +88,3 @@ func (c *clientCache) get(ctx context.Context, netmapAddr string) (TreeServiceCl return NewTreeServiceClient(cc), nil } - -func (c *clientCache) dialTreeService(ctx context.Context, netmapAddr string) (*grpc.ClientConn, error) { - var netAddr network.Address - if err := netAddr.FromString(netmapAddr); err != nil { - return nil, err - } - - cc, err := createConnection(netAddr, grpc.WithContextDialer(c.ds.GrpcContextDialer())) - if err != nil { - return nil, err - } - - ctx, cancel := context.WithTimeout(ctx, defaultClientConnectTimeout) - defer cancel() - - req := &HealthcheckRequest{ - Body: &HealthcheckRequest_Body{}, - } - if err := SignMessage(req, c.key); err != nil { - return nil, err - } - - // perform some request to check connection - if _, err := NewTreeServiceClient(cc).Healthcheck(ctx, req); err != nil { - _ = cc.Close() - return nil, err - } - return cc, nil -} diff --git a/pkg/services/tree/sync.go b/pkg/services/tree/sync.go index d4040337d7..af355639f2 100644 --- a/pkg/services/tree/sync.go +++ b/pkg/services/tree/sync.go @@ -2,6 +2,7 @@ package tree import ( "context" + "crypto/ecdsa" "crypto/sha256" "crypto/tls" "errors" @@ -14,6 +15,7 @@ import ( "time" "git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs" + "git.frostfs.info/TrueCloudLab/frostfs-node/internal/net" "git.frostfs.info/TrueCloudLab/frostfs-node/internal/qos" containerCore "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/container" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/pilorama" @@ -304,7 +306,7 @@ func (s *Service) synchronizeTree(ctx context.Context, cid cid.ID, from uint64, continue } - cc, err := createConnection(a, grpc.WithContextDialer(s.ds.GrpcContextDialer())) + cc, err := dialTreeService(ctx, a, s.key, s.ds) if err != nil { s.log.Warn(ctx, logs.TreeFailedToConnectForTreeSynchronization, zap.Error(err), zap.String("address", addr)) continue @@ -342,6 +344,30 @@ func (s *Service) synchronizeTree(ctx context.Context, cid cid.ID, from uint64, return from } +func dialTreeService(ctx context.Context, netAddr network.Address, key *ecdsa.PrivateKey, ds *net.DialerSource) (*grpc.ClientConn, error) { + cc, err := createConnection(netAddr, grpc.WithContextDialer(ds.GrpcContextDialer())) + if err != nil { + return nil, err + } + + ctx, cancel := context.WithTimeout(ctx, defaultClientConnectTimeout) + defer cancel() + + req := &HealthcheckRequest{ + Body: &HealthcheckRequest_Body{}, + } + if err := SignMessage(req, key); err != nil { + return nil, err + } + + // perform some request to check connection + if _, err := NewTreeServiceClient(cc).Healthcheck(ctx, req); err != nil { + _ = cc.Close() + return nil, err + } + return cc, nil +} + func createConnection(a network.Address, opts ...grpc.DialOption) (*grpc.ClientConn, error) { host, isTLS, err := client.ParseURI(a.URIAddr()) if err != nil {