[#xxx] pool: Log error that caused healthy status change
Some checks failed
DCO / DCO (pull_request) Failing after 7m2s
Tests and linters / Tests (1.21) (pull_request) Successful in 7m50s
Tests and linters / Tests (1.22) (pull_request) Successful in 7m45s
Tests and linters / Lint (pull_request) Successful in 9m30s

Signed-off-by: Marina Biryukova <m.biryukova@yadro.com>
This commit is contained in:
Marina Biryukova 2024-07-22 11:17:26 +03:00
parent 7e94a6adf2
commit b9728d319b
2 changed files with 19 additions and 14 deletions

View file

@ -195,8 +195,8 @@ func (m *mockClient) dial(context.Context) error {
return nil
}
func (m *mockClient) restartIfUnhealthy(ctx context.Context) (healthy bool, changed bool) {
_, err := m.endpointInfo(ctx, prmEndpointInfo{})
func (m *mockClient) restartIfUnhealthy(ctx context.Context) (healthy bool, changed bool, err error) {
_, err = m.endpointInfo(ctx, prmEndpointInfo{})
healthy = err == nil
changed = healthy != m.isHealthy()
if healthy {

View file

@ -86,7 +86,7 @@ type client interface {
// see clientWrapper.dial.
dial(ctx context.Context) error
// see clientWrapper.restartIfUnhealthy.
restartIfUnhealthy(ctx context.Context) (bool, bool)
restartIfUnhealthy(ctx context.Context) (bool, bool, error)
// see clientWrapper.close.
close() error
}
@ -374,10 +374,11 @@ func (c *clientWrapper) dial(ctx context.Context) error {
// restartIfUnhealthy checks healthy status of client and recreate it if status is unhealthy.
// Return current healthy status and indicating if status was changed by this function call.
func (c *clientWrapper) restartIfUnhealthy(ctx context.Context) (healthy, changed bool) {
// Returns error that caused unhealthy status.
func (c *clientWrapper) restartIfUnhealthy(ctx context.Context) (healthy, changed bool, err error) {
var wasHealthy bool
if _, err := c.endpointInfo(ctx, prmEndpointInfo{}); err == nil {
return true, false
if _, err = c.endpointInfo(ctx, prmEndpointInfo{}); err == nil {
return true, false, nil
} else if !errors.Is(err, errPoolClientUnhealthy) {
wasHealthy = true
}
@ -403,22 +404,22 @@ func (c *clientWrapper) restartIfUnhealthy(ctx context.Context) (healthy, change
GRPCDialOptions: c.prm.dialOptions,
}
if err := cl.Dial(ctx, prmDial); err != nil {
if err = cl.Dial(ctx, prmDial); err != nil {
c.setUnhealthyOnDial()
return false, wasHealthy
return false, wasHealthy, err
}
c.clientMutex.Lock()
c.client = &cl
c.clientMutex.Unlock()
if _, err := cl.EndpointInfo(ctx, sdkClient.PrmEndpointInfo{}); err != nil {
if _, err = cl.EndpointInfo(ctx, sdkClient.PrmEndpointInfo{}); err != nil {
c.setUnhealthy()
return false, wasHealthy
return false, wasHealthy, err
}
c.setHealthy()
return true, !wasHealthy
return true, !wasHealthy, nil
}
func (c *clientWrapper) getClient() (*sdkClient.Client, error) {
@ -2198,7 +2199,7 @@ func (p *Pool) updateInnerNodesHealth(ctx context.Context, i int, bufferWeights
tctx, c := context.WithTimeout(ctx, options.nodeRequestTimeout)
defer c()
healthy, changed := cli.restartIfUnhealthy(tctx)
healthy, changed, err := cli.restartIfUnhealthy(tctx)
if healthy {
bufferWeights[j] = options.nodesParams[i].weights[j]
} else {
@ -2207,8 +2208,12 @@ func (p *Pool) updateInnerNodesHealth(ctx context.Context, i int, bufferWeights
}
if changed {
p.log(zap.DebugLevel, "health has changed",
zap.String("address", cli.address()), zap.Bool("healthy", healthy))
fields := []zap.Field{zap.String("address", cli.address()), zap.Bool("healthy", healthy)}
if err != nil {
fields = append(fields, zap.String("error", err.Error()))
}
p.log(zap.DebugLevel, "health has changed", fields...)
healthyChanged.Store(true)
}
}(j, cli)