[#242] pool: Log error that caused healthy status change #242

Merged
fyrchik merged 1 commit from mbiryukova/frostfs-sdk-go:feature/log_unhealthy_error into master 2024-07-23 06:41:58 +00:00
2 changed files with 21 additions and 16 deletions

View file

@ -195,9 +195,9 @@ func (m *mockClient) dial(context.Context) error {
return nil
}
func (m *mockClient) restartIfUnhealthy(ctx context.Context) (healthy bool, changed bool) {
_, err := m.endpointInfo(ctx, prmEndpointInfo{})
healthy = err == nil
func (m *mockClient) restartIfUnhealthy(ctx context.Context) (changed bool, err error) {
_, err = m.endpointInfo(ctx, prmEndpointInfo{})
healthy := err == nil
changed = healthy != m.isHealthy()
if healthy {
m.setHealthy()

View file

@ -86,7 +86,7 @@ type client interface {
// see clientWrapper.dial.
dial(ctx context.Context) error
// see clientWrapper.restartIfUnhealthy.
restartIfUnhealthy(ctx context.Context) (bool, bool)
restartIfUnhealthy(ctx context.Context) (bool, error)
// see clientWrapper.close.
close() error
}
@ -373,11 +373,11 @@ func (c *clientWrapper) dial(ctx context.Context) error {
}
// restartIfUnhealthy checks healthy status of client and recreate it if status is unhealthy.
// Return current healthy status and indicating if status was changed by this function call.
func (c *clientWrapper) restartIfUnhealthy(ctx context.Context) (healthy, changed bool) {
// Indicating if status was changed by this function call and returns error that caused unhealthy status.
func (c *clientWrapper) restartIfUnhealthy(ctx context.Context) (changed bool, err error) {
var wasHealthy bool
dkirillov marked this conversation as resolved Outdated

What do you think about keeping signature of this method intact but adding logging of error (we know log and error here)?

Or at least consider drop healthy bool from return params. We can know this status from error.

What do you think about keeping signature of this method intact but adding logging of error (we know log and error here)? Or at least consider drop `healthy bool` from return params. We can know this status from error.

I would prefer the second option, because it's like in tree pool

I would prefer the second option, because it's like in tree pool
if _, err := c.endpointInfo(ctx, prmEndpointInfo{}); err == nil {
return true, false
if _, err = c.endpointInfo(ctx, prmEndpointInfo{}); err == nil {
return false, nil
} else if !errors.Is(err, errPoolClientUnhealthy) {
wasHealthy = true
}
@ -403,22 +403,22 @@ func (c *clientWrapper) restartIfUnhealthy(ctx context.Context) (healthy, change
GRPCDialOptions: c.prm.dialOptions,
}
if err := cl.Dial(ctx, prmDial); err != nil {
if err = cl.Dial(ctx, prmDial); err != nil {
c.setUnhealthyOnDial()
return false, wasHealthy
return wasHealthy, err
}
c.clientMutex.Lock()
c.client = &cl
c.clientMutex.Unlock()
if _, err := cl.EndpointInfo(ctx, sdkClient.PrmEndpointInfo{}); err != nil {
if _, err = cl.EndpointInfo(ctx, sdkClient.PrmEndpointInfo{}); err != nil {
c.setUnhealthy()
return false, wasHealthy
return wasHealthy, err
}
c.setHealthy()
return true, !wasHealthy
return !wasHealthy, nil
}
func (c *clientWrapper) getClient() (*sdkClient.Client, error) {
@ -2198,7 +2198,8 @@ func (p *Pool) updateInnerNodesHealth(ctx context.Context, i int, bufferWeights
tctx, c := context.WithTimeout(ctx, options.nodeRequestTimeout)
defer c()
healthy, changed := cli.restartIfUnhealthy(tctx)
changed, err := cli.restartIfUnhealthy(tctx)
healthy := err == nil
if healthy {
bufferWeights[j] = options.nodesParams[i].weights[j]
} else {
@ -2207,8 +2208,12 @@ func (p *Pool) updateInnerNodesHealth(ctx context.Context, i int, bufferWeights
}
if changed {
p.log(zap.DebugLevel, "health has changed",
zap.String("address", cli.address()), zap.Bool("healthy", healthy))
fields := []zap.Field{zap.String("address", cli.address()), zap.Bool("healthy", healthy)}
if err != nil {
fields = append(fields, zap.String("reason", err.Error()))
  1. It's more correct to use zap.Error formater instead stringifying the error :)
  2. For me "health has changed" with "error" fields seems unclear. It's optinally but I can suggest to log message like health was changed but client error occurred if err != nil
1. It's more correct to use `zap.Error` formater instead stringifying the error :) 2. For me "health has changed" with `"error"` fields seems unclear. It's optinally but I can suggest to log message like `health was changed but client error occurred` if `err != nil`

ЕМНИП We use zap.String because zap.Error produces unnecessary big stacktraces.

ЕМНИП We use `zap.String` because `zap.Error` produces unnecessary big stacktraces.

For error I suggest replacing it with a reason, it is not an error related to the operation being logged (health status changed), it is the reason.

For `error` I suggest replacing it with a `reason`, it is not an error related to the operation being logged (health status changed), it is the reason.
}
p.log(zap.DebugLevel, "health has changed", fields...)
healthyChanged.Store(true)
}
}(j, cli)