[#242] pool: Log error that caused healthy status change #242
2 changed files with 21 additions and 16 deletions
|
@ -195,9 +195,9 @@ func (m *mockClient) dial(context.Context) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *mockClient) restartIfUnhealthy(ctx context.Context) (healthy bool, changed bool) {
|
func (m *mockClient) restartIfUnhealthy(ctx context.Context) (changed bool, err error) {
|
||||||
_, err := m.endpointInfo(ctx, prmEndpointInfo{})
|
_, err = m.endpointInfo(ctx, prmEndpointInfo{})
|
||||||
healthy = err == nil
|
healthy := err == nil
|
||||||
changed = healthy != m.isHealthy()
|
changed = healthy != m.isHealthy()
|
||||||
if healthy {
|
if healthy {
|
||||||
m.setHealthy()
|
m.setHealthy()
|
||||||
|
|
31
pool/pool.go
31
pool/pool.go
|
@ -86,7 +86,7 @@ type client interface {
|
||||||
// see clientWrapper.dial.
|
// see clientWrapper.dial.
|
||||||
dial(ctx context.Context) error
|
dial(ctx context.Context) error
|
||||||
// see clientWrapper.restartIfUnhealthy.
|
// see clientWrapper.restartIfUnhealthy.
|
||||||
restartIfUnhealthy(ctx context.Context) (bool, bool)
|
restartIfUnhealthy(ctx context.Context) (bool, error)
|
||||||
// see clientWrapper.close.
|
// see clientWrapper.close.
|
||||||
close() error
|
close() error
|
||||||
}
|
}
|
||||||
|
@ -373,11 +373,11 @@ func (c *clientWrapper) dial(ctx context.Context) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
// restartIfUnhealthy checks healthy status of client and recreate it if status is unhealthy.
|
// restartIfUnhealthy checks healthy status of client and recreate it if status is unhealthy.
|
||||||
// Return current healthy status and indicating if status was changed by this function call.
|
// Indicating if status was changed by this function call and returns error that caused unhealthy status.
|
||||||
func (c *clientWrapper) restartIfUnhealthy(ctx context.Context) (healthy, changed bool) {
|
func (c *clientWrapper) restartIfUnhealthy(ctx context.Context) (changed bool, err error) {
|
||||||
var wasHealthy bool
|
var wasHealthy bool
|
||||||
dkirillov marked this conversation as resolved
Outdated
|
|||||||
if _, err := c.endpointInfo(ctx, prmEndpointInfo{}); err == nil {
|
if _, err = c.endpointInfo(ctx, prmEndpointInfo{}); err == nil {
|
||||||
return true, false
|
return false, nil
|
||||||
} else if !errors.Is(err, errPoolClientUnhealthy) {
|
} else if !errors.Is(err, errPoolClientUnhealthy) {
|
||||||
wasHealthy = true
|
wasHealthy = true
|
||||||
}
|
}
|
||||||
|
@ -403,22 +403,22 @@ func (c *clientWrapper) restartIfUnhealthy(ctx context.Context) (healthy, change
|
||||||
GRPCDialOptions: c.prm.dialOptions,
|
GRPCDialOptions: c.prm.dialOptions,
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := cl.Dial(ctx, prmDial); err != nil {
|
if err = cl.Dial(ctx, prmDial); err != nil {
|
||||||
c.setUnhealthyOnDial()
|
c.setUnhealthyOnDial()
|
||||||
return false, wasHealthy
|
return wasHealthy, err
|
||||||
}
|
}
|
||||||
|
|
||||||
c.clientMutex.Lock()
|
c.clientMutex.Lock()
|
||||||
c.client = &cl
|
c.client = &cl
|
||||||
c.clientMutex.Unlock()
|
c.clientMutex.Unlock()
|
||||||
|
|
||||||
if _, err := cl.EndpointInfo(ctx, sdkClient.PrmEndpointInfo{}); err != nil {
|
if _, err = cl.EndpointInfo(ctx, sdkClient.PrmEndpointInfo{}); err != nil {
|
||||||
c.setUnhealthy()
|
c.setUnhealthy()
|
||||||
return false, wasHealthy
|
return wasHealthy, err
|
||||||
}
|
}
|
||||||
|
|
||||||
c.setHealthy()
|
c.setHealthy()
|
||||||
return true, !wasHealthy
|
return !wasHealthy, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *clientWrapper) getClient() (*sdkClient.Client, error) {
|
func (c *clientWrapper) getClient() (*sdkClient.Client, error) {
|
||||||
|
@ -2198,7 +2198,8 @@ func (p *Pool) updateInnerNodesHealth(ctx context.Context, i int, bufferWeights
|
||||||
tctx, c := context.WithTimeout(ctx, options.nodeRequestTimeout)
|
tctx, c := context.WithTimeout(ctx, options.nodeRequestTimeout)
|
||||||
defer c()
|
defer c()
|
||||||
|
|
||||||
healthy, changed := cli.restartIfUnhealthy(tctx)
|
changed, err := cli.restartIfUnhealthy(tctx)
|
||||||
|
healthy := err == nil
|
||||||
if healthy {
|
if healthy {
|
||||||
bufferWeights[j] = options.nodesParams[i].weights[j]
|
bufferWeights[j] = options.nodesParams[i].weights[j]
|
||||||
} else {
|
} else {
|
||||||
|
@ -2207,8 +2208,12 @@ func (p *Pool) updateInnerNodesHealth(ctx context.Context, i int, bufferWeights
|
||||||
}
|
}
|
||||||
|
|
||||||
if changed {
|
if changed {
|
||||||
p.log(zap.DebugLevel, "health has changed",
|
fields := []zap.Field{zap.String("address", cli.address()), zap.Bool("healthy", healthy)}
|
||||||
zap.String("address", cli.address()), zap.Bool("healthy", healthy))
|
if err != nil {
|
||||||
|
fields = append(fields, zap.String("reason", err.Error()))
|
||||||
aarifullin
commented
1. It's more correct to use `zap.Error` formater instead stringifying the error :)
2. For me "health has changed" with `"error"` fields seems unclear. It's optinally but I can suggest to log message like `health was changed but client error occurred` if `err != nil`
fyrchik
commented
ЕМНИП We use ЕМНИП We use `zap.String` because `zap.Error` produces unnecessary big stacktraces.
fyrchik
commented
For For `error` I suggest replacing it with a `reason`, it is not an error related to the operation being logged (health status changed), it is the reason.
|
|||||||
|
}
|
||||||
|
|
||||||
|
p.log(zap.DebugLevel, "health has changed", fields...)
|
||||||
healthyChanged.Store(true)
|
healthyChanged.Store(true)
|
||||||
}
|
}
|
||||||
}(j, cli)
|
}(j, cli)
|
||||||
|
|
Loading…
Reference in a new issue
What do you think about keeping signature of this method intact but adding logging of error (we know log and error here)?
Or at least consider drop
healthy bool
from return params. We can know this status from error.I would prefer the second option, because it's like in tree pool