[#283] pool: Mark node unhealthy if node is under maintenance

Signed-off-by: Denis Kirillov <d.kirillov@yadro.com>
This commit is contained in:
Denis Kirillov 2024-10-16 15:22:05 +03:00
parent 3ea4741231
commit 79f387317a
2 changed files with 63 additions and 19 deletions

View file

@ -1159,6 +1159,16 @@ func (c *clientStatusMonitor) incErrorRate() {
} }
} }
func (c *clientStatusMonitor) incErrorRateToUnhealthy(err error) {
c.mu.Lock()
c.currentErrorCount = 0
c.overallErrorCount++
c.setUnhealthy()
c.mu.Unlock()
c.log(zapcore.WarnLevel, "explicitly mark node unhealthy", zap.String("address", c.addr), zap.Error(err))
}
func (c *clientStatusMonitor) log(level zapcore.Level, msg string, fields ...zap.Field) { func (c *clientStatusMonitor) log(level zapcore.Level, msg string, fields ...zap.Field) {
if c.logger == nil { if c.logger == nil {
return return
@ -1225,9 +1235,10 @@ func (c *clientStatusMonitor) handleError(ctx context.Context, st apistatus.Stat
switch stErr.(type) { switch stErr.(type) {
case *apistatus.ServerInternal, case *apistatus.ServerInternal,
*apistatus.WrongMagicNumber, *apistatus.WrongMagicNumber,
*apistatus.SignatureVerification, *apistatus.SignatureVerification:
*apistatus.NodeUnderMaintenance:
c.incErrorRate() c.incErrorRate()
case *apistatus.NodeUnderMaintenance:
c.incErrorRateToUnhealthy(stErr)
} }
if err == nil { if err == nil {
@ -1239,8 +1250,12 @@ func (c *clientStatusMonitor) handleError(ctx context.Context, st apistatus.Stat
if err != nil { if err != nil {
if needCountError(ctx, err) { if needCountError(ctx, err) {
if sdkClient.IsErrNodeUnderMaintenance(err) {
c.incErrorRateToUnhealthy(err)
} else {
c.incErrorRate() c.incErrorRate()
} }
}
return err return err
} }

View file

@ -4,7 +4,6 @@ import (
"context" "context"
"crypto/ecdsa" "crypto/ecdsa"
"errors" "errors"
"strconv"
"testing" "testing"
"time" "time"
@ -17,6 +16,7 @@ import (
"github.com/nspcc-dev/neo-go/pkg/crypto/keys" "github.com/nspcc-dev/neo-go/pkg/crypto/keys"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"go.uber.org/zap" "go.uber.org/zap"
"go.uber.org/zap/zaptest"
) )
func TestBuildPoolClientFailed(t *testing.T) { func TestBuildPoolClientFailed(t *testing.T) {
@ -562,19 +562,22 @@ func TestStatusMonitor(t *testing.T) {
func TestHandleError(t *testing.T) { func TestHandleError(t *testing.T) {
ctx := context.Background() ctx := context.Background()
monitor := newClientStatusMonitor(zap.NewExample(), "", 10) log := zaptest.NewLogger(t)
canceledCtx, cancel := context.WithCancel(context.Background()) canceledCtx, cancel := context.WithCancel(context.Background())
cancel() cancel()
for i, tc := range []struct { for _, tc := range []struct {
name string
ctx context.Context ctx context.Context
status apistatus.Status status apistatus.Status
err error err error
expectedError bool expectedError bool
countError bool countError bool
markedUnhealthy bool
}{ }{
{ {
name: "no error, no status",
ctx: ctx, ctx: ctx,
status: nil, status: nil,
err: nil, err: nil,
@ -582,6 +585,7 @@ func TestHandleError(t *testing.T) {
countError: false, countError: false,
}, },
{ {
name: "no error, success status",
ctx: ctx, ctx: ctx,
status: new(apistatus.SuccessDefaultV2), status: new(apistatus.SuccessDefaultV2),
err: nil, err: nil,
@ -589,6 +593,7 @@ func TestHandleError(t *testing.T) {
countError: false, countError: false,
}, },
{ {
name: "error, success status",
ctx: ctx, ctx: ctx,
status: new(apistatus.SuccessDefaultV2), status: new(apistatus.SuccessDefaultV2),
err: errors.New("error"), err: errors.New("error"),
@ -596,6 +601,7 @@ func TestHandleError(t *testing.T) {
countError: true, countError: true,
}, },
{ {
name: "error, no status",
ctx: ctx, ctx: ctx,
status: nil, status: nil,
err: errors.New("error"), err: errors.New("error"),
@ -603,6 +609,7 @@ func TestHandleError(t *testing.T) {
countError: true, countError: true,
}, },
{ {
name: "no error, object not found status",
ctx: ctx, ctx: ctx,
status: new(apistatus.ObjectNotFound), status: new(apistatus.ObjectNotFound),
err: nil, err: nil,
@ -610,6 +617,7 @@ func TestHandleError(t *testing.T) {
countError: false, countError: false,
}, },
{ {
name: "object not found error, object not found status",
ctx: ctx, ctx: ctx,
status: new(apistatus.ObjectNotFound), status: new(apistatus.ObjectNotFound),
err: &apistatus.ObjectNotFound{}, err: &apistatus.ObjectNotFound{},
@ -617,6 +625,7 @@ func TestHandleError(t *testing.T) {
countError: false, countError: false,
}, },
{ {
name: "eacl not found error, no status",
ctx: ctx, ctx: ctx,
status: nil, status: nil,
err: &apistatus.EACLNotFound{}, err: &apistatus.EACLNotFound{},
@ -627,6 +636,7 @@ func TestHandleError(t *testing.T) {
countError: true, countError: true,
}, },
{ {
name: "no error, internal status",
ctx: ctx, ctx: ctx,
status: new(apistatus.ServerInternal), status: new(apistatus.ServerInternal),
err: nil, err: nil,
@ -634,6 +644,7 @@ func TestHandleError(t *testing.T) {
countError: true, countError: true,
}, },
{ {
name: "no error, wrong magic status",
ctx: ctx, ctx: ctx,
status: new(apistatus.WrongMagicNumber), status: new(apistatus.WrongMagicNumber),
err: nil, err: nil,
@ -641,6 +652,7 @@ func TestHandleError(t *testing.T) {
countError: true, countError: true,
}, },
{ {
name: "no error, signature verification status",
ctx: ctx, ctx: ctx,
status: new(apistatus.SignatureVerification), status: new(apistatus.SignatureVerification),
err: nil, err: nil,
@ -648,13 +660,25 @@ func TestHandleError(t *testing.T) {
countError: true, countError: true,
}, },
{ {
name: "no error, maintenance status",
ctx: ctx, ctx: ctx,
status: new(apistatus.NodeUnderMaintenance), status: new(apistatus.NodeUnderMaintenance),
err: nil, err: nil,
expectedError: true, expectedError: true,
countError: true, countError: true,
markedUnhealthy: true,
}, },
{ {
name: "maintenance error, no status",
ctx: ctx,
status: nil,
err: &apistatus.NodeUnderMaintenance{},
expectedError: true,
countError: true,
markedUnhealthy: true,
},
{
name: "no error, invalid argument status",
ctx: ctx, ctx: ctx,
status: new(apistatus.InvalidArgument), status: new(apistatus.InvalidArgument),
err: nil, err: nil,
@ -662,6 +686,7 @@ func TestHandleError(t *testing.T) {
countError: false, countError: false,
}, },
{ {
name: "context canceled error, no status",
ctx: canceledCtx, ctx: canceledCtx,
status: nil, status: nil,
err: errors.New("error"), err: errors.New("error"),
@ -669,8 +694,9 @@ func TestHandleError(t *testing.T) {
countError: false, countError: false,
}, },
} { } {
t.Run(strconv.Itoa(i), func(t *testing.T) { t.Run(tc.name, func(t *testing.T) {
errCount := monitor.currentErrorRate() monitor := newClientStatusMonitor(log, "", 10)
errCount := monitor.overallErrorRate()
err := monitor.handleError(tc.ctx, tc.status, tc.err) err := monitor.handleError(tc.ctx, tc.status, tc.err)
if tc.expectedError { if tc.expectedError {
require.Error(t, err) require.Error(t, err)
@ -680,7 +706,10 @@ func TestHandleError(t *testing.T) {
if tc.countError { if tc.countError {
errCount++ errCount++
} }
require.Equal(t, errCount, monitor.currentErrorRate()) require.Equal(t, errCount, monitor.overallErrorRate())
if tc.markedUnhealthy {
require.False(t, monitor.isHealthy())
}
}) })
} }
} }