Change shard mode in case of errors async #1086
2 changed files with 19 additions and 21 deletions
|
@ -133,7 +133,7 @@ func (e *StorageEngine) reportShardErrorBackground(id string, msg string, err er
|
||||||
|
|
||||||
errCount := sh.errorCount.Add(1)
|
errCount := sh.errorCount.Add(1)
|
||||||
sh.Shard.IncErrorCounter()
|
sh.Shard.IncErrorCounter()
|
||||||
e.reportShardErrorWithFlags(sh.Shard, errCount, false, msg, err)
|
e.reportShardErrorWithFlags(sh.Shard, errCount, msg, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// reportShardError checks that the amount of errors doesn't exceed the configured threshold.
|
// reportShardError checks that the amount of errors doesn't exceed the configured threshold.
|
||||||
|
@ -153,13 +153,12 @@ func (e *StorageEngine) reportShardError(
|
||||||
|
|
||||||
errCount := sh.errorCount.Add(1)
|
errCount := sh.errorCount.Add(1)
|
||||||
sh.Shard.IncErrorCounter()
|
sh.Shard.IncErrorCounter()
|
||||||
e.reportShardErrorWithFlags(sh.Shard, errCount, true, msg, err, fields...)
|
e.reportShardErrorWithFlags(sh.Shard, errCount, msg, err, fields...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *StorageEngine) reportShardErrorWithFlags(
|
func (e *StorageEngine) reportShardErrorWithFlags(
|
||||||
sh *shard.Shard,
|
sh *shard.Shard,
|
||||||
errCount uint32,
|
errCount uint32,
|
||||||
block bool,
|
|
||||||
msg string,
|
msg string,
|
||||||
err error,
|
err error,
|
||||||
fields ...zap.Field,
|
fields ...zap.Field,
|
||||||
|
@ -175,23 +174,19 @@ func (e *StorageEngine) reportShardErrorWithFlags(
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if block {
|
req := setModeRequest{
|
||||||
e.moveToDegraded(sh, errCount)
|
errorCount: errCount,
|
||||||
} else {
|
sh: sh,
|
||||||
req := setModeRequest{
|
}
|
||||||
errorCount: errCount,
|
|
||||||
sh: sh,
|
|
||||||
}
|
|
||||||
|
|
||||||
select {
|
select {
|
||||||
case e.setModeCh <- req:
|
case e.setModeCh <- req:
|
||||||
default:
|
default:
|
||||||
// For background workers we can have a lot of such errors,
|
// For background workers we can have a lot of such errors,
|
||||||
// thus logging is done with DEBUG level.
|
// thus logging is done with DEBUG level.
|
||||||
e.log.Debug(logs.EngineModeChangeIsInProgressIgnoringSetmodeRequest,
|
e.log.Debug(logs.EngineModeChangeIsInProgressIgnoringSetmodeRequest,
|
||||||
zap.Stringer("shard_id", sid),
|
zap.Stringer("shard_id", sid),
|
||||||
zap.Uint32("error_count", errCount))
|
zap.Uint32("error_count", errCount))
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -7,6 +7,7 @@ import (
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/object"
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/blobstor"
|
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/local_object_storage/blobstor"
|
||||||
|
@ -229,6 +230,8 @@ func checkShardState(t *testing.T, e *StorageEngine, id *shard.ID, errCount uint
|
||||||
sh := e.shards[id.String()]
|
sh := e.shards[id.String()]
|
||||||
e.mtx.RUnlock()
|
e.mtx.RUnlock()
|
||||||
|
|
||||||
require.Equal(t, errCount, sh.errorCount.Load())
|
require.Eventually(t, func() bool {
|
||||||
require.Equal(t, mode, sh.GetMode())
|
return errCount == sh.errorCount.Load() &&
|
||||||
|
mode == sh.GetMode()
|
||||||
|
}, 10*time.Second, 10*time.Millisecond, "shard mode doesn't changed to expected state in 10 seconds")
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue