node: React on SIGHUP only when node in READY state #748

Merged
fyrchik merged 1 commit from acid-ant/frostfs-node:bugfix/49-node-stuck-middle into master 2023-10-24 15:55:30 +00:00
6 changed files with 54 additions and 20 deletions

View file

@ -1040,7 +1040,6 @@ func (c *cfg) signalWatcher(ctx context.Context) {
c.reloadConfig(ctx)
case syscall.SIGTERM, syscall.SIGINT:
c.log.Info(logs.FrostFSNodeTerminationSignalHasBeenReceivedStopping)
// TODO (@acid-ant): #49 need to cover case when stuck at the middle(node health UNDEFINED or STARTING)
c.shutdown()
@ -1062,6 +1061,12 @@ func (c *cfg) signalWatcher(ctx context.Context) {
func (c *cfg) reloadConfig(ctx context.Context) {
c.log.Info(logs.FrostFSNodeSIGHUPHasBeenReceivedRereadingConfiguration)
if !c.compareAndSwapHealthStatus(control.HealthStatus_READY, control.HealthStatus_RECONFIGURING) {
c.log.Info(logs.FrostFSNodeSIGHUPSkip)
return
}
defer c.compareAndSwapHealthStatus(control.HealthStatus_RECONFIGURING, control.HealthStatus_READY)
err := c.readConfig(c.appCfg)
if err != nil {
c.log.Error(logs.FrostFSNodeConfigurationReading, zap.Error(err))
@ -1142,7 +1147,14 @@ func (c *cfg) createTombstoneSource() *tombstone.ExpirationChecker {
}
func (c *cfg) shutdown() {
c.setHealthStatus(control.HealthStatus_SHUTTING_DOWN)
old := c.swapHealthStatus(control.HealthStatus_SHUTTING_DOWN)
if old == control.HealthStatus_SHUTTING_DOWN {
c.log.Info(logs.FrostFSNodeShutdownSkip)
return
}
if old == control.HealthStatus_STARTING {
c.log.Warn(logs.FrostFSNodeShutdownWhenNotReady)
}
c.ctxCancel()
c.done <- struct{}{}

View file

@ -83,10 +83,20 @@ func (c *cfg) NetmapStatus() control.NetmapStatus {
func (c *cfg) setHealthStatus(st control.HealthStatus) {
c.healthStatus.Store(int32(st))
if c.metricsCollector != nil {
c.metricsCollector.State().SetHealth(int32(st))
}
func (c *cfg) compareAndSwapHealthStatus(oldSt, newSt control.HealthStatus) (swapped bool) {
if swapped = c.healthStatus.CompareAndSwap(int32(oldSt), int32(newSt)); swapped {
c.metricsCollector.State().SetHealth(int32(newSt))
}
return
}
func (c *cfg) swapHealthStatus(st control.HealthStatus) (old control.HealthStatus) {
old = control.HealthStatus(c.healthStatus.Swap(int32(st)))
c.metricsCollector.State().SetHealth(int32(st))
return
}
func (c *cfg) HealthStatus() control.HealthStatus {

Can metricsCollector be nil?

Can `metricsCollector` be nil?

In theory yes, but with this method calling it is already initialized.

In theory yes, but with this method calling it is already initialized.

We already use it in multiple places without nil check and, as you said, it is always initialized, let's drop the nil check?

We already use it in multiple places without nil check and, as you said, it is always initialized, let's drop the nil check?

Agree, removed.

Agree, removed.

View file

@ -60,13 +60,13 @@ func main() {
var ctx context.Context
ctx, c.ctxCancel = context.WithCancel(context.Background())
initApp(ctx, c)
c.setHealthStatus(control.HealthStatus_STARTING)
initApp(ctx, c)
bootUp(ctx, c)
c.setHealthStatus(control.HealthStatus_READY)
c.compareAndSwapHealthStatus(control.HealthStatus_STARTING, control.HealthStatus_READY)
wait(c)
}

View file

@ -436,6 +436,9 @@ const (
FrostFSNodeInternalApplicationError = "internal application error"
FrostFSNodeInternalErrorProcessingIsComplete = "internal error processing is complete"
FrostFSNodeSIGHUPHasBeenReceivedRereadingConfiguration = "SIGHUP has been received, rereading configuration..."
FrostFSNodeSIGHUPSkip = "node not ready for reconfiguration, skipped SIGHUP"
FrostFSNodeShutdownSkip = "node already is going to shutting down, skipped shutdown"
FrostFSNodeShutdownWhenNotReady = "node is going to shutting down when subsystems still initializing"
FrostFSNodeConfigurationReading = "configuration reading"
FrostFSNodeLoggerConfigurationPreparation = "logger configuration preparation"
FrostFSNodeTracingConfigationUpdated = "tracing configation updated"

View file

@ -89,6 +89,8 @@ const (
HealthStatus_READY HealthStatus = 2
// Storage node application is shutting down.
HealthStatus_SHUTTING_DOWN HealthStatus = 3
// Storage node application is reconfiguring.
HealthStatus_RECONFIGURING HealthStatus = 4
)
// Enum value maps for HealthStatus.
@ -98,12 +100,14 @@ var (
1: "STARTING",
2: "READY",
3: "SHUTTING_DOWN",
4: "RECONFIGURING",
}
HealthStatus_value = map[string]int32{
"HEALTH_STATUS_UNDEFINED": 0,
"STARTING": 1,
"READY": 2,
"SHUTTING_DOWN": 3,
"RECONFIGURING": 4,
}
)
@ -709,24 +713,26 @@ var file_pkg_services_control_types_proto_rawDesc = []byte{
0x0a, 0x10, 0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, 0x55, 0x4e, 0x44, 0x45, 0x46, 0x49, 0x4e,
0x45, 0x44, 0x10, 0x00, 0x12, 0x0a, 0x0a, 0x06, 0x4f, 0x4e, 0x4c, 0x49, 0x4e, 0x45, 0x10, 0x01,
0x12, 0x0b, 0x0a, 0x07, 0x4f, 0x46, 0x46, 0x4c, 0x49, 0x4e, 0x45, 0x10, 0x02, 0x12, 0x0f, 0x0a,
0x0b, 0x4d, 0x41, 0x49, 0x4e, 0x54, 0x45, 0x4e, 0x41, 0x4e, 0x43, 0x45, 0x10, 0x03, 0x2a, 0x57,
0x0b, 0x4d, 0x41, 0x49, 0x4e, 0x54, 0x45, 0x4e, 0x41, 0x4e, 0x43, 0x45, 0x10, 0x03, 0x2a, 0x6a,
0x0a, 0x0c, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x1b,
0x0a, 0x17, 0x48, 0x45, 0x41, 0x4c, 0x54, 0x48, 0x5f, 0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f,
0x55, 0x4e, 0x44, 0x45, 0x46, 0x49, 0x4e, 0x45, 0x44, 0x10, 0x00, 0x12, 0x0c, 0x0a, 0x08, 0x53,
0x54, 0x41, 0x52, 0x54, 0x49, 0x4e, 0x47, 0x10, 0x01, 0x12, 0x09, 0x0a, 0x05, 0x52, 0x45, 0x41,
0x44, 0x59, 0x10, 0x02, 0x12, 0x11, 0x0a, 0x0d, 0x53, 0x48, 0x55, 0x54, 0x54, 0x49, 0x4e, 0x47,
0x5f, 0x44, 0x4f, 0x57, 0x4e, 0x10, 0x03, 0x2a, 0x6a, 0x0a, 0x09, 0x53, 0x68, 0x61, 0x72, 0x64,
0x4d, 0x6f, 0x64, 0x65, 0x12, 0x18, 0x0a, 0x14, 0x53, 0x48, 0x41, 0x52, 0x44, 0x5f, 0x4d, 0x4f,
0x44, 0x45, 0x5f, 0x55, 0x4e, 0x44, 0x45, 0x46, 0x49, 0x4e, 0x45, 0x44, 0x10, 0x00, 0x12, 0x0e,
0x0a, 0x0a, 0x52, 0x45, 0x41, 0x44, 0x5f, 0x57, 0x52, 0x49, 0x54, 0x45, 0x10, 0x01, 0x12, 0x0d,
0x0a, 0x09, 0x52, 0x45, 0x41, 0x44, 0x5f, 0x4f, 0x4e, 0x4c, 0x59, 0x10, 0x02, 0x12, 0x0c, 0x0a,
0x08, 0x44, 0x45, 0x47, 0x52, 0x41, 0x44, 0x45, 0x44, 0x10, 0x03, 0x12, 0x16, 0x0a, 0x12, 0x44,
0x45, 0x47, 0x52, 0x41, 0x44, 0x45, 0x44, 0x5f, 0x52, 0x45, 0x41, 0x44, 0x5f, 0x4f, 0x4e, 0x4c,
0x59, 0x10, 0x04, 0x42, 0x41, 0x5a, 0x3f, 0x67, 0x69, 0x74, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74,
0x66, 0x73, 0x2e, 0x69, 0x6e, 0x66, 0x6f, 0x2f, 0x54, 0x72, 0x75, 0x65, 0x43, 0x6c, 0x6f, 0x75,
0x64, 0x4c, 0x61, 0x62, 0x2f, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x66, 0x73, 0x2d, 0x6e, 0x6f, 0x64,
0x65, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x73, 0x2f, 0x63,
0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
0x5f, 0x44, 0x4f, 0x57, 0x4e, 0x10, 0x03, 0x12, 0x11, 0x0a, 0x0d, 0x52, 0x45, 0x43, 0x4f, 0x4e,
0x46, 0x49, 0x47, 0x55, 0x52, 0x49, 0x4e, 0x47, 0x10, 0x04, 0x2a, 0x6a, 0x0a, 0x09, 0x53, 0x68,
0x61, 0x72, 0x64, 0x4d, 0x6f, 0x64, 0x65, 0x12, 0x18, 0x0a, 0x14, 0x53, 0x48, 0x41, 0x52, 0x44,
0x5f, 0x4d, 0x4f, 0x44, 0x45, 0x5f, 0x55, 0x4e, 0x44, 0x45, 0x46, 0x49, 0x4e, 0x45, 0x44, 0x10,
0x00, 0x12, 0x0e, 0x0a, 0x0a, 0x52, 0x45, 0x41, 0x44, 0x5f, 0x57, 0x52, 0x49, 0x54, 0x45, 0x10,
0x01, 0x12, 0x0d, 0x0a, 0x09, 0x52, 0x45, 0x41, 0x44, 0x5f, 0x4f, 0x4e, 0x4c, 0x59, 0x10, 0x02,
0x12, 0x0c, 0x0a, 0x08, 0x44, 0x45, 0x47, 0x52, 0x41, 0x44, 0x45, 0x44, 0x10, 0x03, 0x12, 0x16,
0x0a, 0x12, 0x44, 0x45, 0x47, 0x52, 0x41, 0x44, 0x45, 0x44, 0x5f, 0x52, 0x45, 0x41, 0x44, 0x5f,
0x4f, 0x4e, 0x4c, 0x59, 0x10, 0x04, 0x42, 0x41, 0x5a, 0x3f, 0x67, 0x69, 0x74, 0x2e, 0x66, 0x72,
0x6f, 0x73, 0x74, 0x66, 0x73, 0x2e, 0x69, 0x6e, 0x66, 0x6f, 0x2f, 0x54, 0x72, 0x75, 0x65, 0x43,
0x6c, 0x6f, 0x75, 0x64, 0x4c, 0x61, 0x62, 0x2f, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x66, 0x73, 0x2d,
0x6e, 0x6f, 0x64, 0x65, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65,
0x73, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f,
0x33,
}
var (

View file

@ -115,6 +115,9 @@ enum HealthStatus {
// Storage node application is shutting down.
SHUTTING_DOWN = 3;
// Storage node application is reconfiguring.
RECONFIGURING = 4;
}
// Shard description.