diff --git a/cmd/frostfs-node/config.go b/cmd/frostfs-node/config.go index b35200d27..8a7317231 100644 --- a/cmd/frostfs-node/config.go +++ b/cmd/frostfs-node/config.go @@ -1040,7 +1040,6 @@ func (c *cfg) signalWatcher(ctx context.Context) { c.reloadConfig(ctx) case syscall.SIGTERM, syscall.SIGINT: c.log.Info(logs.FrostFSNodeTerminationSignalHasBeenReceivedStopping) - // TODO (@acid-ant): #49 need to cover case when stuck at the middle(node health UNDEFINED or STARTING) c.shutdown() @@ -1062,6 +1061,12 @@ func (c *cfg) signalWatcher(ctx context.Context) { func (c *cfg) reloadConfig(ctx context.Context) { c.log.Info(logs.FrostFSNodeSIGHUPHasBeenReceivedRereadingConfiguration) + if !c.compareAndSwapHealthStatus(control.HealthStatus_READY, control.HealthStatus_RECONFIGURING) { + c.log.Info(logs.FrostFSNodeSIGHUPSkip) + return + } + defer c.compareAndSwapHealthStatus(control.HealthStatus_RECONFIGURING, control.HealthStatus_READY) + err := c.readConfig(c.appCfg) if err != nil { c.log.Error(logs.FrostFSNodeConfigurationReading, zap.Error(err)) @@ -1142,7 +1147,14 @@ func (c *cfg) createTombstoneSource() *tombstone.ExpirationChecker { } func (c *cfg) shutdown() { - c.setHealthStatus(control.HealthStatus_SHUTTING_DOWN) + old := c.swapHealthStatus(control.HealthStatus_SHUTTING_DOWN) + if old == control.HealthStatus_SHUTTING_DOWN { + c.log.Info(logs.FrostFSNodeShutdownSkip) + return + } + if old == control.HealthStatus_STARTING { + c.log.Warn(logs.FrostFSNodeShutdownWhenNotReady) + } c.ctxCancel() c.done <- struct{}{} diff --git a/cmd/frostfs-node/control.go b/cmd/frostfs-node/control.go index 787fe3351..98d893c38 100644 --- a/cmd/frostfs-node/control.go +++ b/cmd/frostfs-node/control.go @@ -83,10 +83,20 @@ func (c *cfg) NetmapStatus() control.NetmapStatus { func (c *cfg) setHealthStatus(st control.HealthStatus) { c.healthStatus.Store(int32(st)) + c.metricsCollector.State().SetHealth(int32(st)) +} - if c.metricsCollector != nil { - c.metricsCollector.State().SetHealth(int32(st)) +func (c *cfg) compareAndSwapHealthStatus(oldSt, newSt control.HealthStatus) (swapped bool) { + if swapped = c.healthStatus.CompareAndSwap(int32(oldSt), int32(newSt)); swapped { + c.metricsCollector.State().SetHealth(int32(newSt)) } + return +} + +func (c *cfg) swapHealthStatus(st control.HealthStatus) (old control.HealthStatus) { + old = control.HealthStatus(c.healthStatus.Swap(int32(st))) + c.metricsCollector.State().SetHealth(int32(st)) + return } func (c *cfg) HealthStatus() control.HealthStatus { diff --git a/cmd/frostfs-node/main.go b/cmd/frostfs-node/main.go index 88032ebdb..2791efcef 100644 --- a/cmd/frostfs-node/main.go +++ b/cmd/frostfs-node/main.go @@ -60,13 +60,13 @@ func main() { var ctx context.Context ctx, c.ctxCancel = context.WithCancel(context.Background()) - initApp(ctx, c) - c.setHealthStatus(control.HealthStatus_STARTING) + initApp(ctx, c) + bootUp(ctx, c) - c.setHealthStatus(control.HealthStatus_READY) + c.compareAndSwapHealthStatus(control.HealthStatus_STARTING, control.HealthStatus_READY) wait(c) } diff --git a/internal/logs/logs.go b/internal/logs/logs.go index 56312f83f..f8d6cc9b4 100644 --- a/internal/logs/logs.go +++ b/internal/logs/logs.go @@ -436,6 +436,9 @@ const ( FrostFSNodeInternalApplicationError = "internal application error" FrostFSNodeInternalErrorProcessingIsComplete = "internal error processing is complete" FrostFSNodeSIGHUPHasBeenReceivedRereadingConfiguration = "SIGHUP has been received, rereading configuration..." + FrostFSNodeSIGHUPSkip = "node not ready for reconfiguration, skipped SIGHUP" + FrostFSNodeShutdownSkip = "node already is going to shutting down, skipped shutdown" + FrostFSNodeShutdownWhenNotReady = "node is going to shutting down when subsystems still initializing" FrostFSNodeConfigurationReading = "configuration reading" FrostFSNodeLoggerConfigurationPreparation = "logger configuration preparation" FrostFSNodeTracingConfigationUpdated = "tracing configation updated" diff --git a/pkg/services/control/types.pb.go b/pkg/services/control/types.pb.go index d2ee50770..b385bc674 100644 Binary files a/pkg/services/control/types.pb.go and b/pkg/services/control/types.pb.go differ diff --git a/pkg/services/control/types.proto b/pkg/services/control/types.proto index 5b4844580..b34988c2e 100644 --- a/pkg/services/control/types.proto +++ b/pkg/services/control/types.proto @@ -115,6 +115,9 @@ enum HealthStatus { // Storage node application is shutting down. SHUTTING_DOWN = 3; + + // Storage node application is reconfiguring. + RECONFIGURING = 4; } // Shard description.