From b0cf100427dc9d813ac0fc4facd8412f6399b370 Mon Sep 17 00:00:00 2001 From: Anton Nikiforov Date: Fri, 20 Oct 2023 09:52:25 +0300 Subject: [PATCH] [#49] node: React on SIGHUP only when node in READY state Add more info in logs when node is going to shut down, but initialization process still in progress. Signed-off-by: Anton Nikiforov --- cmd/frostfs-node/config.go | 16 ++++++++++++++-- cmd/frostfs-node/control.go | 14 ++++++++++++-- cmd/frostfs-node/main.go | 6 +++--- internal/logs/logs.go | 3 +++ pkg/services/control/types.pb.go | 32 +++++++++++++++++++------------- pkg/services/control/types.proto | 3 +++ 6 files changed, 54 insertions(+), 20 deletions(-) diff --git a/cmd/frostfs-node/config.go b/cmd/frostfs-node/config.go index b35200d2..8a731723 100644 --- a/cmd/frostfs-node/config.go +++ b/cmd/frostfs-node/config.go @@ -1040,7 +1040,6 @@ func (c *cfg) signalWatcher(ctx context.Context) { c.reloadConfig(ctx) case syscall.SIGTERM, syscall.SIGINT: c.log.Info(logs.FrostFSNodeTerminationSignalHasBeenReceivedStopping) - // TODO (@acid-ant): #49 need to cover case when stuck at the middle(node health UNDEFINED or STARTING) c.shutdown() @@ -1062,6 +1061,12 @@ func (c *cfg) signalWatcher(ctx context.Context) { func (c *cfg) reloadConfig(ctx context.Context) { c.log.Info(logs.FrostFSNodeSIGHUPHasBeenReceivedRereadingConfiguration) + if !c.compareAndSwapHealthStatus(control.HealthStatus_READY, control.HealthStatus_RECONFIGURING) { + c.log.Info(logs.FrostFSNodeSIGHUPSkip) + return + } + defer c.compareAndSwapHealthStatus(control.HealthStatus_RECONFIGURING, control.HealthStatus_READY) + err := c.readConfig(c.appCfg) if err != nil { c.log.Error(logs.FrostFSNodeConfigurationReading, zap.Error(err)) @@ -1142,7 +1147,14 @@ func (c *cfg) createTombstoneSource() *tombstone.ExpirationChecker { } func (c *cfg) shutdown() { - c.setHealthStatus(control.HealthStatus_SHUTTING_DOWN) + old := c.swapHealthStatus(control.HealthStatus_SHUTTING_DOWN) + if old == control.HealthStatus_SHUTTING_DOWN { + c.log.Info(logs.FrostFSNodeShutdownSkip) + return + } + if old == control.HealthStatus_STARTING { + c.log.Warn(logs.FrostFSNodeShutdownWhenNotReady) + } c.ctxCancel() c.done <- struct{}{} diff --git a/cmd/frostfs-node/control.go b/cmd/frostfs-node/control.go index 787fe335..98d893c3 100644 --- a/cmd/frostfs-node/control.go +++ b/cmd/frostfs-node/control.go @@ -83,10 +83,20 @@ func (c *cfg) NetmapStatus() control.NetmapStatus { func (c *cfg) setHealthStatus(st control.HealthStatus) { c.healthStatus.Store(int32(st)) + c.metricsCollector.State().SetHealth(int32(st)) +} - if c.metricsCollector != nil { - c.metricsCollector.State().SetHealth(int32(st)) +func (c *cfg) compareAndSwapHealthStatus(oldSt, newSt control.HealthStatus) (swapped bool) { + if swapped = c.healthStatus.CompareAndSwap(int32(oldSt), int32(newSt)); swapped { + c.metricsCollector.State().SetHealth(int32(newSt)) } + return +} + +func (c *cfg) swapHealthStatus(st control.HealthStatus) (old control.HealthStatus) { + old = control.HealthStatus(c.healthStatus.Swap(int32(st))) + c.metricsCollector.State().SetHealth(int32(st)) + return } func (c *cfg) HealthStatus() control.HealthStatus { diff --git a/cmd/frostfs-node/main.go b/cmd/frostfs-node/main.go index 88032ebd..2791efce 100644 --- a/cmd/frostfs-node/main.go +++ b/cmd/frostfs-node/main.go @@ -60,13 +60,13 @@ func main() { var ctx context.Context ctx, c.ctxCancel = context.WithCancel(context.Background()) - initApp(ctx, c) - c.setHealthStatus(control.HealthStatus_STARTING) + initApp(ctx, c) + bootUp(ctx, c) - c.setHealthStatus(control.HealthStatus_READY) + c.compareAndSwapHealthStatus(control.HealthStatus_STARTING, control.HealthStatus_READY) wait(c) } diff --git a/internal/logs/logs.go b/internal/logs/logs.go index 56312f83..f8d6cc9b 100644 --- a/internal/logs/logs.go +++ b/internal/logs/logs.go @@ -436,6 +436,9 @@ const ( FrostFSNodeInternalApplicationError = "internal application error" FrostFSNodeInternalErrorProcessingIsComplete = "internal error processing is complete" FrostFSNodeSIGHUPHasBeenReceivedRereadingConfiguration = "SIGHUP has been received, rereading configuration..." + FrostFSNodeSIGHUPSkip = "node not ready for reconfiguration, skipped SIGHUP" + FrostFSNodeShutdownSkip = "node already is going to shutting down, skipped shutdown" + FrostFSNodeShutdownWhenNotReady = "node is going to shutting down when subsystems still initializing" FrostFSNodeConfigurationReading = "configuration reading" FrostFSNodeLoggerConfigurationPreparation = "logger configuration preparation" FrostFSNodeTracingConfigationUpdated = "tracing configation updated" diff --git a/pkg/services/control/types.pb.go b/pkg/services/control/types.pb.go index d2ee5077..b385bc67 100644 --- a/pkg/services/control/types.pb.go +++ b/pkg/services/control/types.pb.go @@ -89,6 +89,8 @@ const ( HealthStatus_READY HealthStatus = 2 // Storage node application is shutting down. HealthStatus_SHUTTING_DOWN HealthStatus = 3 + // Storage node application is reconfiguring. + HealthStatus_RECONFIGURING HealthStatus = 4 ) // Enum value maps for HealthStatus. @@ -98,12 +100,14 @@ var ( 1: "STARTING", 2: "READY", 3: "SHUTTING_DOWN", + 4: "RECONFIGURING", } HealthStatus_value = map[string]int32{ "HEALTH_STATUS_UNDEFINED": 0, "STARTING": 1, "READY": 2, "SHUTTING_DOWN": 3, + "RECONFIGURING": 4, } ) @@ -709,24 +713,26 @@ var file_pkg_services_control_types_proto_rawDesc = []byte{ 0x0a, 0x10, 0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, 0x55, 0x4e, 0x44, 0x45, 0x46, 0x49, 0x4e, 0x45, 0x44, 0x10, 0x00, 0x12, 0x0a, 0x0a, 0x06, 0x4f, 0x4e, 0x4c, 0x49, 0x4e, 0x45, 0x10, 0x01, 0x12, 0x0b, 0x0a, 0x07, 0x4f, 0x46, 0x46, 0x4c, 0x49, 0x4e, 0x45, 0x10, 0x02, 0x12, 0x0f, 0x0a, - 0x0b, 0x4d, 0x41, 0x49, 0x4e, 0x54, 0x45, 0x4e, 0x41, 0x4e, 0x43, 0x45, 0x10, 0x03, 0x2a, 0x57, + 0x0b, 0x4d, 0x41, 0x49, 0x4e, 0x54, 0x45, 0x4e, 0x41, 0x4e, 0x43, 0x45, 0x10, 0x03, 0x2a, 0x6a, 0x0a, 0x0c, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x1b, 0x0a, 0x17, 0x48, 0x45, 0x41, 0x4c, 0x54, 0x48, 0x5f, 0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, 0x55, 0x4e, 0x44, 0x45, 0x46, 0x49, 0x4e, 0x45, 0x44, 0x10, 0x00, 0x12, 0x0c, 0x0a, 0x08, 0x53, 0x54, 0x41, 0x52, 0x54, 0x49, 0x4e, 0x47, 0x10, 0x01, 0x12, 0x09, 0x0a, 0x05, 0x52, 0x45, 0x41, 0x44, 0x59, 0x10, 0x02, 0x12, 0x11, 0x0a, 0x0d, 0x53, 0x48, 0x55, 0x54, 0x54, 0x49, 0x4e, 0x47, - 0x5f, 0x44, 0x4f, 0x57, 0x4e, 0x10, 0x03, 0x2a, 0x6a, 0x0a, 0x09, 0x53, 0x68, 0x61, 0x72, 0x64, - 0x4d, 0x6f, 0x64, 0x65, 0x12, 0x18, 0x0a, 0x14, 0x53, 0x48, 0x41, 0x52, 0x44, 0x5f, 0x4d, 0x4f, - 0x44, 0x45, 0x5f, 0x55, 0x4e, 0x44, 0x45, 0x46, 0x49, 0x4e, 0x45, 0x44, 0x10, 0x00, 0x12, 0x0e, - 0x0a, 0x0a, 0x52, 0x45, 0x41, 0x44, 0x5f, 0x57, 0x52, 0x49, 0x54, 0x45, 0x10, 0x01, 0x12, 0x0d, - 0x0a, 0x09, 0x52, 0x45, 0x41, 0x44, 0x5f, 0x4f, 0x4e, 0x4c, 0x59, 0x10, 0x02, 0x12, 0x0c, 0x0a, - 0x08, 0x44, 0x45, 0x47, 0x52, 0x41, 0x44, 0x45, 0x44, 0x10, 0x03, 0x12, 0x16, 0x0a, 0x12, 0x44, - 0x45, 0x47, 0x52, 0x41, 0x44, 0x45, 0x44, 0x5f, 0x52, 0x45, 0x41, 0x44, 0x5f, 0x4f, 0x4e, 0x4c, - 0x59, 0x10, 0x04, 0x42, 0x41, 0x5a, 0x3f, 0x67, 0x69, 0x74, 0x2e, 0x66, 0x72, 0x6f, 0x73, 0x74, - 0x66, 0x73, 0x2e, 0x69, 0x6e, 0x66, 0x6f, 0x2f, 0x54, 0x72, 0x75, 0x65, 0x43, 0x6c, 0x6f, 0x75, - 0x64, 0x4c, 0x61, 0x62, 0x2f, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x66, 0x73, 0x2d, 0x6e, 0x6f, 0x64, - 0x65, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x73, 0x2f, 0x63, - 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x5f, 0x44, 0x4f, 0x57, 0x4e, 0x10, 0x03, 0x12, 0x11, 0x0a, 0x0d, 0x52, 0x45, 0x43, 0x4f, 0x4e, + 0x46, 0x49, 0x47, 0x55, 0x52, 0x49, 0x4e, 0x47, 0x10, 0x04, 0x2a, 0x6a, 0x0a, 0x09, 0x53, 0x68, + 0x61, 0x72, 0x64, 0x4d, 0x6f, 0x64, 0x65, 0x12, 0x18, 0x0a, 0x14, 0x53, 0x48, 0x41, 0x52, 0x44, + 0x5f, 0x4d, 0x4f, 0x44, 0x45, 0x5f, 0x55, 0x4e, 0x44, 0x45, 0x46, 0x49, 0x4e, 0x45, 0x44, 0x10, + 0x00, 0x12, 0x0e, 0x0a, 0x0a, 0x52, 0x45, 0x41, 0x44, 0x5f, 0x57, 0x52, 0x49, 0x54, 0x45, 0x10, + 0x01, 0x12, 0x0d, 0x0a, 0x09, 0x52, 0x45, 0x41, 0x44, 0x5f, 0x4f, 0x4e, 0x4c, 0x59, 0x10, 0x02, + 0x12, 0x0c, 0x0a, 0x08, 0x44, 0x45, 0x47, 0x52, 0x41, 0x44, 0x45, 0x44, 0x10, 0x03, 0x12, 0x16, + 0x0a, 0x12, 0x44, 0x45, 0x47, 0x52, 0x41, 0x44, 0x45, 0x44, 0x5f, 0x52, 0x45, 0x41, 0x44, 0x5f, + 0x4f, 0x4e, 0x4c, 0x59, 0x10, 0x04, 0x42, 0x41, 0x5a, 0x3f, 0x67, 0x69, 0x74, 0x2e, 0x66, 0x72, + 0x6f, 0x73, 0x74, 0x66, 0x73, 0x2e, 0x69, 0x6e, 0x66, 0x6f, 0x2f, 0x54, 0x72, 0x75, 0x65, 0x43, + 0x6c, 0x6f, 0x75, 0x64, 0x4c, 0x61, 0x62, 0x2f, 0x66, 0x72, 0x6f, 0x73, 0x74, 0x66, 0x73, 0x2d, + 0x6e, 0x6f, 0x64, 0x65, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, + 0x73, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x33, } var ( diff --git a/pkg/services/control/types.proto b/pkg/services/control/types.proto index 5b484458..b34988c2 100644 --- a/pkg/services/control/types.proto +++ b/pkg/services/control/types.proto @@ -115,6 +115,9 @@ enum HealthStatus { // Storage node application is shutting down. SHUTTING_DOWN = 3; + + // Storage node application is reconfiguring. + RECONFIGURING = 4; } // Shard description.