From b0cf100427dc9d813ac0fc4facd8412f6399b370 Mon Sep 17 00:00:00 2001 From: Anton Nikiforov Date: Fri, 20 Oct 2023 09:52:25 +0300 Subject: [PATCH] [#49] node: React on SIGHUP only when node in READY state Add more info in logs when node is going to shut down, but initialization process still in progress. Signed-off-by: Anton Nikiforov --- cmd/frostfs-node/config.go | 16 ++++++++++++++-- cmd/frostfs-node/control.go | 14 ++++++++++++-- cmd/frostfs-node/main.go | 6 +++--- internal/logs/logs.go | 3 +++ pkg/services/control/types.pb.go | Bin 29301 -> 29563 bytes pkg/services/control/types.proto | 3 +++ 6 files changed, 35 insertions(+), 7 deletions(-) diff --git a/cmd/frostfs-node/config.go b/cmd/frostfs-node/config.go index b35200d27..8a7317231 100644 --- a/cmd/frostfs-node/config.go +++ b/cmd/frostfs-node/config.go @@ -1040,7 +1040,6 @@ func (c *cfg) signalWatcher(ctx context.Context) { c.reloadConfig(ctx) case syscall.SIGTERM, syscall.SIGINT: c.log.Info(logs.FrostFSNodeTerminationSignalHasBeenReceivedStopping) - // TODO (@acid-ant): #49 need to cover case when stuck at the middle(node health UNDEFINED or STARTING) c.shutdown() @@ -1062,6 +1061,12 @@ func (c *cfg) signalWatcher(ctx context.Context) { func (c *cfg) reloadConfig(ctx context.Context) { c.log.Info(logs.FrostFSNodeSIGHUPHasBeenReceivedRereadingConfiguration) + if !c.compareAndSwapHealthStatus(control.HealthStatus_READY, control.HealthStatus_RECONFIGURING) { + c.log.Info(logs.FrostFSNodeSIGHUPSkip) + return + } + defer c.compareAndSwapHealthStatus(control.HealthStatus_RECONFIGURING, control.HealthStatus_READY) + err := c.readConfig(c.appCfg) if err != nil { c.log.Error(logs.FrostFSNodeConfigurationReading, zap.Error(err)) @@ -1142,7 +1147,14 @@ func (c *cfg) createTombstoneSource() *tombstone.ExpirationChecker { } func (c *cfg) shutdown() { - c.setHealthStatus(control.HealthStatus_SHUTTING_DOWN) + old := c.swapHealthStatus(control.HealthStatus_SHUTTING_DOWN) + if old == control.HealthStatus_SHUTTING_DOWN { + c.log.Info(logs.FrostFSNodeShutdownSkip) + return + } + if old == control.HealthStatus_STARTING { + c.log.Warn(logs.FrostFSNodeShutdownWhenNotReady) + } c.ctxCancel() c.done <- struct{}{} diff --git a/cmd/frostfs-node/control.go b/cmd/frostfs-node/control.go index 787fe3351..98d893c38 100644 --- a/cmd/frostfs-node/control.go +++ b/cmd/frostfs-node/control.go @@ -83,10 +83,20 @@ func (c *cfg) NetmapStatus() control.NetmapStatus { func (c *cfg) setHealthStatus(st control.HealthStatus) { c.healthStatus.Store(int32(st)) + c.metricsCollector.State().SetHealth(int32(st)) +} - if c.metricsCollector != nil { - c.metricsCollector.State().SetHealth(int32(st)) +func (c *cfg) compareAndSwapHealthStatus(oldSt, newSt control.HealthStatus) (swapped bool) { + if swapped = c.healthStatus.CompareAndSwap(int32(oldSt), int32(newSt)); swapped { + c.metricsCollector.State().SetHealth(int32(newSt)) } + return +} + +func (c *cfg) swapHealthStatus(st control.HealthStatus) (old control.HealthStatus) { + old = control.HealthStatus(c.healthStatus.Swap(int32(st))) + c.metricsCollector.State().SetHealth(int32(st)) + return } func (c *cfg) HealthStatus() control.HealthStatus { diff --git a/cmd/frostfs-node/main.go b/cmd/frostfs-node/main.go index 88032ebdb..2791efcef 100644 --- a/cmd/frostfs-node/main.go +++ b/cmd/frostfs-node/main.go @@ -60,13 +60,13 @@ func main() { var ctx context.Context ctx, c.ctxCancel = context.WithCancel(context.Background()) - initApp(ctx, c) - c.setHealthStatus(control.HealthStatus_STARTING) + initApp(ctx, c) + bootUp(ctx, c) - c.setHealthStatus(control.HealthStatus_READY) + c.compareAndSwapHealthStatus(control.HealthStatus_STARTING, control.HealthStatus_READY) wait(c) } diff --git a/internal/logs/logs.go b/internal/logs/logs.go index 56312f83f..f8d6cc9b4 100644 --- a/internal/logs/logs.go +++ b/internal/logs/logs.go @@ -436,6 +436,9 @@ const ( FrostFSNodeInternalApplicationError = "internal application error" FrostFSNodeInternalErrorProcessingIsComplete = "internal error processing is complete" FrostFSNodeSIGHUPHasBeenReceivedRereadingConfiguration = "SIGHUP has been received, rereading configuration..." + FrostFSNodeSIGHUPSkip = "node not ready for reconfiguration, skipped SIGHUP" + FrostFSNodeShutdownSkip = "node already is going to shutting down, skipped shutdown" + FrostFSNodeShutdownWhenNotReady = "node is going to shutting down when subsystems still initializing" FrostFSNodeConfigurationReading = "configuration reading" FrostFSNodeLoggerConfigurationPreparation = "logger configuration preparation" FrostFSNodeTracingConfigationUpdated = "tracing configation updated" diff --git a/pkg/services/control/types.pb.go b/pkg/services/control/types.pb.go index d2ee5077088af444612040e2a40284323c8415aa..b385bc67413821593fa5b6c67e28963b066d2606 100644 GIT binary patch delta 497 zcmezRgz@(?#tjQOC*S8#ur5kX&d*EBOfN0U%uCnf;`B&O%qhtTE=epYEshUzb@umj z^K=gl^7M08K$cRlRWO;nfOCxyr-_w<5{kOX9$f2XIZ?%wtP~KyWb+p;XMZNM#Lbpb zB24v$Mmh=x6^4c&(jXB`rhrIOu(*jSh;3pFCey%Vsty+?P{<6-u>_OmAkq}96sip( zo{Eq)Gyt&;OhBX&*cvl1IXN;$l!=RTGFzz5L3TJqOHJmBbL9aW1-9MLXfjuv z$>hm#iXi#(v5J$g$Jv3HTjCWbTg2OejWbIIIob^DD084ECwImBvVzPoo4mk7eDddb ch>7OlKr%|3{NGJ)a#4ah(3s7$5^f6u0M}fLM*si- delta 254 zcmezUjPdIe#tjQOCm&~)nk>#Waq>Q{)sug)@NS;QZS2ovYQFh&lnB%0pjgGp;(jz_Y<1DIaX0b!Jxu4O$SJtq=3mZ9WKtvFJgTs?}-#(Nd<8{<9tD? z3=Bc?hDIRLAO$3`3L*g&N(M_Lf+V=(AvzKfI!sJJ5;^g{lP|{!F#(xpfy^BVV5VLI xSh*S41T(N>%uPYc`xAUwfwq~YP8J9ipZp`ibuyc~)#UO-bw-8F3leV&0sy44OoIRb diff --git a/pkg/services/control/types.proto b/pkg/services/control/types.proto index 5b4844580..b34988c2e 100644 --- a/pkg/services/control/types.proto +++ b/pkg/services/control/types.proto @@ -115,6 +115,9 @@ enum HealthStatus { // Storage node application is shutting down. SHUTTING_DOWN = 3; + + // Storage node application is reconfiguring. + RECONFIGURING = 4; } // Shard description.