From 8f02b9c8c118c44d5bab17ce0525207b65b56e42 Mon Sep 17 00:00:00 2001 From: Dmitrii Stepanov Date: Tue, 13 Jun 2023 12:06:04 +0300 Subject: [PATCH 1/3] [#409] node: Log maintenance state on startup Signed-off-by: Dmitrii Stepanov --- cmd/frostfs-node/netmap.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmd/frostfs-node/netmap.go b/cmd/frostfs-node/netmap.go index c6623a385..9d8ad6a9a 100644 --- a/cmd/frostfs-node/netmap.go +++ b/cmd/frostfs-node/netmap.go @@ -267,6 +267,8 @@ func initNetmapState(c *cfg) { stateWord = "online" case ni.IsOffline(): stateWord = "offline" + case ni.IsMaintenance(): + stateWord = "maintenance" } } -- 2.45.3 From 250602032d74abc8803b22aa84f45631b1dfee2b Mon Sep 17 00:00:00 2001 From: Dmitrii Stepanov Date: Tue, 13 Jun 2023 12:08:56 +0300 Subject: [PATCH 2/3] [#409] node: Fetch last bootstrap info on startup Signed-off-by: Dmitrii Stepanov --- cmd/frostfs-node/netmap.go | 64 ++++++++++++++++++++++++++++++-------- 1 file changed, 51 insertions(+), 13 deletions(-) diff --git a/cmd/frostfs-node/netmap.go b/cmd/frostfs-node/netmap.go index 9d8ad6a9a..c86e3a2e7 100644 --- a/cmd/frostfs-node/netmap.go +++ b/cmd/frostfs-node/netmap.go @@ -256,21 +256,10 @@ func initNetmapState(c *cfg) { epoch, err := c.cfgNetmap.wrapper.Epoch() fatalOnErrDetails("could not initialize current epoch number", err) - ni, err := c.netmapLocalNodeState(epoch) + ni, err := c.netmapInitLocalNodeState(epoch) fatalOnErrDetails("could not init network state", err) - stateWord := "undefined" - - if ni != nil { - switch { - case ni.IsOnline(): - stateWord = "online" - case ni.IsOffline(): - stateWord = "offline" - case ni.IsMaintenance(): - stateWord = "maintenance" - } - } + stateWord := nodeState(ni) c.log.Info(logs.FrostFSNodeInitialNetworkState, zap.Uint64("epoch", epoch), @@ -282,6 +271,55 @@ func initNetmapState(c *cfg) { c.handleLocalNodeInfo(ni) } +func nodeState(ni *netmapSDK.NodeInfo) string { + if ni != nil { + switch { + case ni.IsOnline(): + return "online" + case ni.IsOffline(): + return "offline" + case ni.IsMaintenance(): + return "maintenance" + } + } + return "undefined" +} + +func (c *cfg) netmapInitLocalNodeState(epoch uint64) (*netmapSDK.NodeInfo, error) { + nmNodes, err := c.cfgNetmap.wrapper.GetCandidates() + if err != nil { + return nil, err + } + + var candidate *netmapSDK.NodeInfo + for i := range nmNodes { + if bytes.Equal(nmNodes[i].PublicKey(), c.binPublicKey) { + candidate = &nmNodes[i] + break + } + } + + node, err := c.netmapLocalNodeState(epoch) + if err != nil { + return nil, err + } + + if candidate == nil { + return node, nil + } + + nmState := nodeState(node) + candidateState := nodeState(candidate) + if nmState != candidateState { + // This happens when the node was switched to maintenance without epoch tick. + // We expect it to continue staying in maintenance. + c.log.Info("candidate status is different from the netmap status, the former takes priority", + zap.String("netmap", nmState), + zap.String("candidate", candidateState)) + } + return candidate, nil +} + func (c *cfg) netmapLocalNodeState(epoch uint64) (*netmapSDK.NodeInfo, error) { // calculate current network state nm, err := c.cfgNetmap.wrapper.GetNetMapByEpoch(epoch) -- 2.45.3 From daada10590232d545ff54045b64799a1e6b91c4b Mon Sep 17 00:00:00 2001 From: Dmitrii Stepanov Date: Tue, 13 Jun 2023 12:11:59 +0300 Subject: [PATCH 3/3] [#409] node: Do not sent initial bootstrap under maintenance Signed-off-by: Dmitrii Stepanov --- cmd/frostfs-node/netmap.go | 8 ++++++++ internal/logs/logs.go | 1 + 2 files changed, 9 insertions(+) diff --git a/cmd/frostfs-node/netmap.go b/cmd/frostfs-node/netmap.go index c86e3a2e7..f1ea8b40e 100644 --- a/cmd/frostfs-node/netmap.go +++ b/cmd/frostfs-node/netmap.go @@ -225,6 +225,10 @@ func addNewEpochNotificationHandlers(c *cfg) { // Must be called after initNetmapService. func bootstrapNode(c *cfg) { if c.needBootstrap() { + if c.IsMaintenance() { + c.log.Info(logs.FrostFSNodeNodeIsUnderMaintenanceSkipInitialBootstrap) + return + } err := c.bootstrap() fatalOnErrDetails("bootstrap error", err) } @@ -266,6 +270,10 @@ func initNetmapState(c *cfg) { zap.String("state", stateWord), ) + if ni != nil && ni.IsMaintenance() { + c.isMaintenance.Store(true) + } + c.cfgNetmap.state.setCurrentEpoch(epoch) c.cfgNetmap.startEpoch = epoch c.handleLocalNodeInfo(ni) diff --git a/internal/logs/logs.go b/internal/logs/logs.go index 9b800adfc..dc54f2d2a 100644 --- a/internal/logs/logs.go +++ b/internal/logs/logs.go @@ -491,4 +491,5 @@ const ( EngineShardsEvacuationFailedToMoveObject = "failed to evacuate object to other node" ShardGCFailedToGetExpiredWithLinked = "failed to get expired objects with linked" ShardDeleteCantDeleteFromWriteCache = "can't delete object from write cache" + FrostFSNodeNodeIsUnderMaintenanceSkipInitialBootstrap = "the node is under maintenance, skip initial bootstrap" ) -- 2.45.3