diff --git a/cmd/frostfs-node/netmap.go b/cmd/frostfs-node/netmap.go index c6623a385..f1ea8b40e 100644 --- a/cmd/frostfs-node/netmap.go +++ b/cmd/frostfs-node/netmap.go @@ -225,6 +225,10 @@ func addNewEpochNotificationHandlers(c *cfg) { // Must be called after initNetmapService. func bootstrapNode(c *cfg) { if c.needBootstrap() { + if c.IsMaintenance() { + c.log.Info(logs.FrostFSNodeNodeIsUnderMaintenanceSkipInitialBootstrap) + return + } err := c.bootstrap() fatalOnErrDetails("bootstrap error", err) } @@ -256,30 +260,74 @@ func initNetmapState(c *cfg) { epoch, err := c.cfgNetmap.wrapper.Epoch() fatalOnErrDetails("could not initialize current epoch number", err) - ni, err := c.netmapLocalNodeState(epoch) + ni, err := c.netmapInitLocalNodeState(epoch) fatalOnErrDetails("could not init network state", err) - stateWord := "undefined" - - if ni != nil { - switch { - case ni.IsOnline(): - stateWord = "online" - case ni.IsOffline(): - stateWord = "offline" - } - } + stateWord := nodeState(ni) c.log.Info(logs.FrostFSNodeInitialNetworkState, zap.Uint64("epoch", epoch), zap.String("state", stateWord), ) + if ni != nil && ni.IsMaintenance() { + c.isMaintenance.Store(true) + } + c.cfgNetmap.state.setCurrentEpoch(epoch) c.cfgNetmap.startEpoch = epoch c.handleLocalNodeInfo(ni) } +func nodeState(ni *netmapSDK.NodeInfo) string { + if ni != nil { + switch { + case ni.IsOnline(): + return "online" + case ni.IsOffline(): + return "offline" + case ni.IsMaintenance(): + return "maintenance" + } + } + return "undefined" +} + +func (c *cfg) netmapInitLocalNodeState(epoch uint64) (*netmapSDK.NodeInfo, error) { + nmNodes, err := c.cfgNetmap.wrapper.GetCandidates() + if err != nil { + return nil, err + } + + var candidate *netmapSDK.NodeInfo + for i := range nmNodes { + if bytes.Equal(nmNodes[i].PublicKey(), c.binPublicKey) { + candidate = &nmNodes[i] + break + } + } + + node, err := c.netmapLocalNodeState(epoch) + if err != nil { + return nil, err + } + + if candidate == nil { + return node, nil + } + + nmState := nodeState(node) + candidateState := nodeState(candidate) + if nmState != candidateState { + // This happens when the node was switched to maintenance without epoch tick. + // We expect it to continue staying in maintenance. + c.log.Info("candidate status is different from the netmap status, the former takes priority", + zap.String("netmap", nmState), + zap.String("candidate", candidateState)) + } + return candidate, nil +} + func (c *cfg) netmapLocalNodeState(epoch uint64) (*netmapSDK.NodeInfo, error) { // calculate current network state nm, err := c.cfgNetmap.wrapper.GetNetMapByEpoch(epoch) diff --git a/internal/logs/logs.go b/internal/logs/logs.go index 9b800adfc..dc54f2d2a 100644 --- a/internal/logs/logs.go +++ b/internal/logs/logs.go @@ -491,4 +491,5 @@ const ( EngineShardsEvacuationFailedToMoveObject = "failed to evacuate object to other node" ShardGCFailedToGetExpiredWithLinked = "failed to get expired objects with linked" ShardDeleteCantDeleteFromWriteCache = "can't delete object from write cache" + FrostFSNodeNodeIsUnderMaintenanceSkipInitialBootstrap = "the node is under maintenance, skip initial bootstrap" )