Do not bootstrap if node is online candidate #525

Merged
fyrchik merged 1 commit from dstepanov-yadro/frostfs-node:fix/bootstrap-single into master 2023-07-18 10:51:03 +00:00
3 changed files with 16 additions and 7 deletions

View file

@ -338,6 +338,7 @@ type internals struct {
healthStatus *atomic.Int32 healthStatus *atomic.Int32
// is node under maintenance // is node under maintenance
isMaintenance atomic.Bool isMaintenance atomic.Bool
isOnlineCandidate bool
} }
// starts node's maintenance. // starts node's maintenance.

View file

@ -227,6 +227,10 @@ func bootstrapNode(c *cfg) {
c.log.Info(logs.FrostFSNodeNodeIsUnderMaintenanceSkipInitialBootstrap) c.log.Info(logs.FrostFSNodeNodeIsUnderMaintenanceSkipInitialBootstrap)
return return
} }
if c.isOnlineCandidate {
c.log.Info(logs.NetmapNodeAlreadyInCandidateListOnlineSkipInitialBootstrap)
return
}
err := c.bootstrap() err := c.bootstrap()
fatalOnErrDetails("bootstrap error", err) fatalOnErrDetails("bootstrap error", err)
} }
@ -258,7 +262,8 @@ func initNetmapState(c *cfg) {
epoch, err := c.cfgNetmap.wrapper.Epoch() epoch, err := c.cfgNetmap.wrapper.Epoch()
fatalOnErrDetails("could not initialize current epoch number", err) fatalOnErrDetails("could not initialize current epoch number", err)
ni, err := c.netmapInitLocalNodeState(epoch) var ni *netmapSDK.NodeInfo
ni, c.isOnlineCandidate, err = c.netmapInitLocalNodeState(epoch)
fatalOnErrDetails("could not init network state", err) fatalOnErrDetails("could not init network state", err)
stateWord := nodeState(ni) stateWord := nodeState(ni)
@ -291,27 +296,29 @@ func nodeState(ni *netmapSDK.NodeInfo) string {
return "undefined" return "undefined"
} }
func (c *cfg) netmapInitLocalNodeState(epoch uint64) (*netmapSDK.NodeInfo, error) { func (c *cfg) netmapInitLocalNodeState(epoch uint64) (*netmapSDK.NodeInfo, bool, error) {
fyrchik marked this conversation as resolved Outdated

Would it be easy to avoid making the second query (IsMaintenance already does this)?

Would it be easy to avoid making the second query (`IsMaintenance` already does this)?

fixed

fixed
nmNodes, err := c.cfgNetmap.wrapper.GetCandidates() nmNodes, err := c.cfgNetmap.wrapper.GetCandidates()
if err != nil { if err != nil {
return nil, err return nil, false, err
} }
var candidate *netmapSDK.NodeInfo var candidate *netmapSDK.NodeInfo
isOnlineCandidate := false
for i := range nmNodes { for i := range nmNodes {
if bytes.Equal(nmNodes[i].PublicKey(), c.binPublicKey) { if bytes.Equal(nmNodes[i].PublicKey(), c.binPublicKey) {
candidate = &nmNodes[i] candidate = &nmNodes[i]
isOnlineCandidate = candidate.IsOnline()
break break
} }
} }
node, err := c.netmapLocalNodeState(epoch) node, err := c.netmapLocalNodeState(epoch)
if err != nil { if err != nil {
return nil, err return nil, false, err
} }
if candidate == nil { if candidate == nil {
return node, nil return node, false, nil
} }
nmState := nodeState(node) nmState := nodeState(node)
@ -323,7 +330,7 @@ func (c *cfg) netmapInitLocalNodeState(epoch uint64) (*netmapSDK.NodeInfo, error
zap.String("netmap", nmState), zap.String("netmap", nmState),
zap.String("candidate", candidateState)) zap.String("candidate", candidateState))
} }
return candidate, nil return candidate, isOnlineCandidate, nil
} }
func (c *cfg) netmapLocalNodeState(epoch uint64) (*netmapSDK.NodeInfo, error) { func (c *cfg) netmapLocalNodeState(epoch uint64) (*netmapSDK.NodeInfo, error) {

View file

@ -493,4 +493,5 @@ const (
ShardDeleteCantDeleteFromWriteCache = "can't delete object from write cache" ShardDeleteCantDeleteFromWriteCache = "can't delete object from write cache"
FrostFSNodeNodeIsUnderMaintenanceSkipInitialBootstrap = "the node is under maintenance, skip initial bootstrap" FrostFSNodeNodeIsUnderMaintenanceSkipInitialBootstrap = "the node is under maintenance, skip initial bootstrap"
EngineCouldNotChangeShardModeToDisabled = "could not change shard mode to disabled" EngineCouldNotChangeShardModeToDisabled = "could not change shard mode to disabled"
NetmapNodeAlreadyInCandidateListOnlineSkipInitialBootstrap = "the node is already in candidate list with online state, skip initial bootstrap"
fyrchik marked this conversation as resolved Outdated

Maybe replace assume that current node is not in candidate list with bootstrap query will be performed?

Maybe replace `assume that current node is not in candidate list` with `bootstrap query will be performed`?

msg removed

msg removed
) )