Move netmap init changes from SUPPORT branch #441
2 changed files with 60 additions and 11 deletions
|
@ -225,6 +225,10 @@ func addNewEpochNotificationHandlers(c *cfg) {
|
|||
// Must be called after initNetmapService.
|
||||
func bootstrapNode(c *cfg) {
|
||||
if c.needBootstrap() {
|
||||
if c.IsMaintenance() {
|
||||
c.log.Info(logs.FrostFSNodeNodeIsUnderMaintenanceSkipInitialBootstrap)
|
||||
return
|
||||
}
|
||||
err := c.bootstrap()
|
||||
fatalOnErrDetails("bootstrap error", err)
|
||||
}
|
||||
|
@ -256,30 +260,74 @@ func initNetmapState(c *cfg) {
|
|||
epoch, err := c.cfgNetmap.wrapper.Epoch()
|
||||
fatalOnErrDetails("could not initialize current epoch number", err)
|
||||
|
||||
ni, err := c.netmapLocalNodeState(epoch)
|
||||
ni, err := c.netmapInitLocalNodeState(epoch)
|
||||
fatalOnErrDetails("could not init network state", err)
|
||||
|
||||
stateWord := "undefined"
|
||||
|
||||
if ni != nil {
|
||||
switch {
|
||||
case ni.IsOnline():
|
||||
stateWord = "online"
|
||||
case ni.IsOffline():
|
||||
stateWord = "offline"
|
||||
}
|
||||
}
|
||||
stateWord := nodeState(ni)
|
||||
|
||||
c.log.Info(logs.FrostFSNodeInitialNetworkState,
|
||||
zap.Uint64("epoch", epoch),
|
||||
zap.String("state", stateWord),
|
||||
)
|
||||
|
||||
if ni != nil && ni.IsMaintenance() {
|
||||
c.isMaintenance.Store(true)
|
||||
}
|
||||
|
||||
c.cfgNetmap.state.setCurrentEpoch(epoch)
|
||||
c.cfgNetmap.startEpoch = epoch
|
||||
c.handleLocalNodeInfo(ni)
|
||||
}
|
||||
|
||||
func nodeState(ni *netmapSDK.NodeInfo) string {
|
||||
if ni != nil {
|
||||
switch {
|
||||
case ni.IsOnline():
|
||||
return "online"
|
||||
case ni.IsOffline():
|
||||
return "offline"
|
||||
case ni.IsMaintenance():
|
||||
return "maintenance"
|
||||
}
|
||||
}
|
||||
return "undefined"
|
||||
}
|
||||
|
||||
func (c *cfg) netmapInitLocalNodeState(epoch uint64) (*netmapSDK.NodeInfo, error) {
|
||||
nmNodes, err := c.cfgNetmap.wrapper.GetCandidates()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var candidate *netmapSDK.NodeInfo
|
||||
for i := range nmNodes {
|
||||
if bytes.Equal(nmNodes[i].PublicKey(), c.binPublicKey) {
|
||||
candidate = &nmNodes[i]
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
node, err := c.netmapLocalNodeState(epoch)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if candidate == nil {
|
||||
return node, nil
|
||||
}
|
||||
|
||||
nmState := nodeState(node)
|
||||
candidateState := nodeState(candidate)
|
||||
if nmState != candidateState {
|
||||
// This happens when the node was switched to maintenance without epoch tick.
|
||||
// We expect it to continue staying in maintenance.
|
||||
c.log.Info("candidate status is different from the netmap status, the former takes priority",
|
||||
zap.String("netmap", nmState),
|
||||
zap.String("candidate", candidateState))
|
||||
}
|
||||
return candidate, nil
|
||||
}
|
||||
|
||||
func (c *cfg) netmapLocalNodeState(epoch uint64) (*netmapSDK.NodeInfo, error) {
|
||||
// calculate current network state
|
||||
nm, err := c.cfgNetmap.wrapper.GetNetMapByEpoch(epoch)
|
||||
|
|
|
@ -491,4 +491,5 @@ const (
|
|||
EngineShardsEvacuationFailedToMoveObject = "failed to evacuate object to other node"
|
||||
ShardGCFailedToGetExpiredWithLinked = "failed to get expired objects with linked"
|
||||
ShardDeleteCantDeleteFromWriteCache = "can't delete object from write cache"
|
||||
FrostFSNodeNodeIsUnderMaintenanceSkipInitialBootstrap = "the node is under maintenance, skip initial bootstrap"
|
||||
)
|
||||
|
|
Loading…
Reference in a new issue