Move netmap init changes from SUPPORT branch #441

Merged
fyrchik merged 3 commits from dstepanov-yadro/frostfs-node:fix/409 into master 2023-06-13 10:02:38 +00:00
2 changed files with 60 additions and 11 deletions

View file

@ -225,6 +225,10 @@ func addNewEpochNotificationHandlers(c *cfg) {
// Must be called after initNetmapService.
func bootstrapNode(c *cfg) {
if c.needBootstrap() {
if c.IsMaintenance() {
c.log.Info(logs.FrostFSNodeNodeIsUnderMaintenanceSkipInitialBootstrap)
return
}
err := c.bootstrap()
fatalOnErrDetails("bootstrap error", err)
}
@ -256,30 +260,74 @@ func initNetmapState(c *cfg) {
epoch, err := c.cfgNetmap.wrapper.Epoch()
fatalOnErrDetails("could not initialize current epoch number", err)
ni, err := c.netmapLocalNodeState(epoch)
ni, err := c.netmapInitLocalNodeState(epoch)
fatalOnErrDetails("could not init network state", err)
stateWord := "undefined"
if ni != nil {
switch {
case ni.IsOnline():
stateWord = "online"
case ni.IsOffline():
stateWord = "offline"
}
}
stateWord := nodeState(ni)
c.log.Info(logs.FrostFSNodeInitialNetworkState,
zap.Uint64("epoch", epoch),
zap.String("state", stateWord),
)
if ni != nil && ni.IsMaintenance() {
c.isMaintenance.Store(true)
}
c.cfgNetmap.state.setCurrentEpoch(epoch)
c.cfgNetmap.startEpoch = epoch
c.handleLocalNodeInfo(ni)
}
func nodeState(ni *netmapSDK.NodeInfo) string {
if ni != nil {
switch {
case ni.IsOnline():
return "online"
case ni.IsOffline():
return "offline"
case ni.IsMaintenance():
return "maintenance"
}
}
return "undefined"
}
func (c *cfg) netmapInitLocalNodeState(epoch uint64) (*netmapSDK.NodeInfo, error) {
nmNodes, err := c.cfgNetmap.wrapper.GetCandidates()
if err != nil {
return nil, err
}
var candidate *netmapSDK.NodeInfo
for i := range nmNodes {
if bytes.Equal(nmNodes[i].PublicKey(), c.binPublicKey) {
candidate = &nmNodes[i]
break
}
}
node, err := c.netmapLocalNodeState(epoch)
if err != nil {
return nil, err
}
if candidate == nil {
return node, nil
}
nmState := nodeState(node)
candidateState := nodeState(candidate)
if nmState != candidateState {
// This happens when the node was switched to maintenance without epoch tick.
// We expect it to continue staying in maintenance.
c.log.Info("candidate status is different from the netmap status, the former takes priority",
zap.String("netmap", nmState),
zap.String("candidate", candidateState))
}
return candidate, nil
}
func (c *cfg) netmapLocalNodeState(epoch uint64) (*netmapSDK.NodeInfo, error) {
// calculate current network state
nm, err := c.cfgNetmap.wrapper.GetNetMapByEpoch(epoch)

View file

@ -491,4 +491,5 @@ const (
EngineShardsEvacuationFailedToMoveObject = "failed to evacuate object to other node"
ShardGCFailedToGetExpiredWithLinked = "failed to get expired objects with linked"
ShardDeleteCantDeleteFromWriteCache = "can't delete object from write cache"
FrostFSNodeNodeIsUnderMaintenanceSkipInitialBootstrap = "the node is under maintenance, skip initial bootstrap"
)