Move netmap init changes from SUPPORT branch #441
2 changed files with 60 additions and 11 deletions
|
@ -225,6 +225,10 @@ func addNewEpochNotificationHandlers(c *cfg) {
|
||||||
// Must be called after initNetmapService.
|
// Must be called after initNetmapService.
|
||||||
func bootstrapNode(c *cfg) {
|
func bootstrapNode(c *cfg) {
|
||||||
if c.needBootstrap() {
|
if c.needBootstrap() {
|
||||||
|
if c.IsMaintenance() {
|
||||||
|
c.log.Info(logs.FrostFSNodeNodeIsUnderMaintenanceSkipInitialBootstrap)
|
||||||
|
return
|
||||||
|
}
|
||||||
err := c.bootstrap()
|
err := c.bootstrap()
|
||||||
fatalOnErrDetails("bootstrap error", err)
|
fatalOnErrDetails("bootstrap error", err)
|
||||||
}
|
}
|
||||||
|
@ -256,30 +260,74 @@ func initNetmapState(c *cfg) {
|
||||||
epoch, err := c.cfgNetmap.wrapper.Epoch()
|
epoch, err := c.cfgNetmap.wrapper.Epoch()
|
||||||
fatalOnErrDetails("could not initialize current epoch number", err)
|
fatalOnErrDetails("could not initialize current epoch number", err)
|
||||||
|
|
||||||
ni, err := c.netmapLocalNodeState(epoch)
|
ni, err := c.netmapInitLocalNodeState(epoch)
|
||||||
fatalOnErrDetails("could not init network state", err)
|
fatalOnErrDetails("could not init network state", err)
|
||||||
|
|
||||||
stateWord := "undefined"
|
stateWord := nodeState(ni)
|
||||||
|
|
||||||
if ni != nil {
|
|
||||||
switch {
|
|
||||||
case ni.IsOnline():
|
|
||||||
stateWord = "online"
|
|
||||||
case ni.IsOffline():
|
|
||||||
stateWord = "offline"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
c.log.Info(logs.FrostFSNodeInitialNetworkState,
|
c.log.Info(logs.FrostFSNodeInitialNetworkState,
|
||||||
zap.Uint64("epoch", epoch),
|
zap.Uint64("epoch", epoch),
|
||||||
zap.String("state", stateWord),
|
zap.String("state", stateWord),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if ni != nil && ni.IsMaintenance() {
|
||||||
|
c.isMaintenance.Store(true)
|
||||||
|
}
|
||||||
|
|
||||||
c.cfgNetmap.state.setCurrentEpoch(epoch)
|
c.cfgNetmap.state.setCurrentEpoch(epoch)
|
||||||
c.cfgNetmap.startEpoch = epoch
|
c.cfgNetmap.startEpoch = epoch
|
||||||
c.handleLocalNodeInfo(ni)
|
c.handleLocalNodeInfo(ni)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func nodeState(ni *netmapSDK.NodeInfo) string {
|
||||||
|
if ni != nil {
|
||||||
|
switch {
|
||||||
|
case ni.IsOnline():
|
||||||
|
return "online"
|
||||||
|
case ni.IsOffline():
|
||||||
|
return "offline"
|
||||||
|
case ni.IsMaintenance():
|
||||||
|
return "maintenance"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "undefined"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *cfg) netmapInitLocalNodeState(epoch uint64) (*netmapSDK.NodeInfo, error) {
|
||||||
|
nmNodes, err := c.cfgNetmap.wrapper.GetCandidates()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var candidate *netmapSDK.NodeInfo
|
||||||
|
for i := range nmNodes {
|
||||||
|
if bytes.Equal(nmNodes[i].PublicKey(), c.binPublicKey) {
|
||||||
|
candidate = &nmNodes[i]
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
node, err := c.netmapLocalNodeState(epoch)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if candidate == nil {
|
||||||
|
return node, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
nmState := nodeState(node)
|
||||||
|
candidateState := nodeState(candidate)
|
||||||
|
if nmState != candidateState {
|
||||||
|
// This happens when the node was switched to maintenance without epoch tick.
|
||||||
|
// We expect it to continue staying in maintenance.
|
||||||
|
c.log.Info("candidate status is different from the netmap status, the former takes priority",
|
||||||
|
zap.String("netmap", nmState),
|
||||||
|
zap.String("candidate", candidateState))
|
||||||
|
}
|
||||||
|
return candidate, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (c *cfg) netmapLocalNodeState(epoch uint64) (*netmapSDK.NodeInfo, error) {
|
func (c *cfg) netmapLocalNodeState(epoch uint64) (*netmapSDK.NodeInfo, error) {
|
||||||
// calculate current network state
|
// calculate current network state
|
||||||
nm, err := c.cfgNetmap.wrapper.GetNetMapByEpoch(epoch)
|
nm, err := c.cfgNetmap.wrapper.GetNetMapByEpoch(epoch)
|
||||||
|
|
|
@ -491,4 +491,5 @@ const (
|
||||||
EngineShardsEvacuationFailedToMoveObject = "failed to evacuate object to other node"
|
EngineShardsEvacuationFailedToMoveObject = "failed to evacuate object to other node"
|
||||||
ShardGCFailedToGetExpiredWithLinked = "failed to get expired objects with linked"
|
ShardGCFailedToGetExpiredWithLinked = "failed to get expired objects with linked"
|
||||||
ShardDeleteCantDeleteFromWriteCache = "can't delete object from write cache"
|
ShardDeleteCantDeleteFromWriteCache = "can't delete object from write cache"
|
||||||
|
FrostFSNodeNodeIsUnderMaintenanceSkipInitialBootstrap = "the node is under maintenance, skip initial bootstrap"
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in a new issue