frostfs-node/cmd/frostfs-node/netmap.go
Evgenii Stratonikov d3a52ec73a [#516] node: Send bootstrap request if attributes were updated
Consider following situation:
1. Epoch tick.
2. Update node attributes in the config.
3. Restart node.

Because we already sent bootstrap query after (1) and we are still
online, new bootstrap query won't be sent. This leads to the node
attributes being updated after another epoch.

Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2023-08-08 07:40:26 +00:00

502 lines
13 KiB
Go

package main
import (
"bytes"
"context"
"errors"
"fmt"
"sync/atomic"
netmapGRPC "git.frostfs.info/TrueCloudLab/frostfs-api-go/v2/netmap/grpc"
"git.frostfs.info/TrueCloudLab/frostfs-node/internal/logs"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/core/netmap"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/metrics"
nmClient "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/morph/client/netmap"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/morph/event"
netmapEvent "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/morph/event/netmap"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/network"
netmapTransportGRPC "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/network/transport/netmap/grpc"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/control"
netmapService "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/services/netmap"
netmapSDK "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/netmap"
"git.frostfs.info/TrueCloudLab/frostfs-sdk-go/version"
"go.uber.org/zap"
)
// primary solution of local network state dump.
type networkState struct {
epoch *atomic.Uint64
controlNetStatus atomic.Int32 // control.NetmapStatus
nodeInfo atomic.Value // *netmapSDK.NodeInfo
metrics *metrics.NodeMetrics
}
func newNetworkState() *networkState {
ns := &networkState{
epoch: new(atomic.Uint64),
}
ns.controlNetStatus.Store(int32(control.NetmapStatus_STATUS_UNDEFINED))
return ns
}
func (s *networkState) CurrentEpoch() uint64 {
return s.epoch.Load()
}
func (s *networkState) setCurrentEpoch(v uint64) {
s.epoch.Store(v)
if s.metrics != nil {
s.metrics.SetEpoch(v)
}
}
func (s *networkState) setNodeInfo(ni *netmapSDK.NodeInfo) {
ctrlNetSt := control.NetmapStatus_STATUS_UNDEFINED
if ni != nil {
s.nodeInfo.Store(*ni)
switch {
case ni.IsOnline():
ctrlNetSt = control.NetmapStatus_ONLINE
case ni.IsOffline():
ctrlNetSt = control.NetmapStatus_OFFLINE
case ni.IsMaintenance():
ctrlNetSt = control.NetmapStatus_MAINTENANCE
}
} else {
ctrlNetSt = control.NetmapStatus_OFFLINE
niRaw := s.nodeInfo.Load()
if niRaw != nil {
niOld := niRaw.(netmapSDK.NodeInfo)
// nil ni means that the node is not included
// in the netmap
niOld.SetOffline()
s.nodeInfo.Store(niOld)
}
}
s.setControlNetmapStatus(ctrlNetSt)
}
// sets the current node state to the given value. Subsequent cfg.bootstrap
// calls will process this value to decide what status node should set in the
// network.
func (s *networkState) setControlNetmapStatus(st control.NetmapStatus) {
s.controlNetStatus.Store(int32(st))
}
func (s *networkState) controlNetmapStatus() (res control.NetmapStatus) {
return control.NetmapStatus(s.controlNetStatus.Load())
}
func (s *networkState) getNodeInfo() (res netmapSDK.NodeInfo, ok bool) {
v := s.nodeInfo.Load()
if v != nil {
res, ok = v.(netmapSDK.NodeInfo)
if !ok {
panic(fmt.Sprintf("unexpected value in atomic node info state: %T", v))
}
}
return
}
func nodeKeyFromNetmap(c *cfg) []byte {
ni, ok := c.cfgNetmap.state.getNodeInfo()
if ok {
return ni.PublicKey()
}
return nil
}
func (c *cfg) iterateNetworkAddresses(f func(string) bool) {
ni, ok := c.cfgNetmap.state.getNodeInfo()
if ok {
ni.IterateNetworkEndpoints(f)
}
}
func (c *cfg) addressNum() int {
ni, ok := c.cfgNetmap.state.getNodeInfo()
if ok {
return ni.NumberOfNetworkEndpoints()
}
return 0
}
func initNetmapService(ctx context.Context, c *cfg) {
network.WriteToNodeInfo(c.localAddr, &c.cfgNodeInfo.localInfo)
c.cfgNodeInfo.localInfo.SetPublicKey(c.key.PublicKey().Bytes())
parseAttributes(c)
c.cfgNodeInfo.localInfo.SetOffline()
if c.cfgMorph.client == nil {
initMorphComponents(ctx, c)
}
initNetmapState(c)
server := netmapTransportGRPC.New(
netmapService.NewSignService(
&c.key.PrivateKey,
netmapService.NewExecutionService(
c,
c.apiVersion,
&netInfo{
netState: c.cfgNetmap.state,
magic: c.cfgMorph.client,
morphClientNetMap: c.cfgNetmap.wrapper,
msPerBlockRdr: c.cfgMorph.client.MsPerBlock,
},
c.respSvc,
),
),
)
for _, srv := range c.cfgGRPC.servers {
netmapGRPC.RegisterNetmapServiceServer(srv, server)
}
addNewEpochNotificationHandlers(c)
}
func addNewEpochNotificationHandlers(c *cfg) {
addNewEpochNotificationHandler(c, func(ev event.Event) {
c.cfgNetmap.state.setCurrentEpoch(ev.(netmapEvent.NewEpoch).EpochNumber())
})
addNewEpochAsyncNotificationHandler(c, func(ev event.Event) {
if !c.needBootstrap() || c.cfgNetmap.reBoostrapTurnedOff.Load() { // fixes #470
return
}
n := ev.(netmapEvent.NewEpoch).EpochNumber()
const reBootstrapInterval = 2
if (n-c.cfgNetmap.startEpoch)%reBootstrapInterval == 0 {
err := c.bootstrap()
if err != nil {
c.log.Warn(logs.FrostFSNodeCantSendRebootstrapTx, zap.Error(err))
}
}
})
addNewEpochAsyncNotificationHandler(c, func(ev event.Event) {
e := ev.(netmapEvent.NewEpoch).EpochNumber()
ni, err := c.netmapLocalNodeState(e)
if err != nil {
c.log.Error(logs.FrostFSNodeCouldNotUpdateNodeStateOnNewEpoch,
zap.Uint64("epoch", e),
zap.String("error", err.Error()),
)
return
}
c.handleLocalNodeInfo(ni)
})
if c.cfgMorph.notaryEnabled {
addNewEpochAsyncNotificationHandler(c, func(ev event.Event) {
_, err := makeNotaryDeposit(c)
if err != nil {
c.log.Error(logs.FrostFSNodeCouldNotMakeNotaryDeposit,
zap.String("error", err.Error()),
)
}
})
}
}
// bootstrapNode adds current node to the Network map.
// Must be called after initNetmapService.
func bootstrapNode(c *cfg) {
if c.needBootstrap() {
if c.IsMaintenance() {
c.log.Info(logs.FrostFSNodeNodeIsUnderMaintenanceSkipInitialBootstrap)
return
}
if c.alreadyBootstraped {
c.log.Info(logs.NetmapNodeAlreadyInCandidateListOnlineSkipInitialBootstrap)
return
}
err := c.bootstrap()
fatalOnErrDetails("bootstrap error", err)
}
}
func addNetmapNotificationHandler(c *cfg, sTyp string, h event.Handler) {
typ := event.TypeFromString(sTyp)
if c.cfgNetmap.subscribers == nil {
c.cfgNetmap.subscribers = make(map[event.Type][]event.Handler, 1)
}
c.cfgNetmap.subscribers[typ] = append(c.cfgNetmap.subscribers[typ], h)
}
func setNetmapNotificationParser(c *cfg, sTyp string, p event.NotificationParser) {
typ := event.TypeFromString(sTyp)
if c.cfgNetmap.parsers == nil {
c.cfgNetmap.parsers = make(map[event.Type]event.NotificationParser, 1)
}
c.cfgNetmap.parsers[typ] = p
}
// initNetmapState inits current Network map state.
// Must be called after Morph components initialization.
func initNetmapState(c *cfg) {
epoch, err := c.cfgNetmap.wrapper.Epoch()
fatalOnErrDetails("could not initialize current epoch number", err)
var ni *netmapSDK.NodeInfo
ni, c.alreadyBootstraped, err = c.netmapInitLocalNodeState(epoch)
fatalOnErrDetails("could not init network state", err)
stateWord := nodeState(ni)
c.log.Info(logs.FrostFSNodeInitialNetworkState,
zap.Uint64("epoch", epoch),
zap.String("state", stateWord),
)
if ni != nil && ni.IsMaintenance() {
c.isMaintenance.Store(true)
}
c.cfgNetmap.state.setCurrentEpoch(epoch)
c.cfgNetmap.startEpoch = epoch
c.handleLocalNodeInfo(ni)
}
func sameNodeInfo(a, b *netmapSDK.NodeInfo) bool {
// Suboptimal, but we do this once on the node startup.
rawA := a.Marshal()
rawB := b.Marshal()
return bytes.Equal(rawA, rawB)
}
func nodeState(ni *netmapSDK.NodeInfo) string {
if ni != nil {
switch {
case ni.IsOnline():
return "online"
case ni.IsOffline():
return "offline"
case ni.IsMaintenance():
return "maintenance"
}
}
return "undefined"
}
func (c *cfg) netmapInitLocalNodeState(epoch uint64) (*netmapSDK.NodeInfo, bool, error) {
nmNodes, err := c.cfgNetmap.wrapper.GetCandidates()
if err != nil {
return nil, false, err
}
var candidate *netmapSDK.NodeInfo
alreadyBootstraped := false
for i := range nmNodes {
if bytes.Equal(nmNodes[i].PublicKey(), c.binPublicKey) {
candidate = &nmNodes[i]
alreadyBootstraped = candidate.IsOnline() && sameNodeInfo(&c.cfgNodeInfo.localInfo, candidate)
break
}
}
node, err := c.netmapLocalNodeState(epoch)
if err != nil {
return nil, false, err
}
if candidate == nil {
return node, false, nil
}
nmState := nodeState(node)
candidateState := nodeState(candidate)
if nmState != candidateState {
// This happens when the node was switched to maintenance without epoch tick.
// We expect it to continue staying in maintenance.
c.log.Info("candidate status is different from the netmap status, the former takes priority",
zap.String("netmap", nmState),
zap.String("candidate", candidateState))
}
return candidate, alreadyBootstraped, nil
}
func (c *cfg) netmapLocalNodeState(epoch uint64) (*netmapSDK.NodeInfo, error) {
// calculate current network state
nm, err := c.cfgNetmap.wrapper.GetNetMapByEpoch(epoch)
if err != nil {
return nil, err
}
c.netMap.Store(*nm)
nmNodes := nm.Nodes()
for i := range nmNodes {
if bytes.Equal(nmNodes[i].PublicKey(), c.binPublicKey) {
return &nmNodes[i], nil
}
}
return nil, nil
}
// addNewEpochNotificationHandler adds handler that will be executed synchronously.
func addNewEpochNotificationHandler(c *cfg, h event.Handler) {
addNetmapNotificationHandler(c, newEpochNotification, h)
}
// addNewEpochAsyncNotificationHandler adds handler that will be executed asynchronously via netmap workerPool.
func addNewEpochAsyncNotificationHandler(c *cfg, h event.Handler) {
addNetmapNotificationHandler(
c,
newEpochNotification,
event.WorkerPoolHandler(
c.cfgNetmap.workerPool,
h,
c.log,
),
)
}
var errRelayBootstrap = errors.New("setting netmap status is forbidden in relay mode")
func (c *cfg) SetNetmapStatus(st control.NetmapStatus) error {
switch st {
default:
return fmt.Errorf("unsupported status %v", st)
case control.NetmapStatus_MAINTENANCE:
return c.setMaintenanceStatus(false)
case control.NetmapStatus_ONLINE, control.NetmapStatus_OFFLINE:
}
c.stopMaintenance()
if !c.needBootstrap() {
return errRelayBootstrap
}
if st == control.NetmapStatus_ONLINE {
c.cfgNetmap.reBoostrapTurnedOff.Store(false)
return bootstrapOnline(c)
}
c.cfgNetmap.reBoostrapTurnedOff.Store(true)
return c.updateNetMapState(func(*nmClient.UpdatePeerPrm) {})
}
func (c *cfg) ForceMaintenance() error {
return c.setMaintenanceStatus(true)
}
func (c *cfg) setMaintenanceStatus(force bool) error {
netSettings, err := c.cfgNetmap.wrapper.ReadNetworkConfiguration()
if err != nil {
err = fmt.Errorf("read network settings to check maintenance allowance: %w", err)
} else if !netSettings.MaintenanceModeAllowed {
err = errors.New("maintenance mode is not allowed by the network")
}
if err == nil || force {
c.startMaintenance()
if err == nil {
err = c.updateNetMapState((*nmClient.UpdatePeerPrm).SetMaintenance)
}
if err != nil {
return fmt.Errorf("local maintenance is started, but state is not updated in the network: %w", err)
}
}
return err
}
// calls UpdatePeerState operation of Netmap contract's client for the local node.
// State setter is used to specify node state to switch to.
func (c *cfg) updateNetMapState(stateSetter func(*nmClient.UpdatePeerPrm)) error {
var prm nmClient.UpdatePeerPrm
prm.SetKey(c.key.PublicKey().Bytes())
stateSetter(&prm)
return c.cfgNetmap.wrapper.UpdatePeerState(prm)
}
type netInfo struct {
netState netmap.State
magic interface {
MagicNumber() (uint64, error)
}
morphClientNetMap *nmClient.Client
msPerBlockRdr func() (int64, error)
}
func (n *netInfo) Dump(ver version.Version) (*netmapSDK.NetworkInfo, error) {
magic, err := n.magic.MagicNumber()
if err != nil {
return nil, err
}
var ni netmapSDK.NetworkInfo
ni.SetCurrentEpoch(n.netState.CurrentEpoch())
ni.SetMagicNumber(magic)
netInfoMorph, err := n.morphClientNetMap.ReadNetworkConfiguration()
if err != nil {
return nil, fmt.Errorf("read network configuration using netmap contract client: %w", err)
}
if mjr := ver.Major(); mjr > 2 || mjr == 2 && ver.Minor() > 9 {
msPerBlock, err := n.msPerBlockRdr()
if err != nil {
return nil, fmt.Errorf("ms per block: %w", err)
}
ni.SetMsPerBlock(msPerBlock)
ni.SetMaxObjectSize(netInfoMorph.MaxObjectSize)
ni.SetStoragePrice(netInfoMorph.StoragePrice)
ni.SetAuditFee(netInfoMorph.AuditFee)
ni.SetEpochDuration(netInfoMorph.EpochDuration)
ni.SetContainerFee(netInfoMorph.ContainerFee)
ni.SetNamedContainerFee(netInfoMorph.ContainerAliasFee)
ni.SetIRCandidateFee(netInfoMorph.IRCandidateFee)
ni.SetWithdrawalFee(netInfoMorph.WithdrawalFee)
if netInfoMorph.HomomorphicHashingDisabled {
ni.DisableHomomorphicHashing()
}
if netInfoMorph.MaintenanceModeAllowed {
ni.AllowMaintenanceMode()
}
for i := range netInfoMorph.Raw {
ni.SetRawNetworkParameter(netInfoMorph.Raw[i].Name, netInfoMorph.Raw[i].Value)
}
}
return &ni, nil
}