neoneo-go/pkg/network/server.go

1486 lines
42 KiB
Go
Raw Normal View History

2018-01-26 18:04:13 +00:00
package network
import (
"crypto/rand"
"encoding/binary"
"errors"
2018-01-26 18:04:13 +00:00
"fmt"
"math/big"
mrand "math/rand"
"net"
"sort"
"strconv"
"sync"
2018-01-28 10:12:05 +00:00
"time"
2018-01-27 15:00:28 +00:00
"github.com/nspcc-dev/neo-go/pkg/config"
"github.com/nspcc-dev/neo-go/pkg/core/block"
2020-11-27 10:55:48 +00:00
"github.com/nspcc-dev/neo-go/pkg/core/mempool"
"github.com/nspcc-dev/neo-go/pkg/core/mempoolevent"
"github.com/nspcc-dev/neo-go/pkg/core/mpt"
"github.com/nspcc-dev/neo-go/pkg/core/transaction"
"github.com/nspcc-dev/neo-go/pkg/io"
"github.com/nspcc-dev/neo-go/pkg/network/capability"
"github.com/nspcc-dev/neo-go/pkg/network/extpool"
"github.com/nspcc-dev/neo-go/pkg/network/payload"
"github.com/nspcc-dev/neo-go/pkg/util"
2019-11-15 10:32:40 +00:00
"go.uber.org/atomic"
2019-12-30 07:43:05 +00:00
"go.uber.org/zap"
2018-01-26 18:04:13 +00:00
)
const (
2019-10-22 14:56:03 +00:00
// peer numbers are arbitrary at the moment.
defaultMinPeers = 5
defaultAttemptConnPeers = 20
defaultMaxPeers = 100
defaultExtensiblePoolSize = 20
maxBlockBatch = 200
minPoolCount = 30
2018-01-26 18:04:13 +00:00
)
var (
errAlreadyConnected = errors.New("already connected")
errIdenticalID = errors.New("identical node id")
errInvalidNetwork = errors.New("invalid network")
errMaxPeers = errors.New("max peers reached")
errServerShutdown = errors.New("server shutdown")
errInvalidInvType = errors.New("invalid inventory type")
)
type (
// Ledger is everything Server needs from the blockchain.
Ledger interface {
extpool.Ledger
mempool.Feer
Blockqueuer
GetBlock(hash util.Uint256) (*block.Block, error)
GetConfig() config.ProtocolConfiguration
GetHeader(hash util.Uint256) (*block.Header, error)
GetHeaderHash(int) util.Uint256
GetMaxVerificationGAS() int64
GetMemPool() *mempool.Pool
GetNotaryBalance(acc util.Uint160) *big.Int
GetNotaryContractScriptHash() util.Uint160
GetNotaryDepositExpiration(acc util.Uint160) uint32
GetTransaction(util.Uint256) (*transaction.Transaction, uint32, error)
HasBlock(util.Uint256) bool
HeaderHeight() uint32
P2PSigExtensionsEnabled() bool
PoolTx(t *transaction.Transaction, pools ...*mempool.Pool) error
PoolTxWithData(t *transaction.Transaction, data interface{}, mp *mempool.Pool, feer mempool.Feer, verificationFunction func(t *transaction.Transaction, data interface{}) error) error
RegisterPostBlock(f func(func(*transaction.Transaction, *mempool.Pool, bool) bool, *mempool.Pool, *block.Block))
SubscribeForBlocks(ch chan<- *block.Block)
UnsubscribeFromBlocks(ch chan<- *block.Block)
}
// Service is a service abstraction (oracle, state root, consensus, etc).
Service interface {
Name() string
Start()
Shutdown()
}
// Server represents the local Node in the network. Its transport could
// be of any kind.
Server struct {
// ServerConfig holds the Server configuration.
ServerConfig
// id also known as the nonce of the server.
id uint32
2018-01-26 18:04:13 +00:00
// A copy of the Ledger's config.
config config.ProtocolConfiguration
2020-11-27 10:55:48 +00:00
transport Transporter
discovery Discoverer
chain Ledger
2020-11-27 10:55:48 +00:00
bQueue *blockQueue
bSyncQueue *blockQueue
network: only ask mempool for intersections with received Inv Most of the time on healthy network we see new transactions appearing that are not present in the mempool. Once they get into mempool we don't ask for them again when some other peer sends an Inv with them. Then these transactions are usually added into block, removed from mempool and no one actually sends them again to us. Some stale nodes can do that, but it's not very likely to happen. At the receiving end at the same time it's quite expensive to do full chain HasTransaction() query, so if we can avoid doing that it's always good. Here it technically allows resending old transaction that will be re-requested and an attempt to add it to mempool will be made. But it'll inevitably fail because the same HasTransaction() check is done there too. One can try to maliciously flood the node with stale transactions but it doesn't differ from flooding it with any other invalid transactions, so there is no new attack vector added. Baseline, 4 nodes with 10 workers: RPS 6902.296 6465.662 6856.044 6785.515 6157.024 ≈ 6633 ± 4.26% TPS 6468.431 6218.867 6610.565 6288.596 5790.556 ≈ 6275 ± 4.44% CPU % 50.231 42.925 49.481 48.396 42.662 ≈ 46.7 ± 7.01% Mem MB 2856.841 2684.103 2756.195 2733.485 2422.787 ≈ 2691 ± 5.40% Patched: RPS 7176.784 7014.511 6139.663 7191.280 7080.852 ≈ 6921 ± 5.72% ↑ 4.34% TPS 6945.409 6562.756 5927.050 6681.187 6821.794 ≈ 6588 ± 5.38% ↑ 4.99% CPU % 44.400 43.842 40.418 49.211 49.370 ≈ 45.4 ± 7.53% ↓ 2.78% Mem MB 2693.414 2640.602 2472.007 2731.482 2707.879 ≈ 2649 ± 3.53% ↓ 1.56%
2021-08-03 19:28:16 +00:00
mempool *mempool.Pool
2020-11-27 10:55:48 +00:00
notaryRequestPool *mempool.Pool
extensiblePool *extpool.Pool
2020-12-30 08:01:13 +00:00
notaryFeer NotaryFeer
services map[string]Service
extensHandlers map[string]func(*payload.Extensible) error
2022-01-12 20:04:07 +00:00
extensHighPrio string
txCallback func(*transaction.Transaction)
2018-01-26 18:04:13 +00:00
network: add fail-fast route for tx double processing When transaction spreads through the network many nodes are likely to get it in roughly the same time. They will rebroadcast it also in roughly the same time. As we have a number of peers it's quite likely that we'd get an Inv with the same transaction from multiple peers simultaneously. We will ask them for this transaction (independently!) and again we're likely to get it in roughly the same time. So we can easily end up with multiple threads processing the same transaction. Only one will succeed, but we can actually easily avoid doing it in the first place saving some CPU cycles for other things. Notice that we can't do it _before_ receiving a transaction because nothing guarantees that the peer will respond to our transaction request, so communication overhead is unavoidable at the moment, but saving on processing already gives quite interesting results. Baseline, four nodes with 10 workers: RPS 7176.784 7014.511 6139.663 7191.280 7080.852 ≈ 6921 ± 5.72% TPS 6945.409 6562.756 5927.050 6681.187 6821.794 ≈ 6588 ± 5.38% CPU % 44.400 43.842 40.418 49.211 49.370 ≈ 45.4 ± 7.53% Mem MB 2693.414 2640.602 2472.007 2731.482 2707.879 ≈ 2649 ± 3.53% Patched: RPS ≈ 7791.675 7996.559 7834.504 7746.705 7891.614 ≈ 7852 ± 1.10% ↑ 13.45% TPS ≈ 7241.497 7711.765 7520.211 7425.890 7334.443 ≈ 7447 ± 2.17% ↑ 13.04% CPU % 29.853 39.936 39.945 36.371 39.999 ≈ 37.2 ± 10.57% ↓ 18.06% Mem MB 2749.635 2791.609 2828.610 2910.431 2863.344 ≈ 2829 ± 1.97% ↑ 6.80%
2021-08-03 19:43:31 +00:00
txInLock sync.Mutex
txInMap map[util.Uint256]struct{}
lock sync.RWMutex
peers map[Peer]bool
2018-01-26 20:39:34 +00:00
// lastRequestedBlock contains a height of the last requested block.
lastRequestedBlock atomic.Uint32
// lastRequestedHeader contains a height of the last requested header.
lastRequestedHeader atomic.Uint32
register chan Peer
unregister chan peerDrop
quit chan struct{}
2019-11-15 10:32:40 +00:00
transactions chan *transaction.Transaction
syncReached *atomic.Bool
2019-12-30 07:43:05 +00:00
stateSync StateSync
2019-12-30 07:43:05 +00:00
log *zap.Logger
}
peerDrop struct {
peer Peer
reason error
}
)
func randomID() uint32 {
buf := make([]byte, 4)
_, _ = rand.Read(buf)
return binary.BigEndian.Uint32(buf)
}
// NewServer returns a new Server, initialized with the given configuration.
func NewServer(config ServerConfig, chain Ledger, stSync StateSync, log *zap.Logger) (*Server, error) {
return newServerFromConstructors(config, chain, stSync, log, func(s *Server) Transporter {
return NewTCPTransport(s, net.JoinHostPort(s.ServerConfig.Address, strconv.Itoa(int(s.ServerConfig.Port))), s.log)
2022-01-12 20:04:07 +00:00
}, newDefaultDiscovery)
}
func newServerFromConstructors(config ServerConfig, chain Ledger, stSync StateSync, log *zap.Logger,
newTransport func(*Server) Transporter,
newDiscovery func([]string, time.Duration, Transporter) Discoverer,
) (*Server, error) {
2019-12-30 07:43:05 +00:00
if log == nil {
return nil, errors.New("logger is a required parameter")
2019-12-30 07:43:05 +00:00
}
if config.ExtensiblePoolSize <= 0 {
config.ExtensiblePoolSize = defaultExtensiblePoolSize
log.Info("ExtensiblePoolSize is not set or wrong, using default value",
zap.Int("ExtensiblePoolSize", config.ExtensiblePoolSize))
}
s := &Server{
ServerConfig: config,
chain: chain,
id: randomID(),
config: chain.GetConfig(),
quit: make(chan struct{}),
register: make(chan Peer),
unregister: make(chan peerDrop),
txInMap: make(map[util.Uint256]struct{}),
peers: make(map[Peer]bool),
syncReached: atomic.NewBool(false),
mempool: chain.GetMemPool(),
extensiblePool: extpool.New(chain, config.ExtensiblePoolSize),
log: log,
transactions: make(chan *transaction.Transaction, 64),
services: make(map[string]Service),
extensHandlers: make(map[string]func(*payload.Extensible) error),
stateSync: stSync,
2018-01-26 18:04:13 +00:00
}
2020-11-27 10:55:48 +00:00
if chain.P2PSigExtensionsEnabled() {
2020-12-30 08:01:13 +00:00
s.notaryFeer = NewNotaryFeer(chain)
s.notaryRequestPool = mempool.New(s.config.P2PNotaryRequestPayloadPoolSize, 1, true)
chain.RegisterPostBlock(func(isRelevant func(*transaction.Transaction, *mempool.Pool, bool) bool, txpool *mempool.Pool, _ *block.Block) {
2020-11-27 10:55:48 +00:00
s.notaryRequestPool.RemoveStale(func(t *transaction.Transaction) bool {
return isRelevant(t, txpool, true)
2020-12-30 08:01:13 +00:00
}, s.notaryFeer)
2020-11-27 10:55:48 +00:00
})
}
s.bQueue = newBlockQueue(maxBlockBatch, chain, log, func(b *block.Block) {
s.tryStartServices()
})
2018-01-26 18:04:13 +00:00
s.bSyncQueue = newBlockQueue(maxBlockBatch, s.stateSync, log, nil)
if s.MinPeers < 0 {
2019-12-30 07:43:05 +00:00
s.log.Info("bad MinPeers configured, using the default value",
zap.Int("configured", s.MinPeers),
zap.Int("actual", defaultMinPeers))
s.MinPeers = defaultMinPeers
}
if s.MaxPeers <= 0 {
2019-12-30 07:43:05 +00:00
s.log.Info("bad MaxPeers configured, using the default value",
zap.Int("configured", s.MaxPeers),
zap.Int("actual", defaultMaxPeers))
s.MaxPeers = defaultMaxPeers
}
if s.AttemptConnPeers <= 0 {
2019-12-30 07:43:05 +00:00
s.log.Info("bad AttemptConnPeers configured, using the default value",
zap.Int("configured", s.AttemptConnPeers),
zap.Int("actual", defaultAttemptConnPeers))
s.AttemptConnPeers = defaultAttemptConnPeers
}
s.transport = newTransport(s)
s.discovery = newDiscovery(
s.Seeds,
s.DialTimeout,
s.transport,
)
2018-01-26 18:04:13 +00:00
return s, nil
2018-01-30 10:56:36 +00:00
}
// ID returns the servers ID.
func (s *Server) ID() uint32 {
return s.id
}
// Start will start the server and its underlying transport.
func (s *Server) Start(errChan chan error) {
2019-12-30 07:43:05 +00:00
s.log.Info("node started",
zap.Uint32("blockHeight", s.chain.BlockHeight()),
zap.Uint32("headerHeight", s.chain.HeaderHeight()))
s.tryStartServices()
2020-11-27 10:55:48 +00:00
s.initStaleMemPools()
go s.broadcastTxLoop()
go s.relayBlocksLoop()
go s.bQueue.run()
go s.bSyncQueue.run()
go s.transport.Accept()
setServerAndNodeVersions(s.UserAgent, strconv.FormatUint(uint64(s.id), 10))
s.run()
2018-01-31 19:11:08 +00:00
}
2018-01-30 10:56:36 +00:00
// Shutdown disconnects all peers and stops listening.
func (s *Server) Shutdown() {
2019-12-30 07:43:05 +00:00
s.log.Info("shutting down server", zap.Int("peers", s.PeerCount()))
s.transport.Close()
s.discovery.Close()
for _, p := range s.getPeers(nil) {
p.Disconnect(errServerShutdown)
}
s.bQueue.discard()
s.bSyncQueue.discard()
for _, svc := range s.services {
svc.Shutdown()
}
if s.chain.P2PSigExtensionsEnabled() {
2021-01-15 12:40:15 +00:00
s.notaryRequestPool.StopSubscriptions()
}
close(s.quit)
}
// AddService allows to add a service to be started/stopped by Server.
func (s *Server) AddService(svc Service) {
s.services[svc.Name()] = svc
}
// AddExtensibleService register a service that handles extensible payload of some kind.
func (s *Server) AddExtensibleService(svc Service, category string, handler func(*payload.Extensible) error) {
s.extensHandlers[category] = handler
s.AddService(svc)
}
2022-01-12 20:04:07 +00:00
// AddExtensibleHPService registers a high-priority service that handles extensible payload of some kind.
func (s *Server) AddExtensibleHPService(svc Service, category string, handler func(*payload.Extensible) error, txCallback func(*transaction.Transaction)) {
s.txCallback = txCallback
s.extensHighPrio = category
s.AddExtensibleService(svc, category, handler)
}
// GetNotaryPool allows to retrieve notary pool, if it's configured.
func (s *Server) GetNotaryPool() *mempool.Pool {
return s.notaryRequestPool
}
// UnconnectedPeers returns a list of peers that are in the discovery peer list
// but are not connected to the server.
func (s *Server) UnconnectedPeers() []string {
return s.discovery.UnconnectedPeers()
}
// BadPeers returns a list of peers the are flagged as "bad" peers.
func (s *Server) BadPeers() []string {
return s.discovery.BadPeers()
}
// ConnectedPeers returns a list of currently connected peers.
func (s *Server) ConnectedPeers() []string {
s.lock.RLock()
defer s.lock.RUnlock()
peers := make([]string, 0, len(s.peers))
for k := range s.peers {
peers = append(peers, k.PeerAddr().String())
}
return peers
}
// run is a goroutine that starts another goroutine to manage protocol specifics
// while itself dealing with peers management (handling connects/disconnects).
func (s *Server) run() {
go s.runProto()
for {
if s.PeerCount() < s.MinPeers {
s.discovery.RequestRemote(s.AttemptConnPeers)
}
if s.discovery.PoolCount() < minPoolCount {
s.broadcastHPMessage(NewMessage(CMDGetAddr, payload.NewNullPayload()))
}
select {
case <-s.quit:
return
case p := <-s.register:
s.lock.Lock()
s.peers[p] = true
s.lock.Unlock()
peerCount := s.PeerCount()
s.log.Info("new peer connected", zap.Stringer("addr", p.RemoteAddr()), zap.Int("peerCount", peerCount))
if peerCount > s.MaxPeers {
s.lock.RLock()
// Pick a random peer and drop connection to it.
for peer := range s.peers {
// It will send us unregister signal.
go peer.Disconnect(errMaxPeers)
break
}
s.lock.RUnlock()
}
updatePeersConnectedMetric(s.PeerCount())
case drop := <-s.unregister:
s.lock.Lock()
if s.peers[drop.peer] {
delete(s.peers, drop.peer)
s.lock.Unlock()
2019-12-30 07:43:05 +00:00
s.log.Warn("peer disconnected",
zap.Stringer("addr", drop.peer.RemoteAddr()),
zap.Error(drop.reason),
2019-12-30 07:43:05 +00:00
zap.Int("peerCount", s.PeerCount()))
addr := drop.peer.PeerAddr().String()
if drop.reason == errIdenticalID {
s.discovery.RegisterBadAddr(addr)
} else if drop.reason == errAlreadyConnected {
// There is a race condition when peer can be disconnected twice for the this reason
// which can lead to no connections to peer at all. Here we check for such a possibility.
stillConnected := false
s.lock.RLock()
verDrop := drop.peer.Version()
addr := drop.peer.PeerAddr().String()
if verDrop != nil {
for peer := range s.peers {
ver := peer.Version()
// Already connected, drop this connection.
if ver != nil && ver.Nonce == verDrop.Nonce && peer.PeerAddr().String() == addr {
stillConnected = true
}
}
}
s.lock.RUnlock()
if !stillConnected {
s.discovery.UnregisterConnectedAddr(addr)
s.discovery.BackFill(addr)
}
} else {
s.discovery.UnregisterConnectedAddr(addr)
s.discovery.BackFill(addr)
}
updatePeersConnectedMetric(s.PeerCount())
} else {
// else the peer is already gone, which can happen
// because we have two goroutines sending signals here
s.lock.Unlock()
}
}
2018-01-31 19:11:08 +00:00
}
2018-01-27 12:39:07 +00:00
}
// runProto is a goroutine that manages server-wide protocol events.
func (s *Server) runProto() {
pingTimer := time.NewTimer(s.PingInterval)
for {
prevHeight := s.chain.BlockHeight()
select {
case <-s.quit:
return
case <-pingTimer.C:
if s.chain.BlockHeight() == prevHeight {
// Get a copy of s.peers to avoid holding a lock while sending.
for _, peer := range s.getPeers(nil) {
_ = peer.SendPing(NewMessage(CMDPing, payload.NewPing(s.chain.BlockHeight(), s.id)))
}
}
pingTimer.Reset(s.PingInterval)
}
}
}
func (s *Server) tryStartServices() {
if s.syncReached.Load() {
2019-11-15 10:32:40 +00:00
return
}
if s.IsInSync() && s.syncReached.CAS(false, true) {
s.log.Info("node reached synchronized state, starting services")
if s.chain.P2PSigExtensionsEnabled() {
s.notaryRequestPool.RunSubscriptions() // WSClient is also a subscriber.
}
for _, svc := range s.services {
svc.Start()
}
2019-11-15 10:32:40 +00:00
}
}
// SubscribeForNotaryRequests adds given channel to a notary request event
// broadcasting, so when a new P2PNotaryRequest is received or an existing
// P2PNotaryRequest is removed from pool you'll receive it via this channel.
// Make sure it's read from regularly as not reading these events might affect
// other Server functions.
// Ensure that P2PSigExtensions are enabled before calling this method.
func (s *Server) SubscribeForNotaryRequests(ch chan<- mempoolevent.Event) {
if !s.chain.P2PSigExtensionsEnabled() {
panic("P2PSigExtensions are disabled")
}
s.notaryRequestPool.SubscribeForTransactions(ch)
}
// UnsubscribeFromNotaryRequests unsubscribes given channel from notary request
// notifications, you can close it afterwards. Passing non-subscribed channel
// is a no-op.
// Ensure that P2PSigExtensions are enabled before calling this method.
func (s *Server) UnsubscribeFromNotaryRequests(ch chan<- mempoolevent.Event) {
if !s.chain.P2PSigExtensionsEnabled() {
panic("P2PSigExtensions are disabled")
}
s.notaryRequestPool.UnsubscribeFromTransactions(ch)
}
// getPeers returns current list of peers connected to the server filtered by
// isOK function if it's given.
func (s *Server) getPeers(isOK func(Peer) bool) []Peer {
2019-11-15 10:32:40 +00:00
s.lock.RLock()
defer s.lock.RUnlock()
peers := make([]Peer, 0, len(s.peers))
for k := range s.peers {
if isOK != nil && !isOK(k) {
continue
}
peers = append(peers, k)
2019-11-15 10:32:40 +00:00
}
return peers
}
// PeerCount returns the number of current connected peers.
func (s *Server) PeerCount() int {
s.lock.RLock()
defer s.lock.RUnlock()
return len(s.peers)
}
// HandshakedPeersCount returns the number of connected peers
// which have already performed handshake.
func (s *Server) HandshakedPeersCount() int {
s.lock.RLock()
defer s.lock.RUnlock()
var count int
for p := range s.peers {
if p.Handshaked() {
count++
}
}
return count
}
// getVersionMsg returns current version message.
func (s *Server) getVersionMsg() (*Message, error) {
port, err := s.Port()
if err != nil {
return nil, err
}
capabilities := []capability.Capability{
{
Type: capability.TCPServer,
Data: &capability.Server{
Port: port,
},
},
}
if s.Relay {
capabilities = append(capabilities, capability.Capability{
Type: capability.FullNode,
Data: &capability.Node{
StartHeight: s.chain.BlockHeight(),
},
})
}
payload := payload.NewVersion(
s.Net,
s.id,
s.UserAgent,
capabilities,
)
return NewMessage(CMDVersion, payload), nil
}
// IsInSync answers the question of whether the server is in sync with the
// network or not (at least how the server itself sees it). The server operates
// with the data that it has, the number of peers (that has to be more than
// minimum number) and height of these peers (our chain has to be not lower
// than 2/3 of our peers have). Ideally we would check for the highest of the
// peers, but the problem is that they can lie to us and send whatever height
// they want to.
func (s *Server) IsInSync() bool {
var peersNumber int
var notHigher int
if s.stateSync.IsActive() {
return false
}
if s.MinPeers == 0 {
return true
}
ourLastBlock := s.chain.BlockHeight()
s.lock.RLock()
for p := range s.peers {
if p.Handshaked() {
peersNumber++
if ourLastBlock >= p.LastBlockIndex() {
notHigher++
}
}
}
s.lock.RUnlock()
// Checking bQueue would also be nice, but it can be filled with garbage
// easily at the moment.
return peersNumber >= s.MinPeers && (3*notHigher > 2*peersNumber) // && s.bQueue.length() == 0
}
// When a peer sends out his version we reply with verack after validating
// the version.
func (s *Server) handleVersionCmd(p Peer, version *payload.Version) error {
err := p.HandleVersion(version)
if err != nil {
return err
}
if s.id == version.Nonce {
return errIdenticalID
2018-01-28 13:59:32 +00:00
}
// Make sure both server and peer are operating on
// the same network.
if s.Net != version.Magic {
return errInvalidNetwork
}
peerAddr := p.PeerAddr().String()
s.discovery.RegisterConnectedAddr(peerAddr)
s.lock.RLock()
for peer := range s.peers {
if p == peer {
continue
}
ver := peer.Version()
// Already connected, drop this connection.
if ver != nil && ver.Nonce == version.Nonce && peer.PeerAddr().String() == peerAddr {
s.lock.RUnlock()
return errAlreadyConnected
}
}
s.lock.RUnlock()
return p.SendVersionAck(NewMessage(CMDVerack, payload.NewNullPayload()))
2018-01-28 13:59:32 +00:00
}
// handleBlockCmd processes the received block received from its peer.
func (s *Server) handleBlockCmd(p Peer, block *block.Block) error {
if s.stateSync.IsActive() {
return s.bSyncQueue.putBlock(block)
}
return s.bQueue.putBlock(block)
}
// handlePing processes ping request.
func (s *Server) handlePing(p Peer, ping *payload.Ping) error {
err := p.HandlePing(ping)
if err != nil {
return err
}
err = s.requestBlocksOrHeaders(p)
if err != nil {
return err
}
return p.EnqueueP2PMessage(NewMessage(CMDPong, payload.NewPing(s.chain.BlockHeight(), s.id)))
}
func (s *Server) requestBlocksOrHeaders(p Peer) error {
if s.stateSync.NeedHeaders() {
if s.chain.HeaderHeight() < p.LastBlockIndex() {
return s.requestHeaders(p)
}
return nil
}
var (
bq Blockqueuer = s.chain
requestMPTNodes bool
)
if s.stateSync.IsActive() {
bq = s.stateSync
requestMPTNodes = s.stateSync.NeedMPTNodes()
}
if bq.BlockHeight() >= p.LastBlockIndex() {
return nil
}
err := s.requestBlocks(bq, p)
if err != nil {
return err
}
if requestMPTNodes {
return s.requestMPTNodes(p, s.stateSync.GetUnknownMPTNodesBatch(payload.MaxMPTHashesCount))
}
return nil
}
// requestHeaders sends a CMDGetHeaders message to the peer to sync up in headers.
func (s *Server) requestHeaders(p Peer) error {
pl := getRequestBlocksPayload(p, s.chain.HeaderHeight(), &s.lastRequestedHeader)
return p.EnqueueP2PMessage(NewMessage(CMDGetHeaders, pl))
}
// handlePing processes pong request.
func (s *Server) handlePong(p Peer, pong *payload.Ping) error {
err := p.HandlePong(pong)
if err != nil {
return err
}
return s.requestBlocksOrHeaders(p)
}
2019-10-22 14:56:03 +00:00
// handleInvCmd processes the received inventory.
func (s *Server) handleInvCmd(p Peer, inv *payload.Inventory) error {
reqHashes := make([]util.Uint256, 0)
var typExists = map[payload.InventoryType]func(util.Uint256) bool{
network: only ask mempool for intersections with received Inv Most of the time on healthy network we see new transactions appearing that are not present in the mempool. Once they get into mempool we don't ask for them again when some other peer sends an Inv with them. Then these transactions are usually added into block, removed from mempool and no one actually sends them again to us. Some stale nodes can do that, but it's not very likely to happen. At the receiving end at the same time it's quite expensive to do full chain HasTransaction() query, so if we can avoid doing that it's always good. Here it technically allows resending old transaction that will be re-requested and an attempt to add it to mempool will be made. But it'll inevitably fail because the same HasTransaction() check is done there too. One can try to maliciously flood the node with stale transactions but it doesn't differ from flooding it with any other invalid transactions, so there is no new attack vector added. Baseline, 4 nodes with 10 workers: RPS 6902.296 6465.662 6856.044 6785.515 6157.024 ≈ 6633 ± 4.26% TPS 6468.431 6218.867 6610.565 6288.596 5790.556 ≈ 6275 ± 4.44% CPU % 50.231 42.925 49.481 48.396 42.662 ≈ 46.7 ± 7.01% Mem MB 2856.841 2684.103 2756.195 2733.485 2422.787 ≈ 2691 ± 5.40% Patched: RPS 7176.784 7014.511 6139.663 7191.280 7080.852 ≈ 6921 ± 5.72% ↑ 4.34% TPS 6945.409 6562.756 5927.050 6681.187 6821.794 ≈ 6588 ± 5.38% ↑ 4.99% CPU % 44.400 43.842 40.418 49.211 49.370 ≈ 45.4 ± 7.53% ↓ 2.78% Mem MB 2693.414 2640.602 2472.007 2731.482 2707.879 ≈ 2649 ± 3.53% ↓ 1.56%
2021-08-03 19:28:16 +00:00
payload.TXType: s.mempool.ContainsKey,
payload.BlockType: s.chain.HasBlock,
payload.ExtensibleType: func(h util.Uint256) bool {
cp := s.extensiblePool.Get(h)
return cp != nil
},
2020-11-27 10:55:48 +00:00
payload.P2PNotaryRequestType: func(h util.Uint256) bool {
return s.notaryRequestPool.ContainsKey(h)
},
}
if exists := typExists[inv.Type]; exists != nil {
for _, hash := range inv.Hashes {
if !exists(hash) {
reqHashes = append(reqHashes, hash)
}
}
}
if len(reqHashes) > 0 {
msg := NewMessage(CMDGetData, payload.NewInventory(inv.Type, reqHashes))
pkt, err := msg.Bytes()
if err != nil {
return err
}
if inv.Type == payload.ExtensibleType {
return p.EnqueueHPPacket(true, pkt)
}
return p.EnqueueP2PPacket(pkt)
}
return nil
}
// handleMempoolCmd handles getmempool command.
func (s *Server) handleMempoolCmd(p Peer) error {
network: only ask mempool for intersections with received Inv Most of the time on healthy network we see new transactions appearing that are not present in the mempool. Once they get into mempool we don't ask for them again when some other peer sends an Inv with them. Then these transactions are usually added into block, removed from mempool and no one actually sends them again to us. Some stale nodes can do that, but it's not very likely to happen. At the receiving end at the same time it's quite expensive to do full chain HasTransaction() query, so if we can avoid doing that it's always good. Here it technically allows resending old transaction that will be re-requested and an attempt to add it to mempool will be made. But it'll inevitably fail because the same HasTransaction() check is done there too. One can try to maliciously flood the node with stale transactions but it doesn't differ from flooding it with any other invalid transactions, so there is no new attack vector added. Baseline, 4 nodes with 10 workers: RPS 6902.296 6465.662 6856.044 6785.515 6157.024 ≈ 6633 ± 4.26% TPS 6468.431 6218.867 6610.565 6288.596 5790.556 ≈ 6275 ± 4.44% CPU % 50.231 42.925 49.481 48.396 42.662 ≈ 46.7 ± 7.01% Mem MB 2856.841 2684.103 2756.195 2733.485 2422.787 ≈ 2691 ± 5.40% Patched: RPS 7176.784 7014.511 6139.663 7191.280 7080.852 ≈ 6921 ± 5.72% ↑ 4.34% TPS 6945.409 6562.756 5927.050 6681.187 6821.794 ≈ 6588 ± 5.38% ↑ 4.99% CPU % 44.400 43.842 40.418 49.211 49.370 ≈ 45.4 ± 7.53% ↓ 2.78% Mem MB 2693.414 2640.602 2472.007 2731.482 2707.879 ≈ 2649 ± 3.53% ↓ 1.56%
2021-08-03 19:28:16 +00:00
txs := s.mempool.GetVerifiedTransactions()
hs := make([]util.Uint256, 0, payload.MaxHashesCount)
for i := range txs {
hs = append(hs, txs[i].Hash())
if len(hs) < payload.MaxHashesCount && i != len(txs)-1 {
continue
}
msg := NewMessage(CMDInv, payload.NewInventory(payload.TXType, hs))
err := p.EnqueueP2PMessage(msg)
if err != nil {
return err
}
hs = hs[:0]
}
return nil
}
// handleInvCmd processes the received inventory.
func (s *Server) handleGetDataCmd(p Peer, inv *payload.Inventory) error {
var notFound []util.Uint256
for _, hash := range inv.Hashes {
var msg *Message
switch inv.Type {
case payload.TXType:
tx, _, err := s.chain.GetTransaction(hash)
if err == nil {
msg = NewMessage(CMDTX, tx)
} else {
notFound = append(notFound, hash)
}
case payload.BlockType:
b, err := s.chain.GetBlock(hash)
if err == nil {
msg = NewMessage(CMDBlock, b)
} else {
notFound = append(notFound, hash)
}
case payload.ExtensibleType:
if cp := s.extensiblePool.Get(hash); cp != nil {
msg = NewMessage(CMDExtensible, cp)
}
2020-11-27 10:55:48 +00:00
case payload.P2PNotaryRequestType:
if nrp, ok := s.notaryRequestPool.TryGetData(hash); ok { // already have checked P2PSigExtEnabled
msg = NewMessage(CMDP2PNotaryRequest, nrp.(*payload.P2PNotaryRequest))
} else {
notFound = append(notFound, hash)
}
}
if msg != nil {
pkt, err := msg.Bytes()
if err == nil {
if inv.Type == payload.ExtensibleType {
err = p.EnqueueHPPacket(true, pkt)
} else {
err = p.EnqueueP2PPacket(pkt)
}
}
if err != nil {
return err
}
2019-11-08 15:40:21 +00:00
}
}
if len(notFound) != 0 {
return p.EnqueueP2PMessage(NewMessage(CMDNotFound, payload.NewInventory(inv.Type, notFound)))
}
return nil
}
// handleGetMPTDataCmd processes the received MPT inventory.
func (s *Server) handleGetMPTDataCmd(p Peer, inv *payload.MPTInventory) error {
if !s.config.P2PStateExchangeExtensions {
return errors.New("GetMPTDataCMD was received, but P2PStateExchangeExtensions are disabled")
}
if s.config.KeepOnlyLatestState {
// TODO: implement keeping MPT states for P1 and P2 height (#2095, #2152 related)
return errors.New("GetMPTDataCMD was received, but only latest MPT state is supported")
}
resp := payload.MPTData{}
capLeft := payload.MaxSize - 8 // max(io.GetVarSize(len(resp.Nodes)))
added := make(map[util.Uint256]struct{})
for _, h := range inv.Hashes {
if capLeft <= 2 { // at least 1 byte for len(nodeBytes) and 1 byte for node type
break
}
err := s.stateSync.Traverse(h,
func(n mpt.Node, node []byte) bool {
if _, ok := added[n.Hash()]; ok {
return false
}
l := len(node)
size := l + io.GetVarSize(l)
if size > capLeft {
return true
}
resp.Nodes = append(resp.Nodes, node)
added[n.Hash()] = struct{}{}
capLeft -= size
return false
})
if err != nil {
return fmt.Errorf("failed to traverse MPT starting from %s: %w", h.StringBE(), err)
}
}
if len(resp.Nodes) > 0 {
msg := NewMessage(CMDMPTData, &resp)
return p.EnqueueP2PMessage(msg)
}
return nil
}
func (s *Server) handleMPTDataCmd(p Peer, data *payload.MPTData) error {
if !s.config.P2PStateExchangeExtensions {
return errors.New("MPTDataCMD was received, but P2PStateExchangeExtensions are disabled")
}
return s.stateSync.AddMPTNodes(data.Nodes)
}
// requestMPTNodes requests specified MPT nodes from the peer or broadcasts
// request if peer is not specified.
func (s *Server) requestMPTNodes(p Peer, itms []util.Uint256) error {
if len(itms) == 0 {
return nil
}
if len(itms) > payload.MaxMPTHashesCount {
itms = itms[:payload.MaxMPTHashesCount]
}
pl := payload.NewMPTInventory(itms)
msg := NewMessage(CMDGetMPTData, pl)
return p.EnqueueP2PMessage(msg)
}
// handleGetBlocksCmd processes the getblocks request.
func (s *Server) handleGetBlocksCmd(p Peer, gb *payload.GetBlocks) error {
count := gb.Count
if gb.Count < 0 || gb.Count > payload.MaxHashesCount {
count = payload.MaxHashesCount
}
start, err := s.chain.GetHeader(gb.HashStart)
if err != nil {
return err
}
blockHashes := make([]util.Uint256, 0)
for i := start.Index + 1; i <= start.Index+uint32(count); i++ {
hash := s.chain.GetHeaderHash(int(i))
if hash.Equals(util.Uint256{}) {
break
}
blockHashes = append(blockHashes, hash)
}
if len(blockHashes) == 0 {
return nil
}
payload := payload.NewInventory(payload.BlockType, blockHashes)
msg := NewMessage(CMDInv, payload)
return p.EnqueueP2PMessage(msg)
}
// handleGetBlockByIndexCmd processes the getblockbyindex request.
func (s *Server) handleGetBlockByIndexCmd(p Peer, gbd *payload.GetBlockByIndex) error {
count := gbd.Count
if gbd.Count < 0 || gbd.Count > payload.MaxHashesCount {
count = payload.MaxHashesCount
}
for i := gbd.IndexStart; i < gbd.IndexStart+uint32(count); i++ {
hash := s.chain.GetHeaderHash(int(i))
if hash.Equals(util.Uint256{}) {
break
}
b, err := s.chain.GetBlock(hash)
if err != nil {
break
}
msg := NewMessage(CMDBlock, b)
if err = p.EnqueueP2PMessage(msg); err != nil {
return err
}
}
return nil
}
// handleGetHeadersCmd processes the getheaders request.
func (s *Server) handleGetHeadersCmd(p Peer, gh *payload.GetBlockByIndex) error {
if gh.IndexStart > s.chain.HeaderHeight() {
return nil
}
count := gh.Count
if gh.Count < 0 || gh.Count > payload.MaxHeadersAllowed {
count = payload.MaxHeadersAllowed
}
resp := payload.Headers{}
resp.Hdrs = make([]*block.Header, 0, count)
for i := gh.IndexStart; i < gh.IndexStart+uint32(count); i++ {
hash := s.chain.GetHeaderHash(int(i))
if hash.Equals(util.Uint256{}) {
break
}
header, err := s.chain.GetHeader(hash)
if err != nil {
break
}
resp.Hdrs = append(resp.Hdrs, header)
}
if len(resp.Hdrs) == 0 {
return nil
}
msg := NewMessage(CMDHeaders, &resp)
return p.EnqueueP2PMessage(msg)
}
// handleHeadersCmd processes headers payload.
func (s *Server) handleHeadersCmd(p Peer, h *payload.Headers) error {
return s.stateSync.AddHeaders(h.Hdrs...)
}
// handleExtensibleCmd processes received extensible payload.
func (s *Server) handleExtensibleCmd(e *payload.Extensible) error {
if !s.syncReached.Load() {
return nil
}
ok, err := s.extensiblePool.Add(e)
if err != nil {
return err
}
if !ok { // payload is already in cache
return nil
}
handler := s.extensHandlers[e.Category]
if handler != nil {
err = handler(e)
if err != nil {
return err
}
}
s.advertiseExtensible(e)
return nil
}
func (s *Server) advertiseExtensible(e *payload.Extensible) {
msg := NewMessage(CMDInv, payload.NewInventory(payload.ExtensibleType, []util.Uint256{e.Hash()}))
2022-01-12 20:04:07 +00:00
if e.Category == s.extensHighPrio {
// It's high priority because it directly affects consensus process,
// even though it's just an inv.
s.broadcastHPMessage(msg)
} else {
s.broadcastMessage(msg)
}
2019-11-08 15:40:21 +00:00
}
2019-11-15 10:32:40 +00:00
// handleTxCmd processes received transaction.
// It never returns an error.
func (s *Server) handleTxCmd(tx *transaction.Transaction) error {
// It's OK for it to fail for various reasons like tx already existing
// in the pool.
network: add fail-fast route for tx double processing When transaction spreads through the network many nodes are likely to get it in roughly the same time. They will rebroadcast it also in roughly the same time. As we have a number of peers it's quite likely that we'd get an Inv with the same transaction from multiple peers simultaneously. We will ask them for this transaction (independently!) and again we're likely to get it in roughly the same time. So we can easily end up with multiple threads processing the same transaction. Only one will succeed, but we can actually easily avoid doing it in the first place saving some CPU cycles for other things. Notice that we can't do it _before_ receiving a transaction because nothing guarantees that the peer will respond to our transaction request, so communication overhead is unavoidable at the moment, but saving on processing already gives quite interesting results. Baseline, four nodes with 10 workers: RPS 7176.784 7014.511 6139.663 7191.280 7080.852 ≈ 6921 ± 5.72% TPS 6945.409 6562.756 5927.050 6681.187 6821.794 ≈ 6588 ± 5.38% CPU % 44.400 43.842 40.418 49.211 49.370 ≈ 45.4 ± 7.53% Mem MB 2693.414 2640.602 2472.007 2731.482 2707.879 ≈ 2649 ± 3.53% Patched: RPS ≈ 7791.675 7996.559 7834.504 7746.705 7891.614 ≈ 7852 ± 1.10% ↑ 13.45% TPS ≈ 7241.497 7711.765 7520.211 7425.890 7334.443 ≈ 7447 ± 2.17% ↑ 13.04% CPU % 29.853 39.936 39.945 36.371 39.999 ≈ 37.2 ± 10.57% ↓ 18.06% Mem MB 2749.635 2791.609 2828.610 2910.431 2863.344 ≈ 2829 ± 1.97% ↑ 6.80%
2021-08-03 19:43:31 +00:00
s.txInLock.Lock()
_, ok := s.txInMap[tx.Hash()]
if ok || s.mempool.ContainsKey(tx.Hash()) {
s.txInLock.Unlock()
return nil
}
s.txInMap[tx.Hash()] = struct{}{}
s.txInLock.Unlock()
if s.txCallback != nil {
s.txCallback(tx)
}
if s.verifyAndPoolTX(tx) == nil {
2020-11-27 10:55:48 +00:00
s.broadcastTX(tx, nil)
}
network: add fail-fast route for tx double processing When transaction spreads through the network many nodes are likely to get it in roughly the same time. They will rebroadcast it also in roughly the same time. As we have a number of peers it's quite likely that we'd get an Inv with the same transaction from multiple peers simultaneously. We will ask them for this transaction (independently!) and again we're likely to get it in roughly the same time. So we can easily end up with multiple threads processing the same transaction. Only one will succeed, but we can actually easily avoid doing it in the first place saving some CPU cycles for other things. Notice that we can't do it _before_ receiving a transaction because nothing guarantees that the peer will respond to our transaction request, so communication overhead is unavoidable at the moment, but saving on processing already gives quite interesting results. Baseline, four nodes with 10 workers: RPS 7176.784 7014.511 6139.663 7191.280 7080.852 ≈ 6921 ± 5.72% TPS 6945.409 6562.756 5927.050 6681.187 6821.794 ≈ 6588 ± 5.38% CPU % 44.400 43.842 40.418 49.211 49.370 ≈ 45.4 ± 7.53% Mem MB 2693.414 2640.602 2472.007 2731.482 2707.879 ≈ 2649 ± 3.53% Patched: RPS ≈ 7791.675 7996.559 7834.504 7746.705 7891.614 ≈ 7852 ± 1.10% ↑ 13.45% TPS ≈ 7241.497 7711.765 7520.211 7425.890 7334.443 ≈ 7447 ± 2.17% ↑ 13.04% CPU % 29.853 39.936 39.945 36.371 39.999 ≈ 37.2 ± 10.57% ↓ 18.06% Mem MB 2749.635 2791.609 2828.610 2910.431 2863.344 ≈ 2829 ± 1.97% ↑ 6.80%
2021-08-03 19:43:31 +00:00
s.txInLock.Lock()
delete(s.txInMap, tx.Hash())
s.txInLock.Unlock()
2020-11-27 10:55:48 +00:00
return nil
}
// handleP2PNotaryRequestCmd process received P2PNotaryRequest payload.
func (s *Server) handleP2PNotaryRequestCmd(r *payload.P2PNotaryRequest) error {
if !s.chain.P2PSigExtensionsEnabled() {
return errors.New("P2PNotaryRequestCMD was received, but P2PSignatureExtensions are disabled")
}
// It's OK for it to fail for various reasons like request already existing
// in the pool.
2021-05-12 17:14:35 +00:00
_ = s.RelayP2PNotaryRequest(r)
return nil
}
// RelayP2PNotaryRequest adds given request to the pool and relays. It does not check
// P2PSigExtensions enabled.
func (s *Server) RelayP2PNotaryRequest(r *payload.P2PNotaryRequest) error {
err := s.verifyAndPoolNotaryRequest(r)
if err == nil {
2020-11-27 10:55:48 +00:00
s.broadcastP2PNotaryRequestPayload(nil, r)
}
return err
2020-11-27 10:55:48 +00:00
}
// verifyAndPoolNotaryRequest verifies NotaryRequest payload and adds it to the payload mempool.
func (s *Server) verifyAndPoolNotaryRequest(r *payload.P2PNotaryRequest) error {
return s.chain.PoolTxWithData(r.FallbackTransaction, r, s.notaryRequestPool, s.notaryFeer, s.verifyNotaryRequest)
2020-11-27 10:55:48 +00:00
}
// verifyNotaryRequest is a function for state-dependant P2PNotaryRequest payload verification which is executed before ordinary blockchain's verification.
func (s *Server) verifyNotaryRequest(_ *transaction.Transaction, data interface{}) error {
2020-11-27 10:55:48 +00:00
r := data.(*payload.P2PNotaryRequest)
payer := r.FallbackTransaction.Signers[1].Account
if _, err := s.chain.VerifyWitness(payer, r, &r.Witness, s.chain.GetMaxVerificationGAS()); err != nil {
2020-11-27 10:55:48 +00:00
return fmt.Errorf("bad P2PNotaryRequest payload witness: %w", err)
}
notaryHash := s.chain.GetNotaryContractScriptHash()
2020-12-30 08:01:13 +00:00
if r.FallbackTransaction.Sender() != notaryHash {
2020-11-27 10:55:48 +00:00
return errors.New("P2PNotary contract should be a sender of the fallback transaction")
}
depositExpiration := s.chain.GetNotaryDepositExpiration(payer)
2020-11-27 10:55:48 +00:00
if r.FallbackTransaction.ValidUntilBlock >= depositExpiration {
return fmt.Errorf("fallback transaction is valid after deposit is unlocked: ValidUntilBlock is %d, deposit lock expires at %d", r.FallbackTransaction.ValidUntilBlock, depositExpiration)
}
2019-11-15 10:32:40 +00:00
return nil
}
2020-11-27 10:55:48 +00:00
func (s *Server) broadcastP2PNotaryRequestPayload(_ *transaction.Transaction, data interface{}) {
r := data.(*payload.P2PNotaryRequest) // we can guarantee that cast is successful
2020-11-27 10:55:48 +00:00
msg := NewMessage(CMDInv, payload.NewInventory(payload.P2PNotaryRequestType, []util.Uint256{r.FallbackTransaction.Hash()}))
s.broadcastMessage(msg)
}
// handleAddrCmd will process received addresses.
func (s *Server) handleAddrCmd(p Peer, addrs *payload.AddressList) error {
if !p.CanProcessAddr() {
return errors.New("unexpected addr received")
}
dups := make(map[string]bool)
for _, a := range addrs.Addrs {
addr, err := a.GetTCPAddress()
if err == nil && !dups[addr] {
dups[addr] = true
s.discovery.BackFill(addr)
}
}
return nil
}
// handleGetAddrCmd sends to the peer some good addresses that we know of.
func (s *Server) handleGetAddrCmd(p Peer) error {
addrs := s.discovery.GoodPeers()
if len(addrs) > payload.MaxAddrsCount {
addrs = addrs[:payload.MaxAddrsCount]
}
alist := payload.NewAddressList(len(addrs))
ts := time.Now()
for i, addr := range addrs {
// we know it's a good address, so it can't fail
netaddr, _ := net.ResolveTCPAddr("tcp", addr.Address)
alist.Addrs[i] = payload.NewAddressAndTime(netaddr, ts, addr.Capabilities)
}
return p.EnqueueP2PMessage(NewMessage(CMDAddr, alist))
}
// requestBlocks sends a CMDGetBlockByIndex message to the peer
// to sync up in blocks. A maximum of maxBlockBatch will
// send at once. Two things we need to take care of:
// 1. If possible, blocks should be fetched in parallel.
// height..+500 to one peer, height+500..+1000 to another etc.
// 2. Every block must eventually be fetched even if peer sends no answer.
// Thus the following algorithm is used:
// 1. Block range is divided into chunks of payload.MaxHashesCount.
// 2. Send requests for chunk in increasing order.
// 3. After all requests were sent, request random height.
func (s *Server) requestBlocks(bq Blockqueuer, p Peer) error {
h := bq.BlockHeight()
pl := getRequestBlocksPayload(p, h, &s.lastRequestedBlock)
lq := s.bQueue.lastQueued()
if lq > pl.IndexStart {
c := int16(h + blockCacheSize - lq)
if c < payload.MaxHashesCount {
pl.Count = c
}
pl.IndexStart = lq + 1
}
return p.EnqueueP2PMessage(NewMessage(CMDGetBlockByIndex, pl))
}
func getRequestBlocksPayload(p Peer, currHeight uint32, lastRequestedHeight *atomic.Uint32) *payload.GetBlockByIndex {
var peerHeight = p.LastBlockIndex()
var needHeight uint32
// lastRequestedBlock can only be increased.
for {
old := lastRequestedHeight.Load()
if old <= currHeight {
needHeight = currHeight + 1
if !lastRequestedHeight.CAS(old, needHeight) {
continue
}
} else if old < currHeight+(blockCacheSize-payload.MaxHashesCount) {
needHeight = currHeight + 1
if peerHeight > old+payload.MaxHashesCount {
needHeight = old + payload.MaxHashesCount
if !lastRequestedHeight.CAS(old, needHeight) {
continue
}
}
} else {
index := mrand.Intn(blockCacheSize / payload.MaxHashesCount)
needHeight = currHeight + 1 + uint32(index*payload.MaxHashesCount)
}
break
}
return payload.NewGetBlockByIndex(needHeight, -1)
}
2019-10-22 14:56:03 +00:00
// handleMessage processes the given message.
func (s *Server) handleMessage(peer Peer, msg *Message) error {
2020-01-28 13:40:38 +00:00
s.log.Debug("got msg",
zap.Stringer("addr", peer.RemoteAddr()),
zap.String("type", msg.Command.String()))
2020-01-28 13:40:38 +00:00
if peer.Handshaked() {
if inv, ok := msg.Payload.(*payload.Inventory); ok {
2020-11-27 10:55:48 +00:00
if !inv.Type.Valid(s.chain.P2PSigExtensionsEnabled()) || len(inv.Hashes) == 0 {
return errInvalidInvType
}
}
switch msg.Command {
case CMDAddr:
addrs := msg.Payload.(*payload.AddressList)
return s.handleAddrCmd(peer, addrs)
case CMDGetAddr:
// it has no payload
return s.handleGetAddrCmd(peer)
case CMDGetBlocks:
gb := msg.Payload.(*payload.GetBlocks)
return s.handleGetBlocksCmd(peer, gb)
case CMDGetBlockByIndex:
gbd := msg.Payload.(*payload.GetBlockByIndex)
return s.handleGetBlockByIndexCmd(peer, gbd)
case CMDGetData:
inv := msg.Payload.(*payload.Inventory)
return s.handleGetDataCmd(peer, inv)
case CMDGetMPTData:
inv := msg.Payload.(*payload.MPTInventory)
return s.handleGetMPTDataCmd(peer, inv)
case CMDMPTData:
inv := msg.Payload.(*payload.MPTData)
return s.handleMPTDataCmd(peer, inv)
case CMDGetHeaders:
gh := msg.Payload.(*payload.GetBlockByIndex)
return s.handleGetHeadersCmd(peer, gh)
case CMDHeaders:
h := msg.Payload.(*payload.Headers)
return s.handleHeadersCmd(peer, h)
case CMDInv:
inventory := msg.Payload.(*payload.Inventory)
return s.handleInvCmd(peer, inventory)
case CMDMempool:
// no payload
return s.handleMempoolCmd(peer)
case CMDBlock:
block := msg.Payload.(*block.Block)
return s.handleBlockCmd(peer, block)
case CMDExtensible:
cp := msg.Payload.(*payload.Extensible)
return s.handleExtensibleCmd(cp)
2019-11-15 10:32:40 +00:00
case CMDTX:
tx := msg.Payload.(*transaction.Transaction)
return s.handleTxCmd(tx)
2020-11-27 10:55:48 +00:00
case CMDP2PNotaryRequest:
r := msg.Payload.(*payload.P2PNotaryRequest)
return s.handleP2PNotaryRequestCmd(r)
case CMDPing:
ping := msg.Payload.(*payload.Ping)
return s.handlePing(peer, ping)
case CMDPong:
pong := msg.Payload.(*payload.Ping)
return s.handlePong(peer, pong)
case CMDVersion, CMDVerack:
return fmt.Errorf("received '%s' after the handshake", msg.Command.String())
}
} else {
switch msg.Command {
case CMDVersion:
version := msg.Payload.(*payload.Version)
return s.handleVersionCmd(peer, version)
case CMDVerack:
err := peer.HandleVersionAck()
if err != nil {
return err
}
go peer.StartProtocol()
2019-11-15 10:32:40 +00:00
s.tryInitStateSync()
s.tryStartServices()
default:
return fmt.Errorf("received '%s' during handshake", msg.Command.String())
}
}
return nil
2018-01-26 18:04:13 +00:00
}
func (s *Server) tryInitStateSync() {
if !s.stateSync.IsActive() {
s.bSyncQueue.discard()
return
}
if s.stateSync.IsInitialized() {
return
}
var peersNumber int
s.lock.RLock()
heights := make([]uint32, 0)
for p := range s.peers {
if p.Handshaked() {
peersNumber++
peerLastBlock := p.LastBlockIndex()
i := sort.Search(len(heights), func(i int) bool {
return heights[i] >= peerLastBlock
})
heights = append(heights, peerLastBlock)
if i != len(heights)-1 {
copy(heights[i+1:], heights[i:])
heights[i] = peerLastBlock
}
}
}
s.lock.RUnlock()
if peersNumber >= s.MinPeers && len(heights) > 0 {
// choose the height of the median peer as current chain's height
h := heights[len(heights)/2]
err := s.stateSync.Init(h)
if err != nil {
s.log.Fatal("failed to init state sync module",
zap.Uint32("evaluated chain's blockHeight", h),
zap.Uint32("blockHeight", s.chain.BlockHeight()),
zap.Uint32("headerHeight", s.chain.HeaderHeight()),
zap.Error(err))
}
// module can be inactive after init (i.e. full state is collected and ordinary block processing is needed)
if !s.stateSync.IsActive() {
s.bSyncQueue.discard()
}
}
}
// BroadcastExtensible add locally-generated Extensible payload to the pool
// and advertises it to peers.
func (s *Server) BroadcastExtensible(p *payload.Extensible) {
_, err := s.extensiblePool.Add(p)
if err != nil {
s.log.Error("created payload is not valid", zap.Error(err))
return
}
s.advertiseExtensible(p)
2019-11-15 10:32:40 +00:00
}
2022-01-12 20:04:07 +00:00
// RequestTx asks for given transactions from Server peers using GetData message.
func (s *Server) RequestTx(hashes ...util.Uint256) {
2019-11-15 10:32:40 +00:00
if len(hashes) == 0 {
return
}
for i := 0; i <= len(hashes)/payload.MaxHashesCount; i++ {
start := i * payload.MaxHashesCount
stop := (i + 1) * payload.MaxHashesCount
if stop > len(hashes) {
stop = len(hashes)
}
if start == stop {
break
}
msg := NewMessage(CMDGetData, payload.NewInventory(payload.TXType, hashes[start:stop]))
// It's high priority because it directly affects consensus process,
// even though it's getdata.
s.broadcastHPMessage(msg)
}
2019-11-15 10:32:40 +00:00
}
// iteratePeersWithSendMsg sends given message to all peers using two functions
// passed, one is to send the message and the other is to filtrate peers (the
// peer is considered invalid if it returns false).
func (s *Server) iteratePeersWithSendMsg(msg *Message, send func(Peer, bool, []byte) error, peerOK func(Peer) bool) {
var deadN, peerN, sentN int
// Get a copy of s.peers to avoid holding a lock while sending.
peers := s.getPeers(peerOK)
peerN = len(peers)
if peerN == 0 {
return
}
mrand.Shuffle(peerN, func(i, j int) {
peers[i], peers[j] = peers[j], peers[i]
})
pkt, err := msg.Bytes()
if err != nil {
return
}
// If true, this node isn't counted any more, either it's dead or we
// have already sent an Inv to it.
finished := make([]bool, peerN)
network: merge two loops in iteratePeersWithSendMsg, send to 2/3 Refactor code and be fine with sending to just 2/3 of proper peers. Previously it was an edge case, but it can be a normal thing to do also as broadcasting to everyone is obviously too expensive and excessive (hi, #608). Baseline (four node, 10 workers): RPS 8180.760 8137.822 7858.358 7820.011 8051.076 ≈ 8010 ± 2.04% TPS 7819.831 7521.172 7519.023 7242.965 7426.000 ≈ 7506 ± 2.78% CPU % 41.983 38.775 40.606 39.375 35.537 ≈ 39.3 ± 6.15% Mem MB 2947.189 2743.658 2896.688 2813.276 2863.108 ≈ 2853 ± 2.74% Patched: RPS 9714.567 9676.102 9358.609 9371.408 9301.372 ≈ 9484 ± 2.05% ↑ 18.40% TPS 8809.796 8796.854 8534.754 8661.158 8426.162 ≈ 8646 ± 1.92% ↑ 15.19% CPU % 44.980 45.018 33.640 29.645 43.830 ≈ 39.4 ± 18.41% ↑ 0.25% Mem MB 2989.078 2976.577 2306.185 2351.929 2910.479 ≈ 2707 ± 12.80% ↓ 5.12% There is a nuance with this patch however. While typically it works the way outlined above, sometimes it works like this: RPS ≈ 6734.368 TPS ≈ 6299.332 CPU ≈ 25.552% Mem ≈ 2706.046MB And that's because the log looks like this: DeltaTime, TransactionsCount, TPS 5014, 44212, 8817.710 5163, 49690, 9624.249 5166, 49523, 9586.334 5189, 49693, 9576.604 5198, 49339, 9491.920 5147, 49559, 9628.716 5192, 49680, 9568.567 5163, 49750, 9635.871 5183, 49189, 9490.450 5159, 49653, 9624.540 5167, 47945, 9279.079 5179, 2051, 396.022 5015, 4, 0.798 5004, 0, 0.000 5003, 0, 0.000 5003, 0, 0.000 5003, 0, 0.000 5003, 0, 0.000 5004, 0, 0.000 5003, 2925, 584.649 5040, 49099, 9741.865 5161, 49718, 9633.404 5170, 49228, 9521.857 5179, 49773, 9610.543 5167, 47253, 9145.152 5202, 49788, 9570.934 5177, 47704, 9214.603 5209, 46610, 8947.975 5249, 49156, 9364.831 5163, 18284, 3541.352 5072, 174, 34.306 On a network with 4 CNs and 1 RPC node there is 1/256 probability that a block won't be broadcasted to RPC node, so it won't see it until ping timeout kicks in. While it doesn't see a block it can't accept new incoming transactions so the bench gets stuck basically. To me that's an acceptable trade-off because normal networks are much larger than that and the effect of this patch is way more important there, but still that's what we have and we need to take into account.
2021-08-06 12:36:46 +00:00
// Try non-blocking sends first and only block if have to.
for _, blocking := range []bool{false, true} {
for i, peer := range peers {
// Send to 2/3 of good peers.
if 3*sentN >= 2*(peerN-deadN) {
return
}
network: merge two loops in iteratePeersWithSendMsg, send to 2/3 Refactor code and be fine with sending to just 2/3 of proper peers. Previously it was an edge case, but it can be a normal thing to do also as broadcasting to everyone is obviously too expensive and excessive (hi, #608). Baseline (four node, 10 workers): RPS 8180.760 8137.822 7858.358 7820.011 8051.076 ≈ 8010 ± 2.04% TPS 7819.831 7521.172 7519.023 7242.965 7426.000 ≈ 7506 ± 2.78% CPU % 41.983 38.775 40.606 39.375 35.537 ≈ 39.3 ± 6.15% Mem MB 2947.189 2743.658 2896.688 2813.276 2863.108 ≈ 2853 ± 2.74% Patched: RPS 9714.567 9676.102 9358.609 9371.408 9301.372 ≈ 9484 ± 2.05% ↑ 18.40% TPS 8809.796 8796.854 8534.754 8661.158 8426.162 ≈ 8646 ± 1.92% ↑ 15.19% CPU % 44.980 45.018 33.640 29.645 43.830 ≈ 39.4 ± 18.41% ↑ 0.25% Mem MB 2989.078 2976.577 2306.185 2351.929 2910.479 ≈ 2707 ± 12.80% ↓ 5.12% There is a nuance with this patch however. While typically it works the way outlined above, sometimes it works like this: RPS ≈ 6734.368 TPS ≈ 6299.332 CPU ≈ 25.552% Mem ≈ 2706.046MB And that's because the log looks like this: DeltaTime, TransactionsCount, TPS 5014, 44212, 8817.710 5163, 49690, 9624.249 5166, 49523, 9586.334 5189, 49693, 9576.604 5198, 49339, 9491.920 5147, 49559, 9628.716 5192, 49680, 9568.567 5163, 49750, 9635.871 5183, 49189, 9490.450 5159, 49653, 9624.540 5167, 47945, 9279.079 5179, 2051, 396.022 5015, 4, 0.798 5004, 0, 0.000 5003, 0, 0.000 5003, 0, 0.000 5003, 0, 0.000 5003, 0, 0.000 5004, 0, 0.000 5003, 2925, 584.649 5040, 49099, 9741.865 5161, 49718, 9633.404 5170, 49228, 9521.857 5179, 49773, 9610.543 5167, 47253, 9145.152 5202, 49788, 9570.934 5177, 47704, 9214.603 5209, 46610, 8947.975 5249, 49156, 9364.831 5163, 18284, 3541.352 5072, 174, 34.306 On a network with 4 CNs and 1 RPC node there is 1/256 probability that a block won't be broadcasted to RPC node, so it won't see it until ping timeout kicks in. While it doesn't see a block it can't accept new incoming transactions so the bench gets stuck basically. To me that's an acceptable trade-off because normal networks are much larger than that and the effect of this patch is way more important there, but still that's what we have and we need to take into account.
2021-08-06 12:36:46 +00:00
if finished[i] {
continue
}
err := send(peer, blocking, pkt)
switch err {
case nil:
if msg.Command == CMDGetAddr {
peer.AddGetAddrSent()
}
sentN++
case errBusy: // Can be retried.
continue
default:
deadN++
}
network: merge two loops in iteratePeersWithSendMsg, send to 2/3 Refactor code and be fine with sending to just 2/3 of proper peers. Previously it was an edge case, but it can be a normal thing to do also as broadcasting to everyone is obviously too expensive and excessive (hi, #608). Baseline (four node, 10 workers): RPS 8180.760 8137.822 7858.358 7820.011 8051.076 ≈ 8010 ± 2.04% TPS 7819.831 7521.172 7519.023 7242.965 7426.000 ≈ 7506 ± 2.78% CPU % 41.983 38.775 40.606 39.375 35.537 ≈ 39.3 ± 6.15% Mem MB 2947.189 2743.658 2896.688 2813.276 2863.108 ≈ 2853 ± 2.74% Patched: RPS 9714.567 9676.102 9358.609 9371.408 9301.372 ≈ 9484 ± 2.05% ↑ 18.40% TPS 8809.796 8796.854 8534.754 8661.158 8426.162 ≈ 8646 ± 1.92% ↑ 15.19% CPU % 44.980 45.018 33.640 29.645 43.830 ≈ 39.4 ± 18.41% ↑ 0.25% Mem MB 2989.078 2976.577 2306.185 2351.929 2910.479 ≈ 2707 ± 12.80% ↓ 5.12% There is a nuance with this patch however. While typically it works the way outlined above, sometimes it works like this: RPS ≈ 6734.368 TPS ≈ 6299.332 CPU ≈ 25.552% Mem ≈ 2706.046MB And that's because the log looks like this: DeltaTime, TransactionsCount, TPS 5014, 44212, 8817.710 5163, 49690, 9624.249 5166, 49523, 9586.334 5189, 49693, 9576.604 5198, 49339, 9491.920 5147, 49559, 9628.716 5192, 49680, 9568.567 5163, 49750, 9635.871 5183, 49189, 9490.450 5159, 49653, 9624.540 5167, 47945, 9279.079 5179, 2051, 396.022 5015, 4, 0.798 5004, 0, 0.000 5003, 0, 0.000 5003, 0, 0.000 5003, 0, 0.000 5003, 0, 0.000 5004, 0, 0.000 5003, 2925, 584.649 5040, 49099, 9741.865 5161, 49718, 9633.404 5170, 49228, 9521.857 5179, 49773, 9610.543 5167, 47253, 9145.152 5202, 49788, 9570.934 5177, 47704, 9214.603 5209, 46610, 8947.975 5249, 49156, 9364.831 5163, 18284, 3541.352 5072, 174, 34.306 On a network with 4 CNs and 1 RPC node there is 1/256 probability that a block won't be broadcasted to RPC node, so it won't see it until ping timeout kicks in. While it doesn't see a block it can't accept new incoming transactions so the bench gets stuck basically. To me that's an acceptable trade-off because normal networks are much larger than that and the effect of this patch is way more important there, but still that's what we have and we need to take into account.
2021-08-06 12:36:46 +00:00
finished[i] = true
}
2019-11-15 10:32:40 +00:00
}
}
// broadcastMessage sends the message to all available peers.
func (s *Server) broadcastMessage(msg *Message) {
s.iteratePeersWithSendMsg(msg, Peer.EnqueuePacket, nil)
}
// broadcastHPMessage sends the high-priority message to all available peers.
func (s *Server) broadcastHPMessage(msg *Message) {
s.iteratePeersWithSendMsg(msg, Peer.EnqueueHPPacket, nil)
}
// relayBlocksLoop subscribes to new blocks in the ledger and broadcasts them
// to the network. Intended to be run as a separate goroutine.
func (s *Server) relayBlocksLoop() {
ch := make(chan *block.Block, 2) // Some buffering to smooth out possible egressing delays.
s.chain.SubscribeForBlocks(ch)
for {
select {
case <-s.quit:
s.chain.UnsubscribeFromBlocks(ch)
return
case b := <-ch:
msg := NewMessage(CMDInv, payload.NewInventory(payload.BlockType, []util.Uint256{b.Hash()}))
// Filter out nodes that are more current (avoid spamming the network
// during initial sync).
s.iteratePeersWithSendMsg(msg, Peer.EnqueuePacket, func(p Peer) bool {
return p.Handshaked() && p.LastBlockIndex() < b.Index
})
s.extensiblePool.RemoveStale(b.Index)
}
}
}
// verifyAndPoolTX verifies the TX and adds it to the local mempool.
func (s *Server) verifyAndPoolTX(t *transaction.Transaction) error {
return s.chain.PoolTx(t)
}
// RelayTxn a new transaction to the local node and the connected peers.
// Reference: the method OnRelay in C#: https://github.com/neo-project/neo/blob/master/neo/Network/P2P/LocalNode.cs#L159
func (s *Server) RelayTxn(t *transaction.Transaction) error {
err := s.verifyAndPoolTX(t)
if err == nil {
2020-11-27 10:55:48 +00:00
s.broadcastTX(t, nil)
}
return err
}
// broadcastTX broadcasts an inventory message about new transaction.
2020-11-27 10:55:48 +00:00
func (s *Server) broadcastTX(t *transaction.Transaction, _ interface{}) {
select {
case s.transactions <- t:
case <-s.quit:
}
}
func (s *Server) broadcastTxHashes(hs []util.Uint256) {
msg := NewMessage(CMDInv, payload.NewInventory(payload.TXType, hs))
// We need to filter out non-relaying nodes, so plain broadcast
// functions don't fit here.
s.iteratePeersWithSendMsg(msg, Peer.EnqueuePacket, Peer.IsFullNode)
}
2020-11-27 10:55:48 +00:00
// initStaleMemPools initializes mempools for stale tx/payload processing.
func (s *Server) initStaleMemPools() {
2020-11-11 12:49:51 +00:00
threshold := 5
// Not perfect, can change over time, but should be sufficient.
numOfCNs := s.config.GetNumOfCNs(s.chain.BlockHeight())
if numOfCNs*2 > threshold {
threshold = numOfCNs * 2
2020-11-11 12:49:51 +00:00
}
network: only ask mempool for intersections with received Inv Most of the time on healthy network we see new transactions appearing that are not present in the mempool. Once they get into mempool we don't ask for them again when some other peer sends an Inv with them. Then these transactions are usually added into block, removed from mempool and no one actually sends them again to us. Some stale nodes can do that, but it's not very likely to happen. At the receiving end at the same time it's quite expensive to do full chain HasTransaction() query, so if we can avoid doing that it's always good. Here it technically allows resending old transaction that will be re-requested and an attempt to add it to mempool will be made. But it'll inevitably fail because the same HasTransaction() check is done there too. One can try to maliciously flood the node with stale transactions but it doesn't differ from flooding it with any other invalid transactions, so there is no new attack vector added. Baseline, 4 nodes with 10 workers: RPS 6902.296 6465.662 6856.044 6785.515 6157.024 ≈ 6633 ± 4.26% TPS 6468.431 6218.867 6610.565 6288.596 5790.556 ≈ 6275 ± 4.44% CPU % 50.231 42.925 49.481 48.396 42.662 ≈ 46.7 ± 7.01% Mem MB 2856.841 2684.103 2756.195 2733.485 2422.787 ≈ 2691 ± 5.40% Patched: RPS 7176.784 7014.511 6139.663 7191.280 7080.852 ≈ 6921 ± 5.72% ↑ 4.34% TPS 6945.409 6562.756 5927.050 6681.187 6821.794 ≈ 6588 ± 5.38% ↑ 4.99% CPU % 44.400 43.842 40.418 49.211 49.370 ≈ 45.4 ± 7.53% ↓ 2.78% Mem MB 2693.414 2640.602 2472.007 2731.482 2707.879 ≈ 2649 ± 3.53% ↓ 1.56%
2021-08-03 19:28:16 +00:00
s.mempool.SetResendThreshold(uint32(threshold), s.broadcastTX)
2020-11-27 10:55:48 +00:00
if s.chain.P2PSigExtensionsEnabled() {
s.notaryRequestPool.SetResendThreshold(uint32(threshold), s.broadcastP2PNotaryRequestPayload)
}
2020-11-11 12:49:51 +00:00
}
// broadcastTxLoop is a loop for batching and sending
// transactions hashes in an INV payload.
func (s *Server) broadcastTxLoop() {
const (
batchTime = time.Millisecond * 50
batchSize = 32
)
txs := make([]util.Uint256, 0, batchSize)
var timer *time.Timer
timerCh := func() <-chan time.Time {
if timer == nil {
return nil
}
return timer.C
}
broadcast := func() {
s.broadcastTxHashes(txs)
txs = txs[:0]
if timer != nil {
timer.Stop()
}
}
for {
select {
case <-s.quit:
loop:
for {
select {
case <-s.transactions:
default:
break loop
}
}
return
case <-timerCh():
if len(txs) > 0 {
broadcast()
}
case tx := <-s.transactions:
if len(txs) == 0 {
timer = time.NewTimer(batchTime)
}
txs = append(txs, tx.Hash())
if len(txs) == batchSize {
broadcast()
}
}
}
}
// Port returns a server port that should be used in P2P version exchange. In
// case if `AnnouncedPort` is set in the server.Config, the announced node port
// will be returned (e.g. consider the node running behind NAT). If `AnnouncedPort`
// isn't set, the port returned may still differs from that of server.Config.
func (s *Server) Port() (uint16, error) {
if s.AnnouncedPort != 0 {
return s.ServerConfig.AnnouncedPort, nil
}
var port uint16
_, portStr, err := net.SplitHostPort(s.transport.Address())
if err != nil {
port = s.ServerConfig.Port
} else {
p, err := strconv.ParseUint(portStr, 10, 16)
if err != nil {
return 0, err
}
port = uint16(p)
}
return port, nil
}