From 0a5049658fc3a77d01e22e26af12ae47d2f594a2 Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Tue, 22 Dec 2020 15:55:55 +0300 Subject: [PATCH 1/4] network: support non-blocking broadcast Right now a single slow peer can slow down whole network. Do broadcast in 2 parts: 1. Perform non-blocking send to all peers if possible. 2. Perform blocking sends until message is sent to 2/3 of good peers. --- pkg/network/helper_test.go | 10 +++++----- pkg/network/peer.go | 4 ++-- pkg/network/server.go | 41 +++++++++++++++++++++++++++++++++----- pkg/network/tcp_peer.go | 33 ++++++++++++++++++++---------- 4 files changed, 65 insertions(+), 23 deletions(-) diff --git a/pkg/network/helper_test.go b/pkg/network/helper_test.go index 9b52a2d91..716ba2afa 100644 --- a/pkg/network/helper_test.go +++ b/pkg/network/helper_test.go @@ -415,18 +415,18 @@ func (p *localPeer) EnqueueMessage(msg *Message) error { if err != nil { return err } - return p.EnqueuePacket(b) + return p.EnqueuePacket(true, b) } -func (p *localPeer) EnqueuePacket(m []byte) error { - return p.EnqueueHPPacket(m) +func (p *localPeer) EnqueuePacket(block bool, m []byte) error { + return p.EnqueueHPPacket(block, m) } func (p *localPeer) EnqueueP2PMessage(msg *Message) error { return p.EnqueueMessage(msg) } func (p *localPeer) EnqueueP2PPacket(m []byte) error { - return p.EnqueueHPPacket(m) + return p.EnqueueHPPacket(true, m) } -func (p *localPeer) EnqueueHPPacket(m []byte) error { +func (p *localPeer) EnqueueHPPacket(_ bool, m []byte) error { msg := &Message{Network: netmode.UnitTestNet} r := io.NewBinReaderFromBuf(m) err := msg.Decode(r) diff --git a/pkg/network/peer.go b/pkg/network/peer.go index fb06c59d2..b2df6ff69 100644 --- a/pkg/network/peer.go +++ b/pkg/network/peer.go @@ -28,7 +28,7 @@ type Peer interface { // can be shared with other queues (so that message marshalling can be // done once for all peers). Does nothing is the peer is not yet // completed handshaking. - EnqueuePacket([]byte) error + EnqueuePacket(bool, []byte) error // EnqueueP2PMessage is a temporary wrapper that sends a message via // EnqueueP2PPacket if there is no error in serializing it. @@ -47,7 +47,7 @@ type Peer interface { // EnqueueHPPacket is a blocking high priority packet enqueuer, it // doesn't return until it puts given packet into the high-priority // queue. - EnqueueHPPacket([]byte) error + EnqueueHPPacket(bool, []byte) error Version() *payload.Version LastBlockIndex() uint32 Handshaked() bool diff --git a/pkg/network/server.go b/pkg/network/server.go index 8beeea9d2..e4721f1e0 100644 --- a/pkg/network/server.go +++ b/pkg/network/server.go @@ -537,7 +537,7 @@ func (s *Server) handleInvCmd(p Peer, inv *payload.Inventory) error { return err } if inv.Type == payload.ConsensusType { - return p.EnqueueHPPacket(pkt) + return p.EnqueueHPPacket(true, pkt) } return p.EnqueueP2PPacket(pkt) } @@ -599,7 +599,7 @@ func (s *Server) handleGetDataCmd(p Peer, inv *payload.Inventory) error { pkt, err := msg.Bytes() if err == nil { if inv.Type == payload.ConsensusType { - err = p.EnqueueHPPacket(pkt) + err = p.EnqueueHPPacket(true, pkt) } else { err = p.EnqueueP2PPacket(pkt) } @@ -943,7 +943,7 @@ func (s *Server) requestTx(hashes ...util.Uint256) { // iteratePeersWithSendMsg sends given message to all peers using two functions // passed, one is to send the message and the other is to filtrate peers (the // peer is considered invalid if it returns false). -func (s *Server) iteratePeersWithSendMsg(msg *Message, send func(Peer, []byte) error, peerOK func(Peer) bool) { +func (s *Server) iteratePeersWithSendMsg(msg *Message, send func(Peer, bool, []byte) error, peerOK func(Peer) bool) { // Get a copy of s.peers to avoid holding a lock while sending. peers := s.Peers() if len(peers) == 0 { @@ -953,15 +953,46 @@ func (s *Server) iteratePeersWithSendMsg(msg *Message, send func(Peer, []byte) e if err != nil { return } + + success := make(map[Peer]bool, len(peers)) + okCount := 0 + sentCount := 0 for peer := range peers { if peerOK != nil && !peerOK(peer) { + success[peer] = false + continue + } + okCount++ + if err := send(peer, false, pkt); err != nil { continue } if msg.Command == CMDGetAddr { peer.AddGetAddrSent() } - // Who cares about these messages anyway? - _ = send(peer, pkt) + success[peer] = true + sentCount++ + } + + // Send to at least 2/3 of good peers. + if 3*sentCount >= 2*okCount { + return + } + + // Perform blocking send now. + for peer := range peers { + if _, ok := success[peer]; ok || peerOK != nil && !peerOK(peer) { + continue + } + if err := send(peer, true, pkt); err != nil { + continue + } + if msg.Command == CMDGetAddr { + peer.AddGetAddrSent() + } + sentCount++ + if 3*sentCount >= 2*okCount { + return + } } } diff --git a/pkg/network/tcp_peer.go b/pkg/network/tcp_peer.go index 67329c161..f55a3954b 100644 --- a/pkg/network/tcp_peer.go +++ b/pkg/network/tcp_peer.go @@ -30,6 +30,7 @@ const ( var ( errGone = errors.New("the peer is gone already") + errBusy = errors.New("peer is busy") errStateMismatch = errors.New("tried to send protocol message before handshake completed") errPingPong = errors.New("ping/pong timeout") errUnexpectedPong = errors.New("pong message wasn't expected") @@ -81,21 +82,31 @@ func NewTCPPeer(conn net.Conn, s *Server) *TCPPeer { // putPacketIntoQueue puts given message into the given queue if the peer has // done handshaking. -func (p *TCPPeer) putPacketIntoQueue(queue chan<- []byte, msg []byte) error { +func (p *TCPPeer) putPacketIntoQueue(queue chan<- []byte, block bool, msg []byte) error { if !p.Handshaked() { return errStateMismatch } - select { - case queue <- msg: - case <-p.done: - return errGone + if block { + select { + case queue <- msg: + case <-p.done: + return errGone + } + } else { + select { + case queue <- msg: + case <-p.done: + return errGone + default: + return errBusy + } } return nil } // EnqueuePacket implements the Peer interface. -func (p *TCPPeer) EnqueuePacket(msg []byte) error { - return p.putPacketIntoQueue(p.sendQ, msg) +func (p *TCPPeer) EnqueuePacket(block bool, msg []byte) error { + return p.putPacketIntoQueue(p.sendQ, block, msg) } // putMessageIntoQueue serializes given Message and puts it into given queue if @@ -105,7 +116,7 @@ func (p *TCPPeer) putMsgIntoQueue(queue chan<- []byte, msg *Message) error { if err != nil { return err } - return p.putPacketIntoQueue(queue, b) + return p.putPacketIntoQueue(queue, true, b) } // EnqueueMessage is a temporary wrapper that sends a message via @@ -116,7 +127,7 @@ func (p *TCPPeer) EnqueueMessage(msg *Message) error { // EnqueueP2PPacket implements the Peer interface. func (p *TCPPeer) EnqueueP2PPacket(msg []byte) error { - return p.putPacketIntoQueue(p.p2pSendQ, msg) + return p.putPacketIntoQueue(p.p2pSendQ, true, msg) } // EnqueueP2PMessage implements the Peer interface. @@ -126,8 +137,8 @@ func (p *TCPPeer) EnqueueP2PMessage(msg *Message) error { // EnqueueHPPacket implements the Peer interface. It the peer is not yet // handshaked it's a noop. -func (p *TCPPeer) EnqueueHPPacket(msg []byte) error { - return p.putPacketIntoQueue(p.hpSendQ, msg) +func (p *TCPPeer) EnqueueHPPacket(block bool, msg []byte) error { + return p.putPacketIntoQueue(p.hpSendQ, block, msg) } func (p *TCPPeer) writeMsg(msg *Message) error { From 2cb536a6a158c1a426ef5297b140f942aa29732c Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Wed, 23 Dec 2020 15:32:16 +0300 Subject: [PATCH 2/4] network: provide NullPayload where necessary --- pkg/network/message.go | 2 +- pkg/network/server.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/network/message.go b/pkg/network/message.go index 3b73a5422..c8bf606ae 100644 --- a/pkg/network/message.go +++ b/pkg/network/message.go @@ -216,7 +216,7 @@ func (m *Message) tryCompressPayload() error { compressedPayload := buf.Bytes() if m.Flags&Compressed == 0 { switch m.Payload.(type) { - case *payload.Headers, *payload.MerkleBlock, *payload.NullPayload, + case *payload.Headers, *payload.MerkleBlock, payload.NullPayload, *payload.Inventory: break default: diff --git a/pkg/network/server.go b/pkg/network/server.go index e4721f1e0..caf464b56 100644 --- a/pkg/network/server.go +++ b/pkg/network/server.go @@ -474,7 +474,7 @@ func (s *Server) handleVersionCmd(p Peer, version *payload.Version) error { } } s.lock.RUnlock() - return p.SendVersionAck(NewMessage(CMDVerack, nil)) + return p.SendVersionAck(NewMessage(CMDVerack, payload.NewNullPayload())) } // handleBlockCmd processes the received block received from its peer. From 5bd6c1e5cc3e2f1b769e2583110313d7cb974f04 Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Wed, 23 Dec 2020 16:13:57 +0300 Subject: [PATCH 3/4] network: fix a bug in discovery with a peer connected twice It could be the case that checks are performed simultaneosly and peers connections goes down from 2 to 0. We must take such case into account and register address as good in discovery. --- pkg/network/server.go | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/pkg/network/server.go b/pkg/network/server.go index caf464b56..b8befac11 100644 --- a/pkg/network/server.go +++ b/pkg/network/server.go @@ -293,7 +293,28 @@ func (s *Server) run() { addr := drop.peer.PeerAddr().String() if drop.reason == errIdenticalID { s.discovery.RegisterBadAddr(addr) - } else if drop.reason != errAlreadyConnected { + } else if drop.reason == errAlreadyConnected { + // There is a race condition when peer can be disconnected twice for the this reason + // which can lead to no connections to peer at all. Here we check for such a possibility. + stillConnected := false + s.lock.RLock() + verDrop := drop.peer.Version() + addr := drop.peer.PeerAddr().String() + if verDrop != nil { + for peer := range s.peers { + ver := peer.Version() + // Already connected, drop this connection. + if ver != nil && ver.Nonce == verDrop.Nonce && peer.PeerAddr().String() == addr { + stillConnected = true + } + } + } + s.lock.RUnlock() + if !stillConnected { + s.discovery.UnregisterConnectedAddr(addr) + s.discovery.BackFill(addr) + } + } else { s.discovery.UnregisterConnectedAddr(addr) s.discovery.BackFill(addr) } From 84a3474fc5bf4e82c2a2dade59d7de826905bff4 Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Thu, 24 Dec 2020 14:38:58 +0300 Subject: [PATCH 4/4] network: set timeout on write Fix a bug occuring under high load when node hangs during this write. --- pkg/network/tcp_peer.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pkg/network/tcp_peer.go b/pkg/network/tcp_peer.go index f55a3954b..f8c29d61d 100644 --- a/pkg/network/tcp_peer.go +++ b/pkg/network/tcp_peer.go @@ -195,6 +195,7 @@ func (p *TCPPeer) handleQueues() { var p2pSkipCounter uint32 const p2pSkipDivisor = 4 + var writeTimeout = time.Duration(p.server.chain.GetConfig().SecondsPerBlock) * time.Second for { var msg []byte @@ -228,6 +229,10 @@ func (p *TCPPeer) handleQueues() { case msg = <-p.sendQ: } } + err = p.conn.SetWriteDeadline(time.Now().Add(writeTimeout)) + if err != nil { + break + } _, err = p.conn.Write(msg) if err != nil { break