From 7ee8f9c5d8109a470adf2a4877cdabcdf99f6108 Mon Sep 17 00:00:00 2001 From: Roman Khimov Date: Mon, 10 Feb 2020 17:21:56 +0300 Subject: [PATCH] network: fix networking stalls caused by stale peers We can leak sending goroutines and stall broadcasts because of already gone peers that happened to be cached by some s.Peers() user (more than 800 of these can be seen in nodoka log along with (*Server).run blocking on CMDGetAddr send): Feb 10 16:35:15 nodoka neo-go[1563]: goroutine 41 [chan send, 3320 minutes]: Feb 10 16:35:15 nodoka neo-go[1563]: github.com/CityOfZion/neo-go/pkg/network.(*TCPPeer).putPacketIntoQueue(...) Feb 10 16:35:15 nodoka neo-go[1563]: /go/src/github.com/CityOfZion/neo-go/pkg/network/tcp_peer.go:81 Feb 10 16:35:15 nodoka neo-go[1563]: github.com/CityOfZion/neo-go/pkg/network.(*TCPPeer).EnqueueHPPacket(0xc0083d57a0, 0xc017206100, 0x18, 0x40, 0x136a240, 0xc018ef9720) Feb 10 16:35:15 nodoka neo-go[1563]: /go/src/github.com/CityOfZion/neo-go/pkg/network/tcp_peer.go:119 +0x98 Feb 10 16:35:15 nodoka neo-go[1563]: github.com/CityOfZion/neo-go/pkg/network.(*Server).iteratePeersWithSendMsg(0xc0000ca000, 0xc0001848a0, 0xcb4550, 0x0) Feb 10 16:35:15 nodoka neo-go[1563]: /go/src/github.com/CityOfZion/neo-go/pkg/network/server.go:720 +0x12a Feb 10 16:35:15 nodoka neo-go[1563]: github.com/CityOfZion/neo-go/pkg/network.(*Server).broadcastHPMessage(...) Feb 10 16:35:15 nodoka neo-go[1563]: /go/src/github.com/CityOfZion/neo-go/pkg/network/server.go:731 Feb 10 16:35:15 nodoka neo-go[1563]: github.com/CityOfZion/neo-go/pkg/network.(*Server).run(0xc0000ca000) Feb 10 16:35:15 nodoka neo-go[1563]: /go/src/github.com/CityOfZion/neo-go/pkg/network/server.go:203 +0xee4 Feb 10 16:35:15 nodoka neo-go[1563]: github.com/CityOfZion/neo-go/pkg/network.(*Server).Start(0xc0000ca000, 0xc000072c60) Feb 10 16:35:15 nodoka neo-go[1563]: /go/src/github.com/CityOfZion/neo-go/pkg/network/server.go:173 +0x2ec Feb 10 16:35:15 nodoka neo-go[1563]: created by github.com/CityOfZion/neo-go/cli/server.startServer Feb 10 16:35:15 nodoka neo-go[1563]: /go/src/github.com/CityOfZion/neo-go/cli/server/server.go:331 +0x476 --- pkg/network/tcp_peer.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pkg/network/tcp_peer.go b/pkg/network/tcp_peer.go index 51d9bfe40..9c4fff52d 100644 --- a/pkg/network/tcp_peer.go +++ b/pkg/network/tcp_peer.go @@ -27,6 +27,7 @@ const ( ) var ( + errGone = errors.New("the peer is gone already") errStateMismatch = errors.New("tried to send protocol message before handshake completed") errPingPong = errors.New("ping/pong timeout") errUnexpectedPong = errors.New("pong message wasn't expected") @@ -78,7 +79,11 @@ func (p *TCPPeer) putPacketIntoQueue(queue chan<- []byte, msg []byte) error { if !p.Handshaked() { return errStateMismatch } - queue <- msg + select { + case queue <- msg: + case <-p.done: + return errGone + } return nil }