network: fix networking stalls caused by stale peers

We can leak sending goroutines and stall broadcasts because of already gone
peers that happened to be cached by some s.Peers() user (more than 800 of
these can be seen in nodoka log along with (*Server).run blocking on
CMDGetAddr send):

Feb 10 16:35:15 nodoka neo-go[1563]: goroutine 41 [chan send, 3320 minutes]:
Feb 10 16:35:15 nodoka neo-go[1563]: github.com/CityOfZion/neo-go/pkg/network.(*TCPPeer).putPacketIntoQueue(...)
Feb 10 16:35:15 nodoka neo-go[1563]:         /go/src/github.com/CityOfZion/neo-go/pkg/network/tcp_peer.go:81
Feb 10 16:35:15 nodoka neo-go[1563]: github.com/CityOfZion/neo-go/pkg/network.(*TCPPeer).EnqueueHPPacket(0xc0083d57a0, 0xc017206100, 0x18, 0x40, 0x136a240, 0xc018ef9720)
Feb 10 16:35:15 nodoka neo-go[1563]:         /go/src/github.com/CityOfZion/neo-go/pkg/network/tcp_peer.go:119 +0x98
Feb 10 16:35:15 nodoka neo-go[1563]: github.com/CityOfZion/neo-go/pkg/network.(*Server).iteratePeersWithSendMsg(0xc0000ca000, 0xc0001848a0, 0xcb4550, 0x0)
Feb 10 16:35:15 nodoka neo-go[1563]:         /go/src/github.com/CityOfZion/neo-go/pkg/network/server.go:720 +0x12a
Feb 10 16:35:15 nodoka neo-go[1563]: github.com/CityOfZion/neo-go/pkg/network.(*Server).broadcastHPMessage(...)
Feb 10 16:35:15 nodoka neo-go[1563]:         /go/src/github.com/CityOfZion/neo-go/pkg/network/server.go:731
Feb 10 16:35:15 nodoka neo-go[1563]: github.com/CityOfZion/neo-go/pkg/network.(*Server).run(0xc0000ca000)
Feb 10 16:35:15 nodoka neo-go[1563]:         /go/src/github.com/CityOfZion/neo-go/pkg/network/server.go:203 +0xee4
Feb 10 16:35:15 nodoka neo-go[1563]: github.com/CityOfZion/neo-go/pkg/network.(*Server).Start(0xc0000ca000, 0xc000072c60)
Feb 10 16:35:15 nodoka neo-go[1563]:         /go/src/github.com/CityOfZion/neo-go/pkg/network/server.go:173 +0x2ec
Feb 10 16:35:15 nodoka neo-go[1563]: created by github.com/CityOfZion/neo-go/cli/server.startServer
Feb 10 16:35:15 nodoka neo-go[1563]:         /go/src/github.com/CityOfZion/neo-go/cli/server/server.go:331 +0x476
This commit is contained in:
Roman Khimov 2020-02-10 17:21:56 +03:00
parent f148798291
commit 7ee8f9c5d8

View file

@ -27,6 +27,7 @@ const (
)
var (
errGone = errors.New("the peer is gone already")
errStateMismatch = errors.New("tried to send protocol message before handshake completed")
errPingPong = errors.New("ping/pong timeout")
errUnexpectedPong = errors.New("pong message wasn't expected")
@ -78,7 +79,11 @@ func (p *TCPPeer) putPacketIntoQueue(queue chan<- []byte, msg []byte) error {
if !p.Handshaked() {
return errStateMismatch
}
queue <- msg
select {
case queue <- msg:
case <-p.done:
return errGone
}
return nil
}