From d1a2296939e23f67e1e73c56725416f640023ba1 Mon Sep 17 00:00:00 2001 From: Roman Khimov Date: Thu, 13 Feb 2020 16:24:46 +0300 Subject: [PATCH] network: change the disconnect procedure We can still lock the (*Server).run with dead peers: Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: goroutine 40 [select, 871 minutes]: Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: github.com/CityOfZion/neo-go/pkg/network.(*TCPPeer).putPacketIntoQueue(0xc030ab5320, 0xc02f251f20, 0xc00af0dcc0, 0x18, 0x40, 0x100000000000000, 0xffffffffffffffff) Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: #011/go/src/github.com/CityOfZion/neo-go/pkg/network/tcp_peer.go:82 +0xf4 Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: github.com/CityOfZion/neo-go/pkg/network.(*TCPPeer).EnqueueHPPacket(0xc030ab5320, 0xc00af0dcc0, 0x18, 0x40, 0x1367240, 0xc03090ef98) Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: #011/go/src/github.com/CityOfZion/neo-go/pkg/network/tcp_peer.go:124 +0x52 Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: github.com/CityOfZion/neo-go/pkg/network.(*Server).iteratePeersWithSendMsg(0xc0000ca000, 0xc00af35800, 0xcb2a58, 0x0) Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: #011/go/src/github.com/CityOfZion/neo-go/pkg/network/server.go:720 +0x12a Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: github.com/CityOfZion/neo-go/pkg/network.(*Server).broadcastHPMessage(...) Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: #011/go/src/github.com/CityOfZion/neo-go/pkg/network/server.go:731 Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: github.com/CityOfZion/neo-go/pkg/network.(*Server).run(0xc0000ca000) Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: #011/go/src/github.com/CityOfZion/neo-go/pkg/network/server.go:203 +0xee4 Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: github.com/CityOfZion/neo-go/pkg/network.(*Server).Start(0xc0000ca000, 0xc000072ba0) Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: #011/go/src/github.com/CityOfZion/neo-go/pkg/network/server.go:173 +0x2ec Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: created by github.com/CityOfZion/neo-go/cli/server.startServer Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: #011/go/src/github.com/CityOfZion/neo-go/cli/server/server.go:331 +0x476 ... Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: goroutine 2199 [chan send, 870 minutes]: Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: github.com/CityOfZion/neo-go/pkg/network.(*TCPPeer).Disconnect.func1() Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: #011/go/src/github.com/CityOfZion/neo-go/pkg/network/tcp_peer.go:366 +0x85 Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: sync.(*Once).Do(0xc030ab403c, 0xc02f262788) Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: #011/usr/local/go/src/sync/once.go:44 +0xb3 Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: github.com/CityOfZion/neo-go/pkg/network.(*TCPPeer).Disconnect(0xc030ab4000, 0xd92440, 0xc000065a00) Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: #011/go/src/github.com/CityOfZion/neo-go/pkg/network/tcp_peer.go:365 +0x6d Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: github.com/CityOfZion/neo-go/pkg/network.(*TCPPeer).SendPing.func1() Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: #011/go/src/github.com/CityOfZion/neo-go/pkg/network/tcp_peer.go:394 +0x42 Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: created by time.goFunc Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: #011/usr/local/go/src/time/sleep.go:169 +0x44 ... Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: goroutine 3448 [chan send, 854 minutes]: Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: github.com/CityOfZion/neo-go/pkg/network.(*TCPPeer).handleConn(0xc01ed203f0) Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: #011/go/src/github.com/CityOfZion/neo-go/pkg/network/tcp_peer.go:143 +0x6c Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: created by github.com/CityOfZion/neo-go/pkg/network.(*TCPTransport).Accept Feb 13 16:14:50 neo-go-node-2 neo-go[9448]: #011/go/src/github.com/CityOfZion/neo-go/pkg/network/tcp_transport.go:62 +0x44c ... The problem is that the select in putPacketIntoQueue() only works the way it was intended to after the `close(p.done)`, but that happens only after successful unregistration request send. Thus, do disconnects the other way around, first unblock queueing and exit goroutines, then destroy the connection (if it wasn't previously destroyed) and only after that signal to the Server. --- pkg/network/tcp_peer.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/network/tcp_peer.go b/pkg/network/tcp_peer.go index 9c4fff52d..62fd33b4c 100644 --- a/pkg/network/tcp_peer.go +++ b/pkg/network/tcp_peer.go @@ -363,9 +363,9 @@ func (p *TCPPeer) PeerAddr() net.Addr { // Disconnect will fill the peer's done channel with the given error. func (p *TCPPeer) Disconnect(err error) { p.finale.Do(func() { - p.server.unregister <- peerDrop{p, err} - p.conn.Close() close(p.done) + p.conn.Close() + p.server.unregister <- peerDrop{p, err} }) }