network: add random slight delay to connection attempts

Small (especially dockerized/virtualized) networks often start all nodes at
ones and then we see a lot of connection flapping in the log. This happens
because nodes try to connect to each other simultaneously, establish two
connections, then each one finds a duplicate and drops it, but this can be
different duplicate connections on other sides, so they retry and it all
happens for some time. Eventually everything settles, but we have a lot of
garbage in the log and a lot of useless attempts.

This random waiting timeout doesn't change the logic much, adds a minimal
delay, but increases chances for both nodes to establish a proper single
connection on both sides to only then see another one and drop it on both
sides as well. It leads to almost no flapping in small networks, doesn't
affect much bigger ones. The delay is close to unnoticeable especially if
there is something in the DB for node to process during startup.
This commit is contained in:
Roman Khimov 2022-11-17 18:42:43 +03:00
parent 075a54192c
commit b8c09f509f
2 changed files with 10 additions and 0 deletions

View file

@ -2,6 +2,7 @@ package network
import ( import (
"math" "math"
"math/rand"
"sync" "sync"
"sync/atomic" "sync/atomic"
"time" "time"
@ -14,6 +15,11 @@ const (
connRetries = 3 connRetries = 3
) )
var (
// Maximum waiting time before connection attempt.
tryMaxWait = time.Second / 2
)
// Discoverer is an interface that is responsible for maintaining // Discoverer is an interface that is responsible for maintaining
// a healthy connection pool. // a healthy connection pool.
type Discoverer interface { type Discoverer interface {
@ -294,6 +300,8 @@ func (d *DefaultDiscovery) updateNetSize() {
} }
func (d *DefaultDiscovery) tryAddress(addr string) { func (d *DefaultDiscovery) tryAddress(addr string) {
var tout = rand.Int63n(int64(tryMaxWait))
time.Sleep(time.Duration(tout)) // Have a sleep before working hard.
p, err := d.transport.Dial(addr, d.dialTimeout) p, err := d.transport.Dial(addr, d.dialTimeout)
atomic.AddInt32(&d.outstanding, -1) atomic.AddInt32(&d.outstanding, -1)
d.lock.Lock() d.lock.Lock()

View file

@ -82,6 +82,7 @@ func TestDefaultDiscoverer(t *testing.T) {
ts.dialCh = make(chan string) ts.dialCh = make(chan string)
d := NewDefaultDiscovery(nil, time.Second/16, ts) d := NewDefaultDiscovery(nil, time.Second/16, ts)
tryMaxWait = 1 // Don't waste time.
var set1 = []string{"1.1.1.1:10333", "2.2.2.2:10333"} var set1 = []string{"1.1.1.1:10333", "2.2.2.2:10333"}
sort.Strings(set1) sort.Strings(set1)
@ -211,6 +212,7 @@ func TestSeedDiscovery(t *testing.T) {
sort.Strings(seeds) sort.Strings(seeds)
d := NewDefaultDiscovery(seeds, time.Second/10, ts) d := NewDefaultDiscovery(seeds, time.Second/10, ts)
tryMaxWait = 1 // Don't waste time.
d.RequestRemote(len(seeds)) d.RequestRemote(len(seeds))
for i := 0; i < connRetries*2; i++ { for i := 0; i < connRetries*2; i++ {