network: add random slight delay to connection attempts
Small (especially dockerized/virtualized) networks often start all nodes at ones and then we see a lot of connection flapping in the log. This happens because nodes try to connect to each other simultaneously, establish two connections, then each one finds a duplicate and drops it, but this can be different duplicate connections on other sides, so they retry and it all happens for some time. Eventually everything settles, but we have a lot of garbage in the log and a lot of useless attempts. This random waiting timeout doesn't change the logic much, adds a minimal delay, but increases chances for both nodes to establish a proper single connection on both sides to only then see another one and drop it on both sides as well. It leads to almost no flapping in small networks, doesn't affect much bigger ones. The delay is close to unnoticeable especially if there is something in the DB for node to process during startup.
This commit is contained in:
parent
075a54192c
commit
b8c09f509f
2 changed files with 10 additions and 0 deletions
|
@ -2,6 +2,7 @@ package network
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"math"
|
"math"
|
||||||
|
"math/rand"
|
||||||
"sync"
|
"sync"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
|
@ -14,6 +15,11 @@ const (
|
||||||
connRetries = 3
|
connRetries = 3
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
// Maximum waiting time before connection attempt.
|
||||||
|
tryMaxWait = time.Second / 2
|
||||||
|
)
|
||||||
|
|
||||||
// Discoverer is an interface that is responsible for maintaining
|
// Discoverer is an interface that is responsible for maintaining
|
||||||
// a healthy connection pool.
|
// a healthy connection pool.
|
||||||
type Discoverer interface {
|
type Discoverer interface {
|
||||||
|
@ -294,6 +300,8 @@ func (d *DefaultDiscovery) updateNetSize() {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *DefaultDiscovery) tryAddress(addr string) {
|
func (d *DefaultDiscovery) tryAddress(addr string) {
|
||||||
|
var tout = rand.Int63n(int64(tryMaxWait))
|
||||||
|
time.Sleep(time.Duration(tout)) // Have a sleep before working hard.
|
||||||
p, err := d.transport.Dial(addr, d.dialTimeout)
|
p, err := d.transport.Dial(addr, d.dialTimeout)
|
||||||
atomic.AddInt32(&d.outstanding, -1)
|
atomic.AddInt32(&d.outstanding, -1)
|
||||||
d.lock.Lock()
|
d.lock.Lock()
|
||||||
|
|
|
@ -82,6 +82,7 @@ func TestDefaultDiscoverer(t *testing.T) {
|
||||||
ts.dialCh = make(chan string)
|
ts.dialCh = make(chan string)
|
||||||
d := NewDefaultDiscovery(nil, time.Second/16, ts)
|
d := NewDefaultDiscovery(nil, time.Second/16, ts)
|
||||||
|
|
||||||
|
tryMaxWait = 1 // Don't waste time.
|
||||||
var set1 = []string{"1.1.1.1:10333", "2.2.2.2:10333"}
|
var set1 = []string{"1.1.1.1:10333", "2.2.2.2:10333"}
|
||||||
sort.Strings(set1)
|
sort.Strings(set1)
|
||||||
|
|
||||||
|
@ -211,6 +212,7 @@ func TestSeedDiscovery(t *testing.T) {
|
||||||
sort.Strings(seeds)
|
sort.Strings(seeds)
|
||||||
|
|
||||||
d := NewDefaultDiscovery(seeds, time.Second/10, ts)
|
d := NewDefaultDiscovery(seeds, time.Second/10, ts)
|
||||||
|
tryMaxWait = 1 // Don't waste time.
|
||||||
|
|
||||||
d.RequestRemote(len(seeds))
|
d.RequestRemote(len(seeds))
|
||||||
for i := 0; i < connRetries*2; i++ {
|
for i := 0; i < connRetries*2; i++ {
|
||||||
|
|
Loading…
Reference in a new issue