Add exponential backoff to healthcheck (#3643)

Move exponential backoff initialization to Start()

Signed-off-by: RickyRajinder <singh.sangh@gmail.com>

Move comment

Increase max interval and update README

Remove trailing whitespace

Change Start() param name back to interval
This commit is contained in:
Ricky S 2020-02-04 05:19:48 -08:00 committed by GitHub
parent 22cd28a798
commit efbe4ac5e8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 29 additions and 21 deletions

1
go.mod
View file

@ -12,6 +12,7 @@ require (
github.com/apache/thrift v0.13.0 // indirect
github.com/aws/aws-sdk-go v1.28.9
github.com/caddyserver/caddy v1.0.4
github.com/cenkalti/backoff/v4 v4.0.0
github.com/coredns/federation v0.0.0-20190818181423-e032b096babe
github.com/coreos/go-systemd v0.0.0-20190212144455-93d5ec2c7f76 // indirect
github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f // indirect

2
go.sum
View file

@ -78,6 +78,8 @@ github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kB
github.com/caddyserver/caddy v1.0.4 h1:wwuGSkUHo6RZ3oMpeTt7J09WBB87X5o+IZN4dKehcQE=
github.com/caddyserver/caddy v1.0.4/go.mod h1:uruyfVsyMcDb3IOzSKsi1x0wOjy1my/PxOSTcD+24jM=
github.com/cenkalti/backoff/v3 v3.0.0/go.mod h1:cIeZDE3IrqwwJl6VUwCN6trj1oXrTS4rc0ij+ULvLYs=
github.com/cenkalti/backoff/v4 v4.0.0 h1:6VeaLF9aI+MAUQ95106HwWzYZgJJpZ4stumjj6RFYAU=
github.com/cenkalti/backoff/v4 v4.0.0/go.mod h1:eEew/i+1Q6OrCDZh3WiXYv3+nJwBASZ8Bog/87DQnVg=
github.com/census-instrumentation/opencensus-proto v0.2.0/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY=

View file

@ -9,8 +9,10 @@
The *forward* plugin re-uses already opened sockets to the upstreams. It supports UDP, TCP and
DNS-over-TLS and uses in band health checking.
When it detects an error a health check is performed. This checks runs in a loop, every *0.5s*, for
as long as the upstream reports unhealthy. Once healthy we stop health checking (until the next
When it detects an error a health check is performed. This checks runs in a loop,
starting with a *0.5s* interval and exponentially backing off with randomized intervals
up to *60s* for as long as the upstream reports unhealthy. The exponential backoff
will reset to *0.5s* after 15 minutes. Once healthy we stop health checking (until the next
error). The health checks use a recursive DNS query (`. IN NS`) to get upstream health. Any response
that is not a network error (REFUSED, NOTIMPL, SERVFAIL, etc) is taken as a healthy upstream. The
health check uses the same protocol as specified in **TO**. If `max_fails` is set to 0, no checking

View file

@ -5,6 +5,8 @@ package up
import (
"sync"
"time"
"github.com/cenkalti/backoff/v4"
)
// Probe is used to run a single Func until it returns true (indicating a target is healthy). If an Func
@ -13,8 +15,7 @@ import (
type Probe struct {
sync.Mutex
inprogress int
interval time.Duration
max time.Duration
expBackoff backoff.ExponentialBackOff
}
// Func is used to determine if a target is alive. If so this function must return nil.
@ -31,7 +32,13 @@ func (p *Probe) Do(f Func) {
return
}
p.inprogress = active
interval := p.interval
interval := p.expBackoff.NextBackOff()
// If exponential backoff has reached the maximum elapsed time (15 minutes),
// reset it and try again
if interval == -1 {
p.expBackoff.Reset()
interval = p.expBackoff.NextBackOff()
}
p.Unlock()
// Passed the lock. Now run f for as long it returns false. If a true is returned
// we return from the goroutine and we can accept another Func to run.
@ -42,9 +49,6 @@ func (p *Probe) Do(f Func) {
break
}
time.Sleep(interval)
if i%2 == 0 && i < 4 { // 4 is 2 doubles, so no need to increase anymore - this is *also* checked in double()
p.double()
}
p.Lock()
if p.inprogress == stop {
p.Unlock()
@ -60,15 +64,6 @@ func (p *Probe) Do(f Func) {
}()
}
func (p *Probe) double() {
p.Lock()
p.interval *= 2
if p.interval > p.max {
p.interval = p.max
}
p.Unlock()
}
// Stop stops the probing.
func (p *Probe) Stop() {
p.Lock()
@ -77,10 +72,20 @@ func (p *Probe) Stop() {
}
// Start will initialize the probe manager, after which probes can be initiated with Do.
// Initializes exponential backoff using the given interval duration
func (p *Probe) Start(interval time.Duration) {
p.Lock()
p.interval = interval
p.max = interval * multiplier
eB := &backoff.ExponentialBackOff{
InitialInterval: interval,
RandomizationFactor: backoff.DefaultRandomizationFactor,
Multiplier: backoff.DefaultMultiplier,
MaxInterval: backoff.DefaultMaxInterval,
MaxElapsedTime: backoff.DefaultMaxElapsedTime,
Stop: backoff.Stop,
Clock: backoff.SystemClock,
}
p.expBackoff = *eB
p.expBackoff.Reset()
p.Unlock()
}
@ -88,6 +93,4 @@ const (
idle = iota
active
stop
multiplier = 4
)