Add exponential backoff to healthcheck (#3643)
Move exponential backoff initialization to Start() Signed-off-by: RickyRajinder <singh.sangh@gmail.com> Move comment Increase max interval and update README Remove trailing whitespace Change Start() param name back to interval
This commit is contained in:
parent
22cd28a798
commit
efbe4ac5e8
4 changed files with 29 additions and 21 deletions
1
go.mod
1
go.mod
|
@ -12,6 +12,7 @@ require (
|
|||
github.com/apache/thrift v0.13.0 // indirect
|
||||
github.com/aws/aws-sdk-go v1.28.9
|
||||
github.com/caddyserver/caddy v1.0.4
|
||||
github.com/cenkalti/backoff/v4 v4.0.0
|
||||
github.com/coredns/federation v0.0.0-20190818181423-e032b096babe
|
||||
github.com/coreos/go-systemd v0.0.0-20190212144455-93d5ec2c7f76 // indirect
|
||||
github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f // indirect
|
||||
|
|
2
go.sum
2
go.sum
|
@ -78,6 +78,8 @@ github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kB
|
|||
github.com/caddyserver/caddy v1.0.4 h1:wwuGSkUHo6RZ3oMpeTt7J09WBB87X5o+IZN4dKehcQE=
|
||||
github.com/caddyserver/caddy v1.0.4/go.mod h1:uruyfVsyMcDb3IOzSKsi1x0wOjy1my/PxOSTcD+24jM=
|
||||
github.com/cenkalti/backoff/v3 v3.0.0/go.mod h1:cIeZDE3IrqwwJl6VUwCN6trj1oXrTS4rc0ij+ULvLYs=
|
||||
github.com/cenkalti/backoff/v4 v4.0.0 h1:6VeaLF9aI+MAUQ95106HwWzYZgJJpZ4stumjj6RFYAU=
|
||||
github.com/cenkalti/backoff/v4 v4.0.0/go.mod h1:eEew/i+1Q6OrCDZh3WiXYv3+nJwBASZ8Bog/87DQnVg=
|
||||
github.com/census-instrumentation/opencensus-proto v0.2.0/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
|
||||
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
|
||||
github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY=
|
||||
|
|
|
@ -9,8 +9,10 @@
|
|||
The *forward* plugin re-uses already opened sockets to the upstreams. It supports UDP, TCP and
|
||||
DNS-over-TLS and uses in band health checking.
|
||||
|
||||
When it detects an error a health check is performed. This checks runs in a loop, every *0.5s*, for
|
||||
as long as the upstream reports unhealthy. Once healthy we stop health checking (until the next
|
||||
When it detects an error a health check is performed. This checks runs in a loop,
|
||||
starting with a *0.5s* interval and exponentially backing off with randomized intervals
|
||||
up to *60s* for as long as the upstream reports unhealthy. The exponential backoff
|
||||
will reset to *0.5s* after 15 minutes. Once healthy we stop health checking (until the next
|
||||
error). The health checks use a recursive DNS query (`. IN NS`) to get upstream health. Any response
|
||||
that is not a network error (REFUSED, NOTIMPL, SERVFAIL, etc) is taken as a healthy upstream. The
|
||||
health check uses the same protocol as specified in **TO**. If `max_fails` is set to 0, no checking
|
||||
|
|
|
@ -5,6 +5,8 @@ package up
|
|||
import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/cenkalti/backoff/v4"
|
||||
)
|
||||
|
||||
// Probe is used to run a single Func until it returns true (indicating a target is healthy). If an Func
|
||||
|
@ -13,8 +15,7 @@ import (
|
|||
type Probe struct {
|
||||
sync.Mutex
|
||||
inprogress int
|
||||
interval time.Duration
|
||||
max time.Duration
|
||||
expBackoff backoff.ExponentialBackOff
|
||||
}
|
||||
|
||||
// Func is used to determine if a target is alive. If so this function must return nil.
|
||||
|
@ -31,7 +32,13 @@ func (p *Probe) Do(f Func) {
|
|||
return
|
||||
}
|
||||
p.inprogress = active
|
||||
interval := p.interval
|
||||
interval := p.expBackoff.NextBackOff()
|
||||
// If exponential backoff has reached the maximum elapsed time (15 minutes),
|
||||
// reset it and try again
|
||||
if interval == -1 {
|
||||
p.expBackoff.Reset()
|
||||
interval = p.expBackoff.NextBackOff()
|
||||
}
|
||||
p.Unlock()
|
||||
// Passed the lock. Now run f for as long it returns false. If a true is returned
|
||||
// we return from the goroutine and we can accept another Func to run.
|
||||
|
@ -42,9 +49,6 @@ func (p *Probe) Do(f Func) {
|
|||
break
|
||||
}
|
||||
time.Sleep(interval)
|
||||
if i%2 == 0 && i < 4 { // 4 is 2 doubles, so no need to increase anymore - this is *also* checked in double()
|
||||
p.double()
|
||||
}
|
||||
p.Lock()
|
||||
if p.inprogress == stop {
|
||||
p.Unlock()
|
||||
|
@ -60,15 +64,6 @@ func (p *Probe) Do(f Func) {
|
|||
}()
|
||||
}
|
||||
|
||||
func (p *Probe) double() {
|
||||
p.Lock()
|
||||
p.interval *= 2
|
||||
if p.interval > p.max {
|
||||
p.interval = p.max
|
||||
}
|
||||
p.Unlock()
|
||||
}
|
||||
|
||||
// Stop stops the probing.
|
||||
func (p *Probe) Stop() {
|
||||
p.Lock()
|
||||
|
@ -77,10 +72,20 @@ func (p *Probe) Stop() {
|
|||
}
|
||||
|
||||
// Start will initialize the probe manager, after which probes can be initiated with Do.
|
||||
// Initializes exponential backoff using the given interval duration
|
||||
func (p *Probe) Start(interval time.Duration) {
|
||||
p.Lock()
|
||||
p.interval = interval
|
||||
p.max = interval * multiplier
|
||||
eB := &backoff.ExponentialBackOff{
|
||||
InitialInterval: interval,
|
||||
RandomizationFactor: backoff.DefaultRandomizationFactor,
|
||||
Multiplier: backoff.DefaultMultiplier,
|
||||
MaxInterval: backoff.DefaultMaxInterval,
|
||||
MaxElapsedTime: backoff.DefaultMaxElapsedTime,
|
||||
Stop: backoff.Stop,
|
||||
Clock: backoff.SystemClock,
|
||||
}
|
||||
p.expBackoff = *eB
|
||||
p.expBackoff.Reset()
|
||||
p.Unlock()
|
||||
}
|
||||
|
||||
|
@ -88,6 +93,4 @@ const (
|
|||
idle = iota
|
||||
active
|
||||
stop
|
||||
|
||||
multiplier = 4
|
||||
)
|
||||
|
|
Loading…
Add table
Reference in a new issue