plugin/proxy: kick of HC on every 3rd failure (#1110)

* healthchecks: check on every 3rd failure

Check on every third failure and some cleanups to make this possible. A
failed healthcheck will never increase Fails, a successfull healthceck
will reset Fails to 0. This is a chance this counter now drops below 0,
making the upstream super? healthy.

This removes the okUntil smartness and condences everything back to 1
metrics: Fails; so it's simpler in that regard.

Timout errors are *not* attributed to the local upstream, and don't get
counted into the Fails anymore. Meaning the 'dig any isc.org' won't kill
your upstream.

Added extra test the see if the Fails counter gets reset after 3 failed
connection.

There is still a disconnect beween HTTP healthceck working the proxy (or
lookup) not being able to connect to the upstream.

* Fix tests
This commit is contained in:
Miek Gieben 2017-10-15 19:38:39 +02:00 committed by GitHub
parent c7ff44fb3a
commit e34e2c251f
13 changed files with 180 additions and 190 deletions

View file

@ -127,14 +127,19 @@ func (p Proxy) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (
timeout := host.FailTimeout
if timeout == 0 {
timeout = 2 * time.Second
timeout = defaultFailTimeout
}
atomic.AddInt32(&host.Fails, 1)
fails := atomic.LoadInt32(&host.Fails)
go func(host *healthcheck.UpstreamHost, timeout time.Duration) {
time.Sleep(timeout)
// we may go negative here, should be rectified by the HC.
atomic.AddInt32(&host.Fails, -1)
if fails%failureCheck == 0 { // Kick off healthcheck on eveyry third failure.
host.HealthCheckURL()
}
}(host, timeout)
}
@ -167,9 +172,6 @@ func (p Proxy) match(state request.Request) (u Upstream) {
// Name implements the Handler interface.
func (p Proxy) Name() string { return "proxy" }
// defaultTimeout is the default networking timeout for DNS requests.
const defaultTimeout = 5 * time.Second
func toDnstap(ctx context.Context, host string, ex Exchanger, state request.Request, reply *dns.Msg, queryEpoch, respEpoch uint64) (err error) {
if tapper := dnstap.TapperFromContext(ctx); tapper != nil {
// Query
@ -206,3 +208,9 @@ func toDnstap(ctx context.Context, host string, ex Exchanger, state request.Requ
}
return
}
const (
defaultFailTimeout = 2 * time.Second
defaultTimeout = 5 * time.Second
failureCheck = 3
)