plugin/proxy: kick of HC on every 3rd failure (#1110)
* healthchecks: check on every 3rd failure Check on every third failure and some cleanups to make this possible. A failed healthcheck will never increase Fails, a successfull healthceck will reset Fails to 0. This is a chance this counter now drops below 0, making the upstream super? healthy. This removes the okUntil smartness and condences everything back to 1 metrics: Fails; so it's simpler in that regard. Timout errors are *not* attributed to the local upstream, and don't get counted into the Fails anymore. Meaning the 'dig any isc.org' won't kill your upstream. Added extra test the see if the Fails counter gets reset after 3 failed connection. There is still a disconnect beween HTTP healthceck working the proxy (or lookup) not being able to connect to the upstream. * Fix tests
This commit is contained in:
parent
c7ff44fb3a
commit
e34e2c251f
13 changed files with 180 additions and 190 deletions
|
@ -127,14 +127,19 @@ func (p Proxy) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (
|
|||
|
||||
timeout := host.FailTimeout
|
||||
if timeout == 0 {
|
||||
timeout = 2 * time.Second
|
||||
timeout = defaultFailTimeout
|
||||
}
|
||||
|
||||
atomic.AddInt32(&host.Fails, 1)
|
||||
fails := atomic.LoadInt32(&host.Fails)
|
||||
|
||||
go func(host *healthcheck.UpstreamHost, timeout time.Duration) {
|
||||
time.Sleep(timeout)
|
||||
// we may go negative here, should be rectified by the HC.
|
||||
atomic.AddInt32(&host.Fails, -1)
|
||||
if fails%failureCheck == 0 { // Kick off healthcheck on eveyry third failure.
|
||||
host.HealthCheckURL()
|
||||
}
|
||||
}(host, timeout)
|
||||
}
|
||||
|
||||
|
@ -167,9 +172,6 @@ func (p Proxy) match(state request.Request) (u Upstream) {
|
|||
// Name implements the Handler interface.
|
||||
func (p Proxy) Name() string { return "proxy" }
|
||||
|
||||
// defaultTimeout is the default networking timeout for DNS requests.
|
||||
const defaultTimeout = 5 * time.Second
|
||||
|
||||
func toDnstap(ctx context.Context, host string, ex Exchanger, state request.Request, reply *dns.Msg, queryEpoch, respEpoch uint64) (err error) {
|
||||
if tapper := dnstap.TapperFromContext(ctx); tapper != nil {
|
||||
// Query
|
||||
|
@ -206,3 +208,9 @@ func toDnstap(ctx context.Context, host string, ex Exchanger, state request.Requ
|
|||
}
|
||||
return
|
||||
}
|
||||
|
||||
const (
|
||||
defaultFailTimeout = 2 * time.Second
|
||||
defaultTimeout = 5 * time.Second
|
||||
failureCheck = 3
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue