From 037e4920c29534ba4fd53806ad92bb6446d4b6ed Mon Sep 17 00:00:00 2001 From: Chris O'Haver Date: Fri, 17 Jun 2022 15:49:53 -0400 Subject: [PATCH] plugin/health: Bypass proxy in self health check (#5401) * add detail to docs; bypass proxy in self health check Signed-off-by: Chris O'Haver --- plugin/health/README.md | 10 +++++----- plugin/health/overloaded.go | 16 +++++++++++++++- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/plugin/health/README.md b/plugin/health/README.md index c8fda61a4..b18d2ec3c 100644 --- a/plugin/health/README.md +++ b/plugin/health/README.md @@ -48,13 +48,13 @@ Doing this is supported but both endpoints ":8080" and ":8081" will export the e ## Metrics -If monitoring is enabled (via the *prometheus* plugin) then the following metric is exported: +If monitoring is enabled (via the *prometheus* plugin) then the following metrics are exported: - * `coredns_health_request_duration_seconds{}` - duration to process a HTTP query to the local - `/health` endpoint. As this a local operation it should be fast. A (large) increase in this + * `coredns_health_request_duration_seconds{}` - The *health* plugin performs a self health check + once per second on the `/health` endpoint. This metric is the duration to process that request. + As this is a local operation it should be fast. A (large) increase in this duration indicates the CoreDNS process is having trouble keeping up with its query load. - * `coredns_health_request_failures_total{}` - The number of times the internal health check loop - failed to query `/health`. + * `coredns_health_request_failures_total{}` - The number of times the self health check failed. Note that these metrics *do not* have a `server` label, because being overloaded is a symptom of the running process, *not* a specific server. diff --git a/plugin/health/overloaded.go b/plugin/health/overloaded.go index 482b8a286..57b9ca2d0 100644 --- a/plugin/health/overloaded.go +++ b/plugin/health/overloaded.go @@ -2,6 +2,7 @@ package health import ( "context" + "net" "net/http" "time" @@ -13,9 +14,22 @@ import ( // overloaded queries the health end point and updates a metrics showing how long it took. func (h *health) overloaded(ctx context.Context) { + bypassProxy := &http.Transport{ + Proxy: nil, + DialContext: (&net.Dialer{ + Timeout: 30 * time.Second, + KeepAlive: 30 * time.Second, + }).DialContext, + ForceAttemptHTTP2: true, + MaxIdleConns: 100, + IdleConnTimeout: 90 * time.Second, + TLSHandshakeTimeout: 10 * time.Second, + ExpectContinueTimeout: 1 * time.Second, + } timeout := 3 * time.Second client := http.Client{ - Timeout: timeout, + Timeout: timeout, + Transport: bypassProxy, } url := "http://" + h.Addr + "/health"