coredns/plugin/health/overloaded.go
Ben Kochie 0d6e113f90
Enable Prometheus native histograms (#6524)
Add a NativeHistogramBucketFactor parameter to the use of
`NewHistogramVec` in order to enable use of Prometheus Native
Histograms.

This will store automatically computed sparse buckets in CoreDNS.
If a compatible Prometeus requests native histograms this data will
returned instead of the static buckets.

The default factor of 1.05 should provide high quality resolution data.

Signed-off-by: SuperQ <superq@gmail.com>
2024-03-11 16:09:09 -04:00

84 lines
2.5 KiB
Go

package health
import (
"context"
"net"
"net/http"
"time"
"github.com/coredns/coredns/plugin"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
// overloaded queries the health end point and updates a metrics showing how long it took.
func (h *health) overloaded(ctx context.Context) {
bypassProxy := &http.Transport{
Proxy: nil,
DialContext: (&net.Dialer{
Timeout: 30 * time.Second,
KeepAlive: 30 * time.Second,
}).DialContext,
ForceAttemptHTTP2: true,
MaxIdleConns: 100,
IdleConnTimeout: 90 * time.Second,
TLSHandshakeTimeout: 10 * time.Second,
ExpectContinueTimeout: 1 * time.Second,
}
timeout := 3 * time.Second
client := http.Client{
Timeout: timeout,
Transport: bypassProxy,
}
req, _ := http.NewRequestWithContext(ctx, http.MethodGet, h.healthURI.String(), nil)
tick := time.NewTicker(1 * time.Second)
defer tick.Stop()
for {
select {
case <-tick.C:
start := time.Now()
resp, err := client.Do(req)
if err != nil && ctx.Err() == context.Canceled {
// request was cancelled by parent goroutine
return
}
if err != nil {
HealthDuration.Observe(time.Since(start).Seconds())
HealthFailures.Inc()
log.Warningf("Local health request to %q failed: %s", req.URL.String(), err)
continue
}
resp.Body.Close()
elapsed := time.Since(start)
HealthDuration.Observe(elapsed.Seconds())
if elapsed > time.Second { // 1s is pretty random, but a *local* scrape taking that long isn't good
log.Warningf("Local health request to %q took more than 1s: %s", req.URL.String(), elapsed)
}
case <-ctx.Done():
return
}
}
}
var (
// HealthDuration is the metric used for exporting how fast we can retrieve the /health endpoint.
HealthDuration = promauto.NewHistogram(prometheus.HistogramOpts{
Namespace: plugin.Namespace,
Subsystem: "health",
Name: "request_duration_seconds",
Buckets: plugin.SlimTimeBuckets,
NativeHistogramBucketFactor: plugin.NativeHistogramBucketFactor,
Help: "Histogram of the time (in seconds) each request took.",
})
// HealthFailures is the metric used to count how many times the health request failed
HealthFailures = promauto.NewCounter(prometheus.CounterOpts{
Namespace: plugin.Namespace,
Subsystem: "health",
Name: "request_failures_total",
Help: "The number of times the health check failed.",
})
)