Enable Prometheus native histograms (#6524)

Add a NativeHistogramBucketFactor parameter to the use of
`NewHistogramVec` in order to enable use of Prometheus Native
Histograms.

This will store automatically computed sparse buckets in CoreDNS.
If a compatible Prometeus requests native histograms this data will
returned instead of the static buckets.

The default factor of 1.05 should provide high quality resolution data.

Signed-off-by: SuperQ <superq@gmail.com>
This commit is contained in:
Ben Kochie 2024-03-11 21:09:09 +01:00 committed by GitHub
parent a4cbd95795
commit 0d6e113f90
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 55 additions and 42 deletions

View file

@ -22,10 +22,11 @@ var (
Help: "Counter of requests made per upstream.", Help: "Counter of requests made per upstream.",
}, []string{"rcode", "to"}) }, []string{"rcode", "to"})
RequestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{ RequestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: plugin.Namespace, Namespace: plugin.Namespace,
Subsystem: "grpc", Subsystem: "grpc",
Name: "request_duration_seconds", Name: "request_duration_seconds",
Buckets: plugin.TimeBuckets, Buckets: plugin.TimeBuckets,
Help: "Histogram of the time each request took.", NativeHistogramBucketFactor: plugin.NativeHistogramBucketFactor,
Help: "Histogram of the time each request took.",
}, []string{"to"}) }, []string{"to"})
) )

View file

@ -67,11 +67,12 @@ func (h *health) overloaded(ctx context.Context) {
var ( var (
// HealthDuration is the metric used for exporting how fast we can retrieve the /health endpoint. // HealthDuration is the metric used for exporting how fast we can retrieve the /health endpoint.
HealthDuration = promauto.NewHistogram(prometheus.HistogramOpts{ HealthDuration = promauto.NewHistogram(prometheus.HistogramOpts{
Namespace: plugin.Namespace, Namespace: plugin.Namespace,
Subsystem: "health", Subsystem: "health",
Name: "request_duration_seconds", Name: "request_duration_seconds",
Buckets: plugin.SlimTimeBuckets, Buckets: plugin.SlimTimeBuckets,
Help: "Histogram of the time (in seconds) each request took.", NativeHistogramBucketFactor: plugin.NativeHistogramBucketFactor,
Help: "Histogram of the time (in seconds) each request took.",
}) })
// HealthFailures is the metric used to count how many times the health request failed // HealthFailures is the metric used to count how many times the health request failed
HealthFailures = promauto.NewCounter(prometheus.CounterOpts{ HealthFailures = promauto.NewCounter(prometheus.CounterOpts{

View file

@ -16,11 +16,12 @@ var (
// requestLatency measures K8s rest client requests latency grouped by verb and host. // requestLatency measures K8s rest client requests latency grouped by verb and host.
requestLatency = promauto.NewHistogramVec( requestLatency = promauto.NewHistogramVec(
prometheus.HistogramOpts{ prometheus.HistogramOpts{
Namespace: plugin.Namespace, Namespace: plugin.Namespace,
Subsystem: "kubernetes", Subsystem: "kubernetes",
Name: "rest_client_request_duration_seconds", Name: "rest_client_request_duration_seconds",
Help: "Request latency in seconds. Broken down by verb and host.", Help: "Request latency in seconds. Broken down by verb and host.",
Buckets: prometheus.DefBuckets, Buckets: prometheus.DefBuckets,
NativeHistogramBucketFactor: plugin.NativeHistogramBucketFactor,
}, },
[]string{"verb", "host"}, []string{"verb", "host"},
) )
@ -28,11 +29,12 @@ var (
// rateLimiterLatency measures K8s rest client rate limiter latency grouped by verb and host. // rateLimiterLatency measures K8s rest client rate limiter latency grouped by verb and host.
rateLimiterLatency = promauto.NewHistogramVec( rateLimiterLatency = promauto.NewHistogramVec(
prometheus.HistogramOpts{ prometheus.HistogramOpts{
Namespace: plugin.Namespace, Namespace: plugin.Namespace,
Subsystem: "kubernetes", Subsystem: "kubernetes",
Name: "rest_client_rate_limiter_duration_seconds", Name: "rest_client_rate_limiter_duration_seconds",
Help: "Client side rate limiter latency in seconds. Broken down by verb and host.", Help: "Client side rate limiter latency in seconds. Broken down by verb and host.",
Buckets: prometheus.DefBuckets, Buckets: prometheus.DefBuckets,
NativeHistogramBucketFactor: plugin.NativeHistogramBucketFactor,
}, },
[]string{"verb", "host"}, []string{"verb", "host"},
) )

View file

@ -29,8 +29,9 @@ var (
Subsystem: "kubernetes", Subsystem: "kubernetes",
Name: "dns_programming_duration_seconds", Name: "dns_programming_duration_seconds",
// From 1 millisecond to ~17 minutes. // From 1 millisecond to ~17 minutes.
Buckets: prometheus.ExponentialBuckets(0.001, 2, 20), Buckets: prometheus.ExponentialBuckets(0.001, 2, 20),
Help: "Histogram of the time (in seconds) it took to program a dns instance.", NativeHistogramBucketFactor: plugin.NativeHistogramBucketFactor,
Help: "Histogram of the time (in seconds) it took to program a dns instance.",
}, []string{"service_kind"}) }, []string{"service_kind"})
// DurationSinceFunc returns the duration elapsed since the given time. // DurationSinceFunc returns the duration elapsed since the given time.

View file

@ -17,19 +17,21 @@ var (
}, []string{"server", "zone", "view", "proto", "family", "type"}) }, []string{"server", "zone", "view", "proto", "family", "type"})
RequestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{ RequestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: plugin.Namespace, Namespace: plugin.Namespace,
Subsystem: subsystem, Subsystem: subsystem,
Name: "request_duration_seconds", Name: "request_duration_seconds",
Buckets: plugin.TimeBuckets, Buckets: plugin.TimeBuckets,
Help: "Histogram of the time (in seconds) each request took per zone.", NativeHistogramBucketFactor: plugin.NativeHistogramBucketFactor,
Help: "Histogram of the time (in seconds) each request took per zone.",
}, []string{"server", "zone", "view"}) }, []string{"server", "zone", "view"})
RequestSize = promauto.NewHistogramVec(prometheus.HistogramOpts{ RequestSize = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: plugin.Namespace, Namespace: plugin.Namespace,
Subsystem: subsystem, Subsystem: subsystem,
Name: "request_size_bytes", Name: "request_size_bytes",
Help: "Size of the EDNS0 UDP buffer in bytes (64K for TCP) per zone and protocol.", Help: "Size of the EDNS0 UDP buffer in bytes (64K for TCP) per zone and protocol.",
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3}, Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
NativeHistogramBucketFactor: plugin.NativeHistogramBucketFactor,
}, []string{"server", "zone", "view", "proto"}) }, []string{"server", "zone", "view", "proto"})
RequestDo = promauto.NewCounterVec(prometheus.CounterOpts{ RequestDo = promauto.NewCounterVec(prometheus.CounterOpts{
@ -40,11 +42,12 @@ var (
}, []string{"server", "zone", "view"}) }, []string{"server", "zone", "view"})
ResponseSize = promauto.NewHistogramVec(prometheus.HistogramOpts{ ResponseSize = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: plugin.Namespace, Namespace: plugin.Namespace,
Subsystem: subsystem, Subsystem: subsystem,
Name: "response_size_bytes", Name: "response_size_bytes",
Help: "Size of the returned response in bytes.", Help: "Size of the returned response in bytes.",
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3}, Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
NativeHistogramBucketFactor: plugin.NativeHistogramBucketFactor,
}, []string{"server", "zone", "view", "proto"}) }, []string{"server", "zone", "view", "proto"})
ResponseRcode = promauto.NewCounterVec(prometheus.CounterOpts{ ResponseRcode = promauto.NewCounterVec(prometheus.CounterOpts{

View file

@ -10,11 +10,12 @@ import (
// Variables declared for monitoring. // Variables declared for monitoring.
var ( var (
requestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{ requestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: plugin.Namespace, Namespace: plugin.Namespace,
Subsystem: "proxy", Subsystem: "proxy",
Name: "request_duration_seconds", Name: "request_duration_seconds",
Buckets: plugin.TimeBuckets, Buckets: plugin.TimeBuckets,
Help: "Histogram of the time each request took.", NativeHistogramBucketFactor: plugin.NativeHistogramBucketFactor,
Help: "Histogram of the time each request took.",
}, []string{"proxy_name", "to", "rcode"}) }, []string{"proxy_name", "to", "rcode"})
healthcheckFailureCount = promauto.NewCounterVec(prometheus.CounterOpts{ healthcheckFailureCount = promauto.NewCounterVec(prometheus.CounterOpts{

View file

@ -108,5 +108,9 @@ var TimeBuckets = prometheus.ExponentialBuckets(0.00025, 2, 16) // from 0.25ms t
// SlimTimeBuckets is low cardinality set of duration buckets. // SlimTimeBuckets is low cardinality set of duration buckets.
var SlimTimeBuckets = prometheus.ExponentialBuckets(0.00025, 10, 5) // from 0.25ms to 2.5 seconds var SlimTimeBuckets = prometheus.ExponentialBuckets(0.00025, 10, 5) // from 0.25ms to 2.5 seconds
// NativeHistogramBucketFactor controls the resolution of Prometheus native histogram buckets.
// See: https://pkg.go.dev/github.com/prometheus/client_golang@v1.19.0/prometheus#section-readme
var NativeHistogramBucketFactor = 1.05
// ErrOnce is returned when a plugin doesn't support multiple setups per server. // ErrOnce is returned when a plugin doesn't support multiple setups per server.
var ErrOnce = errors.New("this plugin can only be used once per Server Block") var ErrOnce = errors.New("this plugin can only be used once per Server Block")