plugin/proxy: fix metrics (#1137)

Add Counter metrics and fix duration to use upstream name (and only use
it when we have one).

Fix the documentation to reflect this.

Fixes #1134
This commit is contained in:
Miek Gieben 2017-10-08 04:30:44 -07:00 committed by GitHub
parent c1b9f74f98
commit 7c6ba3fcbd
3 changed files with 29 additions and 10 deletions

View file

@ -98,10 +98,13 @@ payload over HTTPS). Note that with `https_google` the entire transport is encry
If monitoring is enabled (via the *prometheus* directive) then the following metric is exported: If monitoring is enabled (via the *prometheus* directive) then the following metric is exported:
* coredns_proxy_request_count_total{proto, proxy_proto, from} * `coredns_proxy_request_duration_millisecond{proto, proto_proxy, family, to}` - duration per upstream
interaction.
* `coredns_proxy_request_count_total{proto, proto_proxy, family, to}` - query count per upstream.
Where `proxy_proto` is the protocol used (`dns`, `grpc`, or `https_google`) and `from` is **FROM** Where `proxy_proto` is the protocol used (`dns`, `grpc`, or `https_google`) and `to` is **TO**
specified in the config, `proto` is the protocol used by the incoming query ("tcp" or "udp"). specified in the config, `proto` is the protocol used by the incoming query ("tcp" or "udp").
and family the transport family ("1" for IPv4, and "2" for IPv6).
## Examples ## Examples

View file

@ -10,21 +10,40 @@ import (
// Metrics the proxy plugin exports. // Metrics the proxy plugin exports.
var ( var (
RequestCount = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace,
Subsystem: "proxy",
Name: "request_count_total",
Help: "Counter of requests made per protocol, proxy protocol, family and upstream.",
}, []string{"proto", "proxy_proto", "family", "to"})
RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: plugin.Namespace, Namespace: plugin.Namespace,
Subsystem: "proxy", Subsystem: "proxy",
Name: "request_duration_milliseconds", Name: "request_duration_milliseconds",
Buckets: append(prometheus.DefBuckets, []float64{50, 100, 200, 500, 1000, 2000, 3000, 4000, 5000, 10000}...), Buckets: append(prometheus.DefBuckets, []float64{15, 20, 25, 30, 40, 50, 100, 200, 500, 1000, 2000, 3000, 4000, 5000, 10000}...),
Help: "Histogram of the time (in milliseconds) each request took.", Help: "Histogram of the time (in milliseconds) each request took.",
}, []string{"proto", "proxy_proto", "from"}) }, []string{"proto", "proxy_proto", "family", "to"})
) )
// OnStartupMetrics sets up the metrics on startup. This is done for all proxy protocols. // OnStartupMetrics sets up the metrics on startup. This is done for all proxy protocols.
func OnStartupMetrics() error { func OnStartupMetrics() error {
metricsOnce.Do(func() { metricsOnce.Do(func() {
prometheus.MustRegister(RequestCount)
prometheus.MustRegister(RequestDuration) prometheus.MustRegister(RequestDuration)
}) })
return nil return nil
} }
// familyToString returns the string form of either 1, or 2. Returns
// empty string is not a known family
func familyToString(f int) string {
if f == 1 {
return "1"
}
if f == 2 {
return "2"
}
return ""
}
var metricsOnce sync.Once var metricsOnce sync.Once

View file

@ -80,9 +80,6 @@ func (p Proxy) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (
for time.Since(start) < tryDuration { for time.Since(start) < tryDuration {
host := upstream.Select() host := upstream.Select()
if host == nil { if host == nil {
RequestDuration.WithLabelValues(state.Proto(), upstream.Exchanger().Protocol(), upstream.From()).Observe(float64(time.Since(start) / time.Millisecond))
return dns.RcodeServerFailure, fmt.Errorf("%s: %s", errUnreachable, "no upstream host") return dns.RcodeServerFailure, fmt.Errorf("%s: %s", errUnreachable, "no upstream host")
} }
@ -94,6 +91,8 @@ func (p Proxy) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (
atomic.AddInt64(&host.Conns, 1) atomic.AddInt64(&host.Conns, 1)
queryEpoch := msg.Epoch() queryEpoch := msg.Epoch()
RequestCount.WithLabelValues(state.Proto(), upstream.Exchanger().Protocol(), familyToString(state.Family()), host.Name).Add(1)
reply, backendErr = upstream.Exchanger().Exchange(ctx, host.Name, state) reply, backendErr = upstream.Exchanger().Exchange(ctx, host.Name, state)
respEpoch := msg.Epoch() respEpoch := msg.Epoch()
@ -108,7 +107,7 @@ func (p Proxy) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (
if backendErr == nil { if backendErr == nil {
w.WriteMsg(reply) w.WriteMsg(reply)
RequestDuration.WithLabelValues(state.Proto(), upstream.Exchanger().Protocol(), upstream.From()).Observe(float64(time.Since(start) / time.Millisecond)) RequestDuration.WithLabelValues(state.Proto(), upstream.Exchanger().Protocol(), familyToString(state.Family()), host.Name).Observe(float64(time.Since(start) / time.Millisecond))
return 0, taperr return 0, taperr
} }
@ -139,8 +138,6 @@ func (p Proxy) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (
}(host, timeout) }(host, timeout)
} }
RequestDuration.WithLabelValues(state.Proto(), upstream.Exchanger().Protocol(), upstream.From()).Observe(float64(time.Since(start) / time.Millisecond))
return dns.RcodeServerFailure, fmt.Errorf("%s: %s", errUnreachable, backendErr) return dns.RcodeServerFailure, fmt.Errorf("%s: %s", errUnreachable, backendErr)
} }
} }