diff --git a/plugin/proxy/README.md b/plugin/proxy/README.md index c23df8c42..a064a3418 100644 --- a/plugin/proxy/README.md +++ b/plugin/proxy/README.md @@ -98,10 +98,13 @@ payload over HTTPS). Note that with `https_google` the entire transport is encry If monitoring is enabled (via the *prometheus* directive) then the following metric is exported: -* coredns_proxy_request_count_total{proto, proxy_proto, from} +* `coredns_proxy_request_duration_millisecond{proto, proto_proxy, family, to}` - duration per upstream + interaction. +* `coredns_proxy_request_count_total{proto, proto_proxy, family, to}` - query count per upstream. -Where `proxy_proto` is the protocol used (`dns`, `grpc`, or `https_google`) and `from` is **FROM** +Where `proxy_proto` is the protocol used (`dns`, `grpc`, or `https_google`) and `to` is **TO** specified in the config, `proto` is the protocol used by the incoming query ("tcp" or "udp"). +and family the transport family ("1" for IPv4, and "2" for IPv6). ## Examples diff --git a/plugin/proxy/metrics.go b/plugin/proxy/metrics.go index 893c26d6b..d6dc0ea30 100644 --- a/plugin/proxy/metrics.go +++ b/plugin/proxy/metrics.go @@ -10,21 +10,40 @@ import ( // Metrics the proxy plugin exports. var ( + RequestCount = prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: plugin.Namespace, + Subsystem: "proxy", + Name: "request_count_total", + Help: "Counter of requests made per protocol, proxy protocol, family and upstream.", + }, []string{"proto", "proxy_proto", "family", "to"}) RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: plugin.Namespace, Subsystem: "proxy", Name: "request_duration_milliseconds", - Buckets: append(prometheus.DefBuckets, []float64{50, 100, 200, 500, 1000, 2000, 3000, 4000, 5000, 10000}...), + Buckets: append(prometheus.DefBuckets, []float64{15, 20, 25, 30, 40, 50, 100, 200, 500, 1000, 2000, 3000, 4000, 5000, 10000}...), Help: "Histogram of the time (in milliseconds) each request took.", - }, []string{"proto", "proxy_proto", "from"}) + }, []string{"proto", "proxy_proto", "family", "to"}) ) // OnStartupMetrics sets up the metrics on startup. This is done for all proxy protocols. func OnStartupMetrics() error { metricsOnce.Do(func() { + prometheus.MustRegister(RequestCount) prometheus.MustRegister(RequestDuration) }) return nil } +// familyToString returns the string form of either 1, or 2. Returns +// empty string is not a known family +func familyToString(f int) string { + if f == 1 { + return "1" + } + if f == 2 { + return "2" + } + return "" +} + var metricsOnce sync.Once diff --git a/plugin/proxy/proxy.go b/plugin/proxy/proxy.go index b2c713a53..f0e6eadad 100644 --- a/plugin/proxy/proxy.go +++ b/plugin/proxy/proxy.go @@ -80,9 +80,6 @@ func (p Proxy) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) ( for time.Since(start) < tryDuration { host := upstream.Select() if host == nil { - - RequestDuration.WithLabelValues(state.Proto(), upstream.Exchanger().Protocol(), upstream.From()).Observe(float64(time.Since(start) / time.Millisecond)) - return dns.RcodeServerFailure, fmt.Errorf("%s: %s", errUnreachable, "no upstream host") } @@ -94,6 +91,8 @@ func (p Proxy) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) ( atomic.AddInt64(&host.Conns, 1) queryEpoch := msg.Epoch() + RequestCount.WithLabelValues(state.Proto(), upstream.Exchanger().Protocol(), familyToString(state.Family()), host.Name).Add(1) + reply, backendErr = upstream.Exchanger().Exchange(ctx, host.Name, state) respEpoch := msg.Epoch() @@ -108,7 +107,7 @@ func (p Proxy) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) ( if backendErr == nil { w.WriteMsg(reply) - RequestDuration.WithLabelValues(state.Proto(), upstream.Exchanger().Protocol(), upstream.From()).Observe(float64(time.Since(start) / time.Millisecond)) + RequestDuration.WithLabelValues(state.Proto(), upstream.Exchanger().Protocol(), familyToString(state.Family()), host.Name).Observe(float64(time.Since(start) / time.Millisecond)) return 0, taperr } @@ -139,8 +138,6 @@ func (p Proxy) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) ( }(host, timeout) } - RequestDuration.WithLabelValues(state.Proto(), upstream.Exchanger().Protocol(), upstream.From()).Observe(float64(time.Since(start) / time.Millisecond)) - return dns.RcodeServerFailure, fmt.Errorf("%s: %s", errUnreachable, backendErr) } }