From 54964653d10aa8b2e34ac21e007c701ceb456f7d Mon Sep 17 00:00:00 2001 From: Miek Gieben Date: Fri, 28 Oct 2016 12:54:49 +0100 Subject: [PATCH] middleware/proxy: add request duration monitoring (#362) Add a separate request duration metrics specially for proxying requests upstream. Fixes #259 --- middleware/proxy/README.md | 9 +++++++++ middleware/proxy/metrics.go | 32 ++++++++++++++++++++++++++++++++ middleware/proxy/proxy.go | 9 +++++++++ middleware/proxy/setup.go | 2 ++ test/cache_test.go | 2 -- 5 files changed, 52 insertions(+), 2 deletions(-) create mode 100644 middleware/proxy/metrics.go diff --git a/middleware/proxy/README.md b/middleware/proxy/README.md index ab530a0a9..83ca573d1 100644 --- a/middleware/proxy/README.md +++ b/middleware/proxy/README.md @@ -48,6 +48,15 @@ There are three load-balancing policies available: All polices implement randomly spraying packets to backend hosts when *no healthy* hosts are available. This is to preeempt the case where the healthchecking (as a mechanism) fails. +## Metrics + +If monitoring is enabled (via the *prometheus* directive) then the following metric is exported: + +* coredns_proxy_request_count_total{zone, proto, family} + +This has some overlap with `coredns_dns_request_count_total{zone, proto, family}`, but allows for +specifics on upstream query resolving. See the *prometheus* documentation for more details. + ## Examples Proxy all requests within example.org. to a backend system: diff --git a/middleware/proxy/metrics.go b/middleware/proxy/metrics.go new file mode 100644 index 000000000..f3112dd76 --- /dev/null +++ b/middleware/proxy/metrics.go @@ -0,0 +1,32 @@ +package proxy + +import ( + "sync" + + "github.com/miekg/coredns/middleware" + + "github.com/prometheus/client_golang/prometheus" +) + +// Metrics the proxy middleware exports. +var ( + RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: middleware.Namespace, + Subsystem: subsystem, + Name: "request_duration_milliseconds", + Buckets: append(prometheus.DefBuckets, []float64{50, 100, 200, 500, 1000, 2000, 3000, 4000, 5000, 10000}...), + Help: "Histogram of the time (in milliseconds) each request took.", + }, []string{"zone"}) +) + +// OnStartup sets up the metrics on startup. +func OnStartup() error { + metricsOnce.Do(func() { + prometheus.MustRegister(RequestDuration) + }) + return nil +} + +var metricsOnce sync.Once + +const subsystem = "proxy" diff --git a/middleware/proxy/proxy.go b/middleware/proxy/proxy.go index aa58f4dbe..666430c9f 100644 --- a/middleware/proxy/proxy.go +++ b/middleware/proxy/proxy.go @@ -74,6 +74,9 @@ func (p Proxy) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) ( for time.Now().Sub(start) < tryDuration { host := upstream.Select() if host == nil { + + RequestDuration.WithLabelValues(upstream.From()).Observe(float64(time.Since(start) / time.Millisecond)) + return dns.RcodeServerFailure, errUnreachable } @@ -85,6 +88,9 @@ func (p Proxy) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) ( if backendErr == nil { w.WriteMsg(reply) + + RequestDuration.WithLabelValues(upstream.From()).Observe(float64(time.Since(start) / time.Millisecond)) + return 0, nil } timeout := host.FailTimeout @@ -97,6 +103,9 @@ func (p Proxy) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) ( atomic.AddInt32(&host.Fails, -1) }(host, timeout) } + + RequestDuration.WithLabelValues(upstream.From()).Observe(float64(time.Since(start) / time.Millisecond)) + return dns.RcodeServerFailure, errUnreachable } return p.Next.ServeDNS(ctx, w, r) diff --git a/middleware/proxy/setup.go b/middleware/proxy/setup.go index 26f01c11e..4946706ce 100644 --- a/middleware/proxy/setup.go +++ b/middleware/proxy/setup.go @@ -23,5 +23,7 @@ func setup(c *caddy.Controller) error { return Proxy{Next: next, Client: newClient(), Upstreams: upstreams} }) + c.OnStartup(OnStartup) + return nil } diff --git a/test/cache_test.go b/test/cache_test.go index ba24962ec..5156e4d13 100644 --- a/test/cache_test.go +++ b/test/cache_test.go @@ -13,8 +13,6 @@ import ( "github.com/miekg/dns" ) -// This tests uses the exampleOrg zone as defined in proxy_test.go - func TestLookupCache(t *testing.T) { // Start auth. CoreDNS holding the auth zone. name, rm, err := test.TempFile(".", exampleOrg)