From ef1a8604d13b08beea36152c80ac5e061ea05b04 Mon Sep 17 00:00:00 2001 From: Miek Gieben Date: Thu, 23 Jun 2016 11:21:12 +0100 Subject: [PATCH] Add more metrics (#176) --- middleware/log/log.go | 2 +- middleware/metrics/README.md | 9 ++++++--- middleware/metrics/handler.go | 22 +++++++++++++++++----- middleware/metrics/metrics.go | 28 ++++++++++++++++++++++++---- server/server.go | 7 ++++--- 5 files changed, 52 insertions(+), 16 deletions(-) diff --git a/middleware/log/log.go b/middleware/log/log.go index d949ea7b0..32d40632a 100644 --- a/middleware/log/log.go +++ b/middleware/log/log.go @@ -38,7 +38,7 @@ func (l Logger) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) answer.SetRcode(r, rcode) state.SizeAndDo(answer) - metrics.Report(metrics.Dropped, state.Proto(), rc, answer.Len(), time.Now()) + metrics.Report(state, metrics.Dropped, rc, answer.Len(), time.Now()) w.WriteMsg(answer) } rcode = 0 diff --git a/middleware/metrics/README.md b/middleware/metrics/README.md index 354ab921e..973d257e1 100644 --- a/middleware/metrics/README.md +++ b/middleware/metrics/README.md @@ -7,13 +7,16 @@ The following metrics are exported: * coredns_dns_request_count_total * coredns_dns_request_duration_seconds +* coredns_dns_request_size_bytes +* coredns_dns_request_do_count_total * coredns_dns_response_size_bytes * coredns_dns_response_rcode_count_total Each counter has a label `zone` which is the zonename used for the request/response. and a label -`qtype` which old the query type. The `dns_request_count_total` has an extra label `proto` which -holds the transport of the response ("udp" or "tcp"). The `response_rcode_count_total` has an extra -label `rcode` which holds the rcode of the response. +`qtype` which old the query type. The `dns_request_count_total` has extra labels: `proto` which +holds the transport of the response ("udp" or "tcp") and the address family of the transport (1 += IP (IP version 4), 2 = IP6 (IP version 6)). +The `response_rcode_count_total` has an extra label `rcode` which holds the rcode of the response. If monitoring is enabled queries that do not enter the middleware chain are exported under the fake domain "dropped" (without a closing dot). diff --git a/middleware/metrics/handler.go b/middleware/metrics/handler.go index 1a61f3e11..168f0d67a 100644 --- a/middleware/metrics/handler.go +++ b/middleware/metrics/handler.go @@ -11,8 +11,8 @@ import ( func (m Metrics) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) { state := middleware.State{W: w, Req: r} - qname := state.Name() - net := state.Proto() + + qname := state.QName() zone := middleware.Zones(m.ZoneNames).Matches(qname) if zone == "" { zone = "." @@ -22,21 +22,33 @@ func (m Metrics) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) rw := middleware.NewResponseRecorder(w) status, err := m.Next.ServeDNS(ctx, rw, r) - Report(zone, net, rw.Rcode(), rw.Size(), rw.Start()) + Report(state, zone, rw.Rcode(), rw.Size(), rw.Start()) return status, err } // Report is a plain reporting function that the server can use for REFUSED and other // queries that are turned down because they don't match any middleware. -func Report(zone, net, rcode string, size int, start time.Time) { +func Report(state middleware.State, zone, rcode string, size int, start time.Time) { if requestCount == nil { // no metrics are enabled return } - requestCount.WithLabelValues(zone, net).Inc() + // Proto and Family + net := state.Proto() + fam := "1" + if state.Family() == 2 { + fam = "2" + } + + requestCount.WithLabelValues(zone, net, fam).Inc() requestDuration.WithLabelValues(zone).Observe(float64(time.Since(start) / time.Second)) + requestSize.WithLabelValues(zone).Observe(float64(state.Size())) + if state.Do() { + requestDo.WithLabelValues(zone).Inc() + } + responseSize.WithLabelValues(zone).Observe(float64(size)) responseRcode.WithLabelValues(zone, rcode).Inc() } diff --git a/middleware/metrics/metrics.go b/middleware/metrics/metrics.go index 42f1439eb..21d6938b5 100644 --- a/middleware/metrics/metrics.go +++ b/middleware/metrics/metrics.go @@ -14,8 +14,11 @@ import ( var ( requestCount *prometheus.CounterVec requestDuration *prometheus.HistogramVec - responseSize *prometheus.HistogramVec - responseRcode *prometheus.CounterVec + requestSize *prometheus.HistogramVec + requestDo *prometheus.CounterVec + + responseSize *prometheus.HistogramVec + responseRcode *prometheus.CounterVec ) // Metrics holds the prometheus configuration. The metrics' path is fixed to be /metrics @@ -42,6 +45,8 @@ func (m *Metrics) Start() error { prometheus.MustRegister(requestCount) prometheus.MustRegister(requestDuration) + prometheus.MustRegister(requestSize) + prometheus.MustRegister(requestDo) prometheus.MustRegister(responseSize) prometheus.MustRegister(responseRcode) @@ -66,8 +71,8 @@ func define() { Namespace: middleware.Namespace, Subsystem: subsystem, Name: "request_count_total", - Help: "Counter of DNS requests made per zone and protocol.", - }, []string{"zone", "proto"}) + Help: "Counter of DNS requests made per zone, protocol and family.", + }, []string{"zone", "proto", "family"}) requestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: middleware.Namespace, @@ -77,6 +82,21 @@ func define() { Help: "Histogram of the time (in seconds) each request took.", }, []string{"zone"}) + requestSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: middleware.Namespace, + Subsystem: subsystem, + Name: "request_buffer_size_bytes", + Help: "Size of the EDNS0 UDP buffer in bytes (64K for TCP).", + Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3}, + }, []string{"zone"}) + + requestDo = prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: middleware.Namespace, + Subsystem: subsystem, + Name: "request_do_count_total", + Help: "Counter of DNS requests with DO bit set per zone.", + }, []string{"zone"}) + responseSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: middleware.Namespace, Subsystem: subsystem, diff --git a/server/server.go b/server/server.go index af5dd35e1..ab9ce884f 100644 --- a/server/server.go +++ b/server/server.go @@ -329,8 +329,9 @@ func (s *Server) ServeDNS(w dns.ResponseWriter, r *dns.Msg) { if m, err := middleware.Edns0Version(r); err != nil { // Wrong EDNS version, return at once. rc := middleware.RcodeToString(dns.RcodeBadVers) - // TODO(miek): hardcoded "udp" here. - metrics.Report(metrics.Dropped, "udp", rc, m.Len(), time.Now()) + state := middleware.State{W: w, Req: r} + + metrics.Report(state, metrics.Dropped, rc, m.Len(), time.Now()) w.WriteMsg(m) return } @@ -393,7 +394,7 @@ func DefaultErrorFunc(w dns.ResponseWriter, r *dns.Msg, rcode int) { answer.SetRcode(r, rcode) state.SizeAndDo(answer) - metrics.Report(metrics.Dropped, state.Proto(), rc, answer.Len(), time.Now()) + metrics.Report(state, metrics.Dropped, rc, answer.Len(), time.Now()) w.WriteMsg(answer) }