Add more metrics (#176)

This commit is contained in:
Miek Gieben 2016-06-23 11:21:12 +01:00 committed by GitHub
parent 2fe42067fa
commit ef1a8604d1
5 changed files with 52 additions and 16 deletions

View file

@ -38,7 +38,7 @@ func (l Logger) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
answer.SetRcode(r, rcode) answer.SetRcode(r, rcode)
state.SizeAndDo(answer) state.SizeAndDo(answer)
metrics.Report(metrics.Dropped, state.Proto(), rc, answer.Len(), time.Now()) metrics.Report(state, metrics.Dropped, rc, answer.Len(), time.Now())
w.WriteMsg(answer) w.WriteMsg(answer)
} }
rcode = 0 rcode = 0

View file

@ -7,13 +7,16 @@ The following metrics are exported:
* coredns_dns_request_count_total * coredns_dns_request_count_total
* coredns_dns_request_duration_seconds * coredns_dns_request_duration_seconds
* coredns_dns_request_size_bytes
* coredns_dns_request_do_count_total
* coredns_dns_response_size_bytes * coredns_dns_response_size_bytes
* coredns_dns_response_rcode_count_total * coredns_dns_response_rcode_count_total
Each counter has a label `zone` which is the zonename used for the request/response. and a label Each counter has a label `zone` which is the zonename used for the request/response. and a label
`qtype` which old the query type. The `dns_request_count_total` has an extra label `proto` which `qtype` which old the query type. The `dns_request_count_total` has extra labels: `proto` which
holds the transport of the response ("udp" or "tcp"). The `response_rcode_count_total` has an extra holds the transport of the response ("udp" or "tcp") and the address family of the transport (1
label `rcode` which holds the rcode of the response. = IP (IP version 4), 2 = IP6 (IP version 6)).
The `response_rcode_count_total` has an extra label `rcode` which holds the rcode of the response.
If monitoring is enabled queries that do not enter the middleware chain are exported under the fake If monitoring is enabled queries that do not enter the middleware chain are exported under the fake
domain "dropped" (without a closing dot). domain "dropped" (without a closing dot).

View file

@ -11,8 +11,8 @@ import (
func (m Metrics) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) { func (m Metrics) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) {
state := middleware.State{W: w, Req: r} state := middleware.State{W: w, Req: r}
qname := state.Name()
net := state.Proto() qname := state.QName()
zone := middleware.Zones(m.ZoneNames).Matches(qname) zone := middleware.Zones(m.ZoneNames).Matches(qname)
if zone == "" { if zone == "" {
zone = "." zone = "."
@ -22,21 +22,33 @@ func (m Metrics) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
rw := middleware.NewResponseRecorder(w) rw := middleware.NewResponseRecorder(w)
status, err := m.Next.ServeDNS(ctx, rw, r) status, err := m.Next.ServeDNS(ctx, rw, r)
Report(zone, net, rw.Rcode(), rw.Size(), rw.Start()) Report(state, zone, rw.Rcode(), rw.Size(), rw.Start())
return status, err return status, err
} }
// Report is a plain reporting function that the server can use for REFUSED and other // Report is a plain reporting function that the server can use for REFUSED and other
// queries that are turned down because they don't match any middleware. // queries that are turned down because they don't match any middleware.
func Report(zone, net, rcode string, size int, start time.Time) { func Report(state middleware.State, zone, rcode string, size int, start time.Time) {
if requestCount == nil { if requestCount == nil {
// no metrics are enabled // no metrics are enabled
return return
} }
requestCount.WithLabelValues(zone, net).Inc() // Proto and Family
net := state.Proto()
fam := "1"
if state.Family() == 2 {
fam = "2"
}
requestCount.WithLabelValues(zone, net, fam).Inc()
requestDuration.WithLabelValues(zone).Observe(float64(time.Since(start) / time.Second)) requestDuration.WithLabelValues(zone).Observe(float64(time.Since(start) / time.Second))
requestSize.WithLabelValues(zone).Observe(float64(state.Size()))
if state.Do() {
requestDo.WithLabelValues(zone).Inc()
}
responseSize.WithLabelValues(zone).Observe(float64(size)) responseSize.WithLabelValues(zone).Observe(float64(size))
responseRcode.WithLabelValues(zone, rcode).Inc() responseRcode.WithLabelValues(zone, rcode).Inc()
} }

View file

@ -14,8 +14,11 @@ import (
var ( var (
requestCount *prometheus.CounterVec requestCount *prometheus.CounterVec
requestDuration *prometheus.HistogramVec requestDuration *prometheus.HistogramVec
responseSize *prometheus.HistogramVec requestSize *prometheus.HistogramVec
responseRcode *prometheus.CounterVec requestDo *prometheus.CounterVec
responseSize *prometheus.HistogramVec
responseRcode *prometheus.CounterVec
) )
// Metrics holds the prometheus configuration. The metrics' path is fixed to be /metrics // Metrics holds the prometheus configuration. The metrics' path is fixed to be /metrics
@ -42,6 +45,8 @@ func (m *Metrics) Start() error {
prometheus.MustRegister(requestCount) prometheus.MustRegister(requestCount)
prometheus.MustRegister(requestDuration) prometheus.MustRegister(requestDuration)
prometheus.MustRegister(requestSize)
prometheus.MustRegister(requestDo)
prometheus.MustRegister(responseSize) prometheus.MustRegister(responseSize)
prometheus.MustRegister(responseRcode) prometheus.MustRegister(responseRcode)
@ -66,8 +71,8 @@ func define() {
Namespace: middleware.Namespace, Namespace: middleware.Namespace,
Subsystem: subsystem, Subsystem: subsystem,
Name: "request_count_total", Name: "request_count_total",
Help: "Counter of DNS requests made per zone and protocol.", Help: "Counter of DNS requests made per zone, protocol and family.",
}, []string{"zone", "proto"}) }, []string{"zone", "proto", "family"})
requestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ requestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: middleware.Namespace, Namespace: middleware.Namespace,
@ -77,6 +82,21 @@ func define() {
Help: "Histogram of the time (in seconds) each request took.", Help: "Histogram of the time (in seconds) each request took.",
}, []string{"zone"}) }, []string{"zone"})
requestSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
Name: "request_buffer_size_bytes",
Help: "Size of the EDNS0 UDP buffer in bytes (64K for TCP).",
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
}, []string{"zone"})
requestDo = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
Name: "request_do_count_total",
Help: "Counter of DNS requests with DO bit set per zone.",
}, []string{"zone"})
responseSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{ responseSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: middleware.Namespace, Namespace: middleware.Namespace,
Subsystem: subsystem, Subsystem: subsystem,

View file

@ -329,8 +329,9 @@ func (s *Server) ServeDNS(w dns.ResponseWriter, r *dns.Msg) {
if m, err := middleware.Edns0Version(r); err != nil { // Wrong EDNS version, return at once. if m, err := middleware.Edns0Version(r); err != nil { // Wrong EDNS version, return at once.
rc := middleware.RcodeToString(dns.RcodeBadVers) rc := middleware.RcodeToString(dns.RcodeBadVers)
// TODO(miek): hardcoded "udp" here. state := middleware.State{W: w, Req: r}
metrics.Report(metrics.Dropped, "udp", rc, m.Len(), time.Now())
metrics.Report(state, metrics.Dropped, rc, m.Len(), time.Now())
w.WriteMsg(m) w.WriteMsg(m)
return return
} }
@ -393,7 +394,7 @@ func DefaultErrorFunc(w dns.ResponseWriter, r *dns.Msg, rcode int) {
answer.SetRcode(r, rcode) answer.SetRcode(r, rcode)
state.SizeAndDo(answer) state.SizeAndDo(answer)
metrics.Report(metrics.Dropped, state.Proto(), rc, answer.Len(), time.Now()) metrics.Report(state, metrics.Dropped, rc, answer.Len(), time.Now())
w.WriteMsg(answer) w.WriteMsg(answer)
} }