middleware/monitoring: add more monitoring (#182)

* Split response size into normal responses and AXFR|IXFR responses.
* Split out incoming sizes as well in normal and AXFR|IXFRs.
* Add meta qtype to the monitored qtypes.
* Make duration to be exported in milliseconds instead of seconds.
This commit is contained in:
Miek Gieben 2016-07-04 06:08:45 -04:00 committed by GitHub
parent d277f21d39
commit d563c62571
3 changed files with 66 additions and 30 deletions

View file

@ -5,23 +5,26 @@ This module enables prometheus metrics for CoreDNS. The default location for the
The following metrics are exported: The following metrics are exported:
* coredns_dns_request_count_total * coredns_dns_request_count_total{zone, proto, family}
* coredns_dns_request_duration_seconds * coredns_dns_request_duration_milliseconds{zone}
* coredns_dns_request_size_bytes * coredns_dns_request_size_bytes{zone,, proto}
* coredns_dns_request_do_count_total * coredns_dns_request_transfer_size_bytes{zone,, proto}
* coredns_dns_request_type_count_total * coredns_dns_request_do_count_total{zone}
* coredns_dns_response_size_bytes * coredns_dns_request_type_count_total{zone, type}
* coredns_dns_response_rcode_count_total * coredns_dns_response_size_bytes{zone, proto}
* coredns_dns_response_transfer_size_bytes{zone, proto}
* coredns_dns_response_rcode_count_total{zone, rcode}
Each counter has a label `zone` which is the zonename used for the request/response. and a label Each counter has a label `zone` which is the zonename used for the request/response.
`qtype` which old the query type. The `dns_request_count_total` has extra labels: `proto` which
holds the transport of the response ("udp" or "tcp") and the address family of the transport (1
= IP (IP version 4), 2 = IP6 (IP version 6)). And `type_count_total` hold a per RR type counter, it
holds the most common ones (A, AAAA, MX, SOA, CNAME, PTR, TXT, NS, SRV, DS, DNSKEY, RRSIG, NSEC,
NSEC3) and "other" which lumps together all other types.
The `response_rcode_count_total` has an extra label `rcode` which holds the rcode of the response. Extra labels used are:
The `*_size_bytes` counters also hold the protocol in the `proto` label ("udp" or "tcp").
* `proto` which holds the transport of the response ("udp" or "tcp")
* The address family (`family`) of the transport (1 = IP (IP version 4), 2 = IP6 (IP version 6)).
* `type` which old the query type, it holds the most common ones (A, AAAA, MX, SOA, CNAME, PTR, TXT,
NS, SRV, DS, DNSKEY, RRSIG, NSEC, NSEC3, IXFR, AXFR and ANY) and "other" which lumps together all
other types.
* The `response_rcode_count_total` has an extra label `rcode` which holds the rcode of the response.
If monitoring is enabled queries that do not enter the middleware chain are exported under the fake If monitoring is enabled queries that do not enter the middleware chain are exported under the fake
domain "dropped" (without a closing dot). domain "dropped" (without a closing dot).

View file

@ -42,20 +42,29 @@ func Report(state middleware.State, zone, rcode string, size int, start time.Tim
fam = "2" fam = "2"
} }
typ := state.QType()
requestCount.WithLabelValues(zone, net, fam).Inc() requestCount.WithLabelValues(zone, net, fam).Inc()
requestDuration.WithLabelValues(zone).Observe(float64(time.Since(start) / time.Second)) requestDuration.WithLabelValues(zone).Observe(float64(time.Since(start) / time.Millisecond))
requestSize.WithLabelValues(zone, net).Observe(float64(state.Size()))
if state.Do() { if state.Do() {
requestDo.WithLabelValues(zone).Inc() requestDo.WithLabelValues(zone).Inc()
} }
typ := state.QType()
if _, known := monitorType[typ]; known { if _, known := monitorType[typ]; known {
requestType.WithLabelValues(zone, dns.Type(typ).String()).Inc() requestType.WithLabelValues(zone, dns.Type(typ).String()).Inc()
} else { } else {
requestType.WithLabelValues(zone, other).Inc() requestType.WithLabelValues(zone, other).Inc()
} }
responseSize.WithLabelValues(zone, net).Observe(float64(size)) if typ == dns.TypeIXFR || typ == dns.TypeAXFR {
responseTransferSize.WithLabelValues(zone, net).Observe(float64(size))
requestTransferSize.WithLabelValues(zone, net).Observe(float64(size))
} else {
responseSize.WithLabelValues(zone, net).Observe(float64(size))
requestSize.WithLabelValues(zone, net).Observe(float64(state.Size()))
}
responseRcode.WithLabelValues(zone, rcode).Inc() responseRcode.WithLabelValues(zone, rcode).Inc()
} }
@ -74,6 +83,10 @@ var monitorType = map[uint16]bool{
dns.TypeSOA: true, dns.TypeSOA: true,
dns.TypeSRV: true, dns.TypeSRV: true,
dns.TypeTXT: true, dns.TypeTXT: true,
// Meta Qtypes
dns.TypeIXFR: true,
dns.TypeAXFR: true,
dns.TypeANY: true,
} }
const other = "other" const other = "other"

View file

@ -12,14 +12,16 @@ import (
) )
var ( var (
requestCount *prometheus.CounterVec requestCount *prometheus.CounterVec
requestDuration *prometheus.HistogramVec requestDuration *prometheus.HistogramVec
requestSize *prometheus.HistogramVec requestSize *prometheus.HistogramVec
requestDo *prometheus.CounterVec requestTransferSize *prometheus.HistogramVec
requestType *prometheus.CounterVec requestDo *prometheus.CounterVec
requestType *prometheus.CounterVec
responseSize *prometheus.HistogramVec responseSize *prometheus.HistogramVec
responseRcode *prometheus.CounterVec responseTransferSize *prometheus.HistogramVec
responseRcode *prometheus.CounterVec
) )
// Metrics holds the prometheus configuration. The metrics' path is fixed to be /metrics // Metrics holds the prometheus configuration. The metrics' path is fixed to be /metrics
@ -47,10 +49,12 @@ func (m *Metrics) Start() error {
prometheus.MustRegister(requestCount) prometheus.MustRegister(requestCount)
prometheus.MustRegister(requestDuration) prometheus.MustRegister(requestDuration)
prometheus.MustRegister(requestSize) prometheus.MustRegister(requestSize)
prometheus.MustRegister(requestTransferSize)
prometheus.MustRegister(requestDo) prometheus.MustRegister(requestDo)
prometheus.MustRegister(requestType) prometheus.MustRegister(requestType)
prometheus.MustRegister(responseSize) prometheus.MustRegister(responseSize)
prometheus.MustRegister(responseTransferSize)
prometheus.MustRegister(responseRcode) prometheus.MustRegister(responseRcode)
m.mux.Handle(path, prometheus.Handler()) m.mux.Handle(path, prometheus.Handler())
@ -80,9 +84,9 @@ func define() {
requestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ requestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: middleware.Namespace, Namespace: middleware.Namespace,
Subsystem: subsystem, Subsystem: subsystem,
Name: "request_duration_seconds", Name: "request_duration_milliseconds",
Buckets: append([]float64{.0001, .0005, .001, .0025}, prometheus.DefBuckets...), Buckets: append(prometheus.DefBuckets, []float64{50, 100, 200, 500, 1000, 2000, 3000, 4000, 5000}...),
Help: "Histogram of the time (in seconds) each request took.", Help: "Histogram of the time (in milliseconds) each request took.",
}, []string{"zone"}) }, []string{"zone"})
requestSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{ requestSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
@ -93,6 +97,14 @@ func define() {
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3}, Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
}, []string{"zone", "proto"}) }, []string{"zone", "proto"})
requestTransferSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
Name: "request_transfer_size_bytes",
Help: "Size of the incoming zone transfer in bytes.",
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
}, []string{"zone", "proto"})
requestDo = prometheus.NewCounterVec(prometheus.CounterOpts{ requestDo = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: middleware.Namespace, Namespace: middleware.Namespace,
Subsystem: subsystem, Subsystem: subsystem,
@ -111,7 +123,15 @@ func define() {
Namespace: middleware.Namespace, Namespace: middleware.Namespace,
Subsystem: subsystem, Subsystem: subsystem,
Name: "response_size_bytes", Name: "response_size_bytes",
Help: "Size of the returns response in bytes.", Help: "Size of the returned response in bytes.",
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
}, []string{"zone", "proto"})
responseTransferSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
Name: "response_transfer_size_bytes",
Help: "Size of the returned zone transfer in bytes.",
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3}, Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
}, []string{"zone", "proto"}) }, []string{"zone", "proto"})