middleware/monitoring: add more monitoring (#182)

* Split response size into normal responses and AXFR|IXFR responses.
* Split out incoming sizes as well in normal and AXFR|IXFRs.
* Add meta qtype to the monitored qtypes.
* Make duration to be exported in milliseconds instead of seconds.
This commit is contained in:
Miek Gieben 2016-07-04 06:08:45 -04:00 committed by GitHub
parent d277f21d39
commit d563c62571
3 changed files with 66 additions and 30 deletions

View file

@ -5,23 +5,26 @@ This module enables prometheus metrics for CoreDNS. The default location for the
The following metrics are exported:
* coredns_dns_request_count_total
* coredns_dns_request_duration_seconds
* coredns_dns_request_size_bytes
* coredns_dns_request_do_count_total
* coredns_dns_request_type_count_total
* coredns_dns_response_size_bytes
* coredns_dns_response_rcode_count_total
* coredns_dns_request_count_total{zone, proto, family}
* coredns_dns_request_duration_milliseconds{zone}
* coredns_dns_request_size_bytes{zone,, proto}
* coredns_dns_request_transfer_size_bytes{zone,, proto}
* coredns_dns_request_do_count_total{zone}
* coredns_dns_request_type_count_total{zone, type}
* coredns_dns_response_size_bytes{zone, proto}
* coredns_dns_response_transfer_size_bytes{zone, proto}
* coredns_dns_response_rcode_count_total{zone, rcode}
Each counter has a label `zone` which is the zonename used for the request/response. and a label
`qtype` which old the query type. The `dns_request_count_total` has extra labels: `proto` which
holds the transport of the response ("udp" or "tcp") and the address family of the transport (1
= IP (IP version 4), 2 = IP6 (IP version 6)). And `type_count_total` hold a per RR type counter, it
holds the most common ones (A, AAAA, MX, SOA, CNAME, PTR, TXT, NS, SRV, DS, DNSKEY, RRSIG, NSEC,
NSEC3) and "other" which lumps together all other types.
Each counter has a label `zone` which is the zonename used for the request/response.
The `response_rcode_count_total` has an extra label `rcode` which holds the rcode of the response.
The `*_size_bytes` counters also hold the protocol in the `proto` label ("udp" or "tcp").
Extra labels used are:
* `proto` which holds the transport of the response ("udp" or "tcp")
* The address family (`family`) of the transport (1 = IP (IP version 4), 2 = IP6 (IP version 6)).
* `type` which old the query type, it holds the most common ones (A, AAAA, MX, SOA, CNAME, PTR, TXT,
NS, SRV, DS, DNSKEY, RRSIG, NSEC, NSEC3, IXFR, AXFR and ANY) and "other" which lumps together all
other types.
* The `response_rcode_count_total` has an extra label `rcode` which holds the rcode of the response.
If monitoring is enabled queries that do not enter the middleware chain are exported under the fake
domain "dropped" (without a closing dot).

View file

@ -42,20 +42,29 @@ func Report(state middleware.State, zone, rcode string, size int, start time.Tim
fam = "2"
}
typ := state.QType()
requestCount.WithLabelValues(zone, net, fam).Inc()
requestDuration.WithLabelValues(zone).Observe(float64(time.Since(start) / time.Second))
requestSize.WithLabelValues(zone, net).Observe(float64(state.Size()))
requestDuration.WithLabelValues(zone).Observe(float64(time.Since(start) / time.Millisecond))
if state.Do() {
requestDo.WithLabelValues(zone).Inc()
}
typ := state.QType()
if _, known := monitorType[typ]; known {
requestType.WithLabelValues(zone, dns.Type(typ).String()).Inc()
} else {
requestType.WithLabelValues(zone, other).Inc()
}
if typ == dns.TypeIXFR || typ == dns.TypeAXFR {
responseTransferSize.WithLabelValues(zone, net).Observe(float64(size))
requestTransferSize.WithLabelValues(zone, net).Observe(float64(size))
} else {
responseSize.WithLabelValues(zone, net).Observe(float64(size))
requestSize.WithLabelValues(zone, net).Observe(float64(state.Size()))
}
responseRcode.WithLabelValues(zone, rcode).Inc()
}
@ -74,6 +83,10 @@ var monitorType = map[uint16]bool{
dns.TypeSOA: true,
dns.TypeSRV: true,
dns.TypeTXT: true,
// Meta Qtypes
dns.TypeIXFR: true,
dns.TypeAXFR: true,
dns.TypeANY: true,
}
const other = "other"

View file

@ -15,10 +15,12 @@ var (
requestCount *prometheus.CounterVec
requestDuration *prometheus.HistogramVec
requestSize *prometheus.HistogramVec
requestTransferSize *prometheus.HistogramVec
requestDo *prometheus.CounterVec
requestType *prometheus.CounterVec
responseSize *prometheus.HistogramVec
responseTransferSize *prometheus.HistogramVec
responseRcode *prometheus.CounterVec
)
@ -47,10 +49,12 @@ func (m *Metrics) Start() error {
prometheus.MustRegister(requestCount)
prometheus.MustRegister(requestDuration)
prometheus.MustRegister(requestSize)
prometheus.MustRegister(requestTransferSize)
prometheus.MustRegister(requestDo)
prometheus.MustRegister(requestType)
prometheus.MustRegister(responseSize)
prometheus.MustRegister(responseTransferSize)
prometheus.MustRegister(responseRcode)
m.mux.Handle(path, prometheus.Handler())
@ -80,9 +84,9 @@ func define() {
requestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
Name: "request_duration_seconds",
Buckets: append([]float64{.0001, .0005, .001, .0025}, prometheus.DefBuckets...),
Help: "Histogram of the time (in seconds) each request took.",
Name: "request_duration_milliseconds",
Buckets: append(prometheus.DefBuckets, []float64{50, 100, 200, 500, 1000, 2000, 3000, 4000, 5000}...),
Help: "Histogram of the time (in milliseconds) each request took.",
}, []string{"zone"})
requestSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
@ -93,6 +97,14 @@ func define() {
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
}, []string{"zone", "proto"})
requestTransferSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
Name: "request_transfer_size_bytes",
Help: "Size of the incoming zone transfer in bytes.",
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
}, []string{"zone", "proto"})
requestDo = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
@ -111,7 +123,15 @@ func define() {
Namespace: middleware.Namespace,
Subsystem: subsystem,
Name: "response_size_bytes",
Help: "Size of the returns response in bytes.",
Help: "Size of the returned response in bytes.",
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
}, []string{"zone", "proto"})
responseTransferSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
Name: "response_transfer_size_bytes",
Help: "Size of the returned zone transfer in bytes.",
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
}, []string{"zone", "proto"})