diff --git a/middleware/metrics/README.md b/middleware/metrics/README.md index e9ea63f43..825632b65 100644 --- a/middleware/metrics/README.md +++ b/middleware/metrics/README.md @@ -5,23 +5,26 @@ This module enables prometheus metrics for CoreDNS. The default location for the The following metrics are exported: -* coredns_dns_request_count_total -* coredns_dns_request_duration_seconds -* coredns_dns_request_size_bytes -* coredns_dns_request_do_count_total -* coredns_dns_request_type_count_total -* coredns_dns_response_size_bytes -* coredns_dns_response_rcode_count_total +* coredns_dns_request_count_total{zone, proto, family} +* coredns_dns_request_duration_milliseconds{zone} +* coredns_dns_request_size_bytes{zone,, proto} +* coredns_dns_request_transfer_size_bytes{zone,, proto} +* coredns_dns_request_do_count_total{zone} +* coredns_dns_request_type_count_total{zone, type} +* coredns_dns_response_size_bytes{zone, proto} +* coredns_dns_response_transfer_size_bytes{zone, proto} +* coredns_dns_response_rcode_count_total{zone, rcode} -Each counter has a label `zone` which is the zonename used for the request/response. and a label -`qtype` which old the query type. The `dns_request_count_total` has extra labels: `proto` which -holds the transport of the response ("udp" or "tcp") and the address family of the transport (1 -= IP (IP version 4), 2 = IP6 (IP version 6)). And `type_count_total` hold a per RR type counter, it -holds the most common ones (A, AAAA, MX, SOA, CNAME, PTR, TXT, NS, SRV, DS, DNSKEY, RRSIG, NSEC, -NSEC3) and "other" which lumps together all other types. +Each counter has a label `zone` which is the zonename used for the request/response. -The `response_rcode_count_total` has an extra label `rcode` which holds the rcode of the response. -The `*_size_bytes` counters also hold the protocol in the `proto` label ("udp" or "tcp"). +Extra labels used are: + +* `proto` which holds the transport of the response ("udp" or "tcp") +* The address family (`family`) of the transport (1 = IP (IP version 4), 2 = IP6 (IP version 6)). +* `type` which old the query type, it holds the most common ones (A, AAAA, MX, SOA, CNAME, PTR, TXT, + NS, SRV, DS, DNSKEY, RRSIG, NSEC, NSEC3, IXFR, AXFR and ANY) and "other" which lumps together all + other types. +* The `response_rcode_count_total` has an extra label `rcode` which holds the rcode of the response. If monitoring is enabled queries that do not enter the middleware chain are exported under the fake domain "dropped" (without a closing dot). diff --git a/middleware/metrics/handler.go b/middleware/metrics/handler.go index 1a0acbe14..93d2654ab 100644 --- a/middleware/metrics/handler.go +++ b/middleware/metrics/handler.go @@ -42,20 +42,29 @@ func Report(state middleware.State, zone, rcode string, size int, start time.Tim fam = "2" } + typ := state.QType() + requestCount.WithLabelValues(zone, net, fam).Inc() - requestDuration.WithLabelValues(zone).Observe(float64(time.Since(start) / time.Second)) - requestSize.WithLabelValues(zone, net).Observe(float64(state.Size())) + requestDuration.WithLabelValues(zone).Observe(float64(time.Since(start) / time.Millisecond)) + if state.Do() { requestDo.WithLabelValues(zone).Inc() } - typ := state.QType() + if _, known := monitorType[typ]; known { requestType.WithLabelValues(zone, dns.Type(typ).String()).Inc() } else { requestType.WithLabelValues(zone, other).Inc() } - responseSize.WithLabelValues(zone, net).Observe(float64(size)) + if typ == dns.TypeIXFR || typ == dns.TypeAXFR { + responseTransferSize.WithLabelValues(zone, net).Observe(float64(size)) + requestTransferSize.WithLabelValues(zone, net).Observe(float64(size)) + } else { + responseSize.WithLabelValues(zone, net).Observe(float64(size)) + requestSize.WithLabelValues(zone, net).Observe(float64(state.Size())) + } + responseRcode.WithLabelValues(zone, rcode).Inc() } @@ -74,6 +83,10 @@ var monitorType = map[uint16]bool{ dns.TypeSOA: true, dns.TypeSRV: true, dns.TypeTXT: true, + // Meta Qtypes + dns.TypeIXFR: true, + dns.TypeAXFR: true, + dns.TypeANY: true, } const other = "other" diff --git a/middleware/metrics/metrics.go b/middleware/metrics/metrics.go index 50aeaa52c..1c7db29d2 100644 --- a/middleware/metrics/metrics.go +++ b/middleware/metrics/metrics.go @@ -12,14 +12,16 @@ import ( ) var ( - requestCount *prometheus.CounterVec - requestDuration *prometheus.HistogramVec - requestSize *prometheus.HistogramVec - requestDo *prometheus.CounterVec - requestType *prometheus.CounterVec + requestCount *prometheus.CounterVec + requestDuration *prometheus.HistogramVec + requestSize *prometheus.HistogramVec + requestTransferSize *prometheus.HistogramVec + requestDo *prometheus.CounterVec + requestType *prometheus.CounterVec - responseSize *prometheus.HistogramVec - responseRcode *prometheus.CounterVec + responseSize *prometheus.HistogramVec + responseTransferSize *prometheus.HistogramVec + responseRcode *prometheus.CounterVec ) // Metrics holds the prometheus configuration. The metrics' path is fixed to be /metrics @@ -47,10 +49,12 @@ func (m *Metrics) Start() error { prometheus.MustRegister(requestCount) prometheus.MustRegister(requestDuration) prometheus.MustRegister(requestSize) + prometheus.MustRegister(requestTransferSize) prometheus.MustRegister(requestDo) prometheus.MustRegister(requestType) prometheus.MustRegister(responseSize) + prometheus.MustRegister(responseTransferSize) prometheus.MustRegister(responseRcode) m.mux.Handle(path, prometheus.Handler()) @@ -80,9 +84,9 @@ func define() { requestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: middleware.Namespace, Subsystem: subsystem, - Name: "request_duration_seconds", - Buckets: append([]float64{.0001, .0005, .001, .0025}, prometheus.DefBuckets...), - Help: "Histogram of the time (in seconds) each request took.", + Name: "request_duration_milliseconds", + Buckets: append(prometheus.DefBuckets, []float64{50, 100, 200, 500, 1000, 2000, 3000, 4000, 5000}...), + Help: "Histogram of the time (in milliseconds) each request took.", }, []string{"zone"}) requestSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{ @@ -93,6 +97,14 @@ func define() { Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3}, }, []string{"zone", "proto"}) + requestTransferSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: middleware.Namespace, + Subsystem: subsystem, + Name: "request_transfer_size_bytes", + Help: "Size of the incoming zone transfer in bytes.", + Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3}, + }, []string{"zone", "proto"}) + requestDo = prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: middleware.Namespace, Subsystem: subsystem, @@ -111,7 +123,15 @@ func define() { Namespace: middleware.Namespace, Subsystem: subsystem, Name: "response_size_bytes", - Help: "Size of the returns response in bytes.", + Help: "Size of the returned response in bytes.", + Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3}, + }, []string{"zone", "proto"}) + + responseTransferSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: middleware.Namespace, + Subsystem: subsystem, + Name: "response_transfer_size_bytes", + Help: "Size of the returned zone transfer in bytes.", Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3}, }, []string{"zone", "proto"})