middleware/monitoring: add more monitoring (#182)
* Split response size into normal responses and AXFR|IXFR responses. * Split out incoming sizes as well in normal and AXFR|IXFRs. * Add meta qtype to the monitored qtypes. * Make duration to be exported in milliseconds instead of seconds.
This commit is contained in:
parent
d277f21d39
commit
d563c62571
3 changed files with 66 additions and 30 deletions
|
@ -5,23 +5,26 @@ This module enables prometheus metrics for CoreDNS. The default location for the
|
|||
|
||||
The following metrics are exported:
|
||||
|
||||
* coredns_dns_request_count_total
|
||||
* coredns_dns_request_duration_seconds
|
||||
* coredns_dns_request_size_bytes
|
||||
* coredns_dns_request_do_count_total
|
||||
* coredns_dns_request_type_count_total
|
||||
* coredns_dns_response_size_bytes
|
||||
* coredns_dns_response_rcode_count_total
|
||||
* coredns_dns_request_count_total{zone, proto, family}
|
||||
* coredns_dns_request_duration_milliseconds{zone}
|
||||
* coredns_dns_request_size_bytes{zone,, proto}
|
||||
* coredns_dns_request_transfer_size_bytes{zone,, proto}
|
||||
* coredns_dns_request_do_count_total{zone}
|
||||
* coredns_dns_request_type_count_total{zone, type}
|
||||
* coredns_dns_response_size_bytes{zone, proto}
|
||||
* coredns_dns_response_transfer_size_bytes{zone, proto}
|
||||
* coredns_dns_response_rcode_count_total{zone, rcode}
|
||||
|
||||
Each counter has a label `zone` which is the zonename used for the request/response. and a label
|
||||
`qtype` which old the query type. The `dns_request_count_total` has extra labels: `proto` which
|
||||
holds the transport of the response ("udp" or "tcp") and the address family of the transport (1
|
||||
= IP (IP version 4), 2 = IP6 (IP version 6)). And `type_count_total` hold a per RR type counter, it
|
||||
holds the most common ones (A, AAAA, MX, SOA, CNAME, PTR, TXT, NS, SRV, DS, DNSKEY, RRSIG, NSEC,
|
||||
NSEC3) and "other" which lumps together all other types.
|
||||
Each counter has a label `zone` which is the zonename used for the request/response.
|
||||
|
||||
The `response_rcode_count_total` has an extra label `rcode` which holds the rcode of the response.
|
||||
The `*_size_bytes` counters also hold the protocol in the `proto` label ("udp" or "tcp").
|
||||
Extra labels used are:
|
||||
|
||||
* `proto` which holds the transport of the response ("udp" or "tcp")
|
||||
* The address family (`family`) of the transport (1 = IP (IP version 4), 2 = IP6 (IP version 6)).
|
||||
* `type` which old the query type, it holds the most common ones (A, AAAA, MX, SOA, CNAME, PTR, TXT,
|
||||
NS, SRV, DS, DNSKEY, RRSIG, NSEC, NSEC3, IXFR, AXFR and ANY) and "other" which lumps together all
|
||||
other types.
|
||||
* The `response_rcode_count_total` has an extra label `rcode` which holds the rcode of the response.
|
||||
|
||||
If monitoring is enabled queries that do not enter the middleware chain are exported under the fake
|
||||
domain "dropped" (without a closing dot).
|
||||
|
|
|
@ -42,20 +42,29 @@ func Report(state middleware.State, zone, rcode string, size int, start time.Tim
|
|||
fam = "2"
|
||||
}
|
||||
|
||||
typ := state.QType()
|
||||
|
||||
requestCount.WithLabelValues(zone, net, fam).Inc()
|
||||
requestDuration.WithLabelValues(zone).Observe(float64(time.Since(start) / time.Second))
|
||||
requestSize.WithLabelValues(zone, net).Observe(float64(state.Size()))
|
||||
requestDuration.WithLabelValues(zone).Observe(float64(time.Since(start) / time.Millisecond))
|
||||
|
||||
if state.Do() {
|
||||
requestDo.WithLabelValues(zone).Inc()
|
||||
}
|
||||
typ := state.QType()
|
||||
|
||||
if _, known := monitorType[typ]; known {
|
||||
requestType.WithLabelValues(zone, dns.Type(typ).String()).Inc()
|
||||
} else {
|
||||
requestType.WithLabelValues(zone, other).Inc()
|
||||
}
|
||||
|
||||
if typ == dns.TypeIXFR || typ == dns.TypeAXFR {
|
||||
responseTransferSize.WithLabelValues(zone, net).Observe(float64(size))
|
||||
requestTransferSize.WithLabelValues(zone, net).Observe(float64(size))
|
||||
} else {
|
||||
responseSize.WithLabelValues(zone, net).Observe(float64(size))
|
||||
requestSize.WithLabelValues(zone, net).Observe(float64(state.Size()))
|
||||
}
|
||||
|
||||
responseRcode.WithLabelValues(zone, rcode).Inc()
|
||||
}
|
||||
|
||||
|
@ -74,6 +83,10 @@ var monitorType = map[uint16]bool{
|
|||
dns.TypeSOA: true,
|
||||
dns.TypeSRV: true,
|
||||
dns.TypeTXT: true,
|
||||
// Meta Qtypes
|
||||
dns.TypeIXFR: true,
|
||||
dns.TypeAXFR: true,
|
||||
dns.TypeANY: true,
|
||||
}
|
||||
|
||||
const other = "other"
|
||||
|
|
|
@ -15,10 +15,12 @@ var (
|
|||
requestCount *prometheus.CounterVec
|
||||
requestDuration *prometheus.HistogramVec
|
||||
requestSize *prometheus.HistogramVec
|
||||
requestTransferSize *prometheus.HistogramVec
|
||||
requestDo *prometheus.CounterVec
|
||||
requestType *prometheus.CounterVec
|
||||
|
||||
responseSize *prometheus.HistogramVec
|
||||
responseTransferSize *prometheus.HistogramVec
|
||||
responseRcode *prometheus.CounterVec
|
||||
)
|
||||
|
||||
|
@ -47,10 +49,12 @@ func (m *Metrics) Start() error {
|
|||
prometheus.MustRegister(requestCount)
|
||||
prometheus.MustRegister(requestDuration)
|
||||
prometheus.MustRegister(requestSize)
|
||||
prometheus.MustRegister(requestTransferSize)
|
||||
prometheus.MustRegister(requestDo)
|
||||
prometheus.MustRegister(requestType)
|
||||
|
||||
prometheus.MustRegister(responseSize)
|
||||
prometheus.MustRegister(responseTransferSize)
|
||||
prometheus.MustRegister(responseRcode)
|
||||
|
||||
m.mux.Handle(path, prometheus.Handler())
|
||||
|
@ -80,9 +84,9 @@ func define() {
|
|||
requestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Namespace: middleware.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "request_duration_seconds",
|
||||
Buckets: append([]float64{.0001, .0005, .001, .0025}, prometheus.DefBuckets...),
|
||||
Help: "Histogram of the time (in seconds) each request took.",
|
||||
Name: "request_duration_milliseconds",
|
||||
Buckets: append(prometheus.DefBuckets, []float64{50, 100, 200, 500, 1000, 2000, 3000, 4000, 5000}...),
|
||||
Help: "Histogram of the time (in milliseconds) each request took.",
|
||||
}, []string{"zone"})
|
||||
|
||||
requestSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
|
@ -93,6 +97,14 @@ func define() {
|
|||
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
|
||||
}, []string{"zone", "proto"})
|
||||
|
||||
requestTransferSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Namespace: middleware.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "request_transfer_size_bytes",
|
||||
Help: "Size of the incoming zone transfer in bytes.",
|
||||
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
|
||||
}, []string{"zone", "proto"})
|
||||
|
||||
requestDo = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: middleware.Namespace,
|
||||
Subsystem: subsystem,
|
||||
|
@ -111,7 +123,15 @@ func define() {
|
|||
Namespace: middleware.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "response_size_bytes",
|
||||
Help: "Size of the returns response in bytes.",
|
||||
Help: "Size of the returned response in bytes.",
|
||||
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
|
||||
}, []string{"zone", "proto"})
|
||||
|
||||
responseTransferSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Namespace: middleware.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "response_transfer_size_bytes",
|
||||
Help: "Size of the returned zone transfer in bytes.",
|
||||
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
|
||||
}, []string{"zone", "proto"})
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue