plugin/metrics: add 'server' label (#1682)

* plugin/metrics: add 'server' label

This uses the new WithServer(ctx) to get the current server from the
context.

First in a larger refactor to make all plugins do this.

* compile

* compile

* lala test

* compile and test

* typos

* Dont duplicate the code
This commit is contained in:
Miek Gieben 2018-04-18 09:42:20 +01:00 committed by GitHub
parent 573ad62b77
commit 08443a9f00
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 68 additions and 56 deletions

View file

@ -197,7 +197,7 @@ func (s *Server) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
// The default dns.Mux checks the question section size, but we have our // The default dns.Mux checks the question section size, but we have our
// own mux here. Check if we have a question section. If not drop them here. // own mux here. Check if we have a question section. If not drop them here.
if r == nil || len(r.Question) == 0 { if r == nil || len(r.Question) == 0 {
DefaultErrorFunc(w, r, dns.RcodeServerFailure) DefaultErrorFunc(ctx, w, r, dns.RcodeServerFailure)
return return
} }
@ -206,13 +206,13 @@ func (s *Server) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
// In case the user doesn't enable error plugin, we still // In case the user doesn't enable error plugin, we still
// need to make sure that we stay alive up here // need to make sure that we stay alive up here
if rec := recover(); rec != nil { if rec := recover(); rec != nil {
DefaultErrorFunc(w, r, dns.RcodeServerFailure) DefaultErrorFunc(ctx, w, r, dns.RcodeServerFailure)
} }
}() }()
} }
if !s.classChaos && r.Question[0].Qclass != dns.ClassINET { if !s.classChaos && r.Question[0].Qclass != dns.ClassINET {
DefaultErrorFunc(w, r, dns.RcodeRefused) DefaultErrorFunc(ctx, w, r, dns.RcodeRefused)
return return
} }
@ -223,7 +223,7 @@ func (s *Server) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
ctx, err := incrementDepthAndCheck(ctx) ctx, err := incrementDepthAndCheck(ctx)
if err != nil { if err != nil {
DefaultErrorFunc(w, r, dns.RcodeServerFailure) DefaultErrorFunc(ctx, w, r, dns.RcodeServerFailure)
return return
} }
@ -254,7 +254,7 @@ func (s *Server) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
if h.FilterFunc == nil { if h.FilterFunc == nil {
rcode, _ := h.pluginChain.ServeDNS(ctx, w, r) rcode, _ := h.pluginChain.ServeDNS(ctx, w, r)
if !plugin.ClientWrite(rcode) { if !plugin.ClientWrite(rcode) {
DefaultErrorFunc(w, r, rcode) DefaultErrorFunc(ctx, w, r, rcode)
} }
return return
} }
@ -263,7 +263,7 @@ func (s *Server) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
if h.FilterFunc(q) { if h.FilterFunc(q) {
rcode, _ := h.pluginChain.ServeDNS(ctx, w, r) rcode, _ := h.pluginChain.ServeDNS(ctx, w, r)
if !plugin.ClientWrite(rcode) { if !plugin.ClientWrite(rcode) {
DefaultErrorFunc(w, r, rcode) DefaultErrorFunc(ctx, w, r, rcode)
} }
return return
} }
@ -285,7 +285,7 @@ func (s *Server) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
// DS request, and we found a zone, use the handler for the query. // DS request, and we found a zone, use the handler for the query.
rcode, _ := dshandler.pluginChain.ServeDNS(ctx, w, r) rcode, _ := dshandler.pluginChain.ServeDNS(ctx, w, r)
if !plugin.ClientWrite(rcode) { if !plugin.ClientWrite(rcode) {
DefaultErrorFunc(w, r, rcode) DefaultErrorFunc(ctx, w, r, rcode)
} }
return return
} }
@ -298,13 +298,13 @@ func (s *Server) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
rcode, _ := h.pluginChain.ServeDNS(ctx, w, r) rcode, _ := h.pluginChain.ServeDNS(ctx, w, r)
if !plugin.ClientWrite(rcode) { if !plugin.ClientWrite(rcode) {
DefaultErrorFunc(w, r, rcode) DefaultErrorFunc(ctx, w, r, rcode)
} }
return return
} }
// Still here? Error out with REFUSED. // Still here? Error out with REFUSED.
DefaultErrorFunc(w, r, dns.RcodeRefused) DefaultErrorFunc(ctx, w, r, dns.RcodeRefused)
} }
// OnStartupComplete lists the sites served by this server // OnStartupComplete lists the sites served by this server
@ -331,7 +331,7 @@ func (s *Server) Tracer() ot.Tracer {
} }
// DefaultErrorFunc responds to an DNS request with an error. // DefaultErrorFunc responds to an DNS request with an error.
func DefaultErrorFunc(w dns.ResponseWriter, r *dns.Msg, rc int) { func DefaultErrorFunc(ctx context.Context, w dns.ResponseWriter, r *dns.Msg, rc int) {
state := request.Request{W: w, Req: r} state := request.Request{W: w, Req: r}
answer := new(dns.Msg) answer := new(dns.Msg)
@ -339,7 +339,7 @@ func DefaultErrorFunc(w dns.ResponseWriter, r *dns.Msg, rc int) {
state.SizeAndDo(answer) state.SizeAndDo(answer)
vars.Report(state, vars.Dropped, rcode.ToString(rc), answer.Len(), time.Now()) vars.Report(ctx, state, vars.Dropped, rcode.ToString(rc), answer.Len(), time.Now())
w.WriteMsg(answer) w.WriteMsg(answer)
} }

View file

@ -21,7 +21,7 @@ import (
type Logger struct { type Logger struct {
Next plugin.Handler Next plugin.Handler
Rules []Rule Rules []Rule
ErrorFunc func(dns.ResponseWriter, *dns.Msg, int) // failover error handler ErrorFunc func(context.Context, dns.ResponseWriter, *dns.Msg, int) // failover error handler
} }
// ServeDNS implements the plugin.Handler interface. // ServeDNS implements the plugin.Handler interface.
@ -39,13 +39,13 @@ func (l Logger) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
// There was an error up the chain, but no response has been written yet. // There was an error up the chain, but no response has been written yet.
// The error must be handled here so the log entry will record the response size. // The error must be handled here so the log entry will record the response size.
if l.ErrorFunc != nil { if l.ErrorFunc != nil {
l.ErrorFunc(rrw, r, rc) l.ErrorFunc(ctx, rrw, r, rc)
} else { } else {
answer := new(dns.Msg) answer := new(dns.Msg)
answer.SetRcode(r, rc) answer.SetRcode(r, rc)
state.SizeAndDo(answer) state.SizeAndDo(answer)
vars.Report(state, vars.Dropped, rcode.ToString(rc), answer.Len(), time.Now()) vars.Report(ctx, state, vars.Dropped, rcode.ToString(rc), answer.Len(), time.Now())
w.WriteMsg(answer) w.WriteMsg(answer)
} }

View file

@ -11,18 +11,21 @@ The default location for the metrics is `localhost:9153`. The metrics path is fi
The following metrics are exported: The following metrics are exported:
* `coredns_build_info{version, revision, goversion}` - info about CoreDNS itself. * `coredns_build_info{version, revision, goversion}` - info about CoreDNS itself.
* `coredns_dns_request_count_total{zone, proto, family}` - total query count. * `coredns_dns_request_count_total{server, zone, proto, family}` - total query count.
* `coredns_dns_request_duration_seconds{zone}` - duration to process each query. * `coredns_dns_request_duration_seconds{server, zone}` - duration to process each query.
* `coredns_dns_request_size_bytes{zone, proto}` - size of the request in bytes. * `coredns_dns_request_size_bytes{server, zone, proto}` - size of the request in bytes.
* `coredns_dns_request_do_count_total{zone}` - queries that have the DO bit set * `coredns_dns_request_do_count_total{server, zone}` - queries that have the DO bit set
* `coredns_dns_request_type_count_total{zone, type}` - counter of queries per zone and type. * `coredns_dns_request_type_count_total{server, zone, type}` - counter of queries per zone and type.
* `coredns_dns_response_size_bytes{zone, proto}` - response size in bytes. * `coredns_dns_response_size_bytes{server, zone, proto}` - response size in bytes.
* `coredns_dns_response_rcode_count_total{zone, rcode}` - response per zone and rcode. * `coredns_dns_response_rcode_count_total{server, zone, rcode}` - response per zone and rcode.
Each counter has a label `zone` which is the zonename used for the request/response. Each counter has a label `zone` which is the zonename used for the request/response.
Extra labels used are: Extra labels used are:
* `server` is identifying the server responsible for the request. This is a string formatted
as the server's listening address: `<scheme>://[<bind>]:<port>`. I.e. for a "normal" DNS server
this is `dns://:53`. If you are using the *bind* plugin an IP address is included, e.g.: `dns://127.0.0.53:53`.
* `proto` which holds the transport of the response ("udp" or "tcp") * `proto` which holds the transport of the response ("udp" or "tcp")
* The address family (`family`) of the transport (1 = IP (IP version 4), 2 = IP6 (IP version 6)). * The address family (`family`) of the transport (1 = IP (IP version 4), 2 = IP6 (IP version 6)).
* `type` which holds the query type. It holds most common types (A, AAAA, MX, SOA, CNAME, PTR, TXT, * `type` which holds the query type. It holds most common types (A, AAAA, MX, SOA, CNAME, PTR, TXT,

View file

@ -1,7 +1,7 @@
package metrics package metrics
import ( import (
"github.com/coredns/coredns/plugin" "github.com/coredns/coredns/plugin/metrics/vars"
"golang.org/x/net/context" "golang.org/x/net/context"
) )
@ -15,10 +15,4 @@ import (
// Basic usage with a metric: // Basic usage with a metric:
// //
// <metric>.WithLabelValues(metrics.WithServer(ctx), labels..).Add(1) // <metric>.WithLabelValues(metrics.WithServer(ctx), labels..).Add(1)
func WithServer(ctx context.Context) string { func WithServer(ctx context.Context) string { return vars.WithServer(ctx) }
srv := ctx.Value(plugin.ServerCtx{})
if srv == nil {
return ""
}
return srv.(string)
}

View file

@ -25,7 +25,7 @@ func (m *Metrics) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg
rw := dnstest.NewRecorder(w) rw := dnstest.NewRecorder(w)
status, err := plugin.NextOrFailure(m.Name(), m.Next, ctx, rw, r) status, err := plugin.NextOrFailure(m.Name(), m.Next, ctx, rw, r)
vars.Report(state, zone, rcode.ToString(rw.Rcode), rw.Len, rw.Start) vars.Report(ctx, state, zone, rcode.ToString(rw.Rcode), rw.Len, rw.Start)
return status, err return status, err
} }

View file

@ -3,13 +3,15 @@ package vars
import ( import (
"time" "time"
"github.com/coredns/coredns/plugin"
"github.com/coredns/coredns/request" "github.com/coredns/coredns/request"
"github.com/miekg/dns" "github.com/miekg/dns"
"golang.org/x/net/context"
) )
// Report reports the metrics data associcated with request. // Report reports the metrics data associcated with request.
func Report(req request.Request, zone, rcode string, size int, start time.Time) { func Report(ctx context.Context, req request.Request, zone, rcode string, size int, start time.Time) {
// Proto and Family. // Proto and Family.
net := req.Proto() net := req.Proto()
fam := "1" fam := "1"
@ -17,25 +19,35 @@ func Report(req request.Request, zone, rcode string, size int, start time.Time)
fam = "2" fam = "2"
} }
typ := req.QType() server := WithServer(ctx)
RequestCount.WithLabelValues(zone, net, fam).Inc() typ := req.QType()
RequestDuration.WithLabelValues(zone).Observe(time.Since(start).Seconds()) RequestCount.WithLabelValues(server, zone, net, fam).Inc()
RequestDuration.WithLabelValues(server, zone).Observe(time.Since(start).Seconds())
if req.Do() { if req.Do() {
RequestDo.WithLabelValues(zone).Inc() RequestDo.WithLabelValues(server, zone).Inc()
} }
if _, known := monitorType[typ]; known { if _, known := monitorType[typ]; known {
RequestType.WithLabelValues(zone, dns.Type(typ).String()).Inc() RequestType.WithLabelValues(server, zone, dns.Type(typ).String()).Inc()
} else { } else {
RequestType.WithLabelValues(zone, other).Inc() RequestType.WithLabelValues(server, zone, other).Inc()
} }
ResponseSize.WithLabelValues(zone, net).Observe(float64(size)) ResponseSize.WithLabelValues(server, zone, net).Observe(float64(size))
RequestSize.WithLabelValues(zone, net).Observe(float64(req.Len())) RequestSize.WithLabelValues(server, zone, net).Observe(float64(req.Len()))
ResponseRcode.WithLabelValues(zone, rcode).Inc() ResponseRcode.WithLabelValues(server, zone, rcode).Inc()
}
// WithServer returns the current server handling the request.
func WithServer(ctx context.Context) string {
srv := ctx.Value(plugin.ServerCtx{})
if srv == nil {
return ""
}
return srv.(string)
} }
var monitorType = map[uint16]bool{ var monitorType = map[uint16]bool{

View file

@ -13,7 +13,7 @@ var (
Subsystem: subsystem, Subsystem: subsystem,
Name: "request_count_total", Name: "request_count_total",
Help: "Counter of DNS requests made per zone, protocol and family.", Help: "Counter of DNS requests made per zone, protocol and family.",
}, []string{"zone", "proto", "family"}) }, []string{"server", "zone", "proto", "family"})
RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: plugin.Namespace, Namespace: plugin.Namespace,
@ -21,7 +21,7 @@ var (
Name: "request_duration_seconds", Name: "request_duration_seconds",
Buckets: plugin.TimeBuckets, Buckets: plugin.TimeBuckets,
Help: "Histogram of the time (in seconds) each request took.", Help: "Histogram of the time (in seconds) each request took.",
}, []string{"zone"}) }, []string{"server", "zone"})
RequestSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{ RequestSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: plugin.Namespace, Namespace: plugin.Namespace,
@ -29,21 +29,21 @@ var (
Name: "request_size_bytes", Name: "request_size_bytes",
Help: "Size of the EDNS0 UDP buffer in bytes (64K for TCP).", Help: "Size of the EDNS0 UDP buffer in bytes (64K for TCP).",
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3}, Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
}, []string{"zone", "proto"}) }, []string{"server", "zone", "proto"})
RequestDo = prometheus.NewCounterVec(prometheus.CounterOpts{ RequestDo = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace, Namespace: plugin.Namespace,
Subsystem: subsystem, Subsystem: subsystem,
Name: "request_do_count_total", Name: "request_do_count_total",
Help: "Counter of DNS requests with DO bit set per zone.", Help: "Counter of DNS requests with DO bit set per zone.",
}, []string{"zone"}) }, []string{"server", "zone"})
RequestType = prometheus.NewCounterVec(prometheus.CounterOpts{ RequestType = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace, Namespace: plugin.Namespace,
Subsystem: subsystem, Subsystem: subsystem,
Name: "request_type_count_total", Name: "request_type_count_total",
Help: "Counter of DNS requests per type, per zone.", Help: "Counter of DNS requests per type, per zone.",
}, []string{"zone", "type"}) }, []string{"server", "zone", "type"})
ResponseSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{ ResponseSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: plugin.Namespace, Namespace: plugin.Namespace,
@ -51,14 +51,14 @@ var (
Name: "response_size_bytes", Name: "response_size_bytes",
Help: "Size of the returned response in bytes.", Help: "Size of the returned response in bytes.",
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3}, Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
}, []string{"zone", "proto"}) }, []string{"server", "zone", "proto"})
ResponseRcode = prometheus.NewCounterVec(prometheus.CounterOpts{ ResponseRcode = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace, Namespace: plugin.Namespace,
Subsystem: subsystem, Subsystem: subsystem,
Name: "response_rcode_count_total", Name: "response_rcode_count_total",
Help: "Counter of response status codes.", Help: "Counter of response status codes.",
}, []string{"zone", "rcode"}) }, []string{"server", "zone", "rcode"})
) )
const ( const (

View file

@ -102,13 +102,15 @@ payload over HTTPS). Note that with `https_google` the entire transport is encry
If monitoring is enabled (via the *prometheus* directive) then the following metric is exported: If monitoring is enabled (via the *prometheus* directive) then the following metric is exported:
* `coredns_proxy_request_duration_seconds{proto, proto_proxy, family, to}` - duration per upstream * `coredns_proxy_request_duration_seconds{server, proto, proto_proxy, family, to}` - duration per
interaction. upstream interaction.
* `coredns_proxy_request_count_total{proto, proto_proxy, family, to}` - query count per upstream. * `coredns_proxy_request_count_total{server, proto, proto_proxy, family, to}` - query count per
upstream.
Where `proxy_proto` is the protocol used (`dns`, `grpc`, or `https_google`) and `to` is **TO** Where `proxy_proto` is the protocol used (`dns`, `grpc`, or `https_google`) and `to` is **TO**
specified in the config, `proto` is the protocol used by the incoming query ("tcp" or "udp"). specified in the config, `proto` is the protocol used by the incoming query ("tcp" or "udp"), family
and family the transport family ("1" for IPv4, and "2" for IPv6). the transport family ("1" for IPv4, and "2" for IPv6). `Server` is the server responsible for the
request (and metric). See the documention in the metrics plugin.
## Examples ## Examples

View file

@ -15,14 +15,14 @@ var (
Subsystem: "proxy", Subsystem: "proxy",
Name: "request_count_total", Name: "request_count_total",
Help: "Counter of requests made per protocol, proxy protocol, family and upstream.", Help: "Counter of requests made per protocol, proxy protocol, family and upstream.",
}, []string{"proto", "proxy_proto", "family", "to"}) }, []string{"server", "proto", "proxy_proto", "family", "to"})
RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: plugin.Namespace, Namespace: plugin.Namespace,
Subsystem: "proxy", Subsystem: "proxy",
Name: "request_duration_seconds", Name: "request_duration_seconds",
Buckets: plugin.TimeBuckets, Buckets: plugin.TimeBuckets,
Help: "Histogram of the time (in seconds) each request took.", Help: "Histogram of the time (in seconds) each request took.",
}, []string{"proto", "proxy_proto", "family", "to"}) }, []string{"server", "proto", "proxy_proto", "family", "to"})
) )
// familyToString returns the string form of either 1, or 2. Returns // familyToString returns the string form of either 1, or 2. Returns

View file

@ -9,6 +9,7 @@ import (
"time" "time"
"github.com/coredns/coredns/plugin" "github.com/coredns/coredns/plugin"
"github.com/coredns/coredns/plugin/metrics"
"github.com/coredns/coredns/plugin/pkg/healthcheck" "github.com/coredns/coredns/plugin/pkg/healthcheck"
"github.com/coredns/coredns/request" "github.com/coredns/coredns/request"
@ -87,7 +88,7 @@ func (p Proxy) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (
atomic.AddInt64(&host.Conns, 1) atomic.AddInt64(&host.Conns, 1)
RequestCount.WithLabelValues(state.Proto(), upstream.Exchanger().Protocol(), familyToString(state.Family()), host.Name).Add(1) RequestCount.WithLabelValues(metrics.WithServer(ctx), state.Proto(), upstream.Exchanger().Protocol(), familyToString(state.Family()), host.Name).Add(1)
reply, backendErr = upstream.Exchanger().Exchange(ctx, host.Name, state) reply, backendErr = upstream.Exchanger().Exchange(ctx, host.Name, state)
@ -110,7 +111,7 @@ func (p Proxy) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (
w.WriteMsg(reply) w.WriteMsg(reply)
RequestDuration.WithLabelValues(state.Proto(), upstream.Exchanger().Protocol(), familyToString(state.Family()), host.Name).Observe(time.Since(start).Seconds()) RequestDuration.WithLabelValues(metrics.WithServer(ctx), state.Proto(), upstream.Exchanger().Protocol(), familyToString(state.Family()), host.Name).Observe(time.Since(start).Seconds())
return 0, taperr return 0, taperr
} }