diff --git a/core/dnsserver/server.go b/core/dnsserver/server.go index dbc7bd5ce..2bbd8f8d3 100644 --- a/core/dnsserver/server.go +++ b/core/dnsserver/server.go @@ -197,7 +197,7 @@ func (s *Server) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) // The default dns.Mux checks the question section size, but we have our // own mux here. Check if we have a question section. If not drop them here. if r == nil || len(r.Question) == 0 { - DefaultErrorFunc(w, r, dns.RcodeServerFailure) + DefaultErrorFunc(ctx, w, r, dns.RcodeServerFailure) return } @@ -206,13 +206,13 @@ func (s *Server) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) // In case the user doesn't enable error plugin, we still // need to make sure that we stay alive up here if rec := recover(); rec != nil { - DefaultErrorFunc(w, r, dns.RcodeServerFailure) + DefaultErrorFunc(ctx, w, r, dns.RcodeServerFailure) } }() } if !s.classChaos && r.Question[0].Qclass != dns.ClassINET { - DefaultErrorFunc(w, r, dns.RcodeRefused) + DefaultErrorFunc(ctx, w, r, dns.RcodeRefused) return } @@ -223,7 +223,7 @@ func (s *Server) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) ctx, err := incrementDepthAndCheck(ctx) if err != nil { - DefaultErrorFunc(w, r, dns.RcodeServerFailure) + DefaultErrorFunc(ctx, w, r, dns.RcodeServerFailure) return } @@ -254,7 +254,7 @@ func (s *Server) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) if h.FilterFunc == nil { rcode, _ := h.pluginChain.ServeDNS(ctx, w, r) if !plugin.ClientWrite(rcode) { - DefaultErrorFunc(w, r, rcode) + DefaultErrorFunc(ctx, w, r, rcode) } return } @@ -263,7 +263,7 @@ func (s *Server) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) if h.FilterFunc(q) { rcode, _ := h.pluginChain.ServeDNS(ctx, w, r) if !plugin.ClientWrite(rcode) { - DefaultErrorFunc(w, r, rcode) + DefaultErrorFunc(ctx, w, r, rcode) } return } @@ -285,7 +285,7 @@ func (s *Server) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) // DS request, and we found a zone, use the handler for the query. rcode, _ := dshandler.pluginChain.ServeDNS(ctx, w, r) if !plugin.ClientWrite(rcode) { - DefaultErrorFunc(w, r, rcode) + DefaultErrorFunc(ctx, w, r, rcode) } return } @@ -298,13 +298,13 @@ func (s *Server) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) rcode, _ := h.pluginChain.ServeDNS(ctx, w, r) if !plugin.ClientWrite(rcode) { - DefaultErrorFunc(w, r, rcode) + DefaultErrorFunc(ctx, w, r, rcode) } return } // Still here? Error out with REFUSED. - DefaultErrorFunc(w, r, dns.RcodeRefused) + DefaultErrorFunc(ctx, w, r, dns.RcodeRefused) } // OnStartupComplete lists the sites served by this server @@ -331,7 +331,7 @@ func (s *Server) Tracer() ot.Tracer { } // DefaultErrorFunc responds to an DNS request with an error. -func DefaultErrorFunc(w dns.ResponseWriter, r *dns.Msg, rc int) { +func DefaultErrorFunc(ctx context.Context, w dns.ResponseWriter, r *dns.Msg, rc int) { state := request.Request{W: w, Req: r} answer := new(dns.Msg) @@ -339,7 +339,7 @@ func DefaultErrorFunc(w dns.ResponseWriter, r *dns.Msg, rc int) { state.SizeAndDo(answer) - vars.Report(state, vars.Dropped, rcode.ToString(rc), answer.Len(), time.Now()) + vars.Report(ctx, state, vars.Dropped, rcode.ToString(rc), answer.Len(), time.Now()) w.WriteMsg(answer) } diff --git a/plugin/log/log.go b/plugin/log/log.go index 6b8d0c385..a527ccd90 100644 --- a/plugin/log/log.go +++ b/plugin/log/log.go @@ -21,7 +21,7 @@ import ( type Logger struct { Next plugin.Handler Rules []Rule - ErrorFunc func(dns.ResponseWriter, *dns.Msg, int) // failover error handler + ErrorFunc func(context.Context, dns.ResponseWriter, *dns.Msg, int) // failover error handler } // ServeDNS implements the plugin.Handler interface. @@ -39,13 +39,13 @@ func (l Logger) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) // There was an error up the chain, but no response has been written yet. // The error must be handled here so the log entry will record the response size. if l.ErrorFunc != nil { - l.ErrorFunc(rrw, r, rc) + l.ErrorFunc(ctx, rrw, r, rc) } else { answer := new(dns.Msg) answer.SetRcode(r, rc) state.SizeAndDo(answer) - vars.Report(state, vars.Dropped, rcode.ToString(rc), answer.Len(), time.Now()) + vars.Report(ctx, state, vars.Dropped, rcode.ToString(rc), answer.Len(), time.Now()) w.WriteMsg(answer) } diff --git a/plugin/metrics/README.md b/plugin/metrics/README.md index b0443a4e6..882e1fcb2 100644 --- a/plugin/metrics/README.md +++ b/plugin/metrics/README.md @@ -11,18 +11,21 @@ The default location for the metrics is `localhost:9153`. The metrics path is fi The following metrics are exported: * `coredns_build_info{version, revision, goversion}` - info about CoreDNS itself. -* `coredns_dns_request_count_total{zone, proto, family}` - total query count. -* `coredns_dns_request_duration_seconds{zone}` - duration to process each query. -* `coredns_dns_request_size_bytes{zone, proto}` - size of the request in bytes. -* `coredns_dns_request_do_count_total{zone}` - queries that have the DO bit set -* `coredns_dns_request_type_count_total{zone, type}` - counter of queries per zone and type. -* `coredns_dns_response_size_bytes{zone, proto}` - response size in bytes. -* `coredns_dns_response_rcode_count_total{zone, rcode}` - response per zone and rcode. +* `coredns_dns_request_count_total{server, zone, proto, family}` - total query count. +* `coredns_dns_request_duration_seconds{server, zone}` - duration to process each query. +* `coredns_dns_request_size_bytes{server, zone, proto}` - size of the request in bytes. +* `coredns_dns_request_do_count_total{server, zone}` - queries that have the DO bit set +* `coredns_dns_request_type_count_total{server, zone, type}` - counter of queries per zone and type. +* `coredns_dns_response_size_bytes{server, zone, proto}` - response size in bytes. +* `coredns_dns_response_rcode_count_total{server, zone, rcode}` - response per zone and rcode. Each counter has a label `zone` which is the zonename used for the request/response. Extra labels used are: +* `server` is identifying the server responsible for the request. This is a string formatted + as the server's listening address: `://[]:`. I.e. for a "normal" DNS server + this is `dns://:53`. If you are using the *bind* plugin an IP address is included, e.g.: `dns://127.0.0.53:53`. * `proto` which holds the transport of the response ("udp" or "tcp") * The address family (`family`) of the transport (1 = IP (IP version 4), 2 = IP6 (IP version 6)). * `type` which holds the query type. It holds most common types (A, AAAA, MX, SOA, CNAME, PTR, TXT, diff --git a/plugin/metrics/context.go b/plugin/metrics/context.go index 4cce871f5..ac3010c71 100644 --- a/plugin/metrics/context.go +++ b/plugin/metrics/context.go @@ -1,7 +1,7 @@ package metrics import ( - "github.com/coredns/coredns/plugin" + "github.com/coredns/coredns/plugin/metrics/vars" "golang.org/x/net/context" ) @@ -15,10 +15,4 @@ import ( // Basic usage with a metric: // // .WithLabelValues(metrics.WithServer(ctx), labels..).Add(1) -func WithServer(ctx context.Context) string { - srv := ctx.Value(plugin.ServerCtx{}) - if srv == nil { - return "" - } - return srv.(string) -} +func WithServer(ctx context.Context) string { return vars.WithServer(ctx) } diff --git a/plugin/metrics/handler.go b/plugin/metrics/handler.go index 88bc1b634..1c9517bac 100644 --- a/plugin/metrics/handler.go +++ b/plugin/metrics/handler.go @@ -25,7 +25,7 @@ func (m *Metrics) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg rw := dnstest.NewRecorder(w) status, err := plugin.NextOrFailure(m.Name(), m.Next, ctx, rw, r) - vars.Report(state, zone, rcode.ToString(rw.Rcode), rw.Len, rw.Start) + vars.Report(ctx, state, zone, rcode.ToString(rw.Rcode), rw.Len, rw.Start) return status, err } diff --git a/plugin/metrics/vars/report.go b/plugin/metrics/vars/report.go index 568a291e3..dc597c610 100644 --- a/plugin/metrics/vars/report.go +++ b/plugin/metrics/vars/report.go @@ -3,13 +3,15 @@ package vars import ( "time" + "github.com/coredns/coredns/plugin" "github.com/coredns/coredns/request" "github.com/miekg/dns" + "golang.org/x/net/context" ) // Report reports the metrics data associcated with request. -func Report(req request.Request, zone, rcode string, size int, start time.Time) { +func Report(ctx context.Context, req request.Request, zone, rcode string, size int, start time.Time) { // Proto and Family. net := req.Proto() fam := "1" @@ -17,25 +19,35 @@ func Report(req request.Request, zone, rcode string, size int, start time.Time) fam = "2" } - typ := req.QType() + server := WithServer(ctx) - RequestCount.WithLabelValues(zone, net, fam).Inc() - RequestDuration.WithLabelValues(zone).Observe(time.Since(start).Seconds()) + typ := req.QType() + RequestCount.WithLabelValues(server, zone, net, fam).Inc() + RequestDuration.WithLabelValues(server, zone).Observe(time.Since(start).Seconds()) if req.Do() { - RequestDo.WithLabelValues(zone).Inc() + RequestDo.WithLabelValues(server, zone).Inc() } if _, known := monitorType[typ]; known { - RequestType.WithLabelValues(zone, dns.Type(typ).String()).Inc() + RequestType.WithLabelValues(server, zone, dns.Type(typ).String()).Inc() } else { - RequestType.WithLabelValues(zone, other).Inc() + RequestType.WithLabelValues(server, zone, other).Inc() } - ResponseSize.WithLabelValues(zone, net).Observe(float64(size)) - RequestSize.WithLabelValues(zone, net).Observe(float64(req.Len())) + ResponseSize.WithLabelValues(server, zone, net).Observe(float64(size)) + RequestSize.WithLabelValues(server, zone, net).Observe(float64(req.Len())) - ResponseRcode.WithLabelValues(zone, rcode).Inc() + ResponseRcode.WithLabelValues(server, zone, rcode).Inc() +} + +// WithServer returns the current server handling the request. +func WithServer(ctx context.Context) string { + srv := ctx.Value(plugin.ServerCtx{}) + if srv == nil { + return "" + } + return srv.(string) } var monitorType = map[uint16]bool{ diff --git a/plugin/metrics/vars/vars.go b/plugin/metrics/vars/vars.go index e5a0d7c43..3b3c083c8 100644 --- a/plugin/metrics/vars/vars.go +++ b/plugin/metrics/vars/vars.go @@ -13,7 +13,7 @@ var ( Subsystem: subsystem, Name: "request_count_total", Help: "Counter of DNS requests made per zone, protocol and family.", - }, []string{"zone", "proto", "family"}) + }, []string{"server", "zone", "proto", "family"}) RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: plugin.Namespace, @@ -21,7 +21,7 @@ var ( Name: "request_duration_seconds", Buckets: plugin.TimeBuckets, Help: "Histogram of the time (in seconds) each request took.", - }, []string{"zone"}) + }, []string{"server", "zone"}) RequestSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: plugin.Namespace, @@ -29,21 +29,21 @@ var ( Name: "request_size_bytes", Help: "Size of the EDNS0 UDP buffer in bytes (64K for TCP).", Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3}, - }, []string{"zone", "proto"}) + }, []string{"server", "zone", "proto"}) RequestDo = prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: plugin.Namespace, Subsystem: subsystem, Name: "request_do_count_total", Help: "Counter of DNS requests with DO bit set per zone.", - }, []string{"zone"}) + }, []string{"server", "zone"}) RequestType = prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: plugin.Namespace, Subsystem: subsystem, Name: "request_type_count_total", Help: "Counter of DNS requests per type, per zone.", - }, []string{"zone", "type"}) + }, []string{"server", "zone", "type"}) ResponseSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: plugin.Namespace, @@ -51,14 +51,14 @@ var ( Name: "response_size_bytes", Help: "Size of the returned response in bytes.", Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3}, - }, []string{"zone", "proto"}) + }, []string{"server", "zone", "proto"}) ResponseRcode = prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: plugin.Namespace, Subsystem: subsystem, Name: "response_rcode_count_total", Help: "Counter of response status codes.", - }, []string{"zone", "rcode"}) + }, []string{"server", "zone", "rcode"}) ) const ( diff --git a/plugin/proxy/README.md b/plugin/proxy/README.md index d81a2ebc1..1da0ef4b7 100644 --- a/plugin/proxy/README.md +++ b/plugin/proxy/README.md @@ -102,13 +102,15 @@ payload over HTTPS). Note that with `https_google` the entire transport is encry If monitoring is enabled (via the *prometheus* directive) then the following metric is exported: -* `coredns_proxy_request_duration_seconds{proto, proto_proxy, family, to}` - duration per upstream - interaction. -* `coredns_proxy_request_count_total{proto, proto_proxy, family, to}` - query count per upstream. +* `coredns_proxy_request_duration_seconds{server, proto, proto_proxy, family, to}` - duration per + upstream interaction. +* `coredns_proxy_request_count_total{server, proto, proto_proxy, family, to}` - query count per + upstream. Where `proxy_proto` is the protocol used (`dns`, `grpc`, or `https_google`) and `to` is **TO** -specified in the config, `proto` is the protocol used by the incoming query ("tcp" or "udp"). -and family the transport family ("1" for IPv4, and "2" for IPv6). +specified in the config, `proto` is the protocol used by the incoming query ("tcp" or "udp"), family +the transport family ("1" for IPv4, and "2" for IPv6). `Server` is the server responsible for the +request (and metric). See the documention in the metrics plugin. ## Examples diff --git a/plugin/proxy/metrics.go b/plugin/proxy/metrics.go index e0dd3fe98..e5d6139b4 100644 --- a/plugin/proxy/metrics.go +++ b/plugin/proxy/metrics.go @@ -15,14 +15,14 @@ var ( Subsystem: "proxy", Name: "request_count_total", Help: "Counter of requests made per protocol, proxy protocol, family and upstream.", - }, []string{"proto", "proxy_proto", "family", "to"}) + }, []string{"server", "proto", "proxy_proto", "family", "to"}) RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: plugin.Namespace, Subsystem: "proxy", Name: "request_duration_seconds", Buckets: plugin.TimeBuckets, Help: "Histogram of the time (in seconds) each request took.", - }, []string{"proto", "proxy_proto", "family", "to"}) + }, []string{"server", "proto", "proxy_proto", "family", "to"}) ) // familyToString returns the string form of either 1, or 2. Returns diff --git a/plugin/proxy/proxy.go b/plugin/proxy/proxy.go index af61f424f..eae9953df 100644 --- a/plugin/proxy/proxy.go +++ b/plugin/proxy/proxy.go @@ -9,6 +9,7 @@ import ( "time" "github.com/coredns/coredns/plugin" + "github.com/coredns/coredns/plugin/metrics" "github.com/coredns/coredns/plugin/pkg/healthcheck" "github.com/coredns/coredns/request" @@ -87,7 +88,7 @@ func (p Proxy) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) ( atomic.AddInt64(&host.Conns, 1) - RequestCount.WithLabelValues(state.Proto(), upstream.Exchanger().Protocol(), familyToString(state.Family()), host.Name).Add(1) + RequestCount.WithLabelValues(metrics.WithServer(ctx), state.Proto(), upstream.Exchanger().Protocol(), familyToString(state.Family()), host.Name).Add(1) reply, backendErr = upstream.Exchanger().Exchange(ctx, host.Name, state) @@ -110,7 +111,7 @@ func (p Proxy) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) ( w.WriteMsg(reply) - RequestDuration.WithLabelValues(state.Proto(), upstream.Exchanger().Protocol(), familyToString(state.Family()), host.Name).Observe(time.Since(start).Seconds()) + RequestDuration.WithLabelValues(metrics.WithServer(ctx), state.Proto(), upstream.Exchanger().Protocol(), familyToString(state.Family()), host.Name).Observe(time.Since(start).Seconds()) return 0, taperr }