plugin/metrics: add 'server' label (#1682)

* plugin/metrics: add 'server' label

This uses the new WithServer(ctx) to get the current server from the
context.

First in a larger refactor to make all plugins do this.

* compile

* compile

* lala test

* compile and test

* typos

* Dont duplicate the code
This commit is contained in:
Miek Gieben 2018-04-18 09:42:20 +01:00 committed by GitHub
parent 573ad62b77
commit 08443a9f00
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 68 additions and 56 deletions

View file

@ -197,7 +197,7 @@ func (s *Server) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
// The default dns.Mux checks the question section size, but we have our
// own mux here. Check if we have a question section. If not drop them here.
if r == nil || len(r.Question) == 0 {
DefaultErrorFunc(w, r, dns.RcodeServerFailure)
DefaultErrorFunc(ctx, w, r, dns.RcodeServerFailure)
return
}
@ -206,13 +206,13 @@ func (s *Server) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
// In case the user doesn't enable error plugin, we still
// need to make sure that we stay alive up here
if rec := recover(); rec != nil {
DefaultErrorFunc(w, r, dns.RcodeServerFailure)
DefaultErrorFunc(ctx, w, r, dns.RcodeServerFailure)
}
}()
}
if !s.classChaos && r.Question[0].Qclass != dns.ClassINET {
DefaultErrorFunc(w, r, dns.RcodeRefused)
DefaultErrorFunc(ctx, w, r, dns.RcodeRefused)
return
}
@ -223,7 +223,7 @@ func (s *Server) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
ctx, err := incrementDepthAndCheck(ctx)
if err != nil {
DefaultErrorFunc(w, r, dns.RcodeServerFailure)
DefaultErrorFunc(ctx, w, r, dns.RcodeServerFailure)
return
}
@ -254,7 +254,7 @@ func (s *Server) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
if h.FilterFunc == nil {
rcode, _ := h.pluginChain.ServeDNS(ctx, w, r)
if !plugin.ClientWrite(rcode) {
DefaultErrorFunc(w, r, rcode)
DefaultErrorFunc(ctx, w, r, rcode)
}
return
}
@ -263,7 +263,7 @@ func (s *Server) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
if h.FilterFunc(q) {
rcode, _ := h.pluginChain.ServeDNS(ctx, w, r)
if !plugin.ClientWrite(rcode) {
DefaultErrorFunc(w, r, rcode)
DefaultErrorFunc(ctx, w, r, rcode)
}
return
}
@ -285,7 +285,7 @@ func (s *Server) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
// DS request, and we found a zone, use the handler for the query.
rcode, _ := dshandler.pluginChain.ServeDNS(ctx, w, r)
if !plugin.ClientWrite(rcode) {
DefaultErrorFunc(w, r, rcode)
DefaultErrorFunc(ctx, w, r, rcode)
}
return
}
@ -298,13 +298,13 @@ func (s *Server) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
rcode, _ := h.pluginChain.ServeDNS(ctx, w, r)
if !plugin.ClientWrite(rcode) {
DefaultErrorFunc(w, r, rcode)
DefaultErrorFunc(ctx, w, r, rcode)
}
return
}
// Still here? Error out with REFUSED.
DefaultErrorFunc(w, r, dns.RcodeRefused)
DefaultErrorFunc(ctx, w, r, dns.RcodeRefused)
}
// OnStartupComplete lists the sites served by this server
@ -331,7 +331,7 @@ func (s *Server) Tracer() ot.Tracer {
}
// DefaultErrorFunc responds to an DNS request with an error.
func DefaultErrorFunc(w dns.ResponseWriter, r *dns.Msg, rc int) {
func DefaultErrorFunc(ctx context.Context, w dns.ResponseWriter, r *dns.Msg, rc int) {
state := request.Request{W: w, Req: r}
answer := new(dns.Msg)
@ -339,7 +339,7 @@ func DefaultErrorFunc(w dns.ResponseWriter, r *dns.Msg, rc int) {
state.SizeAndDo(answer)
vars.Report(state, vars.Dropped, rcode.ToString(rc), answer.Len(), time.Now())
vars.Report(ctx, state, vars.Dropped, rcode.ToString(rc), answer.Len(), time.Now())
w.WriteMsg(answer)
}

View file

@ -21,7 +21,7 @@ import (
type Logger struct {
Next plugin.Handler
Rules []Rule
ErrorFunc func(dns.ResponseWriter, *dns.Msg, int) // failover error handler
ErrorFunc func(context.Context, dns.ResponseWriter, *dns.Msg, int) // failover error handler
}
// ServeDNS implements the plugin.Handler interface.
@ -39,13 +39,13 @@ func (l Logger) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
// There was an error up the chain, but no response has been written yet.
// The error must be handled here so the log entry will record the response size.
if l.ErrorFunc != nil {
l.ErrorFunc(rrw, r, rc)
l.ErrorFunc(ctx, rrw, r, rc)
} else {
answer := new(dns.Msg)
answer.SetRcode(r, rc)
state.SizeAndDo(answer)
vars.Report(state, vars.Dropped, rcode.ToString(rc), answer.Len(), time.Now())
vars.Report(ctx, state, vars.Dropped, rcode.ToString(rc), answer.Len(), time.Now())
w.WriteMsg(answer)
}

View file

@ -11,18 +11,21 @@ The default location for the metrics is `localhost:9153`. The metrics path is fi
The following metrics are exported:
* `coredns_build_info{version, revision, goversion}` - info about CoreDNS itself.
* `coredns_dns_request_count_total{zone, proto, family}` - total query count.
* `coredns_dns_request_duration_seconds{zone}` - duration to process each query.
* `coredns_dns_request_size_bytes{zone, proto}` - size of the request in bytes.
* `coredns_dns_request_do_count_total{zone}` - queries that have the DO bit set
* `coredns_dns_request_type_count_total{zone, type}` - counter of queries per zone and type.
* `coredns_dns_response_size_bytes{zone, proto}` - response size in bytes.
* `coredns_dns_response_rcode_count_total{zone, rcode}` - response per zone and rcode.
* `coredns_dns_request_count_total{server, zone, proto, family}` - total query count.
* `coredns_dns_request_duration_seconds{server, zone}` - duration to process each query.
* `coredns_dns_request_size_bytes{server, zone, proto}` - size of the request in bytes.
* `coredns_dns_request_do_count_total{server, zone}` - queries that have the DO bit set
* `coredns_dns_request_type_count_total{server, zone, type}` - counter of queries per zone and type.
* `coredns_dns_response_size_bytes{server, zone, proto}` - response size in bytes.
* `coredns_dns_response_rcode_count_total{server, zone, rcode}` - response per zone and rcode.
Each counter has a label `zone` which is the zonename used for the request/response.
Extra labels used are:
* `server` is identifying the server responsible for the request. This is a string formatted
as the server's listening address: `<scheme>://[<bind>]:<port>`. I.e. for a "normal" DNS server
this is `dns://:53`. If you are using the *bind* plugin an IP address is included, e.g.: `dns://127.0.0.53:53`.
* `proto` which holds the transport of the response ("udp" or "tcp")
* The address family (`family`) of the transport (1 = IP (IP version 4), 2 = IP6 (IP version 6)).
* `type` which holds the query type. It holds most common types (A, AAAA, MX, SOA, CNAME, PTR, TXT,

View file

@ -1,7 +1,7 @@
package metrics
import (
"github.com/coredns/coredns/plugin"
"github.com/coredns/coredns/plugin/metrics/vars"
"golang.org/x/net/context"
)
@ -15,10 +15,4 @@ import (
// Basic usage with a metric:
//
// <metric>.WithLabelValues(metrics.WithServer(ctx), labels..).Add(1)
func WithServer(ctx context.Context) string {
srv := ctx.Value(plugin.ServerCtx{})
if srv == nil {
return ""
}
return srv.(string)
}
func WithServer(ctx context.Context) string { return vars.WithServer(ctx) }

View file

@ -25,7 +25,7 @@ func (m *Metrics) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg
rw := dnstest.NewRecorder(w)
status, err := plugin.NextOrFailure(m.Name(), m.Next, ctx, rw, r)
vars.Report(state, zone, rcode.ToString(rw.Rcode), rw.Len, rw.Start)
vars.Report(ctx, state, zone, rcode.ToString(rw.Rcode), rw.Len, rw.Start)
return status, err
}

View file

@ -3,13 +3,15 @@ package vars
import (
"time"
"github.com/coredns/coredns/plugin"
"github.com/coredns/coredns/request"
"github.com/miekg/dns"
"golang.org/x/net/context"
)
// Report reports the metrics data associcated with request.
func Report(req request.Request, zone, rcode string, size int, start time.Time) {
func Report(ctx context.Context, req request.Request, zone, rcode string, size int, start time.Time) {
// Proto and Family.
net := req.Proto()
fam := "1"
@ -17,25 +19,35 @@ func Report(req request.Request, zone, rcode string, size int, start time.Time)
fam = "2"
}
typ := req.QType()
server := WithServer(ctx)
RequestCount.WithLabelValues(zone, net, fam).Inc()
RequestDuration.WithLabelValues(zone).Observe(time.Since(start).Seconds())
typ := req.QType()
RequestCount.WithLabelValues(server, zone, net, fam).Inc()
RequestDuration.WithLabelValues(server, zone).Observe(time.Since(start).Seconds())
if req.Do() {
RequestDo.WithLabelValues(zone).Inc()
RequestDo.WithLabelValues(server, zone).Inc()
}
if _, known := monitorType[typ]; known {
RequestType.WithLabelValues(zone, dns.Type(typ).String()).Inc()
RequestType.WithLabelValues(server, zone, dns.Type(typ).String()).Inc()
} else {
RequestType.WithLabelValues(zone, other).Inc()
RequestType.WithLabelValues(server, zone, other).Inc()
}
ResponseSize.WithLabelValues(zone, net).Observe(float64(size))
RequestSize.WithLabelValues(zone, net).Observe(float64(req.Len()))
ResponseSize.WithLabelValues(server, zone, net).Observe(float64(size))
RequestSize.WithLabelValues(server, zone, net).Observe(float64(req.Len()))
ResponseRcode.WithLabelValues(zone, rcode).Inc()
ResponseRcode.WithLabelValues(server, zone, rcode).Inc()
}
// WithServer returns the current server handling the request.
func WithServer(ctx context.Context) string {
srv := ctx.Value(plugin.ServerCtx{})
if srv == nil {
return ""
}
return srv.(string)
}
var monitorType = map[uint16]bool{

View file

@ -13,7 +13,7 @@ var (
Subsystem: subsystem,
Name: "request_count_total",
Help: "Counter of DNS requests made per zone, protocol and family.",
}, []string{"zone", "proto", "family"})
}, []string{"server", "zone", "proto", "family"})
RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: plugin.Namespace,
@ -21,7 +21,7 @@ var (
Name: "request_duration_seconds",
Buckets: plugin.TimeBuckets,
Help: "Histogram of the time (in seconds) each request took.",
}, []string{"zone"})
}, []string{"server", "zone"})
RequestSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: plugin.Namespace,
@ -29,21 +29,21 @@ var (
Name: "request_size_bytes",
Help: "Size of the EDNS0 UDP buffer in bytes (64K for TCP).",
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
}, []string{"zone", "proto"})
}, []string{"server", "zone", "proto"})
RequestDo = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace,
Subsystem: subsystem,
Name: "request_do_count_total",
Help: "Counter of DNS requests with DO bit set per zone.",
}, []string{"zone"})
}, []string{"server", "zone"})
RequestType = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace,
Subsystem: subsystem,
Name: "request_type_count_total",
Help: "Counter of DNS requests per type, per zone.",
}, []string{"zone", "type"})
}, []string{"server", "zone", "type"})
ResponseSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: plugin.Namespace,
@ -51,14 +51,14 @@ var (
Name: "response_size_bytes",
Help: "Size of the returned response in bytes.",
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
}, []string{"zone", "proto"})
}, []string{"server", "zone", "proto"})
ResponseRcode = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace,
Subsystem: subsystem,
Name: "response_rcode_count_total",
Help: "Counter of response status codes.",
}, []string{"zone", "rcode"})
}, []string{"server", "zone", "rcode"})
)
const (

View file

@ -102,13 +102,15 @@ payload over HTTPS). Note that with `https_google` the entire transport is encry
If monitoring is enabled (via the *prometheus* directive) then the following metric is exported:
* `coredns_proxy_request_duration_seconds{proto, proto_proxy, family, to}` - duration per upstream
interaction.
* `coredns_proxy_request_count_total{proto, proto_proxy, family, to}` - query count per upstream.
* `coredns_proxy_request_duration_seconds{server, proto, proto_proxy, family, to}` - duration per
upstream interaction.
* `coredns_proxy_request_count_total{server, proto, proto_proxy, family, to}` - query count per
upstream.
Where `proxy_proto` is the protocol used (`dns`, `grpc`, or `https_google`) and `to` is **TO**
specified in the config, `proto` is the protocol used by the incoming query ("tcp" or "udp").
and family the transport family ("1" for IPv4, and "2" for IPv6).
specified in the config, `proto` is the protocol used by the incoming query ("tcp" or "udp"), family
the transport family ("1" for IPv4, and "2" for IPv6). `Server` is the server responsible for the
request (and metric). See the documention in the metrics plugin.
## Examples

View file

@ -15,14 +15,14 @@ var (
Subsystem: "proxy",
Name: "request_count_total",
Help: "Counter of requests made per protocol, proxy protocol, family and upstream.",
}, []string{"proto", "proxy_proto", "family", "to"})
}, []string{"server", "proto", "proxy_proto", "family", "to"})
RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: plugin.Namespace,
Subsystem: "proxy",
Name: "request_duration_seconds",
Buckets: plugin.TimeBuckets,
Help: "Histogram of the time (in seconds) each request took.",
}, []string{"proto", "proxy_proto", "family", "to"})
}, []string{"server", "proto", "proxy_proto", "family", "to"})
)
// familyToString returns the string form of either 1, or 2. Returns

View file

@ -9,6 +9,7 @@ import (
"time"
"github.com/coredns/coredns/plugin"
"github.com/coredns/coredns/plugin/metrics"
"github.com/coredns/coredns/plugin/pkg/healthcheck"
"github.com/coredns/coredns/request"
@ -87,7 +88,7 @@ func (p Proxy) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (
atomic.AddInt64(&host.Conns, 1)
RequestCount.WithLabelValues(state.Proto(), upstream.Exchanger().Protocol(), familyToString(state.Family()), host.Name).Add(1)
RequestCount.WithLabelValues(metrics.WithServer(ctx), state.Proto(), upstream.Exchanger().Protocol(), familyToString(state.Family()), host.Name).Add(1)
reply, backendErr = upstream.Exchanger().Exchange(ctx, host.Name, state)
@ -110,7 +111,7 @@ func (p Proxy) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (
w.WriteMsg(reply)
RequestDuration.WithLabelValues(state.Proto(), upstream.Exchanger().Protocol(), familyToString(state.Family()), host.Name).Observe(time.Since(start).Seconds())
RequestDuration.WithLabelValues(metrics.WithServer(ctx), state.Proto(), upstream.Exchanger().Protocol(), familyToString(state.Family()), host.Name).Observe(time.Since(start).Seconds())
return 0, taperr
}