plugin/metrics: add 'server' label (#1682)
* plugin/metrics: add 'server' label This uses the new WithServer(ctx) to get the current server from the context. First in a larger refactor to make all plugins do this. * compile * compile * lala test * compile and test * typos * Dont duplicate the code
This commit is contained in:
parent
573ad62b77
commit
08443a9f00
10 changed files with 68 additions and 56 deletions
|
@ -197,7 +197,7 @@ func (s *Server) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
|
|||
// The default dns.Mux checks the question section size, but we have our
|
||||
// own mux here. Check if we have a question section. If not drop them here.
|
||||
if r == nil || len(r.Question) == 0 {
|
||||
DefaultErrorFunc(w, r, dns.RcodeServerFailure)
|
||||
DefaultErrorFunc(ctx, w, r, dns.RcodeServerFailure)
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -206,13 +206,13 @@ func (s *Server) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
|
|||
// In case the user doesn't enable error plugin, we still
|
||||
// need to make sure that we stay alive up here
|
||||
if rec := recover(); rec != nil {
|
||||
DefaultErrorFunc(w, r, dns.RcodeServerFailure)
|
||||
DefaultErrorFunc(ctx, w, r, dns.RcodeServerFailure)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
if !s.classChaos && r.Question[0].Qclass != dns.ClassINET {
|
||||
DefaultErrorFunc(w, r, dns.RcodeRefused)
|
||||
DefaultErrorFunc(ctx, w, r, dns.RcodeRefused)
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -223,7 +223,7 @@ func (s *Server) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
|
|||
|
||||
ctx, err := incrementDepthAndCheck(ctx)
|
||||
if err != nil {
|
||||
DefaultErrorFunc(w, r, dns.RcodeServerFailure)
|
||||
DefaultErrorFunc(ctx, w, r, dns.RcodeServerFailure)
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -254,7 +254,7 @@ func (s *Server) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
|
|||
if h.FilterFunc == nil {
|
||||
rcode, _ := h.pluginChain.ServeDNS(ctx, w, r)
|
||||
if !plugin.ClientWrite(rcode) {
|
||||
DefaultErrorFunc(w, r, rcode)
|
||||
DefaultErrorFunc(ctx, w, r, rcode)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
@ -263,7 +263,7 @@ func (s *Server) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
|
|||
if h.FilterFunc(q) {
|
||||
rcode, _ := h.pluginChain.ServeDNS(ctx, w, r)
|
||||
if !plugin.ClientWrite(rcode) {
|
||||
DefaultErrorFunc(w, r, rcode)
|
||||
DefaultErrorFunc(ctx, w, r, rcode)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
@ -285,7 +285,7 @@ func (s *Server) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
|
|||
// DS request, and we found a zone, use the handler for the query.
|
||||
rcode, _ := dshandler.pluginChain.ServeDNS(ctx, w, r)
|
||||
if !plugin.ClientWrite(rcode) {
|
||||
DefaultErrorFunc(w, r, rcode)
|
||||
DefaultErrorFunc(ctx, w, r, rcode)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
@ -298,13 +298,13 @@ func (s *Server) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
|
|||
|
||||
rcode, _ := h.pluginChain.ServeDNS(ctx, w, r)
|
||||
if !plugin.ClientWrite(rcode) {
|
||||
DefaultErrorFunc(w, r, rcode)
|
||||
DefaultErrorFunc(ctx, w, r, rcode)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Still here? Error out with REFUSED.
|
||||
DefaultErrorFunc(w, r, dns.RcodeRefused)
|
||||
DefaultErrorFunc(ctx, w, r, dns.RcodeRefused)
|
||||
}
|
||||
|
||||
// OnStartupComplete lists the sites served by this server
|
||||
|
@ -331,7 +331,7 @@ func (s *Server) Tracer() ot.Tracer {
|
|||
}
|
||||
|
||||
// DefaultErrorFunc responds to an DNS request with an error.
|
||||
func DefaultErrorFunc(w dns.ResponseWriter, r *dns.Msg, rc int) {
|
||||
func DefaultErrorFunc(ctx context.Context, w dns.ResponseWriter, r *dns.Msg, rc int) {
|
||||
state := request.Request{W: w, Req: r}
|
||||
|
||||
answer := new(dns.Msg)
|
||||
|
@ -339,7 +339,7 @@ func DefaultErrorFunc(w dns.ResponseWriter, r *dns.Msg, rc int) {
|
|||
|
||||
state.SizeAndDo(answer)
|
||||
|
||||
vars.Report(state, vars.Dropped, rcode.ToString(rc), answer.Len(), time.Now())
|
||||
vars.Report(ctx, state, vars.Dropped, rcode.ToString(rc), answer.Len(), time.Now())
|
||||
|
||||
w.WriteMsg(answer)
|
||||
}
|
||||
|
|
|
@ -21,7 +21,7 @@ import (
|
|||
type Logger struct {
|
||||
Next plugin.Handler
|
||||
Rules []Rule
|
||||
ErrorFunc func(dns.ResponseWriter, *dns.Msg, int) // failover error handler
|
||||
ErrorFunc func(context.Context, dns.ResponseWriter, *dns.Msg, int) // failover error handler
|
||||
}
|
||||
|
||||
// ServeDNS implements the plugin.Handler interface.
|
||||
|
@ -39,13 +39,13 @@ func (l Logger) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
|
|||
// There was an error up the chain, but no response has been written yet.
|
||||
// The error must be handled here so the log entry will record the response size.
|
||||
if l.ErrorFunc != nil {
|
||||
l.ErrorFunc(rrw, r, rc)
|
||||
l.ErrorFunc(ctx, rrw, r, rc)
|
||||
} else {
|
||||
answer := new(dns.Msg)
|
||||
answer.SetRcode(r, rc)
|
||||
state.SizeAndDo(answer)
|
||||
|
||||
vars.Report(state, vars.Dropped, rcode.ToString(rc), answer.Len(), time.Now())
|
||||
vars.Report(ctx, state, vars.Dropped, rcode.ToString(rc), answer.Len(), time.Now())
|
||||
|
||||
w.WriteMsg(answer)
|
||||
}
|
||||
|
|
|
@ -11,18 +11,21 @@ The default location for the metrics is `localhost:9153`. The metrics path is fi
|
|||
The following metrics are exported:
|
||||
|
||||
* `coredns_build_info{version, revision, goversion}` - info about CoreDNS itself.
|
||||
* `coredns_dns_request_count_total{zone, proto, family}` - total query count.
|
||||
* `coredns_dns_request_duration_seconds{zone}` - duration to process each query.
|
||||
* `coredns_dns_request_size_bytes{zone, proto}` - size of the request in bytes.
|
||||
* `coredns_dns_request_do_count_total{zone}` - queries that have the DO bit set
|
||||
* `coredns_dns_request_type_count_total{zone, type}` - counter of queries per zone and type.
|
||||
* `coredns_dns_response_size_bytes{zone, proto}` - response size in bytes.
|
||||
* `coredns_dns_response_rcode_count_total{zone, rcode}` - response per zone and rcode.
|
||||
* `coredns_dns_request_count_total{server, zone, proto, family}` - total query count.
|
||||
* `coredns_dns_request_duration_seconds{server, zone}` - duration to process each query.
|
||||
* `coredns_dns_request_size_bytes{server, zone, proto}` - size of the request in bytes.
|
||||
* `coredns_dns_request_do_count_total{server, zone}` - queries that have the DO bit set
|
||||
* `coredns_dns_request_type_count_total{server, zone, type}` - counter of queries per zone and type.
|
||||
* `coredns_dns_response_size_bytes{server, zone, proto}` - response size in bytes.
|
||||
* `coredns_dns_response_rcode_count_total{server, zone, rcode}` - response per zone and rcode.
|
||||
|
||||
Each counter has a label `zone` which is the zonename used for the request/response.
|
||||
|
||||
Extra labels used are:
|
||||
|
||||
* `server` is identifying the server responsible for the request. This is a string formatted
|
||||
as the server's listening address: `<scheme>://[<bind>]:<port>`. I.e. for a "normal" DNS server
|
||||
this is `dns://:53`. If you are using the *bind* plugin an IP address is included, e.g.: `dns://127.0.0.53:53`.
|
||||
* `proto` which holds the transport of the response ("udp" or "tcp")
|
||||
* The address family (`family`) of the transport (1 = IP (IP version 4), 2 = IP6 (IP version 6)).
|
||||
* `type` which holds the query type. It holds most common types (A, AAAA, MX, SOA, CNAME, PTR, TXT,
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"github.com/coredns/coredns/plugin"
|
||||
"github.com/coredns/coredns/plugin/metrics/vars"
|
||||
|
||||
"golang.org/x/net/context"
|
||||
)
|
||||
|
@ -15,10 +15,4 @@ import (
|
|||
// Basic usage with a metric:
|
||||
//
|
||||
// <metric>.WithLabelValues(metrics.WithServer(ctx), labels..).Add(1)
|
||||
func WithServer(ctx context.Context) string {
|
||||
srv := ctx.Value(plugin.ServerCtx{})
|
||||
if srv == nil {
|
||||
return ""
|
||||
}
|
||||
return srv.(string)
|
||||
}
|
||||
func WithServer(ctx context.Context) string { return vars.WithServer(ctx) }
|
||||
|
|
|
@ -25,7 +25,7 @@ func (m *Metrics) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg
|
|||
rw := dnstest.NewRecorder(w)
|
||||
status, err := plugin.NextOrFailure(m.Name(), m.Next, ctx, rw, r)
|
||||
|
||||
vars.Report(state, zone, rcode.ToString(rw.Rcode), rw.Len, rw.Start)
|
||||
vars.Report(ctx, state, zone, rcode.ToString(rw.Rcode), rw.Len, rw.Start)
|
||||
|
||||
return status, err
|
||||
}
|
||||
|
|
|
@ -3,13 +3,15 @@ package vars
|
|||
import (
|
||||
"time"
|
||||
|
||||
"github.com/coredns/coredns/plugin"
|
||||
"github.com/coredns/coredns/request"
|
||||
|
||||
"github.com/miekg/dns"
|
||||
"golang.org/x/net/context"
|
||||
)
|
||||
|
||||
// Report reports the metrics data associcated with request.
|
||||
func Report(req request.Request, zone, rcode string, size int, start time.Time) {
|
||||
func Report(ctx context.Context, req request.Request, zone, rcode string, size int, start time.Time) {
|
||||
// Proto and Family.
|
||||
net := req.Proto()
|
||||
fam := "1"
|
||||
|
@ -17,25 +19,35 @@ func Report(req request.Request, zone, rcode string, size int, start time.Time)
|
|||
fam = "2"
|
||||
}
|
||||
|
||||
typ := req.QType()
|
||||
server := WithServer(ctx)
|
||||
|
||||
RequestCount.WithLabelValues(zone, net, fam).Inc()
|
||||
RequestDuration.WithLabelValues(zone).Observe(time.Since(start).Seconds())
|
||||
typ := req.QType()
|
||||
RequestCount.WithLabelValues(server, zone, net, fam).Inc()
|
||||
RequestDuration.WithLabelValues(server, zone).Observe(time.Since(start).Seconds())
|
||||
|
||||
if req.Do() {
|
||||
RequestDo.WithLabelValues(zone).Inc()
|
||||
RequestDo.WithLabelValues(server, zone).Inc()
|
||||
}
|
||||
|
||||
if _, known := monitorType[typ]; known {
|
||||
RequestType.WithLabelValues(zone, dns.Type(typ).String()).Inc()
|
||||
RequestType.WithLabelValues(server, zone, dns.Type(typ).String()).Inc()
|
||||
} else {
|
||||
RequestType.WithLabelValues(zone, other).Inc()
|
||||
RequestType.WithLabelValues(server, zone, other).Inc()
|
||||
}
|
||||
|
||||
ResponseSize.WithLabelValues(zone, net).Observe(float64(size))
|
||||
RequestSize.WithLabelValues(zone, net).Observe(float64(req.Len()))
|
||||
ResponseSize.WithLabelValues(server, zone, net).Observe(float64(size))
|
||||
RequestSize.WithLabelValues(server, zone, net).Observe(float64(req.Len()))
|
||||
|
||||
ResponseRcode.WithLabelValues(zone, rcode).Inc()
|
||||
ResponseRcode.WithLabelValues(server, zone, rcode).Inc()
|
||||
}
|
||||
|
||||
// WithServer returns the current server handling the request.
|
||||
func WithServer(ctx context.Context) string {
|
||||
srv := ctx.Value(plugin.ServerCtx{})
|
||||
if srv == nil {
|
||||
return ""
|
||||
}
|
||||
return srv.(string)
|
||||
}
|
||||
|
||||
var monitorType = map[uint16]bool{
|
||||
|
|
|
@ -13,7 +13,7 @@ var (
|
|||
Subsystem: subsystem,
|
||||
Name: "request_count_total",
|
||||
Help: "Counter of DNS requests made per zone, protocol and family.",
|
||||
}, []string{"zone", "proto", "family"})
|
||||
}, []string{"server", "zone", "proto", "family"})
|
||||
|
||||
RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Namespace: plugin.Namespace,
|
||||
|
@ -21,7 +21,7 @@ var (
|
|||
Name: "request_duration_seconds",
|
||||
Buckets: plugin.TimeBuckets,
|
||||
Help: "Histogram of the time (in seconds) each request took.",
|
||||
}, []string{"zone"})
|
||||
}, []string{"server", "zone"})
|
||||
|
||||
RequestSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Namespace: plugin.Namespace,
|
||||
|
@ -29,21 +29,21 @@ var (
|
|||
Name: "request_size_bytes",
|
||||
Help: "Size of the EDNS0 UDP buffer in bytes (64K for TCP).",
|
||||
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
|
||||
}, []string{"zone", "proto"})
|
||||
}, []string{"server", "zone", "proto"})
|
||||
|
||||
RequestDo = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: plugin.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "request_do_count_total",
|
||||
Help: "Counter of DNS requests with DO bit set per zone.",
|
||||
}, []string{"zone"})
|
||||
}, []string{"server", "zone"})
|
||||
|
||||
RequestType = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: plugin.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "request_type_count_total",
|
||||
Help: "Counter of DNS requests per type, per zone.",
|
||||
}, []string{"zone", "type"})
|
||||
}, []string{"server", "zone", "type"})
|
||||
|
||||
ResponseSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Namespace: plugin.Namespace,
|
||||
|
@ -51,14 +51,14 @@ var (
|
|||
Name: "response_size_bytes",
|
||||
Help: "Size of the returned response in bytes.",
|
||||
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
|
||||
}, []string{"zone", "proto"})
|
||||
}, []string{"server", "zone", "proto"})
|
||||
|
||||
ResponseRcode = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: plugin.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "response_rcode_count_total",
|
||||
Help: "Counter of response status codes.",
|
||||
}, []string{"zone", "rcode"})
|
||||
}, []string{"server", "zone", "rcode"})
|
||||
)
|
||||
|
||||
const (
|
||||
|
|
|
@ -102,13 +102,15 @@ payload over HTTPS). Note that with `https_google` the entire transport is encry
|
|||
|
||||
If monitoring is enabled (via the *prometheus* directive) then the following metric is exported:
|
||||
|
||||
* `coredns_proxy_request_duration_seconds{proto, proto_proxy, family, to}` - duration per upstream
|
||||
interaction.
|
||||
* `coredns_proxy_request_count_total{proto, proto_proxy, family, to}` - query count per upstream.
|
||||
* `coredns_proxy_request_duration_seconds{server, proto, proto_proxy, family, to}` - duration per
|
||||
upstream interaction.
|
||||
* `coredns_proxy_request_count_total{server, proto, proto_proxy, family, to}` - query count per
|
||||
upstream.
|
||||
|
||||
Where `proxy_proto` is the protocol used (`dns`, `grpc`, or `https_google`) and `to` is **TO**
|
||||
specified in the config, `proto` is the protocol used by the incoming query ("tcp" or "udp").
|
||||
and family the transport family ("1" for IPv4, and "2" for IPv6).
|
||||
specified in the config, `proto` is the protocol used by the incoming query ("tcp" or "udp"), family
|
||||
the transport family ("1" for IPv4, and "2" for IPv6). `Server` is the server responsible for the
|
||||
request (and metric). See the documention in the metrics plugin.
|
||||
|
||||
## Examples
|
||||
|
||||
|
|
|
@ -15,14 +15,14 @@ var (
|
|||
Subsystem: "proxy",
|
||||
Name: "request_count_total",
|
||||
Help: "Counter of requests made per protocol, proxy protocol, family and upstream.",
|
||||
}, []string{"proto", "proxy_proto", "family", "to"})
|
||||
}, []string{"server", "proto", "proxy_proto", "family", "to"})
|
||||
RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Namespace: plugin.Namespace,
|
||||
Subsystem: "proxy",
|
||||
Name: "request_duration_seconds",
|
||||
Buckets: plugin.TimeBuckets,
|
||||
Help: "Histogram of the time (in seconds) each request took.",
|
||||
}, []string{"proto", "proxy_proto", "family", "to"})
|
||||
}, []string{"server", "proto", "proxy_proto", "family", "to"})
|
||||
)
|
||||
|
||||
// familyToString returns the string form of either 1, or 2. Returns
|
||||
|
|
|
@ -9,6 +9,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/coredns/coredns/plugin"
|
||||
"github.com/coredns/coredns/plugin/metrics"
|
||||
"github.com/coredns/coredns/plugin/pkg/healthcheck"
|
||||
"github.com/coredns/coredns/request"
|
||||
|
||||
|
@ -87,7 +88,7 @@ func (p Proxy) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (
|
|||
|
||||
atomic.AddInt64(&host.Conns, 1)
|
||||
|
||||
RequestCount.WithLabelValues(state.Proto(), upstream.Exchanger().Protocol(), familyToString(state.Family()), host.Name).Add(1)
|
||||
RequestCount.WithLabelValues(metrics.WithServer(ctx), state.Proto(), upstream.Exchanger().Protocol(), familyToString(state.Family()), host.Name).Add(1)
|
||||
|
||||
reply, backendErr = upstream.Exchanger().Exchange(ctx, host.Name, state)
|
||||
|
||||
|
@ -110,7 +111,7 @@ func (p Proxy) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (
|
|||
|
||||
w.WriteMsg(reply)
|
||||
|
||||
RequestDuration.WithLabelValues(state.Proto(), upstream.Exchanger().Protocol(), familyToString(state.Family()), host.Name).Observe(time.Since(start).Seconds())
|
||||
RequestDuration.WithLabelValues(metrics.WithServer(ctx), state.Proto(), upstream.Exchanger().Protocol(), familyToString(state.Family()), host.Name).Observe(time.Since(start).Seconds())
|
||||
|
||||
return 0, taperr
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue