Cache metrics server (#1746)

* plugin/cache: per server metrics

Use per server metrics in the cache plugin as well. This required
some plumbing changes. Also use request.Request more.

* fix cherry-pick
This commit is contained in:
Miek Gieben 2018-04-27 19:37:49 +01:00 committed by GitHub
parent 85f549b529
commit bfc647d4ed
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 44 additions and 59 deletions

View file

@ -62,13 +62,13 @@ Eviction is done per shard - i.e. when a shard reaches capacity, items are evict
If monitoring is enabled (via the *prometheus* directive) then the following metrics are exported: If monitoring is enabled (via the *prometheus* directive) then the following metrics are exported:
* `coredns_cache_size{type}` - Total elements in the cache by cache type. * `coredns_cache_size{server, type}` - Total elements in the cache by cache type.
* `coredns_cache_capacity{type}` - Total capacity of the cache by cache type. * `coredns_cache_hits_total{server, type}` - Counter of cache hits by cache type.
* `coredns_cache_hits_total{type}` - Counter of cache hits by cache type. * `coredns_cache_misses_total{server}` - Counter of cache misses.
* `coredns_cache_misses_total{}` - Counter of cache misses. * `coredns_cache_drops_total{server}` - Counter of dropped messages.
* `coredns_cache_drops_total{}` - Counter of dropped messages.
Cache types are either "denial" or "success". Cache types are either "denial" or "success". `Server` is the server handling the request, see the
metrics plugin for documentation.
## Examples ## Examples

View file

@ -102,7 +102,8 @@ func hash(qname string, qtype uint16, do bool) uint32 {
type ResponseWriter struct { type ResponseWriter struct {
dns.ResponseWriter dns.ResponseWriter
*Cache *Cache
state request.Request state request.Request
server string // Server handling the request.
prefetch bool // When true write nothing back to the client. prefetch bool // When true write nothing back to the client.
} }
@ -132,11 +133,11 @@ func (w *ResponseWriter) WriteMsg(res *dns.Msg) error {
if w.state.Match(res) { if w.state.Match(res) {
w.set(res, key, mt, duration) w.set(res, key, mt, duration)
cacheSize.WithLabelValues(Success).Set(float64(w.pcache.Len())) cacheSize.WithLabelValues(w.server, Success).Set(float64(w.pcache.Len()))
cacheSize.WithLabelValues(Denial).Set(float64(w.ncache.Len())) cacheSize.WithLabelValues(w.server, Denial).Set(float64(w.ncache.Len()))
} else { } else {
// Don't log it, but increment counter // Don't log it, but increment counter
cacheDrops.Inc() cacheDrops.WithLabelValues(w.server).Inc()
} }
} }

View file

@ -8,6 +8,7 @@ import (
"github.com/coredns/coredns/plugin" "github.com/coredns/coredns/plugin"
"github.com/coredns/coredns/plugin/pkg/response" "github.com/coredns/coredns/plugin/pkg/response"
"github.com/coredns/coredns/plugin/test" "github.com/coredns/coredns/plugin/test"
"github.com/coredns/coredns/request"
"github.com/miekg/dns" "github.com/miekg/dns"
) )
@ -162,21 +163,19 @@ func TestCache(t *testing.T) {
for _, tc := range cacheTestCases { for _, tc := range cacheTestCases {
m := tc.in.Msg() m := tc.in.Msg()
m = cacheMsg(m, tc) m = cacheMsg(m, tc)
do := tc.in.Do
state := request.Request{W: nil, Req: m}
mt, _ := response.Typify(m, utc) mt, _ := response.Typify(m, utc)
k := key(m, mt, do) k := key(m, mt, state.Do())
crr.set(m, k, mt, c.pttl) crr.set(m, k, mt, c.pttl)
name := plugin.Name(m.Question[0].Name).Normalize() i, _ := c.get(time.Now().UTC(), state, "dns://:53")
qtype := m.Question[0].Qtype
i, _ := c.get(time.Now().UTC(), name, qtype, do)
ok := i != nil ok := i != nil
if ok != tc.shouldCache { if ok != tc.shouldCache {
t.Errorf("cached message that should not have been cached: %s", name) t.Errorf("cached message that should not have been cached: %s", state.Name())
continue continue
} }

View file

@ -7,6 +7,7 @@ import (
"time" "time"
"github.com/coredns/coredns/plugin" "github.com/coredns/coredns/plugin"
"github.com/coredns/coredns/plugin/metrics"
"github.com/coredns/coredns/request" "github.com/coredns/coredns/request"
"github.com/miekg/dns" "github.com/miekg/dns"
@ -17,18 +18,16 @@ import (
func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) { func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) {
state := request.Request{W: w, Req: r} state := request.Request{W: w, Req: r}
qname := state.Name() zone := plugin.Zones(c.Zones).Matches(state.Name())
qtype := state.QType()
zone := plugin.Zones(c.Zones).Matches(qname)
if zone == "" { if zone == "" {
return plugin.NextOrFailure(c.Name(), c.Next, ctx, w, r) return plugin.NextOrFailure(c.Name(), c.Next, ctx, w, r)
} }
do := state.Do() // TODO(): might need more from OPT record? Like the actual bufsize?
now := c.now().UTC() now := c.now().UTC()
i, ttl := c.get(now, qname, qtype, do) server := metrics.WithServer(ctx)
i, ttl := c.get(now, state, server)
if i != nil && ttl > 0 { if i != nil && ttl > 0 {
resp := i.toMsg(r, now) resp := i.toMsg(r, now)
@ -42,14 +41,16 @@ func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
threshold := int(math.Ceil(float64(c.percentage) / 100 * float64(i.origTTL))) threshold := int(math.Ceil(float64(c.percentage) / 100 * float64(i.origTTL)))
if i.Freq.Hits() >= c.prefetch && ttl <= threshold { if i.Freq.Hits() >= c.prefetch && ttl <= threshold {
go func() { go func() {
cachePrefetches.Inc() cachePrefetches.WithLabelValues(server).Inc()
// When prefetching we loose the item i, and with it the frequency // When prefetching we loose the item i, and with it the frequency
// that we've gathered sofar. See we copy the frequencies info back // that we've gathered sofar. See we copy the frequencies info back
// into the new item that was stored in the cache. // into the new item that was stored in the cache.
prr := &ResponseWriter{ResponseWriter: w, Cache: c, prefetch: true, state: state} prr := &ResponseWriter{ResponseWriter: w, Cache: c,
prefetch: true, state: state,
server: server}
plugin.NextOrFailure(c.Name(), c.Next, ctx, prr, r) plugin.NextOrFailure(c.Name(), c.Next, ctx, prr, r)
if i1 := c.exists(qname, qtype, do); i1 != nil { if i1 := c.exists(state); i1 != nil {
i1.Freq.Reset(now, i.Freq.Hits()) i1.Freq.Reset(now, i.Freq.Hits())
} }
}() }()
@ -58,31 +59,31 @@ func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
return dns.RcodeSuccess, nil return dns.RcodeSuccess, nil
} }
crr := &ResponseWriter{ResponseWriter: w, Cache: c, state: state} crr := &ResponseWriter{ResponseWriter: w, Cache: c, state: state, server: server}
return plugin.NextOrFailure(c.Name(), c.Next, ctx, crr, r) return plugin.NextOrFailure(c.Name(), c.Next, ctx, crr, r)
} }
// Name implements the Handler interface. // Name implements the Handler interface.
func (c *Cache) Name() string { return "cache" } func (c *Cache) Name() string { return "cache" }
func (c *Cache) get(now time.Time, qname string, qtype uint16, do bool) (*item, int) { func (c *Cache) get(now time.Time, state request.Request, server string) (*item, int) {
k := hash(qname, qtype, do) k := hash(state.Name(), state.QType(), state.Do())
if i, ok := c.ncache.Get(k); ok { if i, ok := c.ncache.Get(k); ok {
cacheHits.WithLabelValues(Denial).Inc() cacheHits.WithLabelValues(server, Denial).Inc()
return i.(*item), i.(*item).ttl(now) return i.(*item), i.(*item).ttl(now)
} }
if i, ok := c.pcache.Get(k); ok { if i, ok := c.pcache.Get(k); ok {
cacheHits.WithLabelValues(Success).Inc() cacheHits.WithLabelValues(server, Success).Inc()
return i.(*item), i.(*item).ttl(now) return i.(*item), i.(*item).ttl(now)
} }
cacheMisses.Inc() cacheMisses.WithLabelValues(server).Inc()
return nil, 0 return nil, 0
} }
func (c *Cache) exists(qname string, qtype uint16, do bool) *item { func (c *Cache) exists(state request.Request) *item {
k := hash(qname, qtype, do) k := hash(state.Name(), state.QType(), state.Do())
if i, ok := c.ncache.Get(k); ok { if i, ok := c.ncache.Get(k); ok {
return i.(*item) return i.(*item)
} }
@ -98,42 +99,35 @@ var (
Subsystem: "cache", Subsystem: "cache",
Name: "size", Name: "size",
Help: "The number of elements in the cache.", Help: "The number of elements in the cache.",
}, []string{"type"}) }, []string{"server", "type"})
cacheCapacity = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: plugin.Namespace,
Subsystem: "cache",
Name: "capacity",
Help: "The cache's capacity.",
}, []string{"type"})
cacheHits = prometheus.NewCounterVec(prometheus.CounterOpts{ cacheHits = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace, Namespace: plugin.Namespace,
Subsystem: "cache", Subsystem: "cache",
Name: "hits_total", Name: "hits_total",
Help: "The count of cache hits.", Help: "The count of cache hits.",
}, []string{"type"}) }, []string{"server", "type"})
cacheMisses = prometheus.NewCounter(prometheus.CounterOpts{ cacheMisses = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace, Namespace: plugin.Namespace,
Subsystem: "cache", Subsystem: "cache",
Name: "misses_total", Name: "misses_total",
Help: "The count of cache misses.", Help: "The count of cache misses.",
}) }, []string{"server"})
cachePrefetches = prometheus.NewCounter(prometheus.CounterOpts{ cachePrefetches = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace, Namespace: plugin.Namespace,
Subsystem: "cache", Subsystem: "cache",
Name: "prefetch_total", Name: "prefetch_total",
Help: "The number of time the cache has prefetched a cached item.", Help: "The number of time the cache has prefetched a cached item.",
}) }, []string{"server"})
cacheDrops = prometheus.NewCounter(prometheus.CounterOpts{ cacheDrops = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace, Namespace: plugin.Namespace,
Subsystem: "cache", Subsystem: "cache",
Name: "drops_total", Name: "drops_total",
Help: "The number responses that are not cached, because the reply is malformed.", Help: "The number responses that are not cached, because the reply is malformed.",
}) }, []string{"server"})
) )
var once sync.Once var once sync.Once

11
plugin/cache/setup.go vendored
View file

@ -36,21 +36,12 @@ func setup(c *caddy.Controller) error {
c.OnStartup(func() error { c.OnStartup(func() error {
once.Do(func() { once.Do(func() {
metrics.MustRegister(c, metrics.MustRegister(c,
cacheSize, cacheCapacity, cacheSize, cacheHits, cacheMisses,
cacheHits, cacheMisses,
cachePrefetches, cacheDrops) cachePrefetches, cacheDrops)
}) })
return nil return nil
}) })
// Initialize all counters and gauges.
cacheSize.WithLabelValues(Success)
cacheSize.WithLabelValues(Denial)
cacheCapacity.WithLabelValues(Success).Set(float64(ca.pcap))
cacheCapacity.WithLabelValues(Denial).Set(float64(ca.ncap))
cacheHits.WithLabelValues(Success)
cacheHits.WithLabelValues(Denial)
return nil return nil
} }