Cache metrics server (#1746)

* plugin/cache: per server metrics

Use per server metrics in the cache plugin as well. This required
some plumbing changes. Also use request.Request more.

* fix cherry-pick
This commit is contained in:
Miek Gieben 2018-04-27 19:37:49 +01:00 committed by GitHub
parent 85f549b529
commit bfc647d4ed
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 44 additions and 59 deletions

View file

@ -62,13 +62,13 @@ Eviction is done per shard - i.e. when a shard reaches capacity, items are evict
If monitoring is enabled (via the *prometheus* directive) then the following metrics are exported:
* `coredns_cache_size{type}` - Total elements in the cache by cache type.
* `coredns_cache_capacity{type}` - Total capacity of the cache by cache type.
* `coredns_cache_hits_total{type}` - Counter of cache hits by cache type.
* `coredns_cache_misses_total{}` - Counter of cache misses.
* `coredns_cache_drops_total{}` - Counter of dropped messages.
* `coredns_cache_size{server, type}` - Total elements in the cache by cache type.
* `coredns_cache_hits_total{server, type}` - Counter of cache hits by cache type.
* `coredns_cache_misses_total{server}` - Counter of cache misses.
* `coredns_cache_drops_total{server}` - Counter of dropped messages.
Cache types are either "denial" or "success".
Cache types are either "denial" or "success". `Server` is the server handling the request, see the
metrics plugin for documentation.
## Examples

View file

@ -102,7 +102,8 @@ func hash(qname string, qtype uint16, do bool) uint32 {
type ResponseWriter struct {
dns.ResponseWriter
*Cache
state request.Request
state request.Request
server string // Server handling the request.
prefetch bool // When true write nothing back to the client.
}
@ -132,11 +133,11 @@ func (w *ResponseWriter) WriteMsg(res *dns.Msg) error {
if w.state.Match(res) {
w.set(res, key, mt, duration)
cacheSize.WithLabelValues(Success).Set(float64(w.pcache.Len()))
cacheSize.WithLabelValues(Denial).Set(float64(w.ncache.Len()))
cacheSize.WithLabelValues(w.server, Success).Set(float64(w.pcache.Len()))
cacheSize.WithLabelValues(w.server, Denial).Set(float64(w.ncache.Len()))
} else {
// Don't log it, but increment counter
cacheDrops.Inc()
cacheDrops.WithLabelValues(w.server).Inc()
}
}

View file

@ -8,6 +8,7 @@ import (
"github.com/coredns/coredns/plugin"
"github.com/coredns/coredns/plugin/pkg/response"
"github.com/coredns/coredns/plugin/test"
"github.com/coredns/coredns/request"
"github.com/miekg/dns"
)
@ -162,21 +163,19 @@ func TestCache(t *testing.T) {
for _, tc := range cacheTestCases {
m := tc.in.Msg()
m = cacheMsg(m, tc)
do := tc.in.Do
state := request.Request{W: nil, Req: m}
mt, _ := response.Typify(m, utc)
k := key(m, mt, do)
k := key(m, mt, state.Do())
crr.set(m, k, mt, c.pttl)
name := plugin.Name(m.Question[0].Name).Normalize()
qtype := m.Question[0].Qtype
i, _ := c.get(time.Now().UTC(), name, qtype, do)
i, _ := c.get(time.Now().UTC(), state, "dns://:53")
ok := i != nil
if ok != tc.shouldCache {
t.Errorf("cached message that should not have been cached: %s", name)
t.Errorf("cached message that should not have been cached: %s", state.Name())
continue
}

View file

@ -7,6 +7,7 @@ import (
"time"
"github.com/coredns/coredns/plugin"
"github.com/coredns/coredns/plugin/metrics"
"github.com/coredns/coredns/request"
"github.com/miekg/dns"
@ -17,18 +18,16 @@ import (
func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) {
state := request.Request{W: w, Req: r}
qname := state.Name()
qtype := state.QType()
zone := plugin.Zones(c.Zones).Matches(qname)
zone := plugin.Zones(c.Zones).Matches(state.Name())
if zone == "" {
return plugin.NextOrFailure(c.Name(), c.Next, ctx, w, r)
}
do := state.Do() // TODO(): might need more from OPT record? Like the actual bufsize?
now := c.now().UTC()
i, ttl := c.get(now, qname, qtype, do)
server := metrics.WithServer(ctx)
i, ttl := c.get(now, state, server)
if i != nil && ttl > 0 {
resp := i.toMsg(r, now)
@ -42,14 +41,16 @@ func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
threshold := int(math.Ceil(float64(c.percentage) / 100 * float64(i.origTTL)))
if i.Freq.Hits() >= c.prefetch && ttl <= threshold {
go func() {
cachePrefetches.Inc()
cachePrefetches.WithLabelValues(server).Inc()
// When prefetching we loose the item i, and with it the frequency
// that we've gathered sofar. See we copy the frequencies info back
// into the new item that was stored in the cache.
prr := &ResponseWriter{ResponseWriter: w, Cache: c, prefetch: true, state: state}
prr := &ResponseWriter{ResponseWriter: w, Cache: c,
prefetch: true, state: state,
server: server}
plugin.NextOrFailure(c.Name(), c.Next, ctx, prr, r)
if i1 := c.exists(qname, qtype, do); i1 != nil {
if i1 := c.exists(state); i1 != nil {
i1.Freq.Reset(now, i.Freq.Hits())
}
}()
@ -58,31 +59,31 @@ func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
return dns.RcodeSuccess, nil
}
crr := &ResponseWriter{ResponseWriter: w, Cache: c, state: state}
crr := &ResponseWriter{ResponseWriter: w, Cache: c, state: state, server: server}
return plugin.NextOrFailure(c.Name(), c.Next, ctx, crr, r)
}
// Name implements the Handler interface.
func (c *Cache) Name() string { return "cache" }
func (c *Cache) get(now time.Time, qname string, qtype uint16, do bool) (*item, int) {
k := hash(qname, qtype, do)
func (c *Cache) get(now time.Time, state request.Request, server string) (*item, int) {
k := hash(state.Name(), state.QType(), state.Do())
if i, ok := c.ncache.Get(k); ok {
cacheHits.WithLabelValues(Denial).Inc()
cacheHits.WithLabelValues(server, Denial).Inc()
return i.(*item), i.(*item).ttl(now)
}
if i, ok := c.pcache.Get(k); ok {
cacheHits.WithLabelValues(Success).Inc()
cacheHits.WithLabelValues(server, Success).Inc()
return i.(*item), i.(*item).ttl(now)
}
cacheMisses.Inc()
cacheMisses.WithLabelValues(server).Inc()
return nil, 0
}
func (c *Cache) exists(qname string, qtype uint16, do bool) *item {
k := hash(qname, qtype, do)
func (c *Cache) exists(state request.Request) *item {
k := hash(state.Name(), state.QType(), state.Do())
if i, ok := c.ncache.Get(k); ok {
return i.(*item)
}
@ -98,42 +99,35 @@ var (
Subsystem: "cache",
Name: "size",
Help: "The number of elements in the cache.",
}, []string{"type"})
cacheCapacity = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: plugin.Namespace,
Subsystem: "cache",
Name: "capacity",
Help: "The cache's capacity.",
}, []string{"type"})
}, []string{"server", "type"})
cacheHits = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace,
Subsystem: "cache",
Name: "hits_total",
Help: "The count of cache hits.",
}, []string{"type"})
}, []string{"server", "type"})
cacheMisses = prometheus.NewCounter(prometheus.CounterOpts{
cacheMisses = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace,
Subsystem: "cache",
Name: "misses_total",
Help: "The count of cache misses.",
})
}, []string{"server"})
cachePrefetches = prometheus.NewCounter(prometheus.CounterOpts{
cachePrefetches = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace,
Subsystem: "cache",
Name: "prefetch_total",
Help: "The number of time the cache has prefetched a cached item.",
})
}, []string{"server"})
cacheDrops = prometheus.NewCounter(prometheus.CounterOpts{
cacheDrops = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace,
Subsystem: "cache",
Name: "drops_total",
Help: "The number responses that are not cached, because the reply is malformed.",
})
}, []string{"server"})
)
var once sync.Once

11
plugin/cache/setup.go vendored
View file

@ -36,21 +36,12 @@ func setup(c *caddy.Controller) error {
c.OnStartup(func() error {
once.Do(func() {
metrics.MustRegister(c,
cacheSize, cacheCapacity,
cacheHits, cacheMisses,
cacheSize, cacheHits, cacheMisses,
cachePrefetches, cacheDrops)
})
return nil
})
// Initialize all counters and gauges.
cacheSize.WithLabelValues(Success)
cacheSize.WithLabelValues(Denial)
cacheCapacity.WithLabelValues(Success).Set(float64(ca.pcap))
cacheCapacity.WithLabelValues(Denial).Set(float64(ca.ncap))
cacheHits.WithLabelValues(Success)
cacheHits.WithLabelValues(Denial)
return nil
}