Cache metrics server (#1746)
* plugin/cache: per server metrics Use per server metrics in the cache plugin as well. This required some plumbing changes. Also use request.Request more. * fix cherry-pick
This commit is contained in:
parent
85f549b529
commit
bfc647d4ed
5 changed files with 44 additions and 59 deletions
12
plugin/cache/README.md
vendored
12
plugin/cache/README.md
vendored
|
@ -62,13 +62,13 @@ Eviction is done per shard - i.e. when a shard reaches capacity, items are evict
|
|||
|
||||
If monitoring is enabled (via the *prometheus* directive) then the following metrics are exported:
|
||||
|
||||
* `coredns_cache_size{type}` - Total elements in the cache by cache type.
|
||||
* `coredns_cache_capacity{type}` - Total capacity of the cache by cache type.
|
||||
* `coredns_cache_hits_total{type}` - Counter of cache hits by cache type.
|
||||
* `coredns_cache_misses_total{}` - Counter of cache misses.
|
||||
* `coredns_cache_drops_total{}` - Counter of dropped messages.
|
||||
* `coredns_cache_size{server, type}` - Total elements in the cache by cache type.
|
||||
* `coredns_cache_hits_total{server, type}` - Counter of cache hits by cache type.
|
||||
* `coredns_cache_misses_total{server}` - Counter of cache misses.
|
||||
* `coredns_cache_drops_total{server}` - Counter of dropped messages.
|
||||
|
||||
Cache types are either "denial" or "success".
|
||||
Cache types are either "denial" or "success". `Server` is the server handling the request, see the
|
||||
metrics plugin for documentation.
|
||||
|
||||
## Examples
|
||||
|
||||
|
|
9
plugin/cache/cache.go
vendored
9
plugin/cache/cache.go
vendored
|
@ -102,7 +102,8 @@ func hash(qname string, qtype uint16, do bool) uint32 {
|
|||
type ResponseWriter struct {
|
||||
dns.ResponseWriter
|
||||
*Cache
|
||||
state request.Request
|
||||
state request.Request
|
||||
server string // Server handling the request.
|
||||
|
||||
prefetch bool // When true write nothing back to the client.
|
||||
}
|
||||
|
@ -132,11 +133,11 @@ func (w *ResponseWriter) WriteMsg(res *dns.Msg) error {
|
|||
|
||||
if w.state.Match(res) {
|
||||
w.set(res, key, mt, duration)
|
||||
cacheSize.WithLabelValues(Success).Set(float64(w.pcache.Len()))
|
||||
cacheSize.WithLabelValues(Denial).Set(float64(w.ncache.Len()))
|
||||
cacheSize.WithLabelValues(w.server, Success).Set(float64(w.pcache.Len()))
|
||||
cacheSize.WithLabelValues(w.server, Denial).Set(float64(w.ncache.Len()))
|
||||
} else {
|
||||
// Don't log it, but increment counter
|
||||
cacheDrops.Inc()
|
||||
cacheDrops.WithLabelValues(w.server).Inc()
|
||||
}
|
||||
}
|
||||
|
||||
|
|
13
plugin/cache/cache_test.go
vendored
13
plugin/cache/cache_test.go
vendored
|
@ -8,6 +8,7 @@ import (
|
|||
"github.com/coredns/coredns/plugin"
|
||||
"github.com/coredns/coredns/plugin/pkg/response"
|
||||
"github.com/coredns/coredns/plugin/test"
|
||||
"github.com/coredns/coredns/request"
|
||||
|
||||
"github.com/miekg/dns"
|
||||
)
|
||||
|
@ -162,21 +163,19 @@ func TestCache(t *testing.T) {
|
|||
for _, tc := range cacheTestCases {
|
||||
m := tc.in.Msg()
|
||||
m = cacheMsg(m, tc)
|
||||
do := tc.in.Do
|
||||
|
||||
state := request.Request{W: nil, Req: m}
|
||||
|
||||
mt, _ := response.Typify(m, utc)
|
||||
k := key(m, mt, do)
|
||||
k := key(m, mt, state.Do())
|
||||
|
||||
crr.set(m, k, mt, c.pttl)
|
||||
|
||||
name := plugin.Name(m.Question[0].Name).Normalize()
|
||||
qtype := m.Question[0].Qtype
|
||||
|
||||
i, _ := c.get(time.Now().UTC(), name, qtype, do)
|
||||
i, _ := c.get(time.Now().UTC(), state, "dns://:53")
|
||||
ok := i != nil
|
||||
|
||||
if ok != tc.shouldCache {
|
||||
t.Errorf("cached message that should not have been cached: %s", name)
|
||||
t.Errorf("cached message that should not have been cached: %s", state.Name())
|
||||
continue
|
||||
}
|
||||
|
||||
|
|
58
plugin/cache/handler.go
vendored
58
plugin/cache/handler.go
vendored
|
@ -7,6 +7,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/coredns/coredns/plugin"
|
||||
"github.com/coredns/coredns/plugin/metrics"
|
||||
"github.com/coredns/coredns/request"
|
||||
|
||||
"github.com/miekg/dns"
|
||||
|
@ -17,18 +18,16 @@ import (
|
|||
func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) {
|
||||
state := request.Request{W: w, Req: r}
|
||||
|
||||
qname := state.Name()
|
||||
qtype := state.QType()
|
||||
zone := plugin.Zones(c.Zones).Matches(qname)
|
||||
zone := plugin.Zones(c.Zones).Matches(state.Name())
|
||||
if zone == "" {
|
||||
return plugin.NextOrFailure(c.Name(), c.Next, ctx, w, r)
|
||||
}
|
||||
|
||||
do := state.Do() // TODO(): might need more from OPT record? Like the actual bufsize?
|
||||
|
||||
now := c.now().UTC()
|
||||
|
||||
i, ttl := c.get(now, qname, qtype, do)
|
||||
server := metrics.WithServer(ctx)
|
||||
|
||||
i, ttl := c.get(now, state, server)
|
||||
if i != nil && ttl > 0 {
|
||||
resp := i.toMsg(r, now)
|
||||
|
||||
|
@ -42,14 +41,16 @@ func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
|
|||
threshold := int(math.Ceil(float64(c.percentage) / 100 * float64(i.origTTL)))
|
||||
if i.Freq.Hits() >= c.prefetch && ttl <= threshold {
|
||||
go func() {
|
||||
cachePrefetches.Inc()
|
||||
cachePrefetches.WithLabelValues(server).Inc()
|
||||
// When prefetching we loose the item i, and with it the frequency
|
||||
// that we've gathered sofar. See we copy the frequencies info back
|
||||
// into the new item that was stored in the cache.
|
||||
prr := &ResponseWriter{ResponseWriter: w, Cache: c, prefetch: true, state: state}
|
||||
prr := &ResponseWriter{ResponseWriter: w, Cache: c,
|
||||
prefetch: true, state: state,
|
||||
server: server}
|
||||
plugin.NextOrFailure(c.Name(), c.Next, ctx, prr, r)
|
||||
|
||||
if i1 := c.exists(qname, qtype, do); i1 != nil {
|
||||
if i1 := c.exists(state); i1 != nil {
|
||||
i1.Freq.Reset(now, i.Freq.Hits())
|
||||
}
|
||||
}()
|
||||
|
@ -58,31 +59,31 @@ func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
|
|||
return dns.RcodeSuccess, nil
|
||||
}
|
||||
|
||||
crr := &ResponseWriter{ResponseWriter: w, Cache: c, state: state}
|
||||
crr := &ResponseWriter{ResponseWriter: w, Cache: c, state: state, server: server}
|
||||
return plugin.NextOrFailure(c.Name(), c.Next, ctx, crr, r)
|
||||
}
|
||||
|
||||
// Name implements the Handler interface.
|
||||
func (c *Cache) Name() string { return "cache" }
|
||||
|
||||
func (c *Cache) get(now time.Time, qname string, qtype uint16, do bool) (*item, int) {
|
||||
k := hash(qname, qtype, do)
|
||||
func (c *Cache) get(now time.Time, state request.Request, server string) (*item, int) {
|
||||
k := hash(state.Name(), state.QType(), state.Do())
|
||||
|
||||
if i, ok := c.ncache.Get(k); ok {
|
||||
cacheHits.WithLabelValues(Denial).Inc()
|
||||
cacheHits.WithLabelValues(server, Denial).Inc()
|
||||
return i.(*item), i.(*item).ttl(now)
|
||||
}
|
||||
|
||||
if i, ok := c.pcache.Get(k); ok {
|
||||
cacheHits.WithLabelValues(Success).Inc()
|
||||
cacheHits.WithLabelValues(server, Success).Inc()
|
||||
return i.(*item), i.(*item).ttl(now)
|
||||
}
|
||||
cacheMisses.Inc()
|
||||
cacheMisses.WithLabelValues(server).Inc()
|
||||
return nil, 0
|
||||
}
|
||||
|
||||
func (c *Cache) exists(qname string, qtype uint16, do bool) *item {
|
||||
k := hash(qname, qtype, do)
|
||||
func (c *Cache) exists(state request.Request) *item {
|
||||
k := hash(state.Name(), state.QType(), state.Do())
|
||||
if i, ok := c.ncache.Get(k); ok {
|
||||
return i.(*item)
|
||||
}
|
||||
|
@ -98,42 +99,35 @@ var (
|
|||
Subsystem: "cache",
|
||||
Name: "size",
|
||||
Help: "The number of elements in the cache.",
|
||||
}, []string{"type"})
|
||||
|
||||
cacheCapacity = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: plugin.Namespace,
|
||||
Subsystem: "cache",
|
||||
Name: "capacity",
|
||||
Help: "The cache's capacity.",
|
||||
}, []string{"type"})
|
||||
}, []string{"server", "type"})
|
||||
|
||||
cacheHits = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: plugin.Namespace,
|
||||
Subsystem: "cache",
|
||||
Name: "hits_total",
|
||||
Help: "The count of cache hits.",
|
||||
}, []string{"type"})
|
||||
}, []string{"server", "type"})
|
||||
|
||||
cacheMisses = prometheus.NewCounter(prometheus.CounterOpts{
|
||||
cacheMisses = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: plugin.Namespace,
|
||||
Subsystem: "cache",
|
||||
Name: "misses_total",
|
||||
Help: "The count of cache misses.",
|
||||
})
|
||||
}, []string{"server"})
|
||||
|
||||
cachePrefetches = prometheus.NewCounter(prometheus.CounterOpts{
|
||||
cachePrefetches = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: plugin.Namespace,
|
||||
Subsystem: "cache",
|
||||
Name: "prefetch_total",
|
||||
Help: "The number of time the cache has prefetched a cached item.",
|
||||
})
|
||||
}, []string{"server"})
|
||||
|
||||
cacheDrops = prometheus.NewCounter(prometheus.CounterOpts{
|
||||
cacheDrops = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: plugin.Namespace,
|
||||
Subsystem: "cache",
|
||||
Name: "drops_total",
|
||||
Help: "The number responses that are not cached, because the reply is malformed.",
|
||||
})
|
||||
}, []string{"server"})
|
||||
)
|
||||
|
||||
var once sync.Once
|
||||
|
|
11
plugin/cache/setup.go
vendored
11
plugin/cache/setup.go
vendored
|
@ -36,21 +36,12 @@ func setup(c *caddy.Controller) error {
|
|||
c.OnStartup(func() error {
|
||||
once.Do(func() {
|
||||
metrics.MustRegister(c,
|
||||
cacheSize, cacheCapacity,
|
||||
cacheHits, cacheMisses,
|
||||
cacheSize, cacheHits, cacheMisses,
|
||||
cachePrefetches, cacheDrops)
|
||||
})
|
||||
return nil
|
||||
})
|
||||
|
||||
// Initialize all counters and gauges.
|
||||
cacheSize.WithLabelValues(Success)
|
||||
cacheSize.WithLabelValues(Denial)
|
||||
cacheCapacity.WithLabelValues(Success).Set(float64(ca.pcap))
|
||||
cacheCapacity.WithLabelValues(Denial).Set(float64(ca.ncap))
|
||||
cacheHits.WithLabelValues(Success)
|
||||
cacheHits.WithLabelValues(Denial)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue