plugin/forward: add hit/miss metrics for connection cache (#4114)

Signed-off-by: Ruslan Drozhdzh <rdrozhdzh@infoblox.com>
2020-09-14 12:42:55 +03:00 · 2020-09-14 12:42:55 +03:00 · 30a4a87eaa
commit 30a4a87eaa
parent 2fe5d684f9
3 changed files with 18 additions and 2 deletions
--- a/plugin/forward/README.md
+++ b/plugin/forward/README.md
@ -112,10 +112,12 @@ If monitoring is enabled (via the *prometheus* plugin) then the following metric
 * `coredns_forward_healthcheck_failures_total{to}` - number of failed health checks per upstream.
 * `coredns_forward_healthcheck_broken_total{}` - counter of when all upstreams are unhealthy,
  and we are randomly (this always uses the `random` policy) spraying to an upstream.
-* `max_concurrent_rejects_total{}` - counter of the number of queries rejected because the
+* `coredns_forward_max_concurrent_rejects_total{}` - counter of the number of queries rejected because the
  number of concurrent queries were at maximum.
 * `coredns_forward_conn_cache_hits_total{to, proto}` - counter of connection cache hits per upstream and protocol.
 * `coredns_forward_conn_cache_misses_total{to, proto}` - counter of connection cache misses per upstream and protocol.
 Where `to` is one of the upstream servers (**TO** from the config), `rcode` is the returned RCODE
-from the upstream.
+from the upstream, `proto` is the transport protocol like `udp`, `tcp`, `tcp-tls`.
 ## Examples
--- a/plugin/forward/connect.go
+++ b/plugin/forward/connect.go
@ -54,8 +54,10 @@ func (t *Transport) Dial(proto string) (*persistConn, bool, error) {
 	pc := <-t.ret
 	if pc != nil {
 		ConnCacheHitsCount.WithLabelValues(t.addr, proto).Add(1)
 		return pc, true, nil
 	}
 	ConnCacheMissesCount.WithLabelValues(t.addr, proto).Add(1)
 	reqTime := time.Now()
 	timeout := t.dialTimeout()
--- a/plugin/forward/metrics.go
+++ b/plugin/forward/metrics.go
@ -52,4 +52,16 @@ var (
 		Name:      "max_concurrent_rejects_total",
 		Help:      "Counter of the number of queries rejected because the concurrent queries were at maximum.",
 	})
 	ConnCacheHitsCount = promauto.NewCounterVec(prometheus.CounterOpts{
 		Namespace: plugin.Namespace,
 		Subsystem: "forward",
 		Name:      "conn_cache_hits_total",
 		Help:      "Counter of connection cache hits per upstream and protocol.",
 	}, []string{"to", "proto"})
 	ConnCacheMissesCount = promauto.NewCounterVec(prometheus.CounterOpts{
 		Namespace: plugin.Namespace,
 		Subsystem: "forward",
 		Name:      "conn_cache_misses_total",
 		Help:      "Counter of connection cache misses per upstream and protocol.",
 	}, []string{"to", "proto"})
 )