Metrics registered on wrong prometheus registry (#2246)
* - UT on metrics verifying that all plugins of all blocs have their metrics collectors declared * - fix error msg * - redirect Registry of metric to the one that handle the listener - allow duplicate of metrics collector on the same Registry (case of same plugin in 2 blocs listening metrics on the same address) * - fix change of signature * - ensure cleaning metrics before starting the test (metrics collectors are global vars .. and re-used by several tests) * - I think I fixed this test. Ensure correct mn of hits and clean metrics before test. * - fix typo in error msg - proposed at review * - fix typo in comment * - remove ResetMetrics functions - change a way to test the numeric metrics : get the diff between begin and end of test * - oops. removing debug logs
This commit is contained in:
parent
f5aa6cac67
commit
05204ef142
6 changed files with 145 additions and 34 deletions
|
@ -57,7 +57,15 @@ func New(addr string) *Metrics {
|
||||||
}
|
}
|
||||||
|
|
||||||
// MustRegister wraps m.Reg.MustRegister.
|
// MustRegister wraps m.Reg.MustRegister.
|
||||||
func (m *Metrics) MustRegister(c prometheus.Collector) { m.Reg.MustRegister(c) }
|
func (m *Metrics) MustRegister(c prometheus.Collector) {
|
||||||
|
err := m.Reg.Register(c)
|
||||||
|
if err != nil {
|
||||||
|
// ignore any duplicate error, but fatal on any other kind of error
|
||||||
|
if _, ok := err.(prometheus.AlreadyRegisteredError); !ok {
|
||||||
|
log.Fatalf("Cannot register metrics collector: %s", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// AddZone adds zone z to m.
|
// AddZone adds zone z to m.
|
||||||
func (m *Metrics) AddZone(z string) {
|
func (m *Metrics) AddZone(z string) {
|
||||||
|
|
|
@ -31,6 +31,13 @@ func setup(c *caddy.Controller) error {
|
||||||
return plugin.Error("prometheus", err)
|
return plugin.Error("prometheus", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// register the metrics to its address (ensure only one active metrics per address)
|
||||||
|
obj := uniqAddr.Set(m.Addr, m.OnStartup, m)
|
||||||
|
//propagate the real active Registry to current metrics
|
||||||
|
if om, ok := obj.(*Metrics); ok {
|
||||||
|
m.Reg = om.Reg
|
||||||
|
}
|
||||||
|
|
||||||
dnsserver.GetConfig(c).AddPlugin(func(next plugin.Handler) plugin.Handler {
|
dnsserver.GetConfig(c).AddPlugin(func(next plugin.Handler) plugin.Handler {
|
||||||
m.Next = next
|
m.Next = next
|
||||||
return m
|
return m
|
||||||
|
@ -55,10 +62,6 @@ func setup(c *caddy.Controller) error {
|
||||||
func prometheusParse(c *caddy.Controller) (*Metrics, error) {
|
func prometheusParse(c *caddy.Controller) (*Metrics, error) {
|
||||||
var met = New(defaultAddr)
|
var met = New(defaultAddr)
|
||||||
|
|
||||||
defer func() {
|
|
||||||
uniqAddr.Set(met.Addr, met.OnStartup)
|
|
||||||
}()
|
|
||||||
|
|
||||||
i := 0
|
i := 0
|
||||||
for c.Next() {
|
for c.Next() {
|
||||||
if i > 0 {
|
if i > 0 {
|
||||||
|
|
|
@ -27,6 +27,7 @@ import (
|
||||||
"io"
|
"io"
|
||||||
"mime"
|
"mime"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"strconv"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/matttproud/golang_protobuf_extensions/pbutil"
|
"github.com/matttproud/golang_protobuf_extensions/pbutil"
|
||||||
|
@ -78,6 +79,47 @@ func Scrape(t *testing.T, url string) []*MetricFamily {
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ScrapeMetricAsInt provide a sum of all metrics collected for the name and label provided.
|
||||||
|
// if the metric is not a numeric value, it will be counted a 0.
|
||||||
|
func ScrapeMetricAsInt(t *testing.T, addr string, name string, label string, nometricvalue int) int {
|
||||||
|
|
||||||
|
valueToInt := func(m metric) int {
|
||||||
|
v := m.Value
|
||||||
|
r, err := strconv.Atoi(v)
|
||||||
|
if err != nil {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
met := Scrape(t, fmt.Sprintf("http://%s/metrics", addr))
|
||||||
|
found := false
|
||||||
|
tot := 0
|
||||||
|
for _, mf := range met {
|
||||||
|
if mf.Name == name {
|
||||||
|
// Sum all metrics available
|
||||||
|
for _, m := range mf.Metrics {
|
||||||
|
if label == "" {
|
||||||
|
tot += valueToInt(m.(metric))
|
||||||
|
found = true
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
for _, v := range m.(metric).Labels {
|
||||||
|
if v == label {
|
||||||
|
tot += valueToInt(m.(metric))
|
||||||
|
found = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !found {
|
||||||
|
return nometricvalue
|
||||||
|
}
|
||||||
|
return tot
|
||||||
|
}
|
||||||
|
|
||||||
// MetricValue returns the value associated with name as a string as well as the labels.
|
// MetricValue returns the value associated with name as a string as well as the labels.
|
||||||
// It only returns the first metrics of the slice.
|
// It only returns the first metrics of the slice.
|
||||||
func MetricValue(name string, mfs []*MetricFamily) (string, map[string]string) {
|
func MetricValue(name string, mfs []*MetricFamily) (string, map[string]string) {
|
||||||
|
|
|
@ -10,6 +10,7 @@ type U struct {
|
||||||
type item struct {
|
type item struct {
|
||||||
state int // either todo or done
|
state int // either todo or done
|
||||||
f func() error // function to be executed.
|
f func() error // function to be executed.
|
||||||
|
obj interface{} // any object to return when needed
|
||||||
}
|
}
|
||||||
|
|
||||||
// New returns a new initialized U.
|
// New returns a new initialized U.
|
||||||
|
@ -17,30 +18,21 @@ func New() U { return U{u: make(map[string]item)} }
|
||||||
|
|
||||||
// Set sets function f in U under key. If the key already exists
|
// Set sets function f in U under key. If the key already exists
|
||||||
// it is not overwritten.
|
// it is not overwritten.
|
||||||
func (u U) Set(key string, f func() error) {
|
func (u U) Set(key string, f func() error, o interface{}) interface{} {
|
||||||
if _, ok := u.u[key]; ok {
|
if item, ok := u.u[key]; ok {
|
||||||
return
|
return item.obj
|
||||||
}
|
}
|
||||||
u.u[key] = item{todo, f}
|
u.u[key] = item{todo, f, o}
|
||||||
|
return o
|
||||||
}
|
}
|
||||||
|
|
||||||
// Unset removes the 'todo' associated with a key
|
// Unset removes the key.
|
||||||
func (u U) Unset(key string) {
|
func (u U) Unset(key string) {
|
||||||
if _, ok := u.u[key]; ok {
|
if _, ok := u.u[key]; ok {
|
||||||
delete(u.u, key)
|
delete(u.u, key)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetTodo sets key to 'todo' again.
|
|
||||||
func (u U) SetTodo(key string) {
|
|
||||||
v, ok := u.u[key]
|
|
||||||
if !ok {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
v.state = todo
|
|
||||||
u.u[key] = v
|
|
||||||
}
|
|
||||||
|
|
||||||
// ForEach iterates for u executes f for each element that is 'todo' and sets it to 'done'.
|
// ForEach iterates for u executes f for each element that is 'todo' and sets it to 'done'.
|
||||||
func (u U) ForEach() error {
|
func (u U) ForEach() error {
|
||||||
for k, v := range u.u {
|
for k, v := range u.u {
|
||||||
|
|
|
@ -4,7 +4,7 @@ import "testing"
|
||||||
|
|
||||||
func TestForEach(t *testing.T) {
|
func TestForEach(t *testing.T) {
|
||||||
u, i := New(), 0
|
u, i := New(), 0
|
||||||
u.Set("test", func() error { i++; return nil })
|
u.Set("test", func() error { i++; return nil }, nil)
|
||||||
|
|
||||||
u.ForEach()
|
u.ForEach()
|
||||||
if i != 1 {
|
if i != 1 {
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
package test
|
package test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
@ -15,8 +16,6 @@ import (
|
||||||
"github.com/miekg/dns"
|
"github.com/miekg/dns"
|
||||||
)
|
)
|
||||||
|
|
||||||
// fail when done in parallel
|
|
||||||
|
|
||||||
// Start test server that has metrics enabled. Then tear it down again.
|
// Start test server that has metrics enabled. Then tear it down again.
|
||||||
func TestMetricsServer(t *testing.T) {
|
func TestMetricsServer(t *testing.T) {
|
||||||
corefile := `example.org:0 {
|
corefile := `example.org:0 {
|
||||||
|
@ -72,11 +71,11 @@ func TestMetricsRefused(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(miek): disabled for now - fails in weird ways in travis.
|
// TODO(miek): disabled for now - fails in weird ways in travis.
|
||||||
func testMetricsCache(t *testing.T) {
|
func TestMetricsCache(t *testing.T) {
|
||||||
cacheSizeMetricName := "coredns_cache_size"
|
cacheSizeMetricName := "coredns_cache_size"
|
||||||
cacheHitMetricName := "coredns_cache_hits_total"
|
cacheHitMetricName := "coredns_cache_hits_total"
|
||||||
|
|
||||||
corefile := `www.example.net:0 {
|
corefile := `example.net:0 {
|
||||||
proxy . 8.8.8.8:53
|
proxy . 8.8.8.8:53
|
||||||
prometheus localhost:0
|
prometheus localhost:0
|
||||||
cache
|
cache
|
||||||
|
@ -90,32 +89,45 @@ func testMetricsCache(t *testing.T) {
|
||||||
|
|
||||||
udp, _ := CoreDNSServerPorts(srv, 0)
|
udp, _ := CoreDNSServerPorts(srv, 0)
|
||||||
|
|
||||||
|
// send an initial query to set properly the cache size metric
|
||||||
m := new(dns.Msg)
|
m := new(dns.Msg)
|
||||||
|
m.SetQuestion("example.net.", dns.TypeA)
|
||||||
|
|
||||||
|
if _, err = dns.Exchange(m, udp); err != nil {
|
||||||
|
t.Fatalf("Could not send message: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
beginCacheSizeSuccess := mtest.ScrapeMetricAsInt(t, metrics.ListenAddr, cacheSizeMetricName, cache.Success, 0)
|
||||||
|
beginCacheHitSuccess := mtest.ScrapeMetricAsInt(t, metrics.ListenAddr, cacheHitMetricName, cache.Success, 0)
|
||||||
|
|
||||||
|
m = new(dns.Msg)
|
||||||
m.SetQuestion("www.example.net.", dns.TypeA)
|
m.SetQuestion("www.example.net.", dns.TypeA)
|
||||||
|
|
||||||
if _, err = dns.Exchange(m, udp); err != nil {
|
if _, err = dns.Exchange(m, udp); err != nil {
|
||||||
t.Fatalf("Could not send message: %s", err)
|
t.Fatalf("Could not send message: %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
data := mtest.Scrape(t, "http://"+metrics.ListenAddr+"/metrics")
|
|
||||||
// Get the value for the cache size metric where the one of the labels values matches "success".
|
// Get the value for the cache size metric where the one of the labels values matches "success".
|
||||||
got, _ := mtest.MetricValueLabel(cacheSizeMetricName, cache.Success, data)
|
got := mtest.ScrapeMetricAsInt(t, metrics.ListenAddr, cacheSizeMetricName, cache.Success, 0)
|
||||||
|
|
||||||
if got != "1" {
|
if got-beginCacheSizeSuccess != 1 {
|
||||||
t.Errorf("Expected value %s for %s, but got %s", "1", cacheSizeMetricName, got)
|
t.Errorf("Expected value %d for %s, but got %d", 1, cacheSizeMetricName, got-beginCacheSizeSuccess)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Second request for the same response to test hit counter.
|
// Second request for the same response to test hit counter
|
||||||
|
if _, err = dns.Exchange(m, udp); err != nil {
|
||||||
|
t.Fatalf("Could not send message: %s", err)
|
||||||
|
}
|
||||||
|
// Third request for the same response to test hit counter for the second time
|
||||||
if _, err = dns.Exchange(m, udp); err != nil {
|
if _, err = dns.Exchange(m, udp); err != nil {
|
||||||
t.Fatalf("Could not send message: %s", err)
|
t.Fatalf("Could not send message: %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
data = mtest.Scrape(t, "http://"+metrics.ListenAddr+"/metrics")
|
|
||||||
// Get the value for the cache hit counter where the one of the labels values matches "success".
|
// Get the value for the cache hit counter where the one of the labels values matches "success".
|
||||||
got, _ = mtest.MetricValueLabel(cacheHitMetricName, cache.Success, data)
|
got = mtest.ScrapeMetricAsInt(t, metrics.ListenAddr, cacheHitMetricName, cache.Success, 0)
|
||||||
|
|
||||||
if got != "2" {
|
if got-beginCacheHitSuccess != 2 {
|
||||||
t.Errorf("Expected value %s for %s, but got %s", "2", cacheHitMetricName, got)
|
t.Errorf("Expected value %d for %s, but got %d", 2, cacheHitMetricName, got-beginCacheHitSuccess)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -182,3 +194,57 @@ func TestMetricsAuto(t *testing.T) {
|
||||||
t.Errorf("Expected value %s for %s, but got %s", "1", metricName, got)
|
t.Errorf("Expected value %s for %s, but got %s", "1", metricName, got)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Show that when 2 blocs share the same metric listener (they have a prometheus plugin on the same listening address),
|
||||||
|
// ALL the metrics of the second bloc in order are declared in prometheus, especially the plugins that are used ONLY in the second bloc
|
||||||
|
func TestMetricsSeveralBlocs(t *testing.T) {
|
||||||
|
cacheSizeMetricName := "coredns_cache_size"
|
||||||
|
addrMetrics := "localhost:9155"
|
||||||
|
|
||||||
|
corefile := fmt.Sprintf(`
|
||||||
|
example.org:0 {
|
||||||
|
prometheus %s
|
||||||
|
forward . 8.8.8.8:53 {
|
||||||
|
force_tcp
|
||||||
|
}
|
||||||
|
}
|
||||||
|
google.com:0 {
|
||||||
|
prometheus %s
|
||||||
|
forward . 8.8.8.8:53 {
|
||||||
|
force_tcp
|
||||||
|
}
|
||||||
|
cache
|
||||||
|
}
|
||||||
|
`, addrMetrics, addrMetrics)
|
||||||
|
|
||||||
|
i, udp, _, err := CoreDNSServerAndPorts(corefile)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Could not get CoreDNS serving instance: %s", err)
|
||||||
|
}
|
||||||
|
defer i.Stop()
|
||||||
|
|
||||||
|
// send an inital query to setup properly the cache size
|
||||||
|
m := new(dns.Msg)
|
||||||
|
m.SetQuestion("google.com.", dns.TypeA)
|
||||||
|
if _, err = dns.Exchange(m, udp); err != nil {
|
||||||
|
t.Fatalf("Could not send message: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
beginCacheSize := mtest.ScrapeMetricAsInt(t, addrMetrics, cacheSizeMetricName, "", 0)
|
||||||
|
|
||||||
|
// send an query, different from initial to ensure we have another add to the cache
|
||||||
|
m = new(dns.Msg)
|
||||||
|
m.SetQuestion("www.google.com.", dns.TypeA)
|
||||||
|
|
||||||
|
if _, err = dns.Exchange(m, udp); err != nil {
|
||||||
|
t.Fatalf("Could not send message: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
endCacheSize := mtest.ScrapeMetricAsInt(t, addrMetrics, cacheSizeMetricName, "", 0)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Unexpected metric data retrieved for %s : %s", cacheSizeMetricName, err)
|
||||||
|
}
|
||||||
|
if endCacheSize-beginCacheSize != 1 {
|
||||||
|
t.Errorf("Expected metric data retrieved for %s, expected %d, got %d", cacheSizeMetricName, 1, endCacheSize-beginCacheSize)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue