From f9bdd382ddf911c88b33d2bbd8f8aa9be2885a39 Mon Sep 17 00:00:00 2001 From: Francois Tur Date: Wed, 19 Sep 2018 05:11:24 -0400 Subject: [PATCH] Ensure Re-register of metrics variables after a reload (#2080) * - ensure plugins that use prometheus.MustRegister, re-register after reload - removing once.Do on the startup function was simplest way to do it. * - fix underscored names (advice of bot) * - tune existing UT for reload, and add a test verifying failing reload does not prevent correct registering for metrics * - ensure different ports for tests that can run in same time .. --- plugin/autopath/metrics.go | 4 -- plugin/autopath/setup.go | 2 +- plugin/cache/handler.go | 3 - plugin/cache/setup.go | 8 +-- plugin/dnssec/handler.go | 3 - plugin/dnssec/setup.go | 4 +- plugin/forward/metrics.go | 4 -- plugin/forward/setup.go | 4 +- plugin/health/setup.go | 2 +- plugin/proxy/metrics.go | 4 -- plugin/proxy/setup.go | 2 +- plugin/template/metrics.go | 8 +-- test/reload_test.go | 138 +++++++++++++++++++++++++++++++++++-- 13 files changed, 143 insertions(+), 43 deletions(-) diff --git a/plugin/autopath/metrics.go b/plugin/autopath/metrics.go index c928bd2f3..ae25ab79e 100644 --- a/plugin/autopath/metrics.go +++ b/plugin/autopath/metrics.go @@ -1,8 +1,6 @@ package autopath import ( - "sync" - "github.com/coredns/coredns/plugin" "github.com/prometheus/client_golang/prometheus" @@ -16,5 +14,3 @@ var ( Help: "Counter of requests that did autopath.", }, []string{"server"}) ) - -var once sync.Once diff --git a/plugin/autopath/setup.go b/plugin/autopath/setup.go index edade9d20..4dbe139ca 100644 --- a/plugin/autopath/setup.go +++ b/plugin/autopath/setup.go @@ -26,7 +26,7 @@ func setup(c *caddy.Controller) error { } c.OnStartup(func() error { - once.Do(func() { metrics.MustRegister(c, autoPathCount) }) + metrics.MustRegister(c, autoPathCount) return nil }) diff --git a/plugin/cache/handler.go b/plugin/cache/handler.go index a44a4ec3b..2d608e8d3 100644 --- a/plugin/cache/handler.go +++ b/plugin/cache/handler.go @@ -3,7 +3,6 @@ package cache import ( "context" "math" - "sync" "time" "github.com/coredns/coredns/plugin" @@ -126,5 +125,3 @@ var ( Help: "The number responses that are not cached, because the reply is malformed.", }, []string{"server"}) ) - -var once sync.Once diff --git a/plugin/cache/setup.go b/plugin/cache/setup.go index 8464c27d9..d6052b162 100644 --- a/plugin/cache/setup.go +++ b/plugin/cache/setup.go @@ -34,11 +34,9 @@ func setup(c *caddy.Controller) error { }) c.OnStartup(func() error { - once.Do(func() { - metrics.MustRegister(c, - cacheSize, cacheHits, cacheMisses, - cachePrefetches, cacheDrops) - }) + metrics.MustRegister(c, + cacheSize, cacheHits, cacheMisses, + cachePrefetches, cacheDrops) return nil }) diff --git a/plugin/dnssec/handler.go b/plugin/dnssec/handler.go index 573f7371d..c8bddc01c 100644 --- a/plugin/dnssec/handler.go +++ b/plugin/dnssec/handler.go @@ -2,7 +2,6 @@ package dnssec import ( "context" - "sync" "github.com/coredns/coredns/plugin" "github.com/coredns/coredns/plugin/metrics" @@ -81,5 +80,3 @@ var ( // Name implements the Handler interface. func (d Dnssec) Name() string { return "dnssec" } - -var once sync.Once diff --git a/plugin/dnssec/setup.go b/plugin/dnssec/setup.go index 675a48d89..a476dcba7 100644 --- a/plugin/dnssec/setup.go +++ b/plugin/dnssec/setup.go @@ -35,9 +35,7 @@ func setup(c *caddy.Controller) error { }) c.OnStartup(func() error { - once.Do(func() { - metrics.MustRegister(c, cacheSize, cacheHits, cacheMisses) - }) + metrics.MustRegister(c, cacheSize, cacheHits, cacheMisses) return nil }) diff --git a/plugin/forward/metrics.go b/plugin/forward/metrics.go index 063e2dc00..0fe470926 100644 --- a/plugin/forward/metrics.go +++ b/plugin/forward/metrics.go @@ -1,8 +1,6 @@ package forward import ( - "sync" - "github.com/coredns/coredns/plugin" "github.com/prometheus/client_golang/prometheus" @@ -48,5 +46,3 @@ var ( Help: "Gauge of open sockets per upstream.", }, []string{"to"}) ) - -var once sync.Once diff --git a/plugin/forward/setup.go b/plugin/forward/setup.go index 5743d73b3..23d9acfed 100644 --- a/plugin/forward/setup.go +++ b/plugin/forward/setup.go @@ -38,9 +38,7 @@ func setup(c *caddy.Controller) error { }) c.OnStartup(func() error { - once.Do(func() { - metrics.MustRegister(c, RequestCount, RcodeCount, RequestDuration, HealthcheckFailureCount, SocketGauge) - }) + metrics.MustRegister(c, RequestCount, RcodeCount, RequestDuration, HealthcheckFailureCount, SocketGauge) return f.OnStartup() }) diff --git a/plugin/health/setup.go b/plugin/health/setup.go index 0b90d829a..5a1725125 100644 --- a/plugin/health/setup.go +++ b/plugin/health/setup.go @@ -55,7 +55,7 @@ func setup(c *caddy.Controller) error { }) c.OnStartup(func() error { - once.Do(func() { metrics.MustRegister(c, HealthDuration) }) + metrics.MustRegister(c, HealthDuration) return nil }) diff --git a/plugin/proxy/metrics.go b/plugin/proxy/metrics.go index e5d6139b4..0bea5ae4b 100644 --- a/plugin/proxy/metrics.go +++ b/plugin/proxy/metrics.go @@ -1,8 +1,6 @@ package proxy import ( - "sync" - "github.com/coredns/coredns/plugin" "github.com/prometheus/client_golang/prometheus" @@ -36,5 +34,3 @@ func familyToString(f int) string { } return "" } - -var once sync.Once diff --git a/plugin/proxy/setup.go b/plugin/proxy/setup.go index 279e02cac..2f3d7dd62 100644 --- a/plugin/proxy/setup.go +++ b/plugin/proxy/setup.go @@ -33,7 +33,7 @@ func setup(c *caddy.Controller) error { }) c.OnStartup(func() error { - once.Do(func() { metrics.MustRegister(c, RequestCount, RequestDuration) }) + metrics.MustRegister(c, RequestCount, RequestDuration) return nil }) diff --git a/plugin/template/metrics.go b/plugin/template/metrics.go index 25474fc6c..d5fe0a4d7 100644 --- a/plugin/template/metrics.go +++ b/plugin/template/metrics.go @@ -1,8 +1,6 @@ package template import ( - "sync" - "github.com/coredns/coredns/plugin" "github.com/coredns/coredns/plugin/metrics" @@ -34,13 +32,9 @@ var ( // OnStartupMetrics sets up the metrics on startup. func setupMetrics(c *caddy.Controller) error { c.OnStartup(func() error { - metricsOnce.Do(func() { - metrics.MustRegister(c, templateMatchesCount, templateFailureCount, templateRRFailureCount) - }) + metrics.MustRegister(c, templateMatchesCount, templateFailureCount, templateRRFailureCount) return nil }) return nil } - -var metricsOnce sync.Once diff --git a/test/reload_test.go b/test/reload_test.go index bc55071c6..e026a97ce 100644 --- a/test/reload_test.go +++ b/test/reload_test.go @@ -119,22 +119,24 @@ func TestReloadMetricsHealth(t *testing.T) { if err != nil { t.Fatal(err) } - const proc = "process_virtual_memory_bytes" + const proc = "coredns_build_info" metrics, _ := ioutil.ReadAll(resp.Body) if !bytes.Contains(metrics, []byte(proc)) { t.Errorf("Failed to see %s in metric output", proc) } } -func collectMetricsInfo(addr, proc string) error { +func collectMetricsInfo(addr string, procs ...string) error { cl := &http.Client{} resp, err := cl.Get(fmt.Sprintf("http://%s/metrics", addr)) if err != nil { return err } metrics, _ := ioutil.ReadAll(resp.Body) - if !bytes.Contains(metrics, []byte(proc)) { - return fmt.Errorf("failed to see %s in metric output", proc) + for _, p := range procs { + if !bytes.Contains(metrics, []byte(p)) { + return fmt.Errorf("failed to see %s in metric output \n%s", p, metrics) + } } return nil } @@ -202,4 +204,132 @@ func TestReloadSeveralTimeMetrics(t *testing.T) { } } +func TestMetricsAvailableAfterReload(t *testing.T) { + //TODO: add a tool that find an available port because this needs to be a port + // that is not used in another test + promAddress := "127.0.0.1:53186" + procMetric := "coredns_build_info" + procCache := "coredns_cache_size" + procForward := "coredns_dns_request_duration_seconds" + corefileWithMetrics := ` + .:0 { + prometheus ` + promAddress + ` + cache + forward . 8.8.8.8 { + force_tcp + } + }` + inst, _, tcp, err := CoreDNSServerAndPorts(corefileWithMetrics) + if err != nil { + if strings.Contains(err.Error(), inUse) { + return + } + t.Errorf("Could not get service instance: %s", err) + } + // send a query and check we can scrap corresponding metrics + cl := dns.Client{Net: "tcp"} + m := new(dns.Msg) + m.SetQuestion("www.example.org.", dns.TypeA) + + if _, _, err := cl.Exchange(m, tcp); err != nil { + t.Fatalf("Could not send message: %s", err) + } + + // we should have metrics from forward, cache, and metrics itself + if err := collectMetricsInfo(promAddress, procMetric, procCache, procForward); err != nil { + t.Errorf("Could not scrap one of expected stats : %s", err) + } + + // now reload + instReload, err := inst.Restart( + NewInput(corefileWithMetrics), + ) + if err != nil { + t.Errorf("Could not restart CoreDNS : %s", err) + instReload = inst + } + + // check the metrics are available still + if err := collectMetricsInfo(promAddress, procMetric, procCache, procForward); err != nil { + t.Errorf("Could not scrap one of expected stats : %s", err) + } + + instReload.Stop() + // verify that metrics have not been pushed +} + +func TestMetricsAvailableAfterReloadAndFailedReload(t *testing.T) { + //TODO: add a tool that find an available port because this needs to be a port + // that is not used in another test + promAddress := "127.0.0.1:53187" + procMetric := "coredns_build_info" + procCache := "coredns_cache_size" + procForward := "coredns_dns_request_duration_seconds" + corefileWithMetrics := ` + .:0 { + prometheus ` + promAddress + ` + cache + forward . 8.8.8.8 { + force_tcp + } + }` + invalidCorefileWithMetrics := ` + .:0 { + prometheus ` + promAddress + ` + cache + forward . 8.8.8.8 { + force_tcp + } + invalid + }` + inst, _, tcp, err := CoreDNSServerAndPorts(corefileWithMetrics) + if err != nil { + if strings.Contains(err.Error(), inUse) { + return + } + t.Errorf("Could not get service instance: %s", err) + } + // send a query and check we can scrap corresponding metrics + cl := dns.Client{Net: "tcp"} + m := new(dns.Msg) + m.SetQuestion("www.example.org.", dns.TypeA) + + if _, _, err := cl.Exchange(m, tcp); err != nil { + t.Fatalf("Could not send message: %s", err) + } + + // we should have metrics from forward, cache, and metrics itself + if err := collectMetricsInfo(promAddress, procMetric, procCache, procForward); err != nil { + t.Errorf("Could not scrap one of expected stats : %s", err) + } + + for i := 0; i < 2; i++ { + // now provide a failed reload + invInst, err := inst.Restart( + NewInput(invalidCorefileWithMetrics), + ) + if err == nil { + t.Errorf("Invalid test - this reload should fail") + inst = invInst + } + } + + // now reload with correct corefile + instReload, err := inst.Restart( + NewInput(corefileWithMetrics), + ) + if err != nil { + t.Errorf("Could not restart CoreDNS : %s", err) + instReload = inst + } + + // check the metrics are available still + if err := collectMetricsInfo(promAddress, procMetric, procCache, procForward); err != nil { + t.Errorf("Could not scrap one of expected stats : %s", err) + } + + instReload.Stop() + // verify that metrics have not been pushed +} + const inUse = "address already in use"