Ensure Re-register of metrics variables after a reload (#2080)

* - ensure plugins that use prometheus.MustRegister, re-register after reload - removing once.Do on the startup function was simplest way to do it. * - fix underscored names (advice of bot) * - tune existing UT for reload, and add a test verifying failing reload does not prevent correct registering for metrics * - ensure different ports for tests that can run in same time ..
2018-09-19 05:11:24 -04:00 · 2018-09-19 05:11:24 -04:00 · f9bdd382dd
commit f9bdd382dd
parent cb932ca231
13 changed files with 143 additions and 43 deletions
--- a/test/reload_test.go
+++ b/test/reload_test.go
@ -119,22 +119,24 @@ func TestReloadMetricsHealth(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	const proc = "process_virtual_memory_bytes"
+	const proc = "coredns_build_info"
 	metrics, _ := ioutil.ReadAll(resp.Body)
 	if !bytes.Contains(metrics, []byte(proc)) {
 		t.Errorf("Failed to see %s in metric output", proc)
 	}
 }

-func collectMetricsInfo(addr, proc string) error {
+func collectMetricsInfo(addr string, procs ...string) error {
 	cl := &http.Client{}
 	resp, err := cl.Get(fmt.Sprintf("http://%s/metrics", addr))
 	if err != nil {
 		return err
 	}
 	metrics, _ := ioutil.ReadAll(resp.Body)
-	if !bytes.Contains(metrics, []byte(proc)) {
-		return fmt.Errorf("failed to see %s in metric output", proc)
+	for _, p := range procs {
+		if !bytes.Contains(metrics, []byte(p)) {
+			return fmt.Errorf("failed to see %s in metric output \n%s", p, metrics)
+		}
 	}
 	return nil
 }
@ -202,4 +204,132 @@ func TestReloadSeveralTimeMetrics(t *testing.T) {
 	}
 }

+func TestMetricsAvailableAfterReload(t *testing.T) {
+	//TODO: add a tool that find an available port because this needs to be a port
+	// that is not used in another test
+	promAddress := "127.0.0.1:53186"
+	procMetric := "coredns_build_info"
+	procCache := "coredns_cache_size"
+	procForward := "coredns_dns_request_duration_seconds"
+	corefileWithMetrics := `
+	.:0 {
+		prometheus ` + promAddress + `
+		cache
+		forward . 8.8.8.8 {
+           force_tcp
+		}
+	}`
+	inst, _, tcp, err := CoreDNSServerAndPorts(corefileWithMetrics)
+	if err != nil {
+		if strings.Contains(err.Error(), inUse) {
+			return
+		}
+		t.Errorf("Could not get service instance: %s", err)
+	}
+	// send a query and check we can scrap corresponding metrics
+	cl := dns.Client{Net: "tcp"}
+	m := new(dns.Msg)
+	m.SetQuestion("www.example.org.", dns.TypeA)
+
+	if _, _, err := cl.Exchange(m, tcp); err != nil {
+		t.Fatalf("Could not send message: %s", err)
+	}
+
+	// we should have metrics from forward, cache, and metrics itself
+	if err := collectMetricsInfo(promAddress, procMetric, procCache, procForward); err != nil {
+		t.Errorf("Could not scrap one of expected stats : %s", err)
+	}
+
+	// now reload
+	instReload, err := inst.Restart(
+		NewInput(corefileWithMetrics),
+	)
+	if err != nil {
+		t.Errorf("Could not restart CoreDNS : %s", err)
+		instReload = inst
+	}
+
+	// check the metrics are available still
+	if err := collectMetricsInfo(promAddress, procMetric, procCache, procForward); err != nil {
+		t.Errorf("Could not scrap one of expected stats : %s", err)
+	}
+
+	instReload.Stop()
+	// verify that metrics have not been pushed
+}
+
+func TestMetricsAvailableAfterReloadAndFailedReload(t *testing.T) {
+	//TODO: add a tool that find an available port because this needs to be a port
+	// that is not used in another test
+	promAddress := "127.0.0.1:53187"
+	procMetric := "coredns_build_info"
+	procCache := "coredns_cache_size"
+	procForward := "coredns_dns_request_duration_seconds"
+	corefileWithMetrics := `
+	.:0 {
+		prometheus ` + promAddress + `
+		cache
+		forward . 8.8.8.8 {
+           force_tcp
+		}
+	}`
+	invalidCorefileWithMetrics := `
+	.:0 {
+		prometheus ` + promAddress + `
+		cache
+		forward . 8.8.8.8 {
+           force_tcp
+		}
+		invalid
+	}`
+	inst, _, tcp, err := CoreDNSServerAndPorts(corefileWithMetrics)
+	if err != nil {
+		if strings.Contains(err.Error(), inUse) {
+			return
+		}
+		t.Errorf("Could not get service instance: %s", err)
+	}
+	// send a query and check we can scrap corresponding metrics
+	cl := dns.Client{Net: "tcp"}
+	m := new(dns.Msg)
+	m.SetQuestion("www.example.org.", dns.TypeA)
+
+	if _, _, err := cl.Exchange(m, tcp); err != nil {
+		t.Fatalf("Could not send message: %s", err)
+	}
+
+	// we should have metrics from forward, cache, and metrics itself
+	if err := collectMetricsInfo(promAddress, procMetric, procCache, procForward); err != nil {
+		t.Errorf("Could not scrap one of expected stats : %s", err)
+	}
+
+	for i := 0; i < 2; i++ {
+		// now provide a failed reload
+		invInst, err := inst.Restart(
+			NewInput(invalidCorefileWithMetrics),
+		)
+		if err == nil {
+			t.Errorf("Invalid test - this reload should fail")
+			inst = invInst
+		}
+	}
+
+	// now reload with correct corefile
+	instReload, err := inst.Restart(
+		NewInput(corefileWithMetrics),
+	)
+	if err != nil {
+		t.Errorf("Could not restart CoreDNS : %s", err)
+		instReload = inst
+	}
+
+	// check the metrics are available still
+	if err := collectMetricsInfo(promAddress, procMetric, procCache, procForward); err != nil {
+		t.Errorf("Could not scrap one of expected stats : %s", err)
+	}
+
+	instReload.Stop()
+	// verify that metrics have not been pushed
+}
+
 const inUse = "address already in use"