diff --git a/plugin/metrics/metrics.go b/plugin/metrics/metrics.go index af4afc6ac..8efeff1e7 100644 --- a/plugin/metrics/metrics.go +++ b/plugin/metrics/metrics.go @@ -2,10 +2,12 @@ package metrics import ( + "context" "net" "net/http" "os" "sync" + "time" "github.com/coredns/coredns/plugin" "github.com/coredns/coredns/plugin/metrics/vars" @@ -22,6 +24,7 @@ type Metrics struct { ln net.Listener lnSetup bool mux *http.ServeMux + srv *http.Server zoneNames []string zoneMap map[string]bool @@ -94,9 +97,9 @@ func (m *Metrics) OnStartup() error { m.mux = http.NewServeMux() m.mux.Handle("/metrics", promhttp.HandlerFor(m.Reg, promhttp.HandlerOpts{})) - + m.srv = &http.Server{Handler: m.mux} go func() { - http.Serve(m.ln, m.mux) + m.srv.Serve(m.ln) }() return nil } @@ -106,24 +109,28 @@ func (m *Metrics) OnRestart() error { if !m.lnSetup { return nil } + uniqAddr.Unset(m.Addr) + return m.stopServer() +} - uniqAddr.SetTodo(m.Addr) - - m.ln.Close() +func (m *Metrics) stopServer() error { + if !m.lnSetup { + return nil + } + ctx, cancel := context.WithTimeout(context.Background(), shutdownTimeout) + defer cancel() + if err := m.srv.Shutdown(ctx); err != nil { + log.Infof("Failed to stop prometheus http server: %s", err) + return err + } m.lnSetup = false + m.ln.Close() return nil } // OnFinalShutdown tears down the metrics listener on shutdown and restart. func (m *Metrics) OnFinalShutdown() error { - // We allow prometheus statements in multiple Server Blocks, but only the first - // will open the listener, for the rest they are all nil; guard against that. - if !m.lnSetup { - return nil - } - - m.lnSetup = false - return m.ln.Close() + return m.stopServer() } func keys(m map[string]bool) []string { @@ -138,6 +145,10 @@ func keys(m map[string]bool) []string { // we listen on "localhost:0" and need to retrieve the actual address. var ListenAddr string +// shutdownTimeout is the maximum amount of time the metrics plugin will wait +// before erroring when it tries to close the metrics server +const shutdownTimeout time.Duration = time.Second * 5 + var buildInfo = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Namespace: plugin.Namespace, Name: "build_info", diff --git a/plugin/pkg/uniq/uniq.go b/plugin/pkg/uniq/uniq.go index 3e50d64b5..6dd74883d 100644 --- a/plugin/pkg/uniq/uniq.go +++ b/plugin/pkg/uniq/uniq.go @@ -24,6 +24,13 @@ func (u U) Set(key string, f func() error) { u.u[key] = item{todo, f} } +// Unset removes the 'todo' associated with a key +func (u U) Unset(key string) { + if _, ok := u.u[key]; ok { + delete(u.u, key) + } +} + // SetTodo sets key to 'todo' again. func (u U) SetTodo(key string) { v, ok := u.u[key] diff --git a/test/reload_test.go b/test/reload_test.go index 18639ff03..bc55071c6 100644 --- a/test/reload_test.go +++ b/test/reload_test.go @@ -2,6 +2,7 @@ package test import ( "bytes" + "fmt" "io/ioutil" "net/http" "strings" @@ -125,4 +126,80 @@ func TestReloadMetricsHealth(t *testing.T) { } } +func collectMetricsInfo(addr, proc string) error { + cl := &http.Client{} + resp, err := cl.Get(fmt.Sprintf("http://%s/metrics", addr)) + if err != nil { + return err + } + metrics, _ := ioutil.ReadAll(resp.Body) + if !bytes.Contains(metrics, []byte(proc)) { + return fmt.Errorf("failed to see %s in metric output", proc) + } + return nil +} + +// TestReloadSeveralTimeMetrics ensures that metrics are not pushed to +// prometheus once the metrics plugin is removed and a coredns +// reload is triggered +// 1. check that metrics have not been exported to prometheus before coredns starts +// 2. ensure that build-related metrics have been exported once coredns starts +// 3. trigger multiple reloads without changing the corefile +// 4. remove the metrics plugin and trigger a final reload +// 5. ensure the original prometheus exporter has not received more metrics +func TestReloadSeveralTimeMetrics(t *testing.T) { + //TODO: add a tool that find an available port because this needs to be a port + // that is not used in another test + promAddress := "127.0.0.1:53185" + proc := "coredns_build_info" + corefileWithMetrics := ` + .:0 { + prometheus ` + promAddress + ` + whoami + }` + corefileWithoutMetrics := ` + .:0 { + whoami + }` + if err := collectMetricsInfo(promAddress, proc); err == nil { + t.Errorf("Prometheus is listening before the test started") + } + serverWithMetrics, err := CoreDNSServer(corefileWithMetrics) + if err != nil { + if strings.Contains(err.Error(), inUse) { + return + } + t.Errorf("Could not get service instance: %s", err) + } + // verify prometheus is running + if err := collectMetricsInfo(promAddress, proc); err != nil { + t.Errorf("Prometheus is not listening : %s", err) + } + reloadCount := 2 + for i := 0; i < reloadCount; i++ { + serverReload, err := serverWithMetrics.Restart( + NewInput(corefileWithMetrics), + ) + if err != nil { + t.Errorf("Could not restart CoreDNS : %s, at loop %v", err, i) + } + if err := collectMetricsInfo(promAddress, proc); err != nil { + t.Errorf("Prometheus is not listening : %s", err) + } + serverWithMetrics = serverReload + } + // reload without prometheus + serverWithoutMetrics, err := serverWithMetrics.Restart( + NewInput(corefileWithoutMetrics), + ) + if err != nil { + t.Errorf("Could not restart a second time CoreDNS : %s", err) + } + serverWithoutMetrics.Stop() + // verify that metrics have not been pushed + if err := collectMetricsInfo(promAddress, proc); err == nil { + t.Errorf("Prometheus is still listening") + } +} + const inUse = "address already in use"