Metrics listener fix (#2036)

* Create test to verify correct listener behavior

* Create Unset function to remove todo items

* Reset address for prometheus listener before restarting

* Add inline documentation for Unset function

* Make shutdownTimeout a constant and change to five seconds

* Revert ForEach behavior in uniq package
This commit is contained in:
Zach Eddy 2018-08-21 08:52:25 -07:00 committed by Francois Tur
parent b87ed01bb2
commit 8aa55c5ff2
3 changed files with 108 additions and 13 deletions

View file

@ -2,10 +2,12 @@
package metrics
import (
"context"
"net"
"net/http"
"os"
"sync"
"time"
"github.com/coredns/coredns/plugin"
"github.com/coredns/coredns/plugin/metrics/vars"
@ -22,6 +24,7 @@ type Metrics struct {
ln net.Listener
lnSetup bool
mux *http.ServeMux
srv *http.Server
zoneNames []string
zoneMap map[string]bool
@ -94,9 +97,9 @@ func (m *Metrics) OnStartup() error {
m.mux = http.NewServeMux()
m.mux.Handle("/metrics", promhttp.HandlerFor(m.Reg, promhttp.HandlerOpts{}))
m.srv = &http.Server{Handler: m.mux}
go func() {
http.Serve(m.ln, m.mux)
m.srv.Serve(m.ln)
}()
return nil
}
@ -106,24 +109,28 @@ func (m *Metrics) OnRestart() error {
if !m.lnSetup {
return nil
}
uniqAddr.Unset(m.Addr)
return m.stopServer()
}
uniqAddr.SetTodo(m.Addr)
m.ln.Close()
func (m *Metrics) stopServer() error {
if !m.lnSetup {
return nil
}
ctx, cancel := context.WithTimeout(context.Background(), shutdownTimeout)
defer cancel()
if err := m.srv.Shutdown(ctx); err != nil {
log.Infof("Failed to stop prometheus http server: %s", err)
return err
}
m.lnSetup = false
m.ln.Close()
return nil
}
// OnFinalShutdown tears down the metrics listener on shutdown and restart.
func (m *Metrics) OnFinalShutdown() error {
// We allow prometheus statements in multiple Server Blocks, but only the first
// will open the listener, for the rest they are all nil; guard against that.
if !m.lnSetup {
return nil
}
m.lnSetup = false
return m.ln.Close()
return m.stopServer()
}
func keys(m map[string]bool) []string {
@ -138,6 +145,10 @@ func keys(m map[string]bool) []string {
// we listen on "localhost:0" and need to retrieve the actual address.
var ListenAddr string
// shutdownTimeout is the maximum amount of time the metrics plugin will wait
// before erroring when it tries to close the metrics server
const shutdownTimeout time.Duration = time.Second * 5
var buildInfo = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: plugin.Namespace,
Name: "build_info",