From 076b8d4fbae050f1f04475ad47709de872899374 Mon Sep 17 00:00:00 2001 From: Miek Gieben Date: Sat, 4 May 2019 21:06:25 +0100 Subject: [PATCH] plugin/health: add OnRestartFailed (#2812) Add OnReStartFailed which makes the health plugin stay up if the Corefile is corrupt and we revert to the previous version. Also needs a fix for the channel handling See #2659 Testing it will log the following when restarting with a corrupted Corefile ~~~ 2019-05-04T18:01:59.431Z [INFO] linux/amd64, go1.12.4, CoreDNS-1.5.0 linux/amd64, go1.12.4, [INFO] SIGUSR1: Reloading [INFO] Reloading [ERROR] Restart failed: Corefile:5 - Error during parsing: Unknown directive 'bdhfhdhj' [ERROR] SIGUSR1: starting with listener file descriptors: Corefile:5 - Error during parsing: Unknown directive 'bdhfhdhj' ~~~ After which the curl still works. This also needed a change to reset the channel used for the metrics go-routine which gets closed on shutdown, otherwise you'll see: ~~~ ^C[INFO] SIGINT: Shutting down panic: close of closed channel goroutine 90 [running]: github.com/coredns/coredns/plugin/health.(*health).OnFinalShutdown(0xc000089bc0, 0xc000063d88, 0x4afe6d) ~~~ Signed-off-by: Miek Gieben --- plugin/health/README.md | 7 ------- plugin/health/health.go | 3 +-- plugin/health/setup.go | 3 ++- 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/plugin/health/README.md b/plugin/health/README.md index a62a55e66..eb86635ad 100644 --- a/plugin/health/README.md +++ b/plugin/health/README.md @@ -76,10 +76,3 @@ Set a lameduck duration of 1 second: } } ~~~ - -## Bugs - -When reloading, the health handler is stopped before the new server instance is started. If that -new server fails to start, then the initial server instance is still available and DNS queries still -served, but health handler stays down. Health will not reply HTTP request until a successful reload -or a complete restart of CoreDNS. diff --git a/plugin/health/health.go b/plugin/health/health.go index eef45d2cf..55ff68407 100644 --- a/plugin/health/health.go +++ b/plugin/health/health.go @@ -28,6 +28,7 @@ func (h *health) OnStartup() error { if h.Addr == "" { h.Addr = ":8080" } + h.stop = make(chan bool) ln, err := net.Listen("tcp", h.Addr) if err != nil { @@ -51,8 +52,6 @@ func (h *health) OnStartup() error { return nil } -func (h *health) OnRestart() error { return h.OnFinalShutdown() } - func (h *health) OnFinalShutdown() error { if !h.nlSetup { return nil diff --git a/plugin/health/setup.go b/plugin/health/setup.go index 0e51ec5ea..11d498b09 100644 --- a/plugin/health/setup.go +++ b/plugin/health/setup.go @@ -32,8 +32,9 @@ func setup(c *caddy.Controller) error { }) c.OnStartup(h.OnStartup) - c.OnRestart(h.OnRestart) + c.OnRestart(h.OnFinalShutdown) c.OnFinalShutdown(h.OnFinalShutdown) + c.OnRestartFailed(h.OnStartup) // Don't do AddPlugin, as health is not *really* a plugin just a separate webserver running. return nil