From c778b3a67ca43124a539acda21eca4934c8110f6 Mon Sep 17 00:00:00 2001 From: Miek Gieben Date: Thu, 7 Mar 2019 22:13:47 +0000 Subject: [PATCH] plugin/health: remove ability to poll other plugins (#2547) * plugin/health: remove ability to poll other plugins This mechanism defeats the purpose any plugin (mostly) caching can still be alive, we can probably forward queries still. Don't poll plugins, just tell the world we're up and running. It was only actually used in kubernetes; and there specifically would mean any network hiccup would NACK the entire server health. Fixes: #2534 Signed-off-by: Miek Gieben * update docs based on feedback Signed-off-by: Miek Gieben --- plugin.md | 5 +---- plugin/erratic/README.md | 4 ---- plugin/erratic/health.go | 14 ------------- plugin/health/README.md | 39 +++++++++--------------------------- plugin/health/health.go | 28 +++++++------------------- plugin/health/health_test.go | 28 -------------------------- plugin/health/healther.go | 36 --------------------------------- plugin/health/setup.go | 27 ------------------------- plugin/kubernetes/README.md | 6 ------ plugin/kubernetes/health.go | 4 ---- 10 files changed, 18 insertions(+), 173 deletions(-) delete mode 100644 plugin/erratic/health.go delete mode 100644 plugin/health/healther.go delete mode 100644 plugin/kubernetes/health.go diff --git a/plugin.md b/plugin.md index 81855ec38..9fa3dbb0c 100644 --- a/plugin.md +++ b/plugin.md @@ -59,10 +59,7 @@ When exporting metrics the *Namespace* should be `plugin.Namespace` (="coredns") *Subsystem* should be the name of the plugin. The README.md for the plugin should then also contain a *Metrics* section detailing the metrics. -If the plugin supports dynamic health reporting it should also have *Health* section detailing on -some of its inner workings. - -If the plugins supports signalling readiness it should have a *Ready* section detailing how it +If the plugin supports signalling readiness it should have a *Ready* section detailing how it works. ## Documentation diff --git a/plugin/erratic/README.md b/plugin/erratic/README.md index a731bed0f..e3bd79ae8 100644 --- a/plugin/erratic/README.md +++ b/plugin/erratic/README.md @@ -33,10 +33,6 @@ erratic { In case of a zone transfer and truncate the final SOA record *isn't* added to the response. -## Health - -This plugin implements dynamic health checking. For every dropped query it turns unhealthy. - ## Ready This plugin reports readiness to the ready plugin. diff --git a/plugin/erratic/health.go b/plugin/erratic/health.go deleted file mode 100644 index 1d9625e16..000000000 --- a/plugin/erratic/health.go +++ /dev/null @@ -1,14 +0,0 @@ -package erratic - -import ( - "sync/atomic" -) - -// Health implements the health.Healther interface. -func (e *Erratic) Health() bool { - q := atomic.LoadUint64(&e.q) - if e.drop > 0 && q%e.drop == 0 { - return false - } - return true -} diff --git a/plugin/health/README.md b/plugin/health/README.md index 6d402c4a8..68afeac06 100644 --- a/plugin/health/README.md +++ b/plugin/health/README.md @@ -6,9 +6,8 @@ ## Description -By enabling *health* any plugin that implements -[health.Healther interface](https://godoc.org/github.com/coredns/coredns/plugin/health#Healther) -will be queried for it's health. The combined health is exported, by default, on port 8080/health . +Enabled process wide health endpoint. When CoreDNS is up and running this returns a 200 OK http +status code. The health is exported, by default, on port 8080/health . ## Syntax @@ -17,12 +16,9 @@ health [ADDRESS] ~~~ Optionally takes an address; the default is `:8080`. The health path is fixed to `/health`. The -health endpoint returns a 200 response code and the word "OK" when this server is healthy. It returns -a 503. *health* periodically (1s) polls plugins that exports health information. If any of the -plugins signals that it is unhealthy, the server will go unhealthy too. Each plugin that supports -health checks has a section "Health" in their README. +health endpoint returns a 200 response code and the word "OK" when this server is healthy. -More options can be set with this extended syntax: +An extra option can be set with this extended syntax: ~~~ health [ADDRESS] { @@ -33,8 +29,8 @@ health [ADDRESS] { * Where `lameduck` will make the process unhealthy then *wait* for **DURATION** before the process shuts down. -If you have multiple Server Blocks and need to export health for each of the plugins, you must run -health endpoints on different ports: +If you have multiple Server Blocks, *health* should only be enabled in one of them (as it is process +wide). If you really need multiple endpoints, you must run health endpoints on different ports: ~~~ corefile com { @@ -48,21 +44,6 @@ net { } ~~~ -Note that if you format this in one server block you will get an error on startup, that the second -server can't setup the health plugin (on the same port). - -~~~ txt -com net { - whoami - erratic - health :8080 -} -~~~ - -## Plugins - -Any plugin that implements the Healther interface will be used to report health. - ## Metrics If monitoring is enabled (via the *prometheus* directive) then the following metric is exported: @@ -96,7 +77,7 @@ Set a lameduck duration of 1 second: ## Bugs -When reloading, the Health handler is stopped before the new server instance is started. -If that new server fails to start, then the initial server instance is still available and DNS queries still served, -but Health handler stays down. -Health will not reply HTTP request until a successful reload or a complete restart of CoreDNS. +When reloading, the health handler is stopped before the new server instance is started. If that +new server fails to start, then the initial server instance is still available and DNS queries still +served, but health handler stays down. Health will not reply HTTP request until a successful reload +or a complete restart of CoreDNS. diff --git a/plugin/health/health.go b/plugin/health/health.go index 7f35b0709..895704409 100644 --- a/plugin/health/health.go +++ b/plugin/health/health.go @@ -5,7 +5,6 @@ import ( "io" "net" "net/http" - "sync" "time" clog "github.com/coredns/coredns/plugin/pkg/log" @@ -22,18 +21,12 @@ type health struct { nlSetup bool mux *http.ServeMux - // A slice of Healthers that the health plugin will poll every second for their health status. - h []Healther - sync.RWMutex - ok bool // ok is the global boolean indicating an all healthy plugin stack - - stop chan bool - pollstop chan bool + stop chan bool } // newHealth returns a new initialized health. func newHealth(addr string) *health { - return &health{Addr: addr, stop: make(chan bool), pollstop: make(chan bool)} + return &health{Addr: addr, stop: make(chan bool)} } func (h *health) OnStartup() error { @@ -51,12 +44,10 @@ func (h *health) OnStartup() error { h.nlSetup = true h.mux.HandleFunc(path, func(w http.ResponseWriter, r *http.Request) { - if h.Ok() { - w.WriteHeader(http.StatusOK) - io.WriteString(w, ok) - return - } - w.WriteHeader(http.StatusServiceUnavailable) + // We're always healthy. + w.WriteHeader(http.StatusOK) + io.WriteString(w, ok) + return }) go func() { http.Serve(h.ln, h.mux) }() @@ -72,11 +63,6 @@ func (h *health) OnFinalShutdown() error { return nil } - // Stop polling plugins - h.pollstop <- true - // NACK health - h.SetOk(false) - if h.lameduck > 0 { log.Infof("Going into lameduck mode for %s", h.lameduck) time.Sleep(h.lameduck) @@ -84,8 +70,8 @@ func (h *health) OnFinalShutdown() error { h.ln.Close() - h.stop <- true h.nlSetup = false + close(h.stop) return nil } diff --git a/plugin/health/health_test.go b/plugin/health/health_test.go index acd06ea37..21433c975 100644 --- a/plugin/health/health_test.go +++ b/plugin/health/health_test.go @@ -6,43 +6,23 @@ import ( "net/http" "testing" "time" - - "github.com/coredns/coredns/plugin/erratic" ) func TestHealth(t *testing.T) { h := newHealth(":0") - h.h = append(h.h, &erratic.Erratic{}) if err := h.OnStartup(); err != nil { t.Fatalf("Unable to startup the health server: %v", err) } defer h.OnFinalShutdown() - go func() { - <-h.pollstop - return - }() - // Reconstruct the http address based on the port allocated by operating system. address := fmt.Sprintf("http://%s%s", h.ln.Addr().String(), path) - // Nothing set should return unhealthy response, err := http.Get(address) if err != nil { t.Fatalf("Unable to query %s: %v", address, err) } - if response.StatusCode != 503 { - t.Errorf("Invalid status code: expecting '503', got '%d'", response.StatusCode) - } - response.Body.Close() - - h.poll() - - response, err = http.Get(address) - if err != nil { - t.Fatalf("Unable to query %s: %v", address, err) - } if response.StatusCode != 200 { t.Errorf("Invalid status code: expecting '200', got '%d'", response.StatusCode) } @@ -60,18 +40,10 @@ func TestHealth(t *testing.T) { func TestHealthLameduck(t *testing.T) { h := newHealth(":0") h.lameduck = 250 * time.Millisecond - h.h = append(h.h, &erratic.Erratic{}) if err := h.OnStartup(); err != nil { t.Fatalf("Unable to startup the health server: %v", err) } - // Both these things are behind a sync.Once, fake reading from the channels. - go func() { - <-h.pollstop - <-h.stop - return - }() - h.OnFinalShutdown() } diff --git a/plugin/health/healther.go b/plugin/health/healther.go deleted file mode 100644 index 8bb6c907c..000000000 --- a/plugin/health/healther.go +++ /dev/null @@ -1,36 +0,0 @@ -package health - -// Healther interface needs to be implemented by each plugin willing to provide -// healthhceck information to the health plugin. Note this method should return -// quickly, i.e. just checking a boolean status, as it is called every second -// from the health plugin. -type Healther interface { - // Health returns a boolean indicating the health status of a plugin. - // False indicates unhealthy. - Health() bool -} - -// Ok returns the global health status of all plugin configured in this server. -func (h *health) Ok() bool { - h.RLock() - defer h.RUnlock() - return h.ok -} - -// SetOk sets the global health status of all plugin configured in this server. -func (h *health) SetOk(ok bool) { - h.Lock() - defer h.Unlock() - h.ok = ok -} - -// poll polls all healthers and sets the global state. -func (h *health) poll() { - for _, m := range h.h { - if !m.Health() { - h.SetOk(false) - return - } - } - h.SetOk(true) -} diff --git a/plugin/health/setup.go b/plugin/health/setup.go index 5a1725125..19aeba58d 100644 --- a/plugin/health/setup.go +++ b/plugin/health/setup.go @@ -5,7 +5,6 @@ import ( "net" "time" - "github.com/coredns/coredns/core/dnsserver" "github.com/coredns/coredns/plugin" "github.com/coredns/coredns/plugin/metrics" @@ -28,32 +27,6 @@ func setup(c *caddy.Controller) error { h := newHealth(addr) h.lameduck = lame - c.OnStartup(func() error { - plugins := dnsserver.GetConfig(c).Handlers() - for _, p := range plugins { - if x, ok := p.(Healther); ok { - h.h = append(h.h, x) - } - } - return nil - }) - - c.OnStartup(func() error { - // Poll all middleware every second. - h.poll() - go func() { - for { - select { - case <-time.After(1 * time.Second): - h.poll() - case <-h.pollstop: - return - } - } - }() - return nil - }) - c.OnStartup(func() error { metrics.MustRegister(c, HealthDuration) return nil diff --git a/plugin/kubernetes/README.md b/plugin/kubernetes/README.md index 552050b0b..672e167df 100644 --- a/plugin/kubernetes/README.md +++ b/plugin/kubernetes/README.md @@ -107,11 +107,6 @@ kubernetes [ZONES...] { This allows the querying pod to continue searching for the service in the search path. The search path could, for example, include another Kubernetes cluster. -## Health - -This plugin implements dynamic health checking. Currently this is limited to reporting healthy when -the API has synced. - ## Examples Handle all queries in the `cluster.local` zone. Connect to Kubernetes in-cluster. Also handle all @@ -144,7 +139,6 @@ kubernetes cluster.local { } ~~~ - ## stubDomains and upstreamNameservers Here we use the *forward* plugin to implement a stubDomain that forwards `example.local` to the nameserver `10.100.0.10:53`. diff --git a/plugin/kubernetes/health.go b/plugin/kubernetes/health.go deleted file mode 100644 index 243749210..000000000 --- a/plugin/kubernetes/health.go +++ /dev/null @@ -1,4 +0,0 @@ -package kubernetes - -// Health implements the health.Healther interface. -func (k *Kubernetes) Health() bool { return k.APIConn.HasSynced() }