From 5370f2c0bec24efc059094e4771f779923bdea29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Diogo=20M=C3=B3nica?= Date: Wed, 25 Feb 2015 18:15:07 -0800 Subject: [PATCH] Adding first version of HealthCheck Added a expvar style handler for the debug http server to allow health checks (/debug/health). Signed-off-by: Diogo Monica --- cmd/registry/main.go | 1 + health/api/api.go | 37 +++++++ health/api/api_test.go | 86 ++++++++++++++++ health/checks/checks.go | 35 +++++++ health/doc.go | 130 ++++++++++++++++++++++++ health/health.go | 212 ++++++++++++++++++++++++++++++++++++++++ health/health_test.go | 47 +++++++++ 7 files changed, 548 insertions(+) create mode 100644 health/api/api.go create mode 100644 health/api/api_test.go create mode 100644 health/checks/checks.go create mode 100644 health/doc.go create mode 100644 health/health.go create mode 100644 health/health_test.go diff --git a/cmd/registry/main.go b/cmd/registry/main.go index 79f45ab0c..5fa3c8553 100644 --- a/cmd/registry/main.go +++ b/cmd/registry/main.go @@ -12,6 +12,7 @@ import ( "github.com/bugsnag/bugsnag-go" "github.com/docker/distribution/configuration" ctxu "github.com/docker/distribution/context" + _ "github.com/docker/distribution/health" _ "github.com/docker/distribution/registry/auth/silly" _ "github.com/docker/distribution/registry/auth/token" "github.com/docker/distribution/registry/handlers" diff --git a/health/api/api.go b/health/api/api.go new file mode 100644 index 000000000..73fcc4535 --- /dev/null +++ b/health/api/api.go @@ -0,0 +1,37 @@ +package api + +import ( + "errors" + "net/http" + + "github.com/docker/distribution/health" +) + +var ( + updater = health.NewStatusUpdater() +) + +// DownHandler registers a manual_http_status that always returns an Error +func DownHandler(w http.ResponseWriter, r *http.Request) { + if r.Method == "POST" { + updater.Update(errors.New("Manual Check")) + } else { + w.WriteHeader(http.StatusNotFound) + } +} + +// UpHandler registers a manual_http_status that always returns nil +func UpHandler(w http.ResponseWriter, r *http.Request) { + if r.Method == "POST" { + updater.Update(nil) + } else { + w.WriteHeader(http.StatusNotFound) + } +} + +// init sets up the two endpoints to bring the service up and down +func init() { + health.Register("manual_http_status", updater) + http.HandleFunc("/debug/health/down", DownHandler) + http.HandleFunc("/debug/health/up", UpHandler) +} diff --git a/health/api/api_test.go b/health/api/api_test.go new file mode 100644 index 000000000..ec82154f6 --- /dev/null +++ b/health/api/api_test.go @@ -0,0 +1,86 @@ +package api + +import ( + "net/http" + "net/http/httptest" + "testing" + + "github.com/docker/distribution/health" +) + +// TestGETDownHandlerDoesNotChangeStatus ensures that calling the endpoint +// /debug/health/down with METHOD GET returns a 404 +func TestGETDownHandlerDoesNotChangeStatus(t *testing.T) { + recorder := httptest.NewRecorder() + + req, err := http.NewRequest("GET", "https://fakeurl.com/debug/health/down", nil) + if err != nil { + t.Errorf("Failed to create request.") + } + + DownHandler(recorder, req) + + if recorder.Code != 404 { + t.Errorf("Did not get a 404.") + } +} + +// TestGETUpHandlerDoesNotChangeStatus ensures that calling the endpoint +// /debug/health/down with METHOD GET returns a 404 +func TestGETUpHandlerDoesNotChangeStatus(t *testing.T) { + recorder := httptest.NewRecorder() + + req, err := http.NewRequest("GET", "https://fakeurl.com/debug/health/up", nil) + if err != nil { + t.Errorf("Failed to create request.") + } + + DownHandler(recorder, req) + + if recorder.Code != 404 { + t.Errorf("Did not get a 404.") + } +} + +// TestPOSTDownHandlerChangeStatus ensures the endpoint /debug/health/down changes +// the status code of the response to 503 +// This test is order dependent, and should come before TestPOSTUpHandlerChangeStatus +func TestPOSTDownHandlerChangeStatus(t *testing.T) { + recorder := httptest.NewRecorder() + + req, err := http.NewRequest("POST", "https://fakeurl.com/debug/health/down", nil) + if err != nil { + t.Errorf("Failed to create request.") + } + + DownHandler(recorder, req) + + if recorder.Code != 200 { + t.Errorf("Did not get a 200.") + } + + if len(health.CheckStatus()) != 1 { + t.Errorf("DownHandler didn't add an error check.") + } +} + +// TestPOSTUpHandlerChangeStatus ensures the endpoint /debug/health/up changes +// the status code of the response to 200 +func TestPOSTUpHandlerChangeStatus(t *testing.T) { + recorder := httptest.NewRecorder() + + req, err := http.NewRequest("POST", "https://fakeurl.com/debug/health/up", nil) + if err != nil { + t.Errorf("Failed to create request.") + } + + UpHandler(recorder, req) + + if recorder.Code != 200 { + t.Errorf("Did not get a 200.") + } + + if len(health.CheckStatus()) != 0 { + t.Errorf("UpHandler didn't remove the error check.") + } +} diff --git a/health/checks/checks.go b/health/checks/checks.go new file mode 100644 index 000000000..9de140107 --- /dev/null +++ b/health/checks/checks.go @@ -0,0 +1,35 @@ +package checks + +import ( + "errors" + "github.com/docker/distribution/health" + "net/http" + "os" +) + +// FileChecker checks the existence of a file and returns and error +// if the file exists, taking the application out of rotation +func FileChecker(f string) health.Checker { + return health.CheckFunc(func() error { + if _, err := os.Stat(f); err == nil { + return errors.New("file exists") + } + return nil + }) +} + +// HTTPChecker does a HEAD request and verifies if the HTTP status +// code return is a 200, taking the application out of rotation if +// otherwise +func HTTPChecker(r string) health.Checker { + return health.CheckFunc(func() error { + response, err := http.Head(r) + if err != nil { + return errors.New("error while checking: " + r) + } + if response.StatusCode != http.StatusOK { + return errors.New("downstream service returned unexpected status: " + string(response.StatusCode)) + } + return nil + }) +} diff --git a/health/doc.go b/health/doc.go new file mode 100644 index 000000000..8faa32f7c --- /dev/null +++ b/health/doc.go @@ -0,0 +1,130 @@ +// Package health provides a generic health checking framework. +// The health package works expvar style. By importing the package the debug +// server is getting a "/debug/health" endpoint that returns the current +// status of the application. +// If there are no errors, "/debug/health" will return a HTTP 200 status, +// together with an empty JSON reply "{}". If there are any checks +// with errors, the JSON reply will include all the failed checks, and the +// response will be have an HTTP 503 status. +// +// A Check can either be run synchronously, or asynchronously. We recommend +// that most checks are registered as an asynchronous check, so a call to the +// "/debug/health" endpoint always returns immediately. This pattern is +// particularly useful for checks that verify upstream connectivity or +// database status, since they might take a long time to return/timeout. +// +// Installing +// +// To install health, just import it in your application: +// +// import "github.com/docker/distribution/health" +// +// You can also (optionally) import "health/api" that will add two convenience +// endpoints: "/debug/health/down" and "/debug/health/up". These endpoints add +// "manual" checks that allow the service to quickly be brought in/out of +// rotation. +// +// import _ "github.com/docker/distribution/registry/health/api" +// +// # curl localhost:5001/debug/health +// {} +// # curl -X POST localhost:5001/debug/health/down +// # curl localhost:5001/debug/health +// {"manual_http_status":"Manual Check"} +// +// After importing these packages to your main application, you can start +// registering checks. +// +// Registering Checks +// +// The recommended way of registering checks is using a periodic Check. +// PeriodicChecks run on a certain schedule and asynchronously update the +// status of the check. This allows "CheckStatus()" to return without blocking +// on an expensive check. +// +// A trivial example of a check that runs every 5 seconds and shuts down our +// server if the current minute is even, could be added as follows: +// +// func currentMinuteEvenCheck() error { +// m := time.Now().Minute() +// if m%2 == 0 { +// return errors.New("Current minute is even!") +// } +// return nil +// } +// +// health.RegisterPeriodicFunc("minute_even", currentMinuteEvenCheck, time.Second*5) +// +// Alternatively, you can also make use of "RegisterPeriodicThresholdFunc" to +// implement the exact same check, but add a threshold of failures after which +// the check will be unhealthy. This is particularly useful for flaky Checks, +// ensuring some stability of the service when handling them. +// +// health.RegisterPeriodicThresholdFunc("minute_even", currentMinuteEvenCheck, time.Second*5, 4) +// +// The lowest-level way to interact with the health package is calling +// "Register" directly. Register allows you to pass in an arbitrary string and +// something that implements "Checker" and runs your check. If your method +// returns an error with nil, it is considered a healthy check, otherwise it +// will make the health check endpoint "/debug/health" start returning a 503 +// and list the specific check that failed. +// +// Assuming you wish to register a method called "currentMinuteEvenCheck() +// error" you could do that by doing: +// +// health.Register("even_minute", health.CheckFunc(currentMinuteEvenCheck)) +// +// CheckFunc is a convenience type that implements Checker. +// +// Another way of registering a check could be by using an anonymous function +// and the convenience method RegisterFunc. An example that makes the status +// endpoint always return an error: +// +// health.RegisterFunc("my_check", func() error { +// return Errors.new("This is an error!") +// })) +// +// Examples +// +// You could also use the health checker mechanism to ensure your application +// only comes up if certain conditions are met, or to allow the developer to +// take the service out of rotation immediately. An example that checks +// database connectivity and immediately takes the server out of rotation on +// err: +// +// updater = health.NewStatusUpdater() +// health.RegisterFunc("database_check", func() error { +// return updater.Check() +// })) +// +// conn, err := Connect(...) // database call here +// if err != nil { +// updater.Update(errors.New("Error connecting to the database: " + err.Error())) +// } +// +// You can also use the predefined Checkers that come included with the health +// package. First, import the checks: +// +// import "github.com/docker/distribution/health/checks +// +// After that you can make use of any of the provided checks. An example of +// using a `FileChecker` to take the application out of rotation if a certain +// file exists can be done as follows: +// +// health.Register("fileChecker", health.PeriodicChecker(checks.FileChecker("/tmp/disable"), time.Second*5)) +// +// After registering the check, it is trivial to take an application out of +// rotation from the console: +// +// # curl localhost:5001/debug/health +// {} +// # touch /tmp/disable +// # curl localhost:5001/debug/health +// {"fileChecker":"file exists"} +// +// You could also test the connectivity to a downstream service by using a +// "HTTPChecker", but ensure that you only mark the test unhealthy if there +// are a minimum of two failures in a row: +// +// health.Register("httpChecker", health.PeriodicThresholdChecker(checks.HTTPChecker("https://www.google.pt"), time.Second*5, 2)) +package health diff --git a/health/health.go b/health/health.go new file mode 100644 index 000000000..512539c1c --- /dev/null +++ b/health/health.go @@ -0,0 +1,212 @@ +package health + +import ( + "encoding/json" + "net/http" + "sync" + "time" +) + +var ( + mutex sync.RWMutex + registeredChecks = make(map[string]Checker) +) + +// Checker is the interface for a Health Checker +type Checker interface { + // Check returns nil if the service is okay. + Check() error +} + +// CheckFunc is a convenience type to create functions that implement +// the Checker interface +type CheckFunc func() error + +// Check Implements the Checker interface to allow for any func() error method +// to be passed as a Checker +func (cf CheckFunc) Check() error { + return cf() +} + +// Updater implements a health check that is explicitly set. +type Updater interface { + Checker + + // Update updates the current status of the health check. + Update(status error) +} + +// updater implements Checker and Updater, providing an asynchronous Update +// method. +// This allows us to have a Checker that returns the Check() call immediately +// not blocking on a potentially expensive check. +type updater struct { + mu sync.Mutex + status error +} + +// Check implements the Checker interface +func (u *updater) Check() error { + u.mu.Lock() + defer u.mu.Unlock() + + return u.status +} + +// Update implements the Updater interface, allowing asynchronous access to +// the status of a Checker. +func (u *updater) Update(status error) { + u.mu.Lock() + defer u.mu.Unlock() + + u.status = status +} + +// NewStatusUpdater returns a new updater +func NewStatusUpdater() Updater { + return &updater{} +} + +// thresholdUpdater implements Checker and Updater, providing an asynchronous Update +// method. +// This allows us to have a Checker that returns the Check() call immediately +// not blocking on a potentially expensive check. +type thresholdUpdater struct { + mu sync.Mutex + status error + threshold int + count int +} + +// Check implements the Checker interface +func (tu *thresholdUpdater) Check() error { + tu.mu.Lock() + defer tu.mu.Unlock() + + if tu.count >= tu.threshold { + return tu.status + } + + return nil +} + +// thresholdUpdater implements the Updater interface, allowing asynchronous +// access to the status of a Checker. +func (tu *thresholdUpdater) Update(status error) { + tu.mu.Lock() + defer tu.mu.Unlock() + + if status == nil { + tu.count = 0 + } else if tu.count < tu.threshold { + tu.count++ + } + + tu.status = status +} + +// NewThresholdStatusUpdater returns a new thresholdUpdater +func NewThresholdStatusUpdater(t int) Updater { + return &thresholdUpdater{threshold: t} +} + +// PeriodicChecker wraps an updater to provide a periodic checker +func PeriodicChecker(check Checker, period time.Duration) Checker { + u := NewStatusUpdater() + go func() { + t := time.NewTicker(period) + for { + <-t.C + u.Update(check.Check()) + } + }() + + return u +} + +// PeriodicThresholdChecker wraps an updater to provide a periodic checker that +// uses a threshold before it changes status +func PeriodicThresholdChecker(check Checker, period time.Duration, threshold int) Checker { + tu := NewThresholdStatusUpdater(threshold) + go func() { + t := time.NewTicker(period) + for { + <-t.C + tu.Update(check.Check()) + } + }() + + return tu +} + +// CheckStatus returns a map with all the current health check errors +func CheckStatus() map[string]string { + mutex.RLock() + defer mutex.RUnlock() + statusKeys := make(map[string]string) + for k, v := range registeredChecks { + err := v.Check() + if err != nil { + statusKeys[k] = err.Error() + } + } + + return statusKeys +} + +// Register associates the checker with the provided name. We allow +// overwrites to a specific check status. +func Register(name string, check Checker) { + mutex.Lock() + defer mutex.Unlock() + _, ok := registeredChecks[name] + if ok { + panic("Check already exists: " + name) + } + registeredChecks[name] = check +} + +// RegisterFunc allows the convenience of registering a checker directly +// from an arbitrary func() error +func RegisterFunc(name string, check func() error) { + Register(name, CheckFunc(check)) +} + +// RegisterPeriodicFunc allows the convenience of registering a PeriodicChecker +// from an arbitrary func() error +func RegisterPeriodicFunc(name string, check func() error, period time.Duration) { + Register(name, PeriodicChecker(CheckFunc(check), period)) +} + +// RegisterPeriodicThresholdFunc allows the convenience of registering a +// PeriodicChecker from an arbitrary func() error +func RegisterPeriodicThresholdFunc(name string, check func() error, period time.Duration, threshold int) { + Register(name, PeriodicThresholdChecker(CheckFunc(check), period, threshold)) +} + +// StatusHandler returns a JSON blob with all the currently registered Health Checks +// and their corresponding status. +// Returns 503 if any Error status exists, 200 otherwise +func StatusHandler(w http.ResponseWriter, r *http.Request) { + if r.Method == "GET" { + w.Header().Set("Content-Type", "application/json; charset=utf-8") + checksStatus := CheckStatus() + // If there is an error, return 503 + if len(checksStatus) != 0 { + w.WriteHeader(http.StatusServiceUnavailable) + } + err := json.NewEncoder(w).Encode(checksStatus) + + // Parsing of the JSON failed. Returning generic error message + if err != nil { + w.Write([]byte("{server_error: 'Could not parse error message'}")) + } + } else { + w.WriteHeader(http.StatusNotFound) + } +} + +// Registers global /debug/health api endpoint +func init() { + http.HandleFunc("/debug/health", StatusHandler) +} diff --git a/health/health_test.go b/health/health_test.go new file mode 100644 index 000000000..7989f0b28 --- /dev/null +++ b/health/health_test.go @@ -0,0 +1,47 @@ +package health + +import ( + "errors" + "net/http" + "net/http/httptest" + "testing" +) + +// TestReturns200IfThereAreNoChecks ensures that the result code of the health +// endpoint is 200 if there are not currently registered checks. +func TestReturns200IfThereAreNoChecks(t *testing.T) { + recorder := httptest.NewRecorder() + + req, err := http.NewRequest("GET", "https://fakeurl.com/debug/health", nil) + if err != nil { + t.Errorf("Failed to create request.") + } + + StatusHandler(recorder, req) + + if recorder.Code != 200 { + t.Errorf("Did not get a 200.") + } +} + +// TestReturns500IfThereAreErrorChecks ensures that the result code of the +// health endpoint is 500 if there are health checks with errors +func TestReturns503IfThereAreErrorChecks(t *testing.T) { + recorder := httptest.NewRecorder() + + req, err := http.NewRequest("GET", "https://fakeurl.com/debug/health", nil) + if err != nil { + t.Errorf("Failed to create request.") + } + + // Create a manual error + Register("some_check", CheckFunc(func() error { + return errors.New("This Check did not succeed") + })) + + StatusHandler(recorder, req) + + if recorder.Code != 503 { + t.Errorf("Did not get a 503.") + } +}