Adding first version of HealthCheck

Added a expvar style handler for the debug http server to allow health checks (/debug/health).

Signed-off-by: Diogo Monica <diogo@docker.com>
pull/230/head
Diogo Mónica 2015-02-25 18:15:07 -08:00 committed by Diogo Monica
parent 47a8ad7a61
commit 5370f2c0be
7 changed files with 548 additions and 0 deletions

View File

@ -12,6 +12,7 @@ import (
"github.com/bugsnag/bugsnag-go"
"github.com/docker/distribution/configuration"
ctxu "github.com/docker/distribution/context"
_ "github.com/docker/distribution/health"
_ "github.com/docker/distribution/registry/auth/silly"
_ "github.com/docker/distribution/registry/auth/token"
"github.com/docker/distribution/registry/handlers"

37
health/api/api.go 100644
View File

@ -0,0 +1,37 @@
package api
import (
"errors"
"net/http"
"github.com/docker/distribution/health"
)
var (
updater = health.NewStatusUpdater()
)
// DownHandler registers a manual_http_status that always returns an Error
func DownHandler(w http.ResponseWriter, r *http.Request) {
if r.Method == "POST" {
updater.Update(errors.New("Manual Check"))
} else {
w.WriteHeader(http.StatusNotFound)
}
}
// UpHandler registers a manual_http_status that always returns nil
func UpHandler(w http.ResponseWriter, r *http.Request) {
if r.Method == "POST" {
updater.Update(nil)
} else {
w.WriteHeader(http.StatusNotFound)
}
}
// init sets up the two endpoints to bring the service up and down
func init() {
health.Register("manual_http_status", updater)
http.HandleFunc("/debug/health/down", DownHandler)
http.HandleFunc("/debug/health/up", UpHandler)
}

View File

@ -0,0 +1,86 @@
package api
import (
"net/http"
"net/http/httptest"
"testing"
"github.com/docker/distribution/health"
)
// TestGETDownHandlerDoesNotChangeStatus ensures that calling the endpoint
// /debug/health/down with METHOD GET returns a 404
func TestGETDownHandlerDoesNotChangeStatus(t *testing.T) {
recorder := httptest.NewRecorder()
req, err := http.NewRequest("GET", "https://fakeurl.com/debug/health/down", nil)
if err != nil {
t.Errorf("Failed to create request.")
}
DownHandler(recorder, req)
if recorder.Code != 404 {
t.Errorf("Did not get a 404.")
}
}
// TestGETUpHandlerDoesNotChangeStatus ensures that calling the endpoint
// /debug/health/down with METHOD GET returns a 404
func TestGETUpHandlerDoesNotChangeStatus(t *testing.T) {
recorder := httptest.NewRecorder()
req, err := http.NewRequest("GET", "https://fakeurl.com/debug/health/up", nil)
if err != nil {
t.Errorf("Failed to create request.")
}
DownHandler(recorder, req)
if recorder.Code != 404 {
t.Errorf("Did not get a 404.")
}
}
// TestPOSTDownHandlerChangeStatus ensures the endpoint /debug/health/down changes
// the status code of the response to 503
// This test is order dependent, and should come before TestPOSTUpHandlerChangeStatus
func TestPOSTDownHandlerChangeStatus(t *testing.T) {
recorder := httptest.NewRecorder()
req, err := http.NewRequest("POST", "https://fakeurl.com/debug/health/down", nil)
if err != nil {
t.Errorf("Failed to create request.")
}
DownHandler(recorder, req)
if recorder.Code != 200 {
t.Errorf("Did not get a 200.")
}
if len(health.CheckStatus()) != 1 {
t.Errorf("DownHandler didn't add an error check.")
}
}
// TestPOSTUpHandlerChangeStatus ensures the endpoint /debug/health/up changes
// the status code of the response to 200
func TestPOSTUpHandlerChangeStatus(t *testing.T) {
recorder := httptest.NewRecorder()
req, err := http.NewRequest("POST", "https://fakeurl.com/debug/health/up", nil)
if err != nil {
t.Errorf("Failed to create request.")
}
UpHandler(recorder, req)
if recorder.Code != 200 {
t.Errorf("Did not get a 200.")
}
if len(health.CheckStatus()) != 0 {
t.Errorf("UpHandler didn't remove the error check.")
}
}

View File

@ -0,0 +1,35 @@
package checks
import (
"errors"
"github.com/docker/distribution/health"
"net/http"
"os"
)
// FileChecker checks the existence of a file and returns and error
// if the file exists, taking the application out of rotation
func FileChecker(f string) health.Checker {
return health.CheckFunc(func() error {
if _, err := os.Stat(f); err == nil {
return errors.New("file exists")
}
return nil
})
}
// HTTPChecker does a HEAD request and verifies if the HTTP status
// code return is a 200, taking the application out of rotation if
// otherwise
func HTTPChecker(r string) health.Checker {
return health.CheckFunc(func() error {
response, err := http.Head(r)
if err != nil {
return errors.New("error while checking: " + r)
}
if response.StatusCode != http.StatusOK {
return errors.New("downstream service returned unexpected status: " + string(response.StatusCode))
}
return nil
})
}

130
health/doc.go 100644
View File

@ -0,0 +1,130 @@
// Package health provides a generic health checking framework.
// The health package works expvar style. By importing the package the debug
// server is getting a "/debug/health" endpoint that returns the current
// status of the application.
// If there are no errors, "/debug/health" will return a HTTP 200 status,
// together with an empty JSON reply "{}". If there are any checks
// with errors, the JSON reply will include all the failed checks, and the
// response will be have an HTTP 503 status.
//
// A Check can either be run synchronously, or asynchronously. We recommend
// that most checks are registered as an asynchronous check, so a call to the
// "/debug/health" endpoint always returns immediately. This pattern is
// particularly useful for checks that verify upstream connectivity or
// database status, since they might take a long time to return/timeout.
//
// Installing
//
// To install health, just import it in your application:
//
// import "github.com/docker/distribution/health"
//
// You can also (optionally) import "health/api" that will add two convenience
// endpoints: "/debug/health/down" and "/debug/health/up". These endpoints add
// "manual" checks that allow the service to quickly be brought in/out of
// rotation.
//
// import _ "github.com/docker/distribution/registry/health/api"
//
// # curl localhost:5001/debug/health
// {}
// # curl -X POST localhost:5001/debug/health/down
// # curl localhost:5001/debug/health
// {"manual_http_status":"Manual Check"}
//
// After importing these packages to your main application, you can start
// registering checks.
//
// Registering Checks
//
// The recommended way of registering checks is using a periodic Check.
// PeriodicChecks run on a certain schedule and asynchronously update the
// status of the check. This allows "CheckStatus()" to return without blocking
// on an expensive check.
//
// A trivial example of a check that runs every 5 seconds and shuts down our
// server if the current minute is even, could be added as follows:
//
// func currentMinuteEvenCheck() error {
// m := time.Now().Minute()
// if m%2 == 0 {
// return errors.New("Current minute is even!")
// }
// return nil
// }
//
// health.RegisterPeriodicFunc("minute_even", currentMinuteEvenCheck, time.Second*5)
//
// Alternatively, you can also make use of "RegisterPeriodicThresholdFunc" to
// implement the exact same check, but add a threshold of failures after which
// the check will be unhealthy. This is particularly useful for flaky Checks,
// ensuring some stability of the service when handling them.
//
// health.RegisterPeriodicThresholdFunc("minute_even", currentMinuteEvenCheck, time.Second*5, 4)
//
// The lowest-level way to interact with the health package is calling
// "Register" directly. Register allows you to pass in an arbitrary string and
// something that implements "Checker" and runs your check. If your method
// returns an error with nil, it is considered a healthy check, otherwise it
// will make the health check endpoint "/debug/health" start returning a 503
// and list the specific check that failed.
//
// Assuming you wish to register a method called "currentMinuteEvenCheck()
// error" you could do that by doing:
//
// health.Register("even_minute", health.CheckFunc(currentMinuteEvenCheck))
//
// CheckFunc is a convenience type that implements Checker.
//
// Another way of registering a check could be by using an anonymous function
// and the convenience method RegisterFunc. An example that makes the status
// endpoint always return an error:
//
// health.RegisterFunc("my_check", func() error {
// return Errors.new("This is an error!")
// }))
//
// Examples
//
// You could also use the health checker mechanism to ensure your application
// only comes up if certain conditions are met, or to allow the developer to
// take the service out of rotation immediately. An example that checks
// database connectivity and immediately takes the server out of rotation on
// err:
//
// updater = health.NewStatusUpdater()
// health.RegisterFunc("database_check", func() error {
// return updater.Check()
// }))
//
// conn, err := Connect(...) // database call here
// if err != nil {
// updater.Update(errors.New("Error connecting to the database: " + err.Error()))
// }
//
// You can also use the predefined Checkers that come included with the health
// package. First, import the checks:
//
// import "github.com/docker/distribution/health/checks
//
// After that you can make use of any of the provided checks. An example of
// using a `FileChecker` to take the application out of rotation if a certain
// file exists can be done as follows:
//
// health.Register("fileChecker", health.PeriodicChecker(checks.FileChecker("/tmp/disable"), time.Second*5))
//
// After registering the check, it is trivial to take an application out of
// rotation from the console:
//
// # curl localhost:5001/debug/health
// {}
// # touch /tmp/disable
// # curl localhost:5001/debug/health
// {"fileChecker":"file exists"}
//
// You could also test the connectivity to a downstream service by using a
// "HTTPChecker", but ensure that you only mark the test unhealthy if there
// are a minimum of two failures in a row:
//
// health.Register("httpChecker", health.PeriodicThresholdChecker(checks.HTTPChecker("https://www.google.pt"), time.Second*5, 2))
package health

212
health/health.go 100644
View File

@ -0,0 +1,212 @@
package health
import (
"encoding/json"
"net/http"
"sync"
"time"
)
var (
mutex sync.RWMutex
registeredChecks = make(map[string]Checker)
)
// Checker is the interface for a Health Checker
type Checker interface {
// Check returns nil if the service is okay.
Check() error
}
// CheckFunc is a convenience type to create functions that implement
// the Checker interface
type CheckFunc func() error
// Check Implements the Checker interface to allow for any func() error method
// to be passed as a Checker
func (cf CheckFunc) Check() error {
return cf()
}
// Updater implements a health check that is explicitly set.
type Updater interface {
Checker
// Update updates the current status of the health check.
Update(status error)
}
// updater implements Checker and Updater, providing an asynchronous Update
// method.
// This allows us to have a Checker that returns the Check() call immediately
// not blocking on a potentially expensive check.
type updater struct {
mu sync.Mutex
status error
}
// Check implements the Checker interface
func (u *updater) Check() error {
u.mu.Lock()
defer u.mu.Unlock()
return u.status
}
// Update implements the Updater interface, allowing asynchronous access to
// the status of a Checker.
func (u *updater) Update(status error) {
u.mu.Lock()
defer u.mu.Unlock()
u.status = status
}
// NewStatusUpdater returns a new updater
func NewStatusUpdater() Updater {
return &updater{}
}
// thresholdUpdater implements Checker and Updater, providing an asynchronous Update
// method.
// This allows us to have a Checker that returns the Check() call immediately
// not blocking on a potentially expensive check.
type thresholdUpdater struct {
mu sync.Mutex
status error
threshold int
count int
}
// Check implements the Checker interface
func (tu *thresholdUpdater) Check() error {
tu.mu.Lock()
defer tu.mu.Unlock()
if tu.count >= tu.threshold {
return tu.status
}
return nil
}
// thresholdUpdater implements the Updater interface, allowing asynchronous
// access to the status of a Checker.
func (tu *thresholdUpdater) Update(status error) {
tu.mu.Lock()
defer tu.mu.Unlock()
if status == nil {
tu.count = 0
} else if tu.count < tu.threshold {
tu.count++
}
tu.status = status
}
// NewThresholdStatusUpdater returns a new thresholdUpdater
func NewThresholdStatusUpdater(t int) Updater {
return &thresholdUpdater{threshold: t}
}
// PeriodicChecker wraps an updater to provide a periodic checker
func PeriodicChecker(check Checker, period time.Duration) Checker {
u := NewStatusUpdater()
go func() {
t := time.NewTicker(period)
for {
<-t.C
u.Update(check.Check())
}
}()
return u
}
// PeriodicThresholdChecker wraps an updater to provide a periodic checker that
// uses a threshold before it changes status
func PeriodicThresholdChecker(check Checker, period time.Duration, threshold int) Checker {
tu := NewThresholdStatusUpdater(threshold)
go func() {
t := time.NewTicker(period)
for {
<-t.C
tu.Update(check.Check())
}
}()
return tu
}
// CheckStatus returns a map with all the current health check errors
func CheckStatus() map[string]string {
mutex.RLock()
defer mutex.RUnlock()
statusKeys := make(map[string]string)
for k, v := range registeredChecks {
err := v.Check()
if err != nil {
statusKeys[k] = err.Error()
}
}
return statusKeys
}
// Register associates the checker with the provided name. We allow
// overwrites to a specific check status.
func Register(name string, check Checker) {
mutex.Lock()
defer mutex.Unlock()
_, ok := registeredChecks[name]
if ok {
panic("Check already exists: " + name)
}
registeredChecks[name] = check
}
// RegisterFunc allows the convenience of registering a checker directly
// from an arbitrary func() error
func RegisterFunc(name string, check func() error) {
Register(name, CheckFunc(check))
}
// RegisterPeriodicFunc allows the convenience of registering a PeriodicChecker
// from an arbitrary func() error
func RegisterPeriodicFunc(name string, check func() error, period time.Duration) {
Register(name, PeriodicChecker(CheckFunc(check), period))
}
// RegisterPeriodicThresholdFunc allows the convenience of registering a
// PeriodicChecker from an arbitrary func() error
func RegisterPeriodicThresholdFunc(name string, check func() error, period time.Duration, threshold int) {
Register(name, PeriodicThresholdChecker(CheckFunc(check), period, threshold))
}
// StatusHandler returns a JSON blob with all the currently registered Health Checks
// and their corresponding status.
// Returns 503 if any Error status exists, 200 otherwise
func StatusHandler(w http.ResponseWriter, r *http.Request) {
if r.Method == "GET" {
w.Header().Set("Content-Type", "application/json; charset=utf-8")
checksStatus := CheckStatus()
// If there is an error, return 503
if len(checksStatus) != 0 {
w.WriteHeader(http.StatusServiceUnavailable)
}
err := json.NewEncoder(w).Encode(checksStatus)
// Parsing of the JSON failed. Returning generic error message
if err != nil {
w.Write([]byte("{server_error: 'Could not parse error message'}"))
}
} else {
w.WriteHeader(http.StatusNotFound)
}
}
// Registers global /debug/health api endpoint
func init() {
http.HandleFunc("/debug/health", StatusHandler)
}

View File

@ -0,0 +1,47 @@
package health
import (
"errors"
"net/http"
"net/http/httptest"
"testing"
)
// TestReturns200IfThereAreNoChecks ensures that the result code of the health
// endpoint is 200 if there are not currently registered checks.
func TestReturns200IfThereAreNoChecks(t *testing.T) {
recorder := httptest.NewRecorder()
req, err := http.NewRequest("GET", "https://fakeurl.com/debug/health", nil)
if err != nil {
t.Errorf("Failed to create request.")
}
StatusHandler(recorder, req)
if recorder.Code != 200 {
t.Errorf("Did not get a 200.")
}
}
// TestReturns500IfThereAreErrorChecks ensures that the result code of the
// health endpoint is 500 if there are health checks with errors
func TestReturns503IfThereAreErrorChecks(t *testing.T) {
recorder := httptest.NewRecorder()
req, err := http.NewRequest("GET", "https://fakeurl.com/debug/health", nil)
if err != nil {
t.Errorf("Failed to create request.")
}
// Create a manual error
Register("some_check", CheckFunc(func() error {
return errors.New("This Check did not succeed")
}))
StatusHandler(recorder, req)
if recorder.Code != 503 {
t.Errorf("Did not get a 503.")
}
}