forked from TrueCloudLab/distribution
Provide simple storage driver health check
To ensure the ensure the web application is properly operating, we've added a periodic health check for the storage driver. If the health check fails three times in a row, the registry will serve 503 response status for any request until the condition is resolved. The condition is reported in the response body and via the /debug/health endpoint. To ensure that all drivers will properly operate with this health check, a function has been added to the driver testsuite. Signed-off-by: Stephen J Day <stephen.day@docker.com>
This commit is contained in:
parent
1d5b311fc4
commit
6ba799b69e
4 changed files with 135 additions and 20 deletions
|
@ -17,7 +17,7 @@ import (
|
||||||
"github.com/bugsnag/bugsnag-go"
|
"github.com/bugsnag/bugsnag-go"
|
||||||
"github.com/docker/distribution/configuration"
|
"github.com/docker/distribution/configuration"
|
||||||
"github.com/docker/distribution/context"
|
"github.com/docker/distribution/context"
|
||||||
_ "github.com/docker/distribution/health"
|
"github.com/docker/distribution/health"
|
||||||
_ "github.com/docker/distribution/registry/auth/htpasswd"
|
_ "github.com/docker/distribution/registry/auth/htpasswd"
|
||||||
_ "github.com/docker/distribution/registry/auth/silly"
|
_ "github.com/docker/distribution/registry/auth/silly"
|
||||||
_ "github.com/docker/distribution/registry/auth/token"
|
_ "github.com/docker/distribution/registry/auth/token"
|
||||||
|
@ -70,8 +70,10 @@ func main() {
|
||||||
uuid.Loggerf = context.GetLogger(ctx).Warnf
|
uuid.Loggerf = context.GetLogger(ctx).Warnf
|
||||||
|
|
||||||
app := handlers.NewApp(ctx, *config)
|
app := handlers.NewApp(ctx, *config)
|
||||||
|
app.RegisterHealthChecks()
|
||||||
handler := configureReporting(app)
|
handler := configureReporting(app)
|
||||||
handler = panicHandler(handler)
|
handler = panicHandler(handler)
|
||||||
|
handler = health.Handler(handler)
|
||||||
handler = gorhandlers.CombinedLoggingHandler(os.Stdout, handler)
|
handler = gorhandlers.CombinedLoggingHandler(os.Stdout, handler)
|
||||||
|
|
||||||
if config.HTTP.Debug.Addr != "" {
|
if config.HTTP.Debug.Addr != "" {
|
||||||
|
|
|
@ -2,9 +2,12 @@ package health
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/docker/distribution/context"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -140,7 +143,7 @@ func PeriodicThresholdChecker(check Checker, period time.Duration, threshold int
|
||||||
}
|
}
|
||||||
|
|
||||||
// CheckStatus returns a map with all the current health check errors
|
// CheckStatus returns a map with all the current health check errors
|
||||||
func CheckStatus() map[string]string {
|
func CheckStatus() map[string]string { // TODO(stevvooe) this needs a proper type
|
||||||
mutex.RLock()
|
mutex.RLock()
|
||||||
defer mutex.RUnlock()
|
defer mutex.RUnlock()
|
||||||
statusKeys := make(map[string]string)
|
statusKeys := make(map[string]string)
|
||||||
|
@ -174,13 +177,13 @@ func RegisterFunc(name string, check func() error) {
|
||||||
|
|
||||||
// RegisterPeriodicFunc allows the convenience of registering a PeriodicChecker
|
// RegisterPeriodicFunc allows the convenience of registering a PeriodicChecker
|
||||||
// from an arbitrary func() error
|
// from an arbitrary func() error
|
||||||
func RegisterPeriodicFunc(name string, check func() error, period time.Duration) {
|
func RegisterPeriodicFunc(name string, period time.Duration, check CheckFunc) {
|
||||||
Register(name, PeriodicChecker(CheckFunc(check), period))
|
Register(name, PeriodicChecker(CheckFunc(check), period))
|
||||||
}
|
}
|
||||||
|
|
||||||
// RegisterPeriodicThresholdFunc allows the convenience of registering a
|
// RegisterPeriodicThresholdFunc allows the convenience of registering a
|
||||||
// PeriodicChecker from an arbitrary func() error
|
// PeriodicChecker from an arbitrary func() error
|
||||||
func RegisterPeriodicThresholdFunc(name string, check func() error, period time.Duration, threshold int) {
|
func RegisterPeriodicThresholdFunc(name string, period time.Duration, threshold int, check CheckFunc) {
|
||||||
Register(name, PeriodicThresholdChecker(CheckFunc(check), period, threshold))
|
Register(name, PeriodicThresholdChecker(CheckFunc(check), period, threshold))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -189,25 +192,60 @@ func RegisterPeriodicThresholdFunc(name string, check func() error, period time.
|
||||||
// Returns 503 if any Error status exists, 200 otherwise
|
// Returns 503 if any Error status exists, 200 otherwise
|
||||||
func StatusHandler(w http.ResponseWriter, r *http.Request) {
|
func StatusHandler(w http.ResponseWriter, r *http.Request) {
|
||||||
if r.Method == "GET" {
|
if r.Method == "GET" {
|
||||||
w.Header().Set("Content-Type", "application/json; charset=utf-8")
|
checks := CheckStatus()
|
||||||
checksStatus := CheckStatus()
|
status := http.StatusOK
|
||||||
// If there is an error, return 503
|
|
||||||
if len(checksStatus) != 0 {
|
|
||||||
w.WriteHeader(http.StatusServiceUnavailable)
|
|
||||||
}
|
|
||||||
encoder := json.NewEncoder(w)
|
|
||||||
err := encoder.Encode(checksStatus)
|
|
||||||
|
|
||||||
// Parsing of the JSON failed. Returning generic error message
|
// If there is an error, return 503
|
||||||
|
if len(checks) != 0 {
|
||||||
|
status = http.StatusServiceUnavailable
|
||||||
|
}
|
||||||
|
|
||||||
|
statusResponse(w, r, status, checks)
|
||||||
|
} else {
|
||||||
|
http.NotFound(w, r)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handler returns a handler that will return 503 response code if the health
|
||||||
|
// checks have failed. If everything is okay with the health checks, the
|
||||||
|
// handler will pass through to the provided handler. Use this handler to
|
||||||
|
// disable a web application when the health checks fail.
|
||||||
|
func Handler(handler http.Handler) http.Handler {
|
||||||
|
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
checks := CheckStatus()
|
||||||
|
if len(checks) != 0 {
|
||||||
|
statusResponse(w, r, http.StatusServiceUnavailable, checks)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
handler.ServeHTTP(w, r) // pass through
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// statusResponse completes the request with a response describing the health
|
||||||
|
// of the service.
|
||||||
|
func statusResponse(w http.ResponseWriter, r *http.Request, status int, checks map[string]string) {
|
||||||
|
p, err := json.Marshal(checks)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
encoder.Encode(struct {
|
context.GetLogger(context.Background()).Errorf("error serializing health status: %v", err)
|
||||||
|
p, err = json.Marshal(struct {
|
||||||
ServerError string `json:"server_error"`
|
ServerError string `json:"server_error"`
|
||||||
}{
|
}{
|
||||||
ServerError: "Could not parse error message",
|
ServerError: "Could not parse error message",
|
||||||
})
|
})
|
||||||
|
status = http.StatusInternalServerError
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
context.GetLogger(context.Background()).Errorf("error serializing health status failure message: %v", err)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
} else {
|
}
|
||||||
w.WriteHeader(http.StatusNotFound)
|
|
||||||
|
w.Header().Set("Content-Type", "application/json; charset=utf-8")
|
||||||
|
w.Header().Set("Content-Length", fmt.Sprint(len(p)))
|
||||||
|
w.WriteHeader(status)
|
||||||
|
if _, err := w.Write(p); err != nil {
|
||||||
|
context.GetLogger(context.Background()).Errorf("error writing health status response body: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,7 @@ package health
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
"testing"
|
"testing"
|
||||||
|
@ -45,3 +46,62 @@ func TestReturns503IfThereAreErrorChecks(t *testing.T) {
|
||||||
t.Errorf("Did not get a 503.")
|
t.Errorf("Did not get a 503.")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestHealthHandler ensures that our handler implementation correct protects
|
||||||
|
// the web application when things aren't so healthy.
|
||||||
|
func TestHealthHandler(t *testing.T) {
|
||||||
|
// clear out existing checks.
|
||||||
|
registeredChecks = make(map[string]Checker)
|
||||||
|
|
||||||
|
// protect an http server
|
||||||
|
handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusNoContent)
|
||||||
|
}))
|
||||||
|
|
||||||
|
// wrap it in our health handler
|
||||||
|
handler = Handler(handler)
|
||||||
|
|
||||||
|
// use this swap check status
|
||||||
|
updater := NewStatusUpdater()
|
||||||
|
Register("test_check", updater)
|
||||||
|
|
||||||
|
// now, create a test server
|
||||||
|
server := httptest.NewServer(handler)
|
||||||
|
|
||||||
|
checkUp := func(t *testing.T, message string) {
|
||||||
|
resp, err := http.Get(server.URL)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("error getting success status: %v", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusNoContent {
|
||||||
|
t.Fatalf("unexpected response code from server when %s: %d != %d", message, resp.StatusCode, http.StatusNoContent)
|
||||||
|
}
|
||||||
|
// NOTE(stevvooe): we really don't care about the body -- the format is
|
||||||
|
// not standardized or supported, yet.
|
||||||
|
}
|
||||||
|
|
||||||
|
checkDown := func(t *testing.T, message string) {
|
||||||
|
resp, err := http.Get(server.URL)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("error getting down status: %v", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusServiceUnavailable {
|
||||||
|
t.Fatalf("unexpected response code from server when %s: %d != %d", message, resp.StatusCode, http.StatusServiceUnavailable)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// server should be up
|
||||||
|
checkUp(t, "initial health check")
|
||||||
|
|
||||||
|
// now, we fail the health check
|
||||||
|
updater.Update(fmt.Errorf("the server is now out of commission"))
|
||||||
|
checkDown(t, "server should be down") // should be down
|
||||||
|
|
||||||
|
// bring server back up
|
||||||
|
updater.Update(nil)
|
||||||
|
checkUp(t, "when server is back up") // now we should be back up.
|
||||||
|
}
|
||||||
|
|
|
@ -14,6 +14,7 @@ import (
|
||||||
"github.com/docker/distribution"
|
"github.com/docker/distribution"
|
||||||
"github.com/docker/distribution/configuration"
|
"github.com/docker/distribution/configuration"
|
||||||
ctxu "github.com/docker/distribution/context"
|
ctxu "github.com/docker/distribution/context"
|
||||||
|
"github.com/docker/distribution/health"
|
||||||
"github.com/docker/distribution/notifications"
|
"github.com/docker/distribution/notifications"
|
||||||
"github.com/docker/distribution/registry/api/errcode"
|
"github.com/docker/distribution/registry/api/errcode"
|
||||||
"github.com/docker/distribution/registry/api/v2"
|
"github.com/docker/distribution/registry/api/v2"
|
||||||
|
@ -203,6 +204,20 @@ func NewApp(ctx context.Context, configuration configuration.Configuration) *App
|
||||||
return app
|
return app
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// RegisterHealthChecks is an awful hack to defer health check registration
|
||||||
|
// control to callers. This should only ever be called once per registry
|
||||||
|
// process, typically in a main function. The correct way would be register
|
||||||
|
// health checks outside of app, since multiple apps may exist in the same
|
||||||
|
// process. Because the configuration and app are tightly coupled,
|
||||||
|
// implementing this properly will require a refactor. This method may panic
|
||||||
|
// if called twice in the same process.
|
||||||
|
func (app *App) RegisterHealthChecks() {
|
||||||
|
health.RegisterPeriodicThresholdFunc("storagedriver_"+app.Config.Storage.Type(), 10*time.Second, 3, func() error {
|
||||||
|
_, err := app.driver.List(app, "/") // "/" should always exist
|
||||||
|
return err // any error will be treated as failure
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
// register a handler with the application, by route name. The handler will be
|
// register a handler with the application, by route name. The handler will be
|
||||||
// passed through the application filters and context will be constructed at
|
// passed through the application filters and context will be constructed at
|
||||||
// request time.
|
// request time.
|
||||||
|
|
Loading…
Reference in a new issue