diff --git a/configuration/configuration.go b/configuration/configuration.go
index c6554f45..60440e05 100644
--- a/configuration/configuration.go
+++ b/configuration/configuration.go
@@ -135,6 +135,8 @@ type Configuration struct {
} `yaml:"pool,omitempty"`
} `yaml:"redis,omitempty"`
+ Health Health `yaml:"health,omitempty"`
+
Proxy Proxy `yaml:"proxy,omitempty"`
}
@@ -179,6 +181,37 @@ type MailOptions struct {
To []string `yaml:"to,omitempty"`
}
+// FileChecker is a type of entry in the checkers section for checking files
+type FileChecker struct {
+ // Interval is the number of seconds in between checks
+ Interval time.Duration `yaml:"interval,omitempty"`
+ // File is the path to check
+ File string `yaml:"file,omitempty"`
+ // Threshold is the number of times a check must fail to trigger an
+ // unhealthy state
+ Threshold int `yaml:"threshold,omitempty"`
+}
+
+// HTTPChecker is a type of entry in the checkers section for checking HTTP
+// URIs
+type HTTPChecker struct {
+ // Interval is the number of seconds in between checks
+ Interval time.Duration `yaml:"interval,omitempty"`
+ // URI is the HTTP URI to check
+ URI string `yaml:"uri,omitempty"`
+ // Threshold is the number of times a check must fail to trigger an
+ // unhealthy state
+ Threshold int `yaml:"threshold,omitempty"`
+}
+
+// Health provides the configuration section for health checks.
+type Health struct {
+ // FileChecker is a list of paths to check
+ FileCheckers []FileChecker `yaml:"file,omitempty"`
+ // HTTPChecker is a list of URIs to check
+ HTTPCheckers []HTTPChecker `yaml:"http,omitempty"`
+}
+
// v0_1Configuration is a Version 0.1 Configuration struct
// This is currently aliased to Configuration, as it is the current version
type v0_1Configuration Configuration
diff --git a/docs/configuration.md b/docs/configuration.md
index a9017bb5..3e16ca5d 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -195,6 +195,15 @@ information about each option that appears later in this page.
maxidle: 16
maxactive: 64
idletimeout: 300s
+ health:
+ file:
+ - file: /path/to/checked/file
+ interval: 10s
+ threshold: 3
+ http:
+ - uri: http://server.to.check/must/return/200
+ interval: 10s
+ threshold: 3
In some instances a configuration option is **optional** but it contains child
options marked as **required**. This indicates that you can omit the parent with
@@ -1588,6 +1597,141 @@ Configure the behavior of the Redis connection pool.
+## health
+
+ health:
+ file:
+ - file: /path/to/checked/file
+ interval: 10s
+ threshold: 3
+ http:
+ - uri: http://server.to.check/must/return/200
+ interval: 10s
+ threshold: 3
+
+The health option is **optional**. It may contain lists of file checkers
+and/or HTTP checkers.
+
+### file
+
+file is a list of paths to be periodically checked for the existence of a file.
+If a file exists at the given path, the health check will fail. This can be
+used as a way of bringing a registry out of rotation by creating a file.
+
+
+
+ Parameter |
+ Required |
+ Description |
+
+
+
+ file
+ |
+
+ yes
+ |
+
+The path to check for the existence of a file.
+ |
+
+
+
+ interval
+ |
+
+ no
+ |
+
+ The length of time to wait between repetitions of the check. This field
+ takes a positive integer and an optional suffix indicating the unit of
+ time. Possible units are:
+
+ ns (nanoseconds)
+ us (microseconds)
+ ms (milliseconds)
+ s (seconds)
+ m (minutes)
+ h (hours)
+
+ If you omit the suffix, the system interprets the value as nanoseconds.
+ The default value is 10 seconds if this field is omitted.
+ |
+
+
+
+ threshold
+ |
+
+ no
+ |
+
+ An integer specifying the number of times the check must fail before the
+ check triggers an unhealthy state. If this filed is not specified, a
+ single failure will trigger an unhealthy state.
+ |
+
+
+
+### http
+
+http is a list of HTTP URIs to be periodically checked with HEAD requests. If
+a HEAD request returns a status code other than 200, the health check will fail.
+
+
+
+ Parameter |
+ Required |
+ Description |
+
+
+
+ uri
+ |
+
+ yes
+ |
+
+The URI to check.
+ |
+
+
+
+ interval
+ |
+
+ no
+ |
+
+ The length of time to wait between repetitions of the check. This field
+ takes a positive integer and an optional suffix indicating the unit of
+ time. Possible units are:
+
+ ns (nanoseconds)
+ us (microseconds)
+ ms (milliseconds)
+ s (seconds)
+ m (minutes)
+ h (hours)
+
+ If you omit the suffix, the system interprets the value as nanoseconds.
+ The default value is 10 seconds if this field is omitted.
+ |
+
+
+
+ threshold
+ |
+
+ no
+ |
+
+ An integer specifying the number of times the check must fail before the
+ check triggers an unhealthy state. If this filed is not specified, a
+ single failure will trigger an unhealthy state.
+ |
+
+
## Example: Development configuration
diff --git a/health/checks/checks.go b/health/checks/checks.go
index 9de14010..89d5f3db 100644
--- a/health/checks/checks.go
+++ b/health/checks/checks.go
@@ -2,9 +2,11 @@ package checks
import (
"errors"
- "github.com/docker/distribution/health"
"net/http"
"os"
+ "strconv"
+
+ "github.com/docker/distribution/health"
)
// FileChecker checks the existence of a file and returns and error
@@ -28,7 +30,7 @@ func HTTPChecker(r string) health.Checker {
return errors.New("error while checking: " + r)
}
if response.StatusCode != http.StatusOK {
- return errors.New("downstream service returned unexpected status: " + string(response.StatusCode))
+ return errors.New("downstream service returned unexpected status: " + strconv.Itoa(response.StatusCode))
}
return nil
})
diff --git a/health/health.go b/health/health.go
index ba954919..dab2794d 100644
--- a/health/health.go
+++ b/health/health.go
@@ -170,6 +170,20 @@ func Register(name string, check Checker) {
registeredChecks[name] = check
}
+// Unregister removes the named checker.
+func Unregister(name string) {
+ mutex.Lock()
+ defer mutex.Unlock()
+ delete(registeredChecks, name)
+}
+
+// UnregisterAll removes all registered checkers.
+func UnregisterAll() {
+ mutex.Lock()
+ defer mutex.Unlock()
+ registeredChecks = make(map[string]Checker)
+}
+
// RegisterFunc allows the convenience of registering a checker directly
// from an arbitrary func() error
func RegisterFunc(name string, check func() error) {
diff --git a/registry/handlers/app.go b/registry/handlers/app.go
index 7d1f1cf5..8b8543dd 100644
--- a/registry/handlers/app.go
+++ b/registry/handlers/app.go
@@ -15,6 +15,7 @@ import (
"github.com/docker/distribution/configuration"
ctxu "github.com/docker/distribution/context"
"github.com/docker/distribution/health"
+ "github.com/docker/distribution/health/checks"
"github.com/docker/distribution/notifications"
"github.com/docker/distribution/registry/api/errcode"
"github.com/docker/distribution/registry/api/v2"
@@ -37,6 +38,9 @@ import (
// was specified.
const randomSecretSize = 32
+// defaultCheckInterval is the default time in between health checks
+const defaultCheckInterval = 10 * time.Second
+
// App is a global registry application object. Shared resources can be placed
// on this object that will be accessible from all requests. Any writable
// fields should be protected.
@@ -231,10 +235,38 @@ func NewApp(ctx context.Context, configuration configuration.Configuration) *App
// implementing this properly will require a refactor. This method may panic
// if called twice in the same process.
func (app *App) RegisterHealthChecks() {
- health.RegisterPeriodicThresholdFunc("storagedriver_"+app.Config.Storage.Type(), 10*time.Second, 3, func() error {
+ health.RegisterPeriodicThresholdFunc("storagedriver_"+app.Config.Storage.Type(), defaultCheckInterval, 3, func() error {
_, err := app.driver.List(app, "/") // "/" should always exist
return err // any error will be treated as failure
})
+
+ for _, fileChecker := range app.Config.Health.FileCheckers {
+ interval := fileChecker.Interval
+ if interval == 0 {
+ interval = defaultCheckInterval
+ }
+ if fileChecker.Threshold != 0 {
+ ctxu.GetLogger(app).Infof("configuring file health check path=%s, interval=%d, threshold=%d", fileChecker.File, interval/time.Second, fileChecker.Threshold)
+ health.Register(fileChecker.File, health.PeriodicThresholdChecker(checks.FileChecker(fileChecker.File), interval, fileChecker.Threshold))
+ } else {
+ ctxu.GetLogger(app).Infof("configuring file health check path=%s, interval=%d", fileChecker.File, interval/time.Second)
+ health.Register(fileChecker.File, health.PeriodicChecker(checks.FileChecker(fileChecker.File), interval))
+ }
+ }
+
+ for _, httpChecker := range app.Config.Health.HTTPCheckers {
+ interval := httpChecker.Interval
+ if interval == 0 {
+ interval = defaultCheckInterval
+ }
+ if httpChecker.Threshold != 0 {
+ ctxu.GetLogger(app).Infof("configuring HTTP health check uri=%s, interval=%d, threshold=%d", httpChecker.URI, interval/time.Second, httpChecker.Threshold)
+ health.Register(httpChecker.URI, health.PeriodicThresholdChecker(checks.HTTPChecker(httpChecker.URI), interval, httpChecker.Threshold))
+ } else {
+ ctxu.GetLogger(app).Infof("configuring HTTP health check uri=%s, interval=%d", httpChecker.URI, interval/time.Second)
+ health.Register(httpChecker.URI, health.PeriodicChecker(checks.HTTPChecker(httpChecker.URI), interval))
+ }
+ }
}
// register a handler with the application, by route name. The handler will be
diff --git a/registry/handlers/health_test.go b/registry/handlers/health_test.go
new file mode 100644
index 00000000..ce5860a8
--- /dev/null
+++ b/registry/handlers/health_test.go
@@ -0,0 +1,200 @@
+package handlers
+
+import (
+ "encoding/json"
+ "io/ioutil"
+ "net/http"
+ "net/http/httptest"
+ "os"
+ "testing"
+ "time"
+
+ "github.com/docker/distribution/configuration"
+ "github.com/docker/distribution/health"
+ "golang.org/x/net/context"
+)
+
+func TestFileHealthCheck(t *testing.T) {
+ // In case other tests registered checks before this one
+ health.UnregisterAll()
+
+ interval := time.Second
+
+ tmpfile, err := ioutil.TempFile(os.TempDir(), "healthcheck")
+ if err != nil {
+ t.Fatalf("could not create temporary file: %v", err)
+ }
+ defer tmpfile.Close()
+
+ config := configuration.Configuration{
+ Storage: configuration.Storage{
+ "inmemory": configuration.Parameters{},
+ },
+ Health: configuration.Health{
+ FileCheckers: []configuration.FileChecker{
+ {
+ Interval: interval,
+ File: tmpfile.Name(),
+ },
+ },
+ },
+ }
+
+ ctx := context.Background()
+
+ app := NewApp(ctx, config)
+ app.RegisterHealthChecks()
+
+ debugServer := httptest.NewServer(nil)
+
+ // Wait for health check to happen
+ <-time.After(2 * interval)
+
+ resp, err := http.Get(debugServer.URL + "/debug/health")
+ if err != nil {
+ t.Fatalf("error performing HTTP GET: %v", err)
+ }
+ body, err := ioutil.ReadAll(resp.Body)
+ if err != nil {
+ t.Fatalf("error reading HTTP body: %v", err)
+ }
+ resp.Body.Close()
+ var decoded map[string]string
+ err = json.Unmarshal(body, &decoded)
+ if err != nil {
+ t.Fatalf("error unmarshaling json: %v", err)
+ }
+ if len(decoded) != 1 {
+ t.Fatal("expected 1 item in returned json")
+ }
+ if decoded[tmpfile.Name()] != "file exists" {
+ t.Fatal(`did not get "file exists" result for health check`)
+ }
+
+ os.Remove(tmpfile.Name())
+
+ <-time.After(2 * interval)
+ resp, err = http.Get(debugServer.URL + "/debug/health")
+ if err != nil {
+ t.Fatalf("error performing HTTP GET: %v", err)
+ }
+ body, err = ioutil.ReadAll(resp.Body)
+ if err != nil {
+ t.Fatalf("error reading HTTP body: %v", err)
+ }
+ resp.Body.Close()
+ var decoded2 map[string]string
+ err = json.Unmarshal(body, &decoded2)
+ if err != nil {
+ t.Fatalf("error unmarshaling json: %v", err)
+ }
+ if len(decoded2) != 0 {
+ t.Fatal("expected 0 items in returned json")
+ }
+}
+
+func TestHTTPHealthCheck(t *testing.T) {
+ // In case other tests registered checks before this one
+ health.UnregisterAll()
+
+ interval := time.Second
+ threshold := 3
+
+ stopFailing := make(chan struct{})
+
+ checkedServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ if r.Method != "HEAD" {
+ t.Fatalf("expected HEAD request, got %s", r.Method)
+ }
+ select {
+ case <-stopFailing:
+ w.WriteHeader(http.StatusOK)
+ default:
+ w.WriteHeader(http.StatusInternalServerError)
+ }
+ }))
+
+ config := configuration.Configuration{
+ Storage: configuration.Storage{
+ "inmemory": configuration.Parameters{},
+ },
+ Health: configuration.Health{
+ HTTPCheckers: []configuration.HTTPChecker{
+ {
+ Interval: interval,
+ URI: checkedServer.URL,
+ Threshold: threshold,
+ },
+ },
+ },
+ }
+
+ ctx := context.Background()
+
+ app := NewApp(ctx, config)
+ app.RegisterHealthChecks()
+
+ debugServer := httptest.NewServer(nil)
+
+ for i := 0; ; i++ {
+ <-time.After(interval)
+
+ resp, err := http.Get(debugServer.URL + "/debug/health")
+ if err != nil {
+ t.Fatalf("error performing HTTP GET: %v", err)
+ }
+ body, err := ioutil.ReadAll(resp.Body)
+ if err != nil {
+ t.Fatalf("error reading HTTP body: %v", err)
+ }
+ resp.Body.Close()
+ var decoded map[string]string
+ err = json.Unmarshal(body, &decoded)
+ if err != nil {
+ t.Fatalf("error unmarshaling json: %v", err)
+ }
+
+ if i < threshold-1 {
+ // definitely shouldn't have hit the threshold yet
+ if len(decoded) != 0 {
+ t.Fatal("expected 1 items in returned json")
+ }
+ continue
+ }
+ if i < threshold+1 {
+ // right on the threshold - don't expect a failure yet
+ continue
+ }
+
+ if len(decoded) != 1 {
+ t.Fatal("expected 1 item in returned json")
+ }
+ if decoded[checkedServer.URL] != "downstream service returned unexpected status: 500" {
+ t.Fatal("did not get expected result for health check")
+ }
+
+ break
+ }
+
+ // Signal HTTP handler to start returning 200
+ close(stopFailing)
+
+ <-time.After(2 * interval)
+ resp, err := http.Get(debugServer.URL + "/debug/health")
+ if err != nil {
+ t.Fatalf("error performing HTTP GET: %v", err)
+ }
+ body, err := ioutil.ReadAll(resp.Body)
+ if err != nil {
+ t.Fatalf("error reading HTTP body: %v", err)
+ }
+ resp.Body.Close()
+ var decoded map[string]string
+ err = json.Unmarshal(body, &decoded)
+ if err != nil {
+ t.Fatalf("error unmarshaling json: %v", err)
+ }
+ if len(decoded) != 0 {
+ t.Fatal("expected 0 items in returned json")
+ }
+}