diff --git a/cmd/registry/config-cache.yml b/cmd/registry/config-cache.yml index 6566130bb..7a274ea59 100644 --- a/cmd/registry/config-cache.yml +++ b/cmd/registry/config-cache.yml @@ -48,3 +48,8 @@ proxy: remoteurl: https://registry-1.docker.io username: username password: password +health: + storagedriver: + enabled: true + interval: 10s + threshold: 3 diff --git a/cmd/registry/config-dev.yml b/cmd/registry/config-dev.yml index 729e7fd2c..b6438be50 100644 --- a/cmd/registry/config-dev.yml +++ b/cmd/registry/config-dev.yml @@ -59,4 +59,8 @@ notifications: threshold: 10 backoff: 1s disabled: true - +health: + storagedriver: + enabled: true + interval: 10s + threshold: 3 diff --git a/cmd/registry/config-example.yml b/cmd/registry/config-example.yml index ef5994e59..b5700e192 100644 --- a/cmd/registry/config-example.yml +++ b/cmd/registry/config-example.yml @@ -11,3 +11,8 @@ http: addr: :5000 headers: X-Content-Type-Options: [nosniff] +health: + storagedriver: + enabled: true + interval: 10s + threshold: 3 diff --git a/configuration/configuration.go b/configuration/configuration.go index c6554f45b..1b3519d52 100644 --- a/configuration/configuration.go +++ b/configuration/configuration.go @@ -135,6 +135,8 @@ type Configuration struct { } `yaml:"pool,omitempty"` } `yaml:"redis,omitempty"` + Health Health `yaml:"health,omitempty"` + Proxy Proxy `yaml:"proxy,omitempty"` } @@ -179,6 +181,68 @@ type MailOptions struct { To []string `yaml:"to,omitempty"` } +// FileChecker is a type of entry in the health section for checking files. +type FileChecker struct { + // Interval is the duration in between checks + Interval time.Duration `yaml:"interval,omitempty"` + // File is the path to check + File string `yaml:"file,omitempty"` + // Threshold is the number of times a check must fail to trigger an + // unhealthy state + Threshold int `yaml:"threshold,omitempty"` +} + +// HTTPChecker is a type of entry in the health section for checking HTTP URIs. +type HTTPChecker struct { + // Timeout is the duration to wait before timing out the HTTP request + Timeout time.Duration `yaml:"interval,omitempty"` + // StatusCode is the expected status code + StatusCode int + // Interval is the duration in between checks + Interval time.Duration `yaml:"interval,omitempty"` + // URI is the HTTP URI to check + URI string `yaml:"uri,omitempty"` + // Headers lists static headers that should be added to all requests + Headers http.Header `yaml:"headers"` + // Threshold is the number of times a check must fail to trigger an + // unhealthy state + Threshold int `yaml:"threshold,omitempty"` +} + +// TCPChecker is a type of entry in the health section for checking TCP servers. +type TCPChecker struct { + // Timeout is the duration to wait before timing out the TCP connection + Timeout time.Duration `yaml:"interval,omitempty"` + // Interval is the duration in between checks + Interval time.Duration `yaml:"interval,omitempty"` + // Addr is the TCP address to check + Addr string `yaml:"addr,omitempty"` + // Threshold is the number of times a check must fail to trigger an + // unhealthy state + Threshold int `yaml:"threshold,omitempty"` +} + +// Health provides the configuration section for health checks. +type Health struct { + // FileCheckers is a list of paths to check + FileCheckers []FileChecker `yaml:"file,omitempty"` + // HTTPCheckers is a list of URIs to check + HTTPCheckers []HTTPChecker `yaml:"http,omitempty"` + // TCPCheckers is a list of URIs to check + TCPCheckers []TCPChecker `yaml:"tcp,omitempty"` + // StorageDriver configures a health check on the configured storage + // driver + StorageDriver struct { + // Enabled turns on the health check for the storage driver + Enabled bool `yaml:"enabled,omitempty"` + // Interval is the duration in between checks + Interval time.Duration `yaml:"interval,omitempty"` + // Threshold is the number of times a check must fail to trigger an + // unhealthy state + Threshold int `yaml:"threshold,omitempty"` + } `yaml:"storagedriver,omitempty"` +} + // v0_1Configuration is a Version 0.1 Configuration struct // This is currently aliased to Configuration, as it is the current version type v0_1Configuration Configuration diff --git a/docs/configuration.md b/docs/configuration.md index a9017bb5a..3563ef515 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -195,6 +195,27 @@ information about each option that appears later in this page. maxidle: 16 maxactive: 64 idletimeout: 300s + health: + storagedriver: + enabled: true + interval: 10s + threshold: 3 + file: + - file: /path/to/checked/file + interval: 10s + http: + - uri: http://server.to.check/must/return/200 + headers: + Authorization: [Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==] + statuscode: 200 + timeout: 3s + interval: 10s + threshold: 3 + tcp: + - addr: redis-server.domain.com:6379 + timeout: 3s + interval: 10s + threshold: 3 In some instances a configuration option is **optional** but it contains child options marked as **required**. This indicates that you can omit the parent with @@ -1381,7 +1402,9 @@ The URL to which events should be published. yes - Static headers to add to each request. + Static headers to add to each request. Each header's name should be a key + underneath headers, and each value is a list of payloads for that + header name. Note that values must always be lists. @@ -1588,6 +1611,334 @@ Configure the behavior of the Redis connection pool. +## health + + health: + storagedriver: + enabled: true + interval: 10s + threshold: 3 + file: + - file: /path/to/checked/file + interval: 10s + http: + - uri: http://server.to.check/must/return/200 + headers: + Authorization: [Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==] + statuscode: 200 + timeout: 3s + interval: 10s + threshold: 3 + tcp: + - addr: redis-server.domain.com:6379 + timeout: 3s + interval: 10s + threshold: 3 + +The health option is **optional**. It may contain preferences for a periodic +health check on the storage driver's backend storage, and optional periodic +checks on local files, HTTP URIs, and/or TCP servers. The results of the health +checks are available at /debug/health on the debug HTTP server if the debug +HTTP server is enabled (see http section). + +### storagedriver + +storagedriver contains options for a health check on the configured storage +driver's backend storage. enabled must be set to true for this health check to +be active. + + + + + + + + + + + + + + + + + + + + + + +
ParameterRequiredDescription
+ enabled + + yes + +"true" to enable the storage driver health check or "false" to disable it. +
+ interval + + no + + The length of time to wait between repetitions of the check. This field + takes a positive integer and an optional suffix indicating the unit of + time. Possible units are: +
    +
  • ns (nanoseconds)
  • +
  • us (microseconds)
  • +
  • ms (milliseconds)
  • +
  • s (seconds)
  • +
  • m (minutes)
  • +
  • h (hours)
  • +
+ If you omit the suffix, the system interprets the value as nanoseconds. + The default value is 10 seconds if this field is omitted. +
+ threshold + + no + + An integer specifying the number of times the check must fail before the + check triggers an unhealthy state. If this filed is not specified, a + single failure will trigger an unhealthy state. +
+ +### file + +file is a list of paths to be periodically checked for the existence of a file. +If a file exists at the given path, the health check will fail. This can be +used as a way of bringing a registry out of rotation by creating a file. + + + + + + + + + + + + + + + + + +
ParameterRequiredDescription
+ file + + yes + +The path to check for the existence of a file. +
+ interval + + no + + The length of time to wait between repetitions of the check. This field + takes a positive integer and an optional suffix indicating the unit of + time. Possible units are: +
    +
  • ns (nanoseconds)
  • +
  • us (microseconds)
  • +
  • ms (milliseconds)
  • +
  • s (seconds)
  • +
  • m (minutes)
  • +
  • h (hours)
  • +
+ If you omit the suffix, the system interprets the value as nanoseconds. + The default value is 10 seconds if this field is omitted. +
+ +### http + +http is a list of HTTP URIs to be periodically checked with HEAD requests. If +a HEAD request doesn't complete or returns an unexpected status code, the +health check will fail. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ParameterRequiredDescription
+ uri + + yes + +The URI to check. +
+ headers + + no + + Static headers to add to each request. Each header's name should be a key + underneath headers, and each value is a list of payloads for that + header name. Note that values must always be lists. +
+ statuscode + + no + +Expected status code from the HTTP URI. Defaults to 200. +
+ timeout + + no + + The length of time to wait before timing out the HTTP request. This field + takes a positive integer and an optional suffix indicating the unit of + time. Possible units are: +
    +
  • ns (nanoseconds)
  • +
  • us (microseconds)
  • +
  • ms (milliseconds)
  • +
  • s (seconds)
  • +
  • m (minutes)
  • +
  • h (hours)
  • +
+ If you omit the suffix, the system interprets the value as nanoseconds. +
+ interval + + no + + The length of time to wait between repetitions of the check. This field + takes a positive integer and an optional suffix indicating the unit of + time. Possible units are: +
    +
  • ns (nanoseconds)
  • +
  • us (microseconds)
  • +
  • ms (milliseconds)
  • +
  • s (seconds)
  • +
  • m (minutes)
  • +
  • h (hours)
  • +
+ If you omit the suffix, the system interprets the value as nanoseconds. + The default value is 10 seconds if this field is omitted. +
+ threshold + + no + + An integer specifying the number of times the check must fail before the + check triggers an unhealthy state. If this filed is not specified, a + single failure will trigger an unhealthy state. +
+ +### tcp + +tcp is a list of TCP addresses to be periodically checked with connection +attempts. The addresses must include port numbers. If a connection attempt +fails, the health check will fail. + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ParameterRequiredDescription
+ addr + + yes + +The TCP address to connect to, including a port number. +
+ timeout + + no + + The length of time to wait before timing out the TCP connection. This + field takes a positive integer and an optional suffix indicating the unit + of time. Possible units are: +
    +
  • ns (nanoseconds)
  • +
  • us (microseconds)
  • +
  • ms (milliseconds)
  • +
  • s (seconds)
  • +
  • m (minutes)
  • +
  • h (hours)
  • +
+ If you omit the suffix, the system interprets the value as nanoseconds. +
+ interval + + no + + The length of time to wait between repetitions of the check. This field + takes a positive integer and an optional suffix indicating the unit of + time. Possible units are: +
    +
  • ns (nanoseconds)
  • +
  • us (microseconds)
  • +
  • ms (milliseconds)
  • +
  • s (seconds)
  • +
  • m (minutes)
  • +
  • h (hours)
  • +
+ If you omit the suffix, the system interprets the value as nanoseconds. + The default value is 10 seconds if this field is omitted. +
+ threshold + + no + + An integer specifying the number of times the check must fail before the + check triggers an unhealthy state. If this filed is not specified, a + single failure will trigger an unhealthy state. +
## Example: Development configuration diff --git a/health/checks/checks.go b/health/checks/checks.go index 9de140107..e3c3b08d3 100644 --- a/health/checks/checks.go +++ b/health/checks/checks.go @@ -2,13 +2,17 @@ package checks import ( "errors" - "github.com/docker/distribution/health" + "net" "net/http" "os" + "strconv" + "time" + + "github.com/docker/distribution/health" ) -// FileChecker checks the existence of a file and returns and error -// if the file exists, taking the application out of rotation +// FileChecker checks the existence of a file and returns an error +// if the file exists. func FileChecker(f string) health.Checker { return health.CheckFunc(func() error { if _, err := os.Stat(f); err == nil { @@ -18,18 +22,41 @@ func FileChecker(f string) health.Checker { }) } -// HTTPChecker does a HEAD request and verifies if the HTTP status -// code return is a 200, taking the application out of rotation if -// otherwise -func HTTPChecker(r string) health.Checker { +// HTTPChecker does a HEAD request and verifies that the HTTP status code +// returned matches statusCode. +func HTTPChecker(r string, statusCode int, timeout time.Duration, headers http.Header) health.Checker { return health.CheckFunc(func() error { - response, err := http.Head(r) + client := http.Client{ + Timeout: timeout, + } + req, err := http.NewRequest("HEAD", r, nil) + if err != nil { + return errors.New("error creating request: " + r) + } + for headerName, headerValues := range headers { + for _, headerValue := range headerValues { + req.Header.Add(headerName, headerValue) + } + } + response, err := client.Do(req) if err != nil { return errors.New("error while checking: " + r) } - if response.StatusCode != http.StatusOK { - return errors.New("downstream service returned unexpected status: " + string(response.StatusCode)) + if response.StatusCode != statusCode { + return errors.New("downstream service returned unexpected status: " + strconv.Itoa(response.StatusCode)) } return nil }) } + +// TCPChecker attempts to open a TCP connection. +func TCPChecker(addr string, timeout time.Duration) health.Checker { + return health.CheckFunc(func() error { + conn, err := net.DialTimeout("tcp", addr, timeout) + if err != nil { + return errors.New("connection to " + addr + " failed") + } + conn.Close() + return nil + }) +} diff --git a/health/checks/checks_test.go b/health/checks/checks_test.go index 4e49d1182..6b6dd14fa 100644 --- a/health/checks/checks_test.go +++ b/health/checks/checks_test.go @@ -15,11 +15,11 @@ func TestFileChecker(t *testing.T) { } func TestHTTPChecker(t *testing.T) { - if err := HTTPChecker("https://www.google.cybertron").Check(); err == nil { + if err := HTTPChecker("https://www.google.cybertron", 200, 0, nil).Check(); err == nil { t.Errorf("Google on Cybertron was expected as not exists") } - if err := HTTPChecker("https://www.google.pt").Check(); err != nil { + if err := HTTPChecker("https://www.google.pt", 200, 0, nil).Check(); err != nil { t.Errorf("Google at Portugal was expected as exists, error:%v", err) } } diff --git a/health/doc.go b/health/doc.go index 8faa32f7c..194b8a566 100644 --- a/health/doc.go +++ b/health/doc.go @@ -39,7 +39,7 @@ // // The recommended way of registering checks is using a periodic Check. // PeriodicChecks run on a certain schedule and asynchronously update the -// status of the check. This allows "CheckStatus()" to return without blocking +// status of the check. This allows CheckStatus to return without blocking // on an expensive check. // // A trivial example of a check that runs every 5 seconds and shuts down our diff --git a/health/health.go b/health/health.go index ba9549192..220282dcd 100644 --- a/health/health.go +++ b/health/health.go @@ -11,10 +11,26 @@ import ( "github.com/docker/distribution/registry/api/errcode" ) -var ( - mutex sync.RWMutex - registeredChecks = make(map[string]Checker) -) +// A Registry is a collection of checks. Most applications will use the global +// registry defined in DefaultRegistry. However, unit tests may need to create +// separate registries to isolate themselves from other tests. +type Registry struct { + mu sync.RWMutex + registeredChecks map[string]Checker +} + +// NewRegistry creates a new registry. This isn't necessary for normal use of +// the package, but may be useful for unit tests so individual tests have their +// own set of checks. +func NewRegistry() *Registry { + return &Registry{ + registeredChecks: make(map[string]Checker), + } +} + +// DefaultRegistry is the default registry where checks are registered. It is +// the registry used by the HTTP handler. +var DefaultRegistry *Registry // Checker is the interface for a Health Checker type Checker interface { @@ -144,11 +160,11 @@ func PeriodicThresholdChecker(check Checker, period time.Duration, threshold int } // CheckStatus returns a map with all the current health check errors -func CheckStatus() map[string]string { // TODO(stevvooe) this needs a proper type - mutex.RLock() - defer mutex.RUnlock() +func (registry *Registry) CheckStatus() map[string]string { // TODO(stevvooe) this needs a proper type + registry.mu.RLock() + defer registry.mu.RUnlock() statusKeys := make(map[string]string) - for k, v := range registeredChecks { + for k, v := range registry.registeredChecks { err := v.Check() if err != nil { statusKeys[k] = err.Error() @@ -158,34 +174,66 @@ func CheckStatus() map[string]string { // TODO(stevvooe) this needs a proper typ return statusKeys } -// Register associates the checker with the provided name. We allow -// overwrites to a specific check status. -func Register(name string, check Checker) { - mutex.Lock() - defer mutex.Unlock() - _, ok := registeredChecks[name] +// CheckStatus returns a map with all the current health check errors from the +// default registry. +func CheckStatus() map[string]string { + return DefaultRegistry.CheckStatus() +} + +// Register associates the checker with the provided name. +func (registry *Registry) Register(name string, check Checker) { + if registry == nil { + registry = DefaultRegistry + } + registry.mu.Lock() + defer registry.mu.Unlock() + _, ok := registry.registeredChecks[name] if ok { panic("Check already exists: " + name) } - registeredChecks[name] = check + registry.registeredChecks[name] = check } -// RegisterFunc allows the convenience of registering a checker directly -// from an arbitrary func() error +// Register associates the checker with the provided name in the default +// registry. +func Register(name string, check Checker) { + DefaultRegistry.Register(name, check) +} + +// RegisterFunc allows the convenience of registering a checker directly from +// an arbitrary func() error. +func (registry *Registry) RegisterFunc(name string, check func() error) { + registry.Register(name, CheckFunc(check)) +} + +// RegisterFunc allows the convenience of registering a checker in the default +// registry directly from an arbitrary func() error. func RegisterFunc(name string, check func() error) { - Register(name, CheckFunc(check)) + DefaultRegistry.RegisterFunc(name, check) } // RegisterPeriodicFunc allows the convenience of registering a PeriodicChecker -// from an arbitrary func() error +// from an arbitrary func() error. +func (registry *Registry) RegisterPeriodicFunc(name string, period time.Duration, check CheckFunc) { + registry.Register(name, PeriodicChecker(CheckFunc(check), period)) +} + +// RegisterPeriodicFunc allows the convenience of registering a PeriodicChecker +// in the default registry from an arbitrary func() error. func RegisterPeriodicFunc(name string, period time.Duration, check CheckFunc) { - Register(name, PeriodicChecker(CheckFunc(check), period)) + DefaultRegistry.RegisterPeriodicFunc(name, period, check) } // RegisterPeriodicThresholdFunc allows the convenience of registering a -// PeriodicChecker from an arbitrary func() error +// PeriodicChecker from an arbitrary func() error. +func (registry *Registry) RegisterPeriodicThresholdFunc(name string, period time.Duration, threshold int, check CheckFunc) { + registry.Register(name, PeriodicThresholdChecker(CheckFunc(check), period, threshold)) +} + +// RegisterPeriodicThresholdFunc allows the convenience of registering a +// PeriodicChecker in the default registry from an arbitrary func() error. func RegisterPeriodicThresholdFunc(name string, period time.Duration, threshold int, check CheckFunc) { - Register(name, PeriodicThresholdChecker(CheckFunc(check), period, threshold)) + DefaultRegistry.RegisterPeriodicThresholdFunc(name, period, threshold, check) } // StatusHandler returns a JSON blob with all the currently registered Health Checks @@ -251,7 +299,8 @@ func statusResponse(w http.ResponseWriter, r *http.Request, status int, checks m } } -// Registers global /debug/health api endpoint +// Registers global /debug/health api endpoint, creates default registry func init() { + DefaultRegistry = NewRegistry() http.HandleFunc("/debug/health", StatusHandler) } diff --git a/health/health_test.go b/health/health_test.go index 3228cb801..766fe159f 100644 --- a/health/health_test.go +++ b/health/health_test.go @@ -51,7 +51,7 @@ func TestReturns503IfThereAreErrorChecks(t *testing.T) { // the web application when things aren't so healthy. func TestHealthHandler(t *testing.T) { // clear out existing checks. - registeredChecks = make(map[string]Checker) + DefaultRegistry = NewRegistry() // protect an http server handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { diff --git a/registry/auth/silly/access_test.go b/registry/auth/silly/access_test.go index 8b5ecb801..ff2155b18 100644 --- a/registry/auth/silly/access_test.go +++ b/registry/auth/silly/access_test.go @@ -5,8 +5,8 @@ import ( "net/http/httptest" "testing" + "github.com/docker/distribution/context" "github.com/docker/distribution/registry/auth" - "golang.org/x/net/context" ) func TestSillyAccessController(t *testing.T) { diff --git a/registry/auth/token/token_test.go b/registry/auth/token/token_test.go index 9d84d4efb..119aa738a 100644 --- a/registry/auth/token/token_test.go +++ b/registry/auth/token/token_test.go @@ -15,9 +15,9 @@ import ( "testing" "time" + "github.com/docker/distribution/context" "github.com/docker/distribution/registry/auth" "github.com/docker/libtrust" - "golang.org/x/net/context" ) func makeRootKeys(numKeys int) ([]libtrust.PrivateKey, error) { diff --git a/registry/handlers/api_test.go b/registry/handlers/api_test.go index e351cb95e..a975bd339 100644 --- a/registry/handlers/api_test.go +++ b/registry/handlers/api_test.go @@ -19,6 +19,7 @@ import ( "testing" "github.com/docker/distribution/configuration" + "github.com/docker/distribution/context" "github.com/docker/distribution/digest" "github.com/docker/distribution/manifest" "github.com/docker/distribution/registry/api/errcode" @@ -27,7 +28,6 @@ import ( "github.com/docker/distribution/testutil" "github.com/docker/libtrust" "github.com/gorilla/handlers" - "golang.org/x/net/context" ) var headerConfig = http.Header{ diff --git a/registry/handlers/app.go b/registry/handlers/app.go index 7d1f1cf53..b1e46b021 100644 --- a/registry/handlers/app.go +++ b/registry/handlers/app.go @@ -15,6 +15,7 @@ import ( "github.com/docker/distribution/configuration" ctxu "github.com/docker/distribution/context" "github.com/docker/distribution/health" + "github.com/docker/distribution/health/checks" "github.com/docker/distribution/notifications" "github.com/docker/distribution/registry/api/errcode" "github.com/docker/distribution/registry/api/v2" @@ -37,6 +38,9 @@ import ( // was specified. const randomSecretSize = 32 +// defaultCheckInterval is the default time in between health checks +const defaultCheckInterval = 10 * time.Second + // App is a global registry application object. Shared resources can be placed // on this object that will be accessible from all requests. Any writable // fields should be protected. @@ -230,11 +234,80 @@ func NewApp(ctx context.Context, configuration configuration.Configuration) *App // process. Because the configuration and app are tightly coupled, // implementing this properly will require a refactor. This method may panic // if called twice in the same process. -func (app *App) RegisterHealthChecks() { - health.RegisterPeriodicThresholdFunc("storagedriver_"+app.Config.Storage.Type(), 10*time.Second, 3, func() error { - _, err := app.driver.List(app, "/") // "/" should always exist - return err // any error will be treated as failure - }) +func (app *App) RegisterHealthChecks(healthRegistries ...*health.Registry) { + if len(healthRegistries) > 1 { + panic("RegisterHealthChecks called with more than one registry") + } + healthRegistry := health.DefaultRegistry + if len(healthRegistries) == 1 { + healthRegistry = healthRegistries[0] + } + + if app.Config.Health.StorageDriver.Enabled { + interval := app.Config.Health.StorageDriver.Interval + if interval == 0 { + interval = defaultCheckInterval + } + + storageDriverCheck := func() error { + _, err := app.driver.List(app, "/") // "/" should always exist + return err // any error will be treated as failure + } + + if app.Config.Health.StorageDriver.Threshold != 0 { + healthRegistry.RegisterPeriodicThresholdFunc("storagedriver_"+app.Config.Storage.Type(), interval, app.Config.Health.StorageDriver.Threshold, storageDriverCheck) + } else { + healthRegistry.RegisterPeriodicFunc("storagedriver_"+app.Config.Storage.Type(), interval, storageDriverCheck) + } + } + + for _, fileChecker := range app.Config.Health.FileCheckers { + interval := fileChecker.Interval + if interval == 0 { + interval = defaultCheckInterval + } + ctxu.GetLogger(app).Infof("configuring file health check path=%s, interval=%d", fileChecker.File, interval/time.Second) + healthRegistry.Register(fileChecker.File, health.PeriodicChecker(checks.FileChecker(fileChecker.File), interval)) + } + + for _, httpChecker := range app.Config.Health.HTTPCheckers { + interval := httpChecker.Interval + if interval == 0 { + interval = defaultCheckInterval + } + + statusCode := httpChecker.StatusCode + if statusCode == 0 { + statusCode = 200 + } + + checker := checks.HTTPChecker(httpChecker.URI, statusCode, httpChecker.Timeout, httpChecker.Headers) + + if httpChecker.Threshold != 0 { + ctxu.GetLogger(app).Infof("configuring HTTP health check uri=%s, interval=%d, threshold=%d", httpChecker.URI, interval/time.Second, httpChecker.Threshold) + healthRegistry.Register(httpChecker.URI, health.PeriodicThresholdChecker(checker, interval, httpChecker.Threshold)) + } else { + ctxu.GetLogger(app).Infof("configuring HTTP health check uri=%s, interval=%d", httpChecker.URI, interval/time.Second) + healthRegistry.Register(httpChecker.URI, health.PeriodicChecker(checker, interval)) + } + } + + for _, tcpChecker := range app.Config.Health.TCPCheckers { + interval := tcpChecker.Interval + if interval == 0 { + interval = defaultCheckInterval + } + + checker := checks.TCPChecker(tcpChecker.Addr, tcpChecker.Timeout) + + if tcpChecker.Threshold != 0 { + ctxu.GetLogger(app).Infof("configuring TCP health check addr=%s, interval=%d, threshold=%d", tcpChecker.Addr, interval/time.Second, tcpChecker.Threshold) + healthRegistry.Register(tcpChecker.Addr, health.PeriodicThresholdChecker(checker, interval, tcpChecker.Threshold)) + } else { + ctxu.GetLogger(app).Infof("configuring TCP health check addr=%s, interval=%d", tcpChecker.Addr, interval/time.Second) + healthRegistry.Register(tcpChecker.Addr, health.PeriodicChecker(checker, interval)) + } + } } // register a handler with the application, by route name. The handler will be diff --git a/registry/handlers/app_test.go b/registry/handlers/app_test.go index fbb0b1885..0038a97d4 100644 --- a/registry/handlers/app_test.go +++ b/registry/handlers/app_test.go @@ -9,6 +9,7 @@ import ( "testing" "github.com/docker/distribution/configuration" + "github.com/docker/distribution/context" "github.com/docker/distribution/registry/api/errcode" "github.com/docker/distribution/registry/api/v2" "github.com/docker/distribution/registry/auth" @@ -16,7 +17,6 @@ import ( "github.com/docker/distribution/registry/storage" memorycache "github.com/docker/distribution/registry/storage/cache/memory" "github.com/docker/distribution/registry/storage/driver/inmemory" - "golang.org/x/net/context" ) // TestAppDispatcher builds an application with a test dispatcher and ensures diff --git a/registry/handlers/health_test.go b/registry/handlers/health_test.go new file mode 100644 index 000000000..bb460b47a --- /dev/null +++ b/registry/handlers/health_test.go @@ -0,0 +1,201 @@ +package handlers + +import ( + "io/ioutil" + "net" + "net/http" + "net/http/httptest" + "os" + "testing" + "time" + + "github.com/docker/distribution/configuration" + "github.com/docker/distribution/context" + "github.com/docker/distribution/health" +) + +func TestFileHealthCheck(t *testing.T) { + interval := time.Second + + tmpfile, err := ioutil.TempFile(os.TempDir(), "healthcheck") + if err != nil { + t.Fatalf("could not create temporary file: %v", err) + } + defer tmpfile.Close() + + config := configuration.Configuration{ + Storage: configuration.Storage{ + "inmemory": configuration.Parameters{}, + }, + Health: configuration.Health{ + FileCheckers: []configuration.FileChecker{ + { + Interval: interval, + File: tmpfile.Name(), + }, + }, + }, + } + + ctx := context.Background() + + app := NewApp(ctx, config) + healthRegistry := health.NewRegistry() + app.RegisterHealthChecks(healthRegistry) + + // Wait for health check to happen + <-time.After(2 * interval) + + status := healthRegistry.CheckStatus() + if len(status) != 1 { + t.Fatal("expected 1 item in health check results") + } + if status[tmpfile.Name()] != "file exists" { + t.Fatal(`did not get "file exists" result for health check`) + } + + os.Remove(tmpfile.Name()) + + <-time.After(2 * interval) + if len(healthRegistry.CheckStatus()) != 0 { + t.Fatal("expected 0 items in health check results") + } +} + +func TestTCPHealthCheck(t *testing.T) { + interval := time.Second + + ln, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatalf("could not create listener: %v", err) + } + addrStr := ln.Addr().String() + + // Start accepting + go func() { + for { + conn, err := ln.Accept() + if err != nil { + // listener was closed + return + } + defer conn.Close() + } + }() + + config := configuration.Configuration{ + Storage: configuration.Storage{ + "inmemory": configuration.Parameters{}, + }, + Health: configuration.Health{ + TCPCheckers: []configuration.TCPChecker{ + { + Interval: interval, + Addr: addrStr, + Timeout: 500 * time.Millisecond, + }, + }, + }, + } + + ctx := context.Background() + + app := NewApp(ctx, config) + healthRegistry := health.NewRegistry() + app.RegisterHealthChecks(healthRegistry) + + // Wait for health check to happen + <-time.After(2 * interval) + + if len(healthRegistry.CheckStatus()) != 0 { + t.Fatal("expected 0 items in health check results") + } + + ln.Close() + <-time.After(2 * interval) + + // Health check should now fail + status := healthRegistry.CheckStatus() + if len(status) != 1 { + t.Fatal("expected 1 item in health check results") + } + if status[addrStr] != "connection to "+addrStr+" failed" { + t.Fatal(`did not get "connection failed" result for health check`) + } +} + +func TestHTTPHealthCheck(t *testing.T) { + interval := time.Second + threshold := 3 + + stopFailing := make(chan struct{}) + + checkedServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method != "HEAD" { + t.Fatalf("expected HEAD request, got %s", r.Method) + } + select { + case <-stopFailing: + w.WriteHeader(http.StatusOK) + default: + w.WriteHeader(http.StatusInternalServerError) + } + })) + + config := configuration.Configuration{ + Storage: configuration.Storage{ + "inmemory": configuration.Parameters{}, + }, + Health: configuration.Health{ + HTTPCheckers: []configuration.HTTPChecker{ + { + Interval: interval, + URI: checkedServer.URL, + Threshold: threshold, + }, + }, + }, + } + + ctx := context.Background() + + app := NewApp(ctx, config) + healthRegistry := health.NewRegistry() + app.RegisterHealthChecks(healthRegistry) + + for i := 0; ; i++ { + <-time.After(interval) + + status := healthRegistry.CheckStatus() + + if i < threshold-1 { + // definitely shouldn't have hit the threshold yet + if len(status) != 0 { + t.Fatal("expected 1 item in health check results") + } + continue + } + if i < threshold+1 { + // right on the threshold - don't expect a failure yet + continue + } + + if len(status) != 1 { + t.Fatal("expected 1 item in health check results") + } + if status[checkedServer.URL] != "downstream service returned unexpected status: 500" { + t.Fatal("did not get expected result for health check") + } + + break + } + + // Signal HTTP handler to start returning 200 + close(stopFailing) + + <-time.After(2 * interval) + + if len(healthRegistry.CheckStatus()) != 0 { + t.Fatal("expected 0 items in health check results") + } +}