Merge pull request #901 from aaronlehmann/configurable-health-checks

Add configurable file-existence and HTTP health checks
This commit is contained in:
Richard Scothern 2015-08-21 10:53:32 -07:00
commit 37d4ad081f
16 changed files with 827 additions and 48 deletions

View file

@ -48,3 +48,8 @@ proxy:
remoteurl: https://registry-1.docker.io remoteurl: https://registry-1.docker.io
username: username username: username
password: password password: password
health:
storagedriver:
enabled: true
interval: 10s
threshold: 3

View file

@ -59,4 +59,8 @@ notifications:
threshold: 10 threshold: 10
backoff: 1s backoff: 1s
disabled: true disabled: true
health:
storagedriver:
enabled: true
interval: 10s
threshold: 3

View file

@ -11,3 +11,8 @@ http:
addr: :5000 addr: :5000
headers: headers:
X-Content-Type-Options: [nosniff] X-Content-Type-Options: [nosniff]
health:
storagedriver:
enabled: true
interval: 10s
threshold: 3

View file

@ -135,6 +135,8 @@ type Configuration struct {
} `yaml:"pool,omitempty"` } `yaml:"pool,omitempty"`
} `yaml:"redis,omitempty"` } `yaml:"redis,omitempty"`
Health Health `yaml:"health,omitempty"`
Proxy Proxy `yaml:"proxy,omitempty"` Proxy Proxy `yaml:"proxy,omitempty"`
} }
@ -179,6 +181,68 @@ type MailOptions struct {
To []string `yaml:"to,omitempty"` To []string `yaml:"to,omitempty"`
} }
// FileChecker is a type of entry in the health section for checking files.
type FileChecker struct {
// Interval is the duration in between checks
Interval time.Duration `yaml:"interval,omitempty"`
// File is the path to check
File string `yaml:"file,omitempty"`
// Threshold is the number of times a check must fail to trigger an
// unhealthy state
Threshold int `yaml:"threshold,omitempty"`
}
// HTTPChecker is a type of entry in the health section for checking HTTP URIs.
type HTTPChecker struct {
// Timeout is the duration to wait before timing out the HTTP request
Timeout time.Duration `yaml:"interval,omitempty"`
// StatusCode is the expected status code
StatusCode int
// Interval is the duration in between checks
Interval time.Duration `yaml:"interval,omitempty"`
// URI is the HTTP URI to check
URI string `yaml:"uri,omitempty"`
// Headers lists static headers that should be added to all requests
Headers http.Header `yaml:"headers"`
// Threshold is the number of times a check must fail to trigger an
// unhealthy state
Threshold int `yaml:"threshold,omitempty"`
}
// TCPChecker is a type of entry in the health section for checking TCP servers.
type TCPChecker struct {
// Timeout is the duration to wait before timing out the TCP connection
Timeout time.Duration `yaml:"interval,omitempty"`
// Interval is the duration in between checks
Interval time.Duration `yaml:"interval,omitempty"`
// Addr is the TCP address to check
Addr string `yaml:"addr,omitempty"`
// Threshold is the number of times a check must fail to trigger an
// unhealthy state
Threshold int `yaml:"threshold,omitempty"`
}
// Health provides the configuration section for health checks.
type Health struct {
// FileCheckers is a list of paths to check
FileCheckers []FileChecker `yaml:"file,omitempty"`
// HTTPCheckers is a list of URIs to check
HTTPCheckers []HTTPChecker `yaml:"http,omitempty"`
// TCPCheckers is a list of URIs to check
TCPCheckers []TCPChecker `yaml:"tcp,omitempty"`
// StorageDriver configures a health check on the configured storage
// driver
StorageDriver struct {
// Enabled turns on the health check for the storage driver
Enabled bool `yaml:"enabled,omitempty"`
// Interval is the duration in between checks
Interval time.Duration `yaml:"interval,omitempty"`
// Threshold is the number of times a check must fail to trigger an
// unhealthy state
Threshold int `yaml:"threshold,omitempty"`
} `yaml:"storagedriver,omitempty"`
}
// v0_1Configuration is a Version 0.1 Configuration struct // v0_1Configuration is a Version 0.1 Configuration struct
// This is currently aliased to Configuration, as it is the current version // This is currently aliased to Configuration, as it is the current version
type v0_1Configuration Configuration type v0_1Configuration Configuration

View file

@ -195,6 +195,27 @@ information about each option that appears later in this page.
maxidle: 16 maxidle: 16
maxactive: 64 maxactive: 64
idletimeout: 300s idletimeout: 300s
health:
storagedriver:
enabled: true
interval: 10s
threshold: 3
file:
- file: /path/to/checked/file
interval: 10s
http:
- uri: http://server.to.check/must/return/200
headers:
Authorization: [Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==]
statuscode: 200
timeout: 3s
interval: 10s
threshold: 3
tcp:
- addr: redis-server.domain.com:6379
timeout: 3s
interval: 10s
threshold: 3
In some instances a configuration option is **optional** but it contains child In some instances a configuration option is **optional** but it contains child
options marked as **required**. This indicates that you can omit the parent with options marked as **required**. This indicates that you can omit the parent with
@ -1381,7 +1402,9 @@ The URL to which events should be published.
yes yes
</td> </td>
<td> <td>
Static headers to add to each request. Static headers to add to each request. Each header's name should be a key
underneath headers, and each value is a list of payloads for that
header name. Note that values must always be lists.
</td> </td>
</tr> </tr>
<tr> <tr>
@ -1588,6 +1611,334 @@ Configure the behavior of the Redis connection pool.
</tr> </tr>
</table> </table>
## health
health:
storagedriver:
enabled: true
interval: 10s
threshold: 3
file:
- file: /path/to/checked/file
interval: 10s
http:
- uri: http://server.to.check/must/return/200
headers:
Authorization: [Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==]
statuscode: 200
timeout: 3s
interval: 10s
threshold: 3
tcp:
- addr: redis-server.domain.com:6379
timeout: 3s
interval: 10s
threshold: 3
The health option is **optional**. It may contain preferences for a periodic
health check on the storage driver's backend storage, and optional periodic
checks on local files, HTTP URIs, and/or TCP servers. The results of the health
checks are available at /debug/health on the debug HTTP server if the debug
HTTP server is enabled (see http section).
### storagedriver
storagedriver contains options for a health check on the configured storage
driver's backend storage. enabled must be set to true for this health check to
be active.
<table>
<tr>
<th>Parameter</th>
<th>Required</th>
<th>Description</th>
</tr>
<tr>
<td>
<code>enabled</code>
</td>
<td>
yes
</td>
<td>
"true" to enable the storage driver health check or "false" to disable it.
</td>
</tr>
<tr>
<td>
<code>interval</code>
</td>
<td>
no
</td>
<td>
The length of time to wait between repetitions of the check. This field
takes a positive integer and an optional suffix indicating the unit of
time. Possible units are:
<ul>
<li><code>ns</code> (nanoseconds)</li>
<li><code>us</code> (microseconds)</li>
<li><code>ms</code> (milliseconds)</li>
<li><code>s</code> (seconds)</li>
<li><code>m</code> (minutes)</li>
<li><code>h</code> (hours)</li>
</ul>
If you omit the suffix, the system interprets the value as nanoseconds.
The default value is 10 seconds if this field is omitted.
</td>
</tr>
<tr>
<td>
<code>threshold</code>
</td>
<td>
no
</td>
<td>
An integer specifying the number of times the check must fail before the
check triggers an unhealthy state. If this filed is not specified, a
single failure will trigger an unhealthy state.
</td>
</tr>
</table>
### file
file is a list of paths to be periodically checked for the existence of a file.
If a file exists at the given path, the health check will fail. This can be
used as a way of bringing a registry out of rotation by creating a file.
<table>
<tr>
<th>Parameter</th>
<th>Required</th>
<th>Description</th>
</tr>
<tr>
<td>
<code>file</code>
</td>
<td>
yes
</td>
<td>
The path to check for the existence of a file.
</td>
</tr>
<tr>
<td>
<code>interval</code>
</td>
<td>
no
</td>
<td>
The length of time to wait between repetitions of the check. This field
takes a positive integer and an optional suffix indicating the unit of
time. Possible units are:
<ul>
<li><code>ns</code> (nanoseconds)</li>
<li><code>us</code> (microseconds)</li>
<li><code>ms</code> (milliseconds)</li>
<li><code>s</code> (seconds)</li>
<li><code>m</code> (minutes)</li>
<li><code>h</code> (hours)</li>
</ul>
If you omit the suffix, the system interprets the value as nanoseconds.
The default value is 10 seconds if this field is omitted.
</td>
</tr>
</table>
### http
http is a list of HTTP URIs to be periodically checked with HEAD requests. If
a HEAD request doesn't complete or returns an unexpected status code, the
health check will fail.
<table>
<tr>
<th>Parameter</th>
<th>Required</th>
<th>Description</th>
</tr>
<tr>
<td>
<code>uri</code>
</td>
<td>
yes
</td>
<td>
The URI to check.
</td>
</tr>
<tr>
<td>
<code>headers</code>
</td>
<td>
no
</td>
<td>
Static headers to add to each request. Each header's name should be a key
underneath headers, and each value is a list of payloads for that
header name. Note that values must always be lists.
</td>
</tr>
<tr>
<td>
<code>statuscode</code>
</td>
<td>
no
</td>
<td>
Expected status code from the HTTP URI. Defaults to 200.
</td>
</tr>
<tr>
<td>
<code>timeout</code>
</td>
<td>
no
</td>
<td>
The length of time to wait before timing out the HTTP request. This field
takes a positive integer and an optional suffix indicating the unit of
time. Possible units are:
<ul>
<li><code>ns</code> (nanoseconds)</li>
<li><code>us</code> (microseconds)</li>
<li><code>ms</code> (milliseconds)</li>
<li><code>s</code> (seconds)</li>
<li><code>m</code> (minutes)</li>
<li><code>h</code> (hours)</li>
</ul>
If you omit the suffix, the system interprets the value as nanoseconds.
</td>
</tr>
<tr>
<td>
<code>interval</code>
</td>
<td>
no
</td>
<td>
The length of time to wait between repetitions of the check. This field
takes a positive integer and an optional suffix indicating the unit of
time. Possible units are:
<ul>
<li><code>ns</code> (nanoseconds)</li>
<li><code>us</code> (microseconds)</li>
<li><code>ms</code> (milliseconds)</li>
<li><code>s</code> (seconds)</li>
<li><code>m</code> (minutes)</li>
<li><code>h</code> (hours)</li>
</ul>
If you omit the suffix, the system interprets the value as nanoseconds.
The default value is 10 seconds if this field is omitted.
</td>
</tr>
<tr>
<td>
<code>threshold</code>
</td>
<td>
no
</td>
<td>
An integer specifying the number of times the check must fail before the
check triggers an unhealthy state. If this filed is not specified, a
single failure will trigger an unhealthy state.
</td>
</tr>
</table>
### tcp
tcp is a list of TCP addresses to be periodically checked with connection
attempts. The addresses must include port numbers. If a connection attempt
fails, the health check will fail.
<table>
<tr>
<th>Parameter</th>
<th>Required</th>
<th>Description</th>
</tr>
<tr>
<td>
<code>addr</code>
</td>
<td>
yes
</td>
<td>
The TCP address to connect to, including a port number.
</td>
</tr>
<tr>
<td>
<code>timeout</code>
</td>
<td>
no
</td>
<td>
The length of time to wait before timing out the TCP connection. This
field takes a positive integer and an optional suffix indicating the unit
of time. Possible units are:
<ul>
<li><code>ns</code> (nanoseconds)</li>
<li><code>us</code> (microseconds)</li>
<li><code>ms</code> (milliseconds)</li>
<li><code>s</code> (seconds)</li>
<li><code>m</code> (minutes)</li>
<li><code>h</code> (hours)</li>
</ul>
If you omit the suffix, the system interprets the value as nanoseconds.
</td>
</tr>
<tr>
<td>
<code>interval</code>
</td>
<td>
no
</td>
<td>
The length of time to wait between repetitions of the check. This field
takes a positive integer and an optional suffix indicating the unit of
time. Possible units are:
<ul>
<li><code>ns</code> (nanoseconds)</li>
<li><code>us</code> (microseconds)</li>
<li><code>ms</code> (milliseconds)</li>
<li><code>s</code> (seconds)</li>
<li><code>m</code> (minutes)</li>
<li><code>h</code> (hours)</li>
</ul>
If you omit the suffix, the system interprets the value as nanoseconds.
The default value is 10 seconds if this field is omitted.
</td>
</tr>
<tr>
<td>
<code>threshold</code>
</td>
<td>
no
</td>
<td>
An integer specifying the number of times the check must fail before the
check triggers an unhealthy state. If this filed is not specified, a
single failure will trigger an unhealthy state.
</td>
</tr>
</table>
## Example: Development configuration ## Example: Development configuration

View file

@ -2,13 +2,17 @@ package checks
import ( import (
"errors" "errors"
"github.com/docker/distribution/health" "net"
"net/http" "net/http"
"os" "os"
"strconv"
"time"
"github.com/docker/distribution/health"
) )
// FileChecker checks the existence of a file and returns and error // FileChecker checks the existence of a file and returns an error
// if the file exists, taking the application out of rotation // if the file exists.
func FileChecker(f string) health.Checker { func FileChecker(f string) health.Checker {
return health.CheckFunc(func() error { return health.CheckFunc(func() error {
if _, err := os.Stat(f); err == nil { if _, err := os.Stat(f); err == nil {
@ -18,18 +22,41 @@ func FileChecker(f string) health.Checker {
}) })
} }
// HTTPChecker does a HEAD request and verifies if the HTTP status // HTTPChecker does a HEAD request and verifies that the HTTP status code
// code return is a 200, taking the application out of rotation if // returned matches statusCode.
// otherwise func HTTPChecker(r string, statusCode int, timeout time.Duration, headers http.Header) health.Checker {
func HTTPChecker(r string) health.Checker {
return health.CheckFunc(func() error { return health.CheckFunc(func() error {
response, err := http.Head(r) client := http.Client{
Timeout: timeout,
}
req, err := http.NewRequest("HEAD", r, nil)
if err != nil {
return errors.New("error creating request: " + r)
}
for headerName, headerValues := range headers {
for _, headerValue := range headerValues {
req.Header.Add(headerName, headerValue)
}
}
response, err := client.Do(req)
if err != nil { if err != nil {
return errors.New("error while checking: " + r) return errors.New("error while checking: " + r)
} }
if response.StatusCode != http.StatusOK { if response.StatusCode != statusCode {
return errors.New("downstream service returned unexpected status: " + string(response.StatusCode)) return errors.New("downstream service returned unexpected status: " + strconv.Itoa(response.StatusCode))
} }
return nil return nil
}) })
} }
// TCPChecker attempts to open a TCP connection.
func TCPChecker(addr string, timeout time.Duration) health.Checker {
return health.CheckFunc(func() error {
conn, err := net.DialTimeout("tcp", addr, timeout)
if err != nil {
return errors.New("connection to " + addr + " failed")
}
conn.Close()
return nil
})
}

View file

@ -15,11 +15,11 @@ func TestFileChecker(t *testing.T) {
} }
func TestHTTPChecker(t *testing.T) { func TestHTTPChecker(t *testing.T) {
if err := HTTPChecker("https://www.google.cybertron").Check(); err == nil { if err := HTTPChecker("https://www.google.cybertron", 200, 0, nil).Check(); err == nil {
t.Errorf("Google on Cybertron was expected as not exists") t.Errorf("Google on Cybertron was expected as not exists")
} }
if err := HTTPChecker("https://www.google.pt").Check(); err != nil { if err := HTTPChecker("https://www.google.pt", 200, 0, nil).Check(); err != nil {
t.Errorf("Google at Portugal was expected as exists, error:%v", err) t.Errorf("Google at Portugal was expected as exists, error:%v", err)
} }
} }

View file

@ -39,7 +39,7 @@
// //
// The recommended way of registering checks is using a periodic Check. // The recommended way of registering checks is using a periodic Check.
// PeriodicChecks run on a certain schedule and asynchronously update the // PeriodicChecks run on a certain schedule and asynchronously update the
// status of the check. This allows "CheckStatus()" to return without blocking // status of the check. This allows CheckStatus to return without blocking
// on an expensive check. // on an expensive check.
// //
// A trivial example of a check that runs every 5 seconds and shuts down our // A trivial example of a check that runs every 5 seconds and shuts down our

View file

@ -11,10 +11,26 @@ import (
"github.com/docker/distribution/registry/api/errcode" "github.com/docker/distribution/registry/api/errcode"
) )
var ( // A Registry is a collection of checks. Most applications will use the global
mutex sync.RWMutex // registry defined in DefaultRegistry. However, unit tests may need to create
registeredChecks = make(map[string]Checker) // separate registries to isolate themselves from other tests.
) type Registry struct {
mu sync.RWMutex
registeredChecks map[string]Checker
}
// NewRegistry creates a new registry. This isn't necessary for normal use of
// the package, but may be useful for unit tests so individual tests have their
// own set of checks.
func NewRegistry() *Registry {
return &Registry{
registeredChecks: make(map[string]Checker),
}
}
// DefaultRegistry is the default registry where checks are registered. It is
// the registry used by the HTTP handler.
var DefaultRegistry *Registry
// Checker is the interface for a Health Checker // Checker is the interface for a Health Checker
type Checker interface { type Checker interface {
@ -144,11 +160,11 @@ func PeriodicThresholdChecker(check Checker, period time.Duration, threshold int
} }
// CheckStatus returns a map with all the current health check errors // CheckStatus returns a map with all the current health check errors
func CheckStatus() map[string]string { // TODO(stevvooe) this needs a proper type func (registry *Registry) CheckStatus() map[string]string { // TODO(stevvooe) this needs a proper type
mutex.RLock() registry.mu.RLock()
defer mutex.RUnlock() defer registry.mu.RUnlock()
statusKeys := make(map[string]string) statusKeys := make(map[string]string)
for k, v := range registeredChecks { for k, v := range registry.registeredChecks {
err := v.Check() err := v.Check()
if err != nil { if err != nil {
statusKeys[k] = err.Error() statusKeys[k] = err.Error()
@ -158,34 +174,66 @@ func CheckStatus() map[string]string { // TODO(stevvooe) this needs a proper typ
return statusKeys return statusKeys
} }
// Register associates the checker with the provided name. We allow // CheckStatus returns a map with all the current health check errors from the
// overwrites to a specific check status. // default registry.
func Register(name string, check Checker) { func CheckStatus() map[string]string {
mutex.Lock() return DefaultRegistry.CheckStatus()
defer mutex.Unlock() }
_, ok := registeredChecks[name]
// Register associates the checker with the provided name.
func (registry *Registry) Register(name string, check Checker) {
if registry == nil {
registry = DefaultRegistry
}
registry.mu.Lock()
defer registry.mu.Unlock()
_, ok := registry.registeredChecks[name]
if ok { if ok {
panic("Check already exists: " + name) panic("Check already exists: " + name)
} }
registeredChecks[name] = check registry.registeredChecks[name] = check
} }
// RegisterFunc allows the convenience of registering a checker directly // Register associates the checker with the provided name in the default
// from an arbitrary func() error // registry.
func Register(name string, check Checker) {
DefaultRegistry.Register(name, check)
}
// RegisterFunc allows the convenience of registering a checker directly from
// an arbitrary func() error.
func (registry *Registry) RegisterFunc(name string, check func() error) {
registry.Register(name, CheckFunc(check))
}
// RegisterFunc allows the convenience of registering a checker in the default
// registry directly from an arbitrary func() error.
func RegisterFunc(name string, check func() error) { func RegisterFunc(name string, check func() error) {
Register(name, CheckFunc(check)) DefaultRegistry.RegisterFunc(name, check)
} }
// RegisterPeriodicFunc allows the convenience of registering a PeriodicChecker // RegisterPeriodicFunc allows the convenience of registering a PeriodicChecker
// from an arbitrary func() error // from an arbitrary func() error.
func (registry *Registry) RegisterPeriodicFunc(name string, period time.Duration, check CheckFunc) {
registry.Register(name, PeriodicChecker(CheckFunc(check), period))
}
// RegisterPeriodicFunc allows the convenience of registering a PeriodicChecker
// in the default registry from an arbitrary func() error.
func RegisterPeriodicFunc(name string, period time.Duration, check CheckFunc) { func RegisterPeriodicFunc(name string, period time.Duration, check CheckFunc) {
Register(name, PeriodicChecker(CheckFunc(check), period)) DefaultRegistry.RegisterPeriodicFunc(name, period, check)
} }
// RegisterPeriodicThresholdFunc allows the convenience of registering a // RegisterPeriodicThresholdFunc allows the convenience of registering a
// PeriodicChecker from an arbitrary func() error // PeriodicChecker from an arbitrary func() error.
func (registry *Registry) RegisterPeriodicThresholdFunc(name string, period time.Duration, threshold int, check CheckFunc) {
registry.Register(name, PeriodicThresholdChecker(CheckFunc(check), period, threshold))
}
// RegisterPeriodicThresholdFunc allows the convenience of registering a
// PeriodicChecker in the default registry from an arbitrary func() error.
func RegisterPeriodicThresholdFunc(name string, period time.Duration, threshold int, check CheckFunc) { func RegisterPeriodicThresholdFunc(name string, period time.Duration, threshold int, check CheckFunc) {
Register(name, PeriodicThresholdChecker(CheckFunc(check), period, threshold)) DefaultRegistry.RegisterPeriodicThresholdFunc(name, period, threshold, check)
} }
// StatusHandler returns a JSON blob with all the currently registered Health Checks // StatusHandler returns a JSON blob with all the currently registered Health Checks
@ -251,7 +299,8 @@ func statusResponse(w http.ResponseWriter, r *http.Request, status int, checks m
} }
} }
// Registers global /debug/health api endpoint // Registers global /debug/health api endpoint, creates default registry
func init() { func init() {
DefaultRegistry = NewRegistry()
http.HandleFunc("/debug/health", StatusHandler) http.HandleFunc("/debug/health", StatusHandler)
} }

View file

@ -51,7 +51,7 @@ func TestReturns503IfThereAreErrorChecks(t *testing.T) {
// the web application when things aren't so healthy. // the web application when things aren't so healthy.
func TestHealthHandler(t *testing.T) { func TestHealthHandler(t *testing.T) {
// clear out existing checks. // clear out existing checks.
registeredChecks = make(map[string]Checker) DefaultRegistry = NewRegistry()
// protect an http server // protect an http server
handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {

View file

@ -5,8 +5,8 @@ import (
"net/http/httptest" "net/http/httptest"
"testing" "testing"
"github.com/docker/distribution/context"
"github.com/docker/distribution/registry/auth" "github.com/docker/distribution/registry/auth"
"golang.org/x/net/context"
) )
func TestSillyAccessController(t *testing.T) { func TestSillyAccessController(t *testing.T) {

View file

@ -15,9 +15,9 @@ import (
"testing" "testing"
"time" "time"
"github.com/docker/distribution/context"
"github.com/docker/distribution/registry/auth" "github.com/docker/distribution/registry/auth"
"github.com/docker/libtrust" "github.com/docker/libtrust"
"golang.org/x/net/context"
) )
func makeRootKeys(numKeys int) ([]libtrust.PrivateKey, error) { func makeRootKeys(numKeys int) ([]libtrust.PrivateKey, error) {

View file

@ -19,6 +19,7 @@ import (
"testing" "testing"
"github.com/docker/distribution/configuration" "github.com/docker/distribution/configuration"
"github.com/docker/distribution/context"
"github.com/docker/distribution/digest" "github.com/docker/distribution/digest"
"github.com/docker/distribution/manifest" "github.com/docker/distribution/manifest"
"github.com/docker/distribution/registry/api/errcode" "github.com/docker/distribution/registry/api/errcode"
@ -27,7 +28,6 @@ import (
"github.com/docker/distribution/testutil" "github.com/docker/distribution/testutil"
"github.com/docker/libtrust" "github.com/docker/libtrust"
"github.com/gorilla/handlers" "github.com/gorilla/handlers"
"golang.org/x/net/context"
) )
var headerConfig = http.Header{ var headerConfig = http.Header{

View file

@ -15,6 +15,7 @@ import (
"github.com/docker/distribution/configuration" "github.com/docker/distribution/configuration"
ctxu "github.com/docker/distribution/context" ctxu "github.com/docker/distribution/context"
"github.com/docker/distribution/health" "github.com/docker/distribution/health"
"github.com/docker/distribution/health/checks"
"github.com/docker/distribution/notifications" "github.com/docker/distribution/notifications"
"github.com/docker/distribution/registry/api/errcode" "github.com/docker/distribution/registry/api/errcode"
"github.com/docker/distribution/registry/api/v2" "github.com/docker/distribution/registry/api/v2"
@ -37,6 +38,9 @@ import (
// was specified. // was specified.
const randomSecretSize = 32 const randomSecretSize = 32
// defaultCheckInterval is the default time in between health checks
const defaultCheckInterval = 10 * time.Second
// App is a global registry application object. Shared resources can be placed // App is a global registry application object. Shared resources can be placed
// on this object that will be accessible from all requests. Any writable // on this object that will be accessible from all requests. Any writable
// fields should be protected. // fields should be protected.
@ -230,11 +234,80 @@ func NewApp(ctx context.Context, configuration configuration.Configuration) *App
// process. Because the configuration and app are tightly coupled, // process. Because the configuration and app are tightly coupled,
// implementing this properly will require a refactor. This method may panic // implementing this properly will require a refactor. This method may panic
// if called twice in the same process. // if called twice in the same process.
func (app *App) RegisterHealthChecks() { func (app *App) RegisterHealthChecks(healthRegistries ...*health.Registry) {
health.RegisterPeriodicThresholdFunc("storagedriver_"+app.Config.Storage.Type(), 10*time.Second, 3, func() error { if len(healthRegistries) > 1 {
_, err := app.driver.List(app, "/") // "/" should always exist panic("RegisterHealthChecks called with more than one registry")
return err // any error will be treated as failure }
}) healthRegistry := health.DefaultRegistry
if len(healthRegistries) == 1 {
healthRegistry = healthRegistries[0]
}
if app.Config.Health.StorageDriver.Enabled {
interval := app.Config.Health.StorageDriver.Interval
if interval == 0 {
interval = defaultCheckInterval
}
storageDriverCheck := func() error {
_, err := app.driver.List(app, "/") // "/" should always exist
return err // any error will be treated as failure
}
if app.Config.Health.StorageDriver.Threshold != 0 {
healthRegistry.RegisterPeriodicThresholdFunc("storagedriver_"+app.Config.Storage.Type(), interval, app.Config.Health.StorageDriver.Threshold, storageDriverCheck)
} else {
healthRegistry.RegisterPeriodicFunc("storagedriver_"+app.Config.Storage.Type(), interval, storageDriverCheck)
}
}
for _, fileChecker := range app.Config.Health.FileCheckers {
interval := fileChecker.Interval
if interval == 0 {
interval = defaultCheckInterval
}
ctxu.GetLogger(app).Infof("configuring file health check path=%s, interval=%d", fileChecker.File, interval/time.Second)
healthRegistry.Register(fileChecker.File, health.PeriodicChecker(checks.FileChecker(fileChecker.File), interval))
}
for _, httpChecker := range app.Config.Health.HTTPCheckers {
interval := httpChecker.Interval
if interval == 0 {
interval = defaultCheckInterval
}
statusCode := httpChecker.StatusCode
if statusCode == 0 {
statusCode = 200
}
checker := checks.HTTPChecker(httpChecker.URI, statusCode, httpChecker.Timeout, httpChecker.Headers)
if httpChecker.Threshold != 0 {
ctxu.GetLogger(app).Infof("configuring HTTP health check uri=%s, interval=%d, threshold=%d", httpChecker.URI, interval/time.Second, httpChecker.Threshold)
healthRegistry.Register(httpChecker.URI, health.PeriodicThresholdChecker(checker, interval, httpChecker.Threshold))
} else {
ctxu.GetLogger(app).Infof("configuring HTTP health check uri=%s, interval=%d", httpChecker.URI, interval/time.Second)
healthRegistry.Register(httpChecker.URI, health.PeriodicChecker(checker, interval))
}
}
for _, tcpChecker := range app.Config.Health.TCPCheckers {
interval := tcpChecker.Interval
if interval == 0 {
interval = defaultCheckInterval
}
checker := checks.TCPChecker(tcpChecker.Addr, tcpChecker.Timeout)
if tcpChecker.Threshold != 0 {
ctxu.GetLogger(app).Infof("configuring TCP health check addr=%s, interval=%d, threshold=%d", tcpChecker.Addr, interval/time.Second, tcpChecker.Threshold)
healthRegistry.Register(tcpChecker.Addr, health.PeriodicThresholdChecker(checker, interval, tcpChecker.Threshold))
} else {
ctxu.GetLogger(app).Infof("configuring TCP health check addr=%s, interval=%d", tcpChecker.Addr, interval/time.Second)
healthRegistry.Register(tcpChecker.Addr, health.PeriodicChecker(checker, interval))
}
}
} }
// register a handler with the application, by route name. The handler will be // register a handler with the application, by route name. The handler will be

View file

@ -9,6 +9,7 @@ import (
"testing" "testing"
"github.com/docker/distribution/configuration" "github.com/docker/distribution/configuration"
"github.com/docker/distribution/context"
"github.com/docker/distribution/registry/api/errcode" "github.com/docker/distribution/registry/api/errcode"
"github.com/docker/distribution/registry/api/v2" "github.com/docker/distribution/registry/api/v2"
"github.com/docker/distribution/registry/auth" "github.com/docker/distribution/registry/auth"
@ -16,7 +17,6 @@ import (
"github.com/docker/distribution/registry/storage" "github.com/docker/distribution/registry/storage"
memorycache "github.com/docker/distribution/registry/storage/cache/memory" memorycache "github.com/docker/distribution/registry/storage/cache/memory"
"github.com/docker/distribution/registry/storage/driver/inmemory" "github.com/docker/distribution/registry/storage/driver/inmemory"
"golang.org/x/net/context"
) )
// TestAppDispatcher builds an application with a test dispatcher and ensures // TestAppDispatcher builds an application with a test dispatcher and ensures

View file

@ -0,0 +1,201 @@
package handlers
import (
"io/ioutil"
"net"
"net/http"
"net/http/httptest"
"os"
"testing"
"time"
"github.com/docker/distribution/configuration"
"github.com/docker/distribution/context"
"github.com/docker/distribution/health"
)
func TestFileHealthCheck(t *testing.T) {
interval := time.Second
tmpfile, err := ioutil.TempFile(os.TempDir(), "healthcheck")
if err != nil {
t.Fatalf("could not create temporary file: %v", err)
}
defer tmpfile.Close()
config := configuration.Configuration{
Storage: configuration.Storage{
"inmemory": configuration.Parameters{},
},
Health: configuration.Health{
FileCheckers: []configuration.FileChecker{
{
Interval: interval,
File: tmpfile.Name(),
},
},
},
}
ctx := context.Background()
app := NewApp(ctx, config)
healthRegistry := health.NewRegistry()
app.RegisterHealthChecks(healthRegistry)
// Wait for health check to happen
<-time.After(2 * interval)
status := healthRegistry.CheckStatus()
if len(status) != 1 {
t.Fatal("expected 1 item in health check results")
}
if status[tmpfile.Name()] != "file exists" {
t.Fatal(`did not get "file exists" result for health check`)
}
os.Remove(tmpfile.Name())
<-time.After(2 * interval)
if len(healthRegistry.CheckStatus()) != 0 {
t.Fatal("expected 0 items in health check results")
}
}
func TestTCPHealthCheck(t *testing.T) {
interval := time.Second
ln, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatalf("could not create listener: %v", err)
}
addrStr := ln.Addr().String()
// Start accepting
go func() {
for {
conn, err := ln.Accept()
if err != nil {
// listener was closed
return
}
defer conn.Close()
}
}()
config := configuration.Configuration{
Storage: configuration.Storage{
"inmemory": configuration.Parameters{},
},
Health: configuration.Health{
TCPCheckers: []configuration.TCPChecker{
{
Interval: interval,
Addr: addrStr,
Timeout: 500 * time.Millisecond,
},
},
},
}
ctx := context.Background()
app := NewApp(ctx, config)
healthRegistry := health.NewRegistry()
app.RegisterHealthChecks(healthRegistry)
// Wait for health check to happen
<-time.After(2 * interval)
if len(healthRegistry.CheckStatus()) != 0 {
t.Fatal("expected 0 items in health check results")
}
ln.Close()
<-time.After(2 * interval)
// Health check should now fail
status := healthRegistry.CheckStatus()
if len(status) != 1 {
t.Fatal("expected 1 item in health check results")
}
if status[addrStr] != "connection to "+addrStr+" failed" {
t.Fatal(`did not get "connection failed" result for health check`)
}
}
func TestHTTPHealthCheck(t *testing.T) {
interval := time.Second
threshold := 3
stopFailing := make(chan struct{})
checkedServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method != "HEAD" {
t.Fatalf("expected HEAD request, got %s", r.Method)
}
select {
case <-stopFailing:
w.WriteHeader(http.StatusOK)
default:
w.WriteHeader(http.StatusInternalServerError)
}
}))
config := configuration.Configuration{
Storage: configuration.Storage{
"inmemory": configuration.Parameters{},
},
Health: configuration.Health{
HTTPCheckers: []configuration.HTTPChecker{
{
Interval: interval,
URI: checkedServer.URL,
Threshold: threshold,
},
},
},
}
ctx := context.Background()
app := NewApp(ctx, config)
healthRegistry := health.NewRegistry()
app.RegisterHealthChecks(healthRegistry)
for i := 0; ; i++ {
<-time.After(interval)
status := healthRegistry.CheckStatus()
if i < threshold-1 {
// definitely shouldn't have hit the threshold yet
if len(status) != 0 {
t.Fatal("expected 1 item in health check results")
}
continue
}
if i < threshold+1 {
// right on the threshold - don't expect a failure yet
continue
}
if len(status) != 1 {
t.Fatal("expected 1 item in health check results")
}
if status[checkedServer.URL] != "downstream service returned unexpected status: 500" {
t.Fatal("did not get expected result for health check")
}
break
}
// Signal HTTP handler to start returning 200
close(stopFailing)
<-time.After(2 * interval)
if len(healthRegistry.CheckStatus()) != 0 {
t.Fatal("expected 0 items in health check results")
}
}