Merge pull request #901 from aaronlehmann/configurable-health-checks
Add configurable file-existence and HTTP health checks
This commit is contained in:
commit
37d4ad081f
16 changed files with 827 additions and 48 deletions
|
@ -48,3 +48,8 @@ proxy:
|
||||||
remoteurl: https://registry-1.docker.io
|
remoteurl: https://registry-1.docker.io
|
||||||
username: username
|
username: username
|
||||||
password: password
|
password: password
|
||||||
|
health:
|
||||||
|
storagedriver:
|
||||||
|
enabled: true
|
||||||
|
interval: 10s
|
||||||
|
threshold: 3
|
||||||
|
|
|
@ -59,4 +59,8 @@ notifications:
|
||||||
threshold: 10
|
threshold: 10
|
||||||
backoff: 1s
|
backoff: 1s
|
||||||
disabled: true
|
disabled: true
|
||||||
|
health:
|
||||||
|
storagedriver:
|
||||||
|
enabled: true
|
||||||
|
interval: 10s
|
||||||
|
threshold: 3
|
||||||
|
|
|
@ -11,3 +11,8 @@ http:
|
||||||
addr: :5000
|
addr: :5000
|
||||||
headers:
|
headers:
|
||||||
X-Content-Type-Options: [nosniff]
|
X-Content-Type-Options: [nosniff]
|
||||||
|
health:
|
||||||
|
storagedriver:
|
||||||
|
enabled: true
|
||||||
|
interval: 10s
|
||||||
|
threshold: 3
|
||||||
|
|
|
@ -135,6 +135,8 @@ type Configuration struct {
|
||||||
} `yaml:"pool,omitempty"`
|
} `yaml:"pool,omitempty"`
|
||||||
} `yaml:"redis,omitempty"`
|
} `yaml:"redis,omitempty"`
|
||||||
|
|
||||||
|
Health Health `yaml:"health,omitempty"`
|
||||||
|
|
||||||
Proxy Proxy `yaml:"proxy,omitempty"`
|
Proxy Proxy `yaml:"proxy,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -179,6 +181,68 @@ type MailOptions struct {
|
||||||
To []string `yaml:"to,omitempty"`
|
To []string `yaml:"to,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FileChecker is a type of entry in the health section for checking files.
|
||||||
|
type FileChecker struct {
|
||||||
|
// Interval is the duration in between checks
|
||||||
|
Interval time.Duration `yaml:"interval,omitempty"`
|
||||||
|
// File is the path to check
|
||||||
|
File string `yaml:"file,omitempty"`
|
||||||
|
// Threshold is the number of times a check must fail to trigger an
|
||||||
|
// unhealthy state
|
||||||
|
Threshold int `yaml:"threshold,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// HTTPChecker is a type of entry in the health section for checking HTTP URIs.
|
||||||
|
type HTTPChecker struct {
|
||||||
|
// Timeout is the duration to wait before timing out the HTTP request
|
||||||
|
Timeout time.Duration `yaml:"interval,omitempty"`
|
||||||
|
// StatusCode is the expected status code
|
||||||
|
StatusCode int
|
||||||
|
// Interval is the duration in between checks
|
||||||
|
Interval time.Duration `yaml:"interval,omitempty"`
|
||||||
|
// URI is the HTTP URI to check
|
||||||
|
URI string `yaml:"uri,omitempty"`
|
||||||
|
// Headers lists static headers that should be added to all requests
|
||||||
|
Headers http.Header `yaml:"headers"`
|
||||||
|
// Threshold is the number of times a check must fail to trigger an
|
||||||
|
// unhealthy state
|
||||||
|
Threshold int `yaml:"threshold,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// TCPChecker is a type of entry in the health section for checking TCP servers.
|
||||||
|
type TCPChecker struct {
|
||||||
|
// Timeout is the duration to wait before timing out the TCP connection
|
||||||
|
Timeout time.Duration `yaml:"interval,omitempty"`
|
||||||
|
// Interval is the duration in between checks
|
||||||
|
Interval time.Duration `yaml:"interval,omitempty"`
|
||||||
|
// Addr is the TCP address to check
|
||||||
|
Addr string `yaml:"addr,omitempty"`
|
||||||
|
// Threshold is the number of times a check must fail to trigger an
|
||||||
|
// unhealthy state
|
||||||
|
Threshold int `yaml:"threshold,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Health provides the configuration section for health checks.
|
||||||
|
type Health struct {
|
||||||
|
// FileCheckers is a list of paths to check
|
||||||
|
FileCheckers []FileChecker `yaml:"file,omitempty"`
|
||||||
|
// HTTPCheckers is a list of URIs to check
|
||||||
|
HTTPCheckers []HTTPChecker `yaml:"http,omitempty"`
|
||||||
|
// TCPCheckers is a list of URIs to check
|
||||||
|
TCPCheckers []TCPChecker `yaml:"tcp,omitempty"`
|
||||||
|
// StorageDriver configures a health check on the configured storage
|
||||||
|
// driver
|
||||||
|
StorageDriver struct {
|
||||||
|
// Enabled turns on the health check for the storage driver
|
||||||
|
Enabled bool `yaml:"enabled,omitempty"`
|
||||||
|
// Interval is the duration in between checks
|
||||||
|
Interval time.Duration `yaml:"interval,omitempty"`
|
||||||
|
// Threshold is the number of times a check must fail to trigger an
|
||||||
|
// unhealthy state
|
||||||
|
Threshold int `yaml:"threshold,omitempty"`
|
||||||
|
} `yaml:"storagedriver,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
// v0_1Configuration is a Version 0.1 Configuration struct
|
// v0_1Configuration is a Version 0.1 Configuration struct
|
||||||
// This is currently aliased to Configuration, as it is the current version
|
// This is currently aliased to Configuration, as it is the current version
|
||||||
type v0_1Configuration Configuration
|
type v0_1Configuration Configuration
|
||||||
|
|
|
@ -195,6 +195,27 @@ information about each option that appears later in this page.
|
||||||
maxidle: 16
|
maxidle: 16
|
||||||
maxactive: 64
|
maxactive: 64
|
||||||
idletimeout: 300s
|
idletimeout: 300s
|
||||||
|
health:
|
||||||
|
storagedriver:
|
||||||
|
enabled: true
|
||||||
|
interval: 10s
|
||||||
|
threshold: 3
|
||||||
|
file:
|
||||||
|
- file: /path/to/checked/file
|
||||||
|
interval: 10s
|
||||||
|
http:
|
||||||
|
- uri: http://server.to.check/must/return/200
|
||||||
|
headers:
|
||||||
|
Authorization: [Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==]
|
||||||
|
statuscode: 200
|
||||||
|
timeout: 3s
|
||||||
|
interval: 10s
|
||||||
|
threshold: 3
|
||||||
|
tcp:
|
||||||
|
- addr: redis-server.domain.com:6379
|
||||||
|
timeout: 3s
|
||||||
|
interval: 10s
|
||||||
|
threshold: 3
|
||||||
|
|
||||||
In some instances a configuration option is **optional** but it contains child
|
In some instances a configuration option is **optional** but it contains child
|
||||||
options marked as **required**. This indicates that you can omit the parent with
|
options marked as **required**. This indicates that you can omit the parent with
|
||||||
|
@ -1381,7 +1402,9 @@ The URL to which events should be published.
|
||||||
yes
|
yes
|
||||||
</td>
|
</td>
|
||||||
<td>
|
<td>
|
||||||
Static headers to add to each request.
|
Static headers to add to each request. Each header's name should be a key
|
||||||
|
underneath headers, and each value is a list of payloads for that
|
||||||
|
header name. Note that values must always be lists.
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
|
@ -1588,6 +1611,334 @@ Configure the behavior of the Redis connection pool.
|
||||||
</tr>
|
</tr>
|
||||||
</table>
|
</table>
|
||||||
|
|
||||||
|
## health
|
||||||
|
|
||||||
|
health:
|
||||||
|
storagedriver:
|
||||||
|
enabled: true
|
||||||
|
interval: 10s
|
||||||
|
threshold: 3
|
||||||
|
file:
|
||||||
|
- file: /path/to/checked/file
|
||||||
|
interval: 10s
|
||||||
|
http:
|
||||||
|
- uri: http://server.to.check/must/return/200
|
||||||
|
headers:
|
||||||
|
Authorization: [Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==]
|
||||||
|
statuscode: 200
|
||||||
|
timeout: 3s
|
||||||
|
interval: 10s
|
||||||
|
threshold: 3
|
||||||
|
tcp:
|
||||||
|
- addr: redis-server.domain.com:6379
|
||||||
|
timeout: 3s
|
||||||
|
interval: 10s
|
||||||
|
threshold: 3
|
||||||
|
|
||||||
|
The health option is **optional**. It may contain preferences for a periodic
|
||||||
|
health check on the storage driver's backend storage, and optional periodic
|
||||||
|
checks on local files, HTTP URIs, and/or TCP servers. The results of the health
|
||||||
|
checks are available at /debug/health on the debug HTTP server if the debug
|
||||||
|
HTTP server is enabled (see http section).
|
||||||
|
|
||||||
|
### storagedriver
|
||||||
|
|
||||||
|
storagedriver contains options for a health check on the configured storage
|
||||||
|
driver's backend storage. enabled must be set to true for this health check to
|
||||||
|
be active.
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<th>Parameter</th>
|
||||||
|
<th>Required</th>
|
||||||
|
<th>Description</th>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<code>enabled</code>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
yes
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
"true" to enable the storage driver health check or "false" to disable it.
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<code>interval</code>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
no
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
The length of time to wait between repetitions of the check. This field
|
||||||
|
takes a positive integer and an optional suffix indicating the unit of
|
||||||
|
time. Possible units are:
|
||||||
|
<ul>
|
||||||
|
<li><code>ns</code> (nanoseconds)</li>
|
||||||
|
<li><code>us</code> (microseconds)</li>
|
||||||
|
<li><code>ms</code> (milliseconds)</li>
|
||||||
|
<li><code>s</code> (seconds)</li>
|
||||||
|
<li><code>m</code> (minutes)</li>
|
||||||
|
<li><code>h</code> (hours)</li>
|
||||||
|
</ul>
|
||||||
|
If you omit the suffix, the system interprets the value as nanoseconds.
|
||||||
|
The default value is 10 seconds if this field is omitted.
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<code>threshold</code>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
no
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
An integer specifying the number of times the check must fail before the
|
||||||
|
check triggers an unhealthy state. If this filed is not specified, a
|
||||||
|
single failure will trigger an unhealthy state.
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
### file
|
||||||
|
|
||||||
|
file is a list of paths to be periodically checked for the existence of a file.
|
||||||
|
If a file exists at the given path, the health check will fail. This can be
|
||||||
|
used as a way of bringing a registry out of rotation by creating a file.
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<th>Parameter</th>
|
||||||
|
<th>Required</th>
|
||||||
|
<th>Description</th>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<code>file</code>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
yes
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
The path to check for the existence of a file.
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<code>interval</code>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
no
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
The length of time to wait between repetitions of the check. This field
|
||||||
|
takes a positive integer and an optional suffix indicating the unit of
|
||||||
|
time. Possible units are:
|
||||||
|
<ul>
|
||||||
|
<li><code>ns</code> (nanoseconds)</li>
|
||||||
|
<li><code>us</code> (microseconds)</li>
|
||||||
|
<li><code>ms</code> (milliseconds)</li>
|
||||||
|
<li><code>s</code> (seconds)</li>
|
||||||
|
<li><code>m</code> (minutes)</li>
|
||||||
|
<li><code>h</code> (hours)</li>
|
||||||
|
</ul>
|
||||||
|
If you omit the suffix, the system interprets the value as nanoseconds.
|
||||||
|
The default value is 10 seconds if this field is omitted.
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
### http
|
||||||
|
|
||||||
|
http is a list of HTTP URIs to be periodically checked with HEAD requests. If
|
||||||
|
a HEAD request doesn't complete or returns an unexpected status code, the
|
||||||
|
health check will fail.
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<th>Parameter</th>
|
||||||
|
<th>Required</th>
|
||||||
|
<th>Description</th>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<code>uri</code>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
yes
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
The URI to check.
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<code>headers</code>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
no
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
Static headers to add to each request. Each header's name should be a key
|
||||||
|
underneath headers, and each value is a list of payloads for that
|
||||||
|
header name. Note that values must always be lists.
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<code>statuscode</code>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
no
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
Expected status code from the HTTP URI. Defaults to 200.
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<code>timeout</code>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
no
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
The length of time to wait before timing out the HTTP request. This field
|
||||||
|
takes a positive integer and an optional suffix indicating the unit of
|
||||||
|
time. Possible units are:
|
||||||
|
<ul>
|
||||||
|
<li><code>ns</code> (nanoseconds)</li>
|
||||||
|
<li><code>us</code> (microseconds)</li>
|
||||||
|
<li><code>ms</code> (milliseconds)</li>
|
||||||
|
<li><code>s</code> (seconds)</li>
|
||||||
|
<li><code>m</code> (minutes)</li>
|
||||||
|
<li><code>h</code> (hours)</li>
|
||||||
|
</ul>
|
||||||
|
If you omit the suffix, the system interprets the value as nanoseconds.
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<code>interval</code>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
no
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
The length of time to wait between repetitions of the check. This field
|
||||||
|
takes a positive integer and an optional suffix indicating the unit of
|
||||||
|
time. Possible units are:
|
||||||
|
<ul>
|
||||||
|
<li><code>ns</code> (nanoseconds)</li>
|
||||||
|
<li><code>us</code> (microseconds)</li>
|
||||||
|
<li><code>ms</code> (milliseconds)</li>
|
||||||
|
<li><code>s</code> (seconds)</li>
|
||||||
|
<li><code>m</code> (minutes)</li>
|
||||||
|
<li><code>h</code> (hours)</li>
|
||||||
|
</ul>
|
||||||
|
If you omit the suffix, the system interprets the value as nanoseconds.
|
||||||
|
The default value is 10 seconds if this field is omitted.
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<code>threshold</code>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
no
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
An integer specifying the number of times the check must fail before the
|
||||||
|
check triggers an unhealthy state. If this filed is not specified, a
|
||||||
|
single failure will trigger an unhealthy state.
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
### tcp
|
||||||
|
|
||||||
|
tcp is a list of TCP addresses to be periodically checked with connection
|
||||||
|
attempts. The addresses must include port numbers. If a connection attempt
|
||||||
|
fails, the health check will fail.
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<th>Parameter</th>
|
||||||
|
<th>Required</th>
|
||||||
|
<th>Description</th>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<code>addr</code>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
yes
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
The TCP address to connect to, including a port number.
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<code>timeout</code>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
no
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
The length of time to wait before timing out the TCP connection. This
|
||||||
|
field takes a positive integer and an optional suffix indicating the unit
|
||||||
|
of time. Possible units are:
|
||||||
|
<ul>
|
||||||
|
<li><code>ns</code> (nanoseconds)</li>
|
||||||
|
<li><code>us</code> (microseconds)</li>
|
||||||
|
<li><code>ms</code> (milliseconds)</li>
|
||||||
|
<li><code>s</code> (seconds)</li>
|
||||||
|
<li><code>m</code> (minutes)</li>
|
||||||
|
<li><code>h</code> (hours)</li>
|
||||||
|
</ul>
|
||||||
|
If you omit the suffix, the system interprets the value as nanoseconds.
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<code>interval</code>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
no
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
The length of time to wait between repetitions of the check. This field
|
||||||
|
takes a positive integer and an optional suffix indicating the unit of
|
||||||
|
time. Possible units are:
|
||||||
|
<ul>
|
||||||
|
<li><code>ns</code> (nanoseconds)</li>
|
||||||
|
<li><code>us</code> (microseconds)</li>
|
||||||
|
<li><code>ms</code> (milliseconds)</li>
|
||||||
|
<li><code>s</code> (seconds)</li>
|
||||||
|
<li><code>m</code> (minutes)</li>
|
||||||
|
<li><code>h</code> (hours)</li>
|
||||||
|
</ul>
|
||||||
|
If you omit the suffix, the system interprets the value as nanoseconds.
|
||||||
|
The default value is 10 seconds if this field is omitted.
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<code>threshold</code>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
no
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
An integer specifying the number of times the check must fail before the
|
||||||
|
check triggers an unhealthy state. If this filed is not specified, a
|
||||||
|
single failure will trigger an unhealthy state.
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
## Example: Development configuration
|
## Example: Development configuration
|
||||||
|
|
||||||
|
|
|
@ -2,13 +2,17 @@ package checks
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"github.com/docker/distribution/health"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
|
"strconv"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/docker/distribution/health"
|
||||||
)
|
)
|
||||||
|
|
||||||
// FileChecker checks the existence of a file and returns and error
|
// FileChecker checks the existence of a file and returns an error
|
||||||
// if the file exists, taking the application out of rotation
|
// if the file exists.
|
||||||
func FileChecker(f string) health.Checker {
|
func FileChecker(f string) health.Checker {
|
||||||
return health.CheckFunc(func() error {
|
return health.CheckFunc(func() error {
|
||||||
if _, err := os.Stat(f); err == nil {
|
if _, err := os.Stat(f); err == nil {
|
||||||
|
@ -18,18 +22,41 @@ func FileChecker(f string) health.Checker {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// HTTPChecker does a HEAD request and verifies if the HTTP status
|
// HTTPChecker does a HEAD request and verifies that the HTTP status code
|
||||||
// code return is a 200, taking the application out of rotation if
|
// returned matches statusCode.
|
||||||
// otherwise
|
func HTTPChecker(r string, statusCode int, timeout time.Duration, headers http.Header) health.Checker {
|
||||||
func HTTPChecker(r string) health.Checker {
|
|
||||||
return health.CheckFunc(func() error {
|
return health.CheckFunc(func() error {
|
||||||
response, err := http.Head(r)
|
client := http.Client{
|
||||||
|
Timeout: timeout,
|
||||||
|
}
|
||||||
|
req, err := http.NewRequest("HEAD", r, nil)
|
||||||
|
if err != nil {
|
||||||
|
return errors.New("error creating request: " + r)
|
||||||
|
}
|
||||||
|
for headerName, headerValues := range headers {
|
||||||
|
for _, headerValue := range headerValues {
|
||||||
|
req.Header.Add(headerName, headerValue)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
response, err := client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return errors.New("error while checking: " + r)
|
return errors.New("error while checking: " + r)
|
||||||
}
|
}
|
||||||
if response.StatusCode != http.StatusOK {
|
if response.StatusCode != statusCode {
|
||||||
return errors.New("downstream service returned unexpected status: " + string(response.StatusCode))
|
return errors.New("downstream service returned unexpected status: " + strconv.Itoa(response.StatusCode))
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TCPChecker attempts to open a TCP connection.
|
||||||
|
func TCPChecker(addr string, timeout time.Duration) health.Checker {
|
||||||
|
return health.CheckFunc(func() error {
|
||||||
|
conn, err := net.DialTimeout("tcp", addr, timeout)
|
||||||
|
if err != nil {
|
||||||
|
return errors.New("connection to " + addr + " failed")
|
||||||
|
}
|
||||||
|
conn.Close()
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
|
@ -15,11 +15,11 @@ func TestFileChecker(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestHTTPChecker(t *testing.T) {
|
func TestHTTPChecker(t *testing.T) {
|
||||||
if err := HTTPChecker("https://www.google.cybertron").Check(); err == nil {
|
if err := HTTPChecker("https://www.google.cybertron", 200, 0, nil).Check(); err == nil {
|
||||||
t.Errorf("Google on Cybertron was expected as not exists")
|
t.Errorf("Google on Cybertron was expected as not exists")
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := HTTPChecker("https://www.google.pt").Check(); err != nil {
|
if err := HTTPChecker("https://www.google.pt", 200, 0, nil).Check(); err != nil {
|
||||||
t.Errorf("Google at Portugal was expected as exists, error:%v", err)
|
t.Errorf("Google at Portugal was expected as exists, error:%v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,7 +39,7 @@
|
||||||
//
|
//
|
||||||
// The recommended way of registering checks is using a periodic Check.
|
// The recommended way of registering checks is using a periodic Check.
|
||||||
// PeriodicChecks run on a certain schedule and asynchronously update the
|
// PeriodicChecks run on a certain schedule and asynchronously update the
|
||||||
// status of the check. This allows "CheckStatus()" to return without blocking
|
// status of the check. This allows CheckStatus to return without blocking
|
||||||
// on an expensive check.
|
// on an expensive check.
|
||||||
//
|
//
|
||||||
// A trivial example of a check that runs every 5 seconds and shuts down our
|
// A trivial example of a check that runs every 5 seconds and shuts down our
|
||||||
|
|
|
@ -11,10 +11,26 @@ import (
|
||||||
"github.com/docker/distribution/registry/api/errcode"
|
"github.com/docker/distribution/registry/api/errcode"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
// A Registry is a collection of checks. Most applications will use the global
|
||||||
mutex sync.RWMutex
|
// registry defined in DefaultRegistry. However, unit tests may need to create
|
||||||
registeredChecks = make(map[string]Checker)
|
// separate registries to isolate themselves from other tests.
|
||||||
)
|
type Registry struct {
|
||||||
|
mu sync.RWMutex
|
||||||
|
registeredChecks map[string]Checker
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewRegistry creates a new registry. This isn't necessary for normal use of
|
||||||
|
// the package, but may be useful for unit tests so individual tests have their
|
||||||
|
// own set of checks.
|
||||||
|
func NewRegistry() *Registry {
|
||||||
|
return &Registry{
|
||||||
|
registeredChecks: make(map[string]Checker),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// DefaultRegistry is the default registry where checks are registered. It is
|
||||||
|
// the registry used by the HTTP handler.
|
||||||
|
var DefaultRegistry *Registry
|
||||||
|
|
||||||
// Checker is the interface for a Health Checker
|
// Checker is the interface for a Health Checker
|
||||||
type Checker interface {
|
type Checker interface {
|
||||||
|
@ -144,11 +160,11 @@ func PeriodicThresholdChecker(check Checker, period time.Duration, threshold int
|
||||||
}
|
}
|
||||||
|
|
||||||
// CheckStatus returns a map with all the current health check errors
|
// CheckStatus returns a map with all the current health check errors
|
||||||
func CheckStatus() map[string]string { // TODO(stevvooe) this needs a proper type
|
func (registry *Registry) CheckStatus() map[string]string { // TODO(stevvooe) this needs a proper type
|
||||||
mutex.RLock()
|
registry.mu.RLock()
|
||||||
defer mutex.RUnlock()
|
defer registry.mu.RUnlock()
|
||||||
statusKeys := make(map[string]string)
|
statusKeys := make(map[string]string)
|
||||||
for k, v := range registeredChecks {
|
for k, v := range registry.registeredChecks {
|
||||||
err := v.Check()
|
err := v.Check()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
statusKeys[k] = err.Error()
|
statusKeys[k] = err.Error()
|
||||||
|
@ -158,34 +174,66 @@ func CheckStatus() map[string]string { // TODO(stevvooe) this needs a proper typ
|
||||||
return statusKeys
|
return statusKeys
|
||||||
}
|
}
|
||||||
|
|
||||||
// Register associates the checker with the provided name. We allow
|
// CheckStatus returns a map with all the current health check errors from the
|
||||||
// overwrites to a specific check status.
|
// default registry.
|
||||||
func Register(name string, check Checker) {
|
func CheckStatus() map[string]string {
|
||||||
mutex.Lock()
|
return DefaultRegistry.CheckStatus()
|
||||||
defer mutex.Unlock()
|
}
|
||||||
_, ok := registeredChecks[name]
|
|
||||||
|
// Register associates the checker with the provided name.
|
||||||
|
func (registry *Registry) Register(name string, check Checker) {
|
||||||
|
if registry == nil {
|
||||||
|
registry = DefaultRegistry
|
||||||
|
}
|
||||||
|
registry.mu.Lock()
|
||||||
|
defer registry.mu.Unlock()
|
||||||
|
_, ok := registry.registeredChecks[name]
|
||||||
if ok {
|
if ok {
|
||||||
panic("Check already exists: " + name)
|
panic("Check already exists: " + name)
|
||||||
}
|
}
|
||||||
registeredChecks[name] = check
|
registry.registeredChecks[name] = check
|
||||||
}
|
}
|
||||||
|
|
||||||
// RegisterFunc allows the convenience of registering a checker directly
|
// Register associates the checker with the provided name in the default
|
||||||
// from an arbitrary func() error
|
// registry.
|
||||||
|
func Register(name string, check Checker) {
|
||||||
|
DefaultRegistry.Register(name, check)
|
||||||
|
}
|
||||||
|
|
||||||
|
// RegisterFunc allows the convenience of registering a checker directly from
|
||||||
|
// an arbitrary func() error.
|
||||||
|
func (registry *Registry) RegisterFunc(name string, check func() error) {
|
||||||
|
registry.Register(name, CheckFunc(check))
|
||||||
|
}
|
||||||
|
|
||||||
|
// RegisterFunc allows the convenience of registering a checker in the default
|
||||||
|
// registry directly from an arbitrary func() error.
|
||||||
func RegisterFunc(name string, check func() error) {
|
func RegisterFunc(name string, check func() error) {
|
||||||
Register(name, CheckFunc(check))
|
DefaultRegistry.RegisterFunc(name, check)
|
||||||
}
|
}
|
||||||
|
|
||||||
// RegisterPeriodicFunc allows the convenience of registering a PeriodicChecker
|
// RegisterPeriodicFunc allows the convenience of registering a PeriodicChecker
|
||||||
// from an arbitrary func() error
|
// from an arbitrary func() error.
|
||||||
|
func (registry *Registry) RegisterPeriodicFunc(name string, period time.Duration, check CheckFunc) {
|
||||||
|
registry.Register(name, PeriodicChecker(CheckFunc(check), period))
|
||||||
|
}
|
||||||
|
|
||||||
|
// RegisterPeriodicFunc allows the convenience of registering a PeriodicChecker
|
||||||
|
// in the default registry from an arbitrary func() error.
|
||||||
func RegisterPeriodicFunc(name string, period time.Duration, check CheckFunc) {
|
func RegisterPeriodicFunc(name string, period time.Duration, check CheckFunc) {
|
||||||
Register(name, PeriodicChecker(CheckFunc(check), period))
|
DefaultRegistry.RegisterPeriodicFunc(name, period, check)
|
||||||
}
|
}
|
||||||
|
|
||||||
// RegisterPeriodicThresholdFunc allows the convenience of registering a
|
// RegisterPeriodicThresholdFunc allows the convenience of registering a
|
||||||
// PeriodicChecker from an arbitrary func() error
|
// PeriodicChecker from an arbitrary func() error.
|
||||||
|
func (registry *Registry) RegisterPeriodicThresholdFunc(name string, period time.Duration, threshold int, check CheckFunc) {
|
||||||
|
registry.Register(name, PeriodicThresholdChecker(CheckFunc(check), period, threshold))
|
||||||
|
}
|
||||||
|
|
||||||
|
// RegisterPeriodicThresholdFunc allows the convenience of registering a
|
||||||
|
// PeriodicChecker in the default registry from an arbitrary func() error.
|
||||||
func RegisterPeriodicThresholdFunc(name string, period time.Duration, threshold int, check CheckFunc) {
|
func RegisterPeriodicThresholdFunc(name string, period time.Duration, threshold int, check CheckFunc) {
|
||||||
Register(name, PeriodicThresholdChecker(CheckFunc(check), period, threshold))
|
DefaultRegistry.RegisterPeriodicThresholdFunc(name, period, threshold, check)
|
||||||
}
|
}
|
||||||
|
|
||||||
// StatusHandler returns a JSON blob with all the currently registered Health Checks
|
// StatusHandler returns a JSON blob with all the currently registered Health Checks
|
||||||
|
@ -251,7 +299,8 @@ func statusResponse(w http.ResponseWriter, r *http.Request, status int, checks m
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Registers global /debug/health api endpoint
|
// Registers global /debug/health api endpoint, creates default registry
|
||||||
func init() {
|
func init() {
|
||||||
|
DefaultRegistry = NewRegistry()
|
||||||
http.HandleFunc("/debug/health", StatusHandler)
|
http.HandleFunc("/debug/health", StatusHandler)
|
||||||
}
|
}
|
||||||
|
|
|
@ -51,7 +51,7 @@ func TestReturns503IfThereAreErrorChecks(t *testing.T) {
|
||||||
// the web application when things aren't so healthy.
|
// the web application when things aren't so healthy.
|
||||||
func TestHealthHandler(t *testing.T) {
|
func TestHealthHandler(t *testing.T) {
|
||||||
// clear out existing checks.
|
// clear out existing checks.
|
||||||
registeredChecks = make(map[string]Checker)
|
DefaultRegistry = NewRegistry()
|
||||||
|
|
||||||
// protect an http server
|
// protect an http server
|
||||||
handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
|
|
@ -5,8 +5,8 @@ import (
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/docker/distribution/context"
|
||||||
"github.com/docker/distribution/registry/auth"
|
"github.com/docker/distribution/registry/auth"
|
||||||
"golang.org/x/net/context"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestSillyAccessController(t *testing.T) {
|
func TestSillyAccessController(t *testing.T) {
|
||||||
|
|
|
@ -15,9 +15,9 @@ import (
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/docker/distribution/context"
|
||||||
"github.com/docker/distribution/registry/auth"
|
"github.com/docker/distribution/registry/auth"
|
||||||
"github.com/docker/libtrust"
|
"github.com/docker/libtrust"
|
||||||
"golang.org/x/net/context"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func makeRootKeys(numKeys int) ([]libtrust.PrivateKey, error) {
|
func makeRootKeys(numKeys int) ([]libtrust.PrivateKey, error) {
|
||||||
|
|
|
@ -19,6 +19,7 @@ import (
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/docker/distribution/configuration"
|
"github.com/docker/distribution/configuration"
|
||||||
|
"github.com/docker/distribution/context"
|
||||||
"github.com/docker/distribution/digest"
|
"github.com/docker/distribution/digest"
|
||||||
"github.com/docker/distribution/manifest"
|
"github.com/docker/distribution/manifest"
|
||||||
"github.com/docker/distribution/registry/api/errcode"
|
"github.com/docker/distribution/registry/api/errcode"
|
||||||
|
@ -27,7 +28,6 @@ import (
|
||||||
"github.com/docker/distribution/testutil"
|
"github.com/docker/distribution/testutil"
|
||||||
"github.com/docker/libtrust"
|
"github.com/docker/libtrust"
|
||||||
"github.com/gorilla/handlers"
|
"github.com/gorilla/handlers"
|
||||||
"golang.org/x/net/context"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var headerConfig = http.Header{
|
var headerConfig = http.Header{
|
||||||
|
|
|
@ -15,6 +15,7 @@ import (
|
||||||
"github.com/docker/distribution/configuration"
|
"github.com/docker/distribution/configuration"
|
||||||
ctxu "github.com/docker/distribution/context"
|
ctxu "github.com/docker/distribution/context"
|
||||||
"github.com/docker/distribution/health"
|
"github.com/docker/distribution/health"
|
||||||
|
"github.com/docker/distribution/health/checks"
|
||||||
"github.com/docker/distribution/notifications"
|
"github.com/docker/distribution/notifications"
|
||||||
"github.com/docker/distribution/registry/api/errcode"
|
"github.com/docker/distribution/registry/api/errcode"
|
||||||
"github.com/docker/distribution/registry/api/v2"
|
"github.com/docker/distribution/registry/api/v2"
|
||||||
|
@ -37,6 +38,9 @@ import (
|
||||||
// was specified.
|
// was specified.
|
||||||
const randomSecretSize = 32
|
const randomSecretSize = 32
|
||||||
|
|
||||||
|
// defaultCheckInterval is the default time in between health checks
|
||||||
|
const defaultCheckInterval = 10 * time.Second
|
||||||
|
|
||||||
// App is a global registry application object. Shared resources can be placed
|
// App is a global registry application object. Shared resources can be placed
|
||||||
// on this object that will be accessible from all requests. Any writable
|
// on this object that will be accessible from all requests. Any writable
|
||||||
// fields should be protected.
|
// fields should be protected.
|
||||||
|
@ -230,11 +234,80 @@ func NewApp(ctx context.Context, configuration configuration.Configuration) *App
|
||||||
// process. Because the configuration and app are tightly coupled,
|
// process. Because the configuration and app are tightly coupled,
|
||||||
// implementing this properly will require a refactor. This method may panic
|
// implementing this properly will require a refactor. This method may panic
|
||||||
// if called twice in the same process.
|
// if called twice in the same process.
|
||||||
func (app *App) RegisterHealthChecks() {
|
func (app *App) RegisterHealthChecks(healthRegistries ...*health.Registry) {
|
||||||
health.RegisterPeriodicThresholdFunc("storagedriver_"+app.Config.Storage.Type(), 10*time.Second, 3, func() error {
|
if len(healthRegistries) > 1 {
|
||||||
_, err := app.driver.List(app, "/") // "/" should always exist
|
panic("RegisterHealthChecks called with more than one registry")
|
||||||
return err // any error will be treated as failure
|
}
|
||||||
})
|
healthRegistry := health.DefaultRegistry
|
||||||
|
if len(healthRegistries) == 1 {
|
||||||
|
healthRegistry = healthRegistries[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
if app.Config.Health.StorageDriver.Enabled {
|
||||||
|
interval := app.Config.Health.StorageDriver.Interval
|
||||||
|
if interval == 0 {
|
||||||
|
interval = defaultCheckInterval
|
||||||
|
}
|
||||||
|
|
||||||
|
storageDriverCheck := func() error {
|
||||||
|
_, err := app.driver.List(app, "/") // "/" should always exist
|
||||||
|
return err // any error will be treated as failure
|
||||||
|
}
|
||||||
|
|
||||||
|
if app.Config.Health.StorageDriver.Threshold != 0 {
|
||||||
|
healthRegistry.RegisterPeriodicThresholdFunc("storagedriver_"+app.Config.Storage.Type(), interval, app.Config.Health.StorageDriver.Threshold, storageDriverCheck)
|
||||||
|
} else {
|
||||||
|
healthRegistry.RegisterPeriodicFunc("storagedriver_"+app.Config.Storage.Type(), interval, storageDriverCheck)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, fileChecker := range app.Config.Health.FileCheckers {
|
||||||
|
interval := fileChecker.Interval
|
||||||
|
if interval == 0 {
|
||||||
|
interval = defaultCheckInterval
|
||||||
|
}
|
||||||
|
ctxu.GetLogger(app).Infof("configuring file health check path=%s, interval=%d", fileChecker.File, interval/time.Second)
|
||||||
|
healthRegistry.Register(fileChecker.File, health.PeriodicChecker(checks.FileChecker(fileChecker.File), interval))
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, httpChecker := range app.Config.Health.HTTPCheckers {
|
||||||
|
interval := httpChecker.Interval
|
||||||
|
if interval == 0 {
|
||||||
|
interval = defaultCheckInterval
|
||||||
|
}
|
||||||
|
|
||||||
|
statusCode := httpChecker.StatusCode
|
||||||
|
if statusCode == 0 {
|
||||||
|
statusCode = 200
|
||||||
|
}
|
||||||
|
|
||||||
|
checker := checks.HTTPChecker(httpChecker.URI, statusCode, httpChecker.Timeout, httpChecker.Headers)
|
||||||
|
|
||||||
|
if httpChecker.Threshold != 0 {
|
||||||
|
ctxu.GetLogger(app).Infof("configuring HTTP health check uri=%s, interval=%d, threshold=%d", httpChecker.URI, interval/time.Second, httpChecker.Threshold)
|
||||||
|
healthRegistry.Register(httpChecker.URI, health.PeriodicThresholdChecker(checker, interval, httpChecker.Threshold))
|
||||||
|
} else {
|
||||||
|
ctxu.GetLogger(app).Infof("configuring HTTP health check uri=%s, interval=%d", httpChecker.URI, interval/time.Second)
|
||||||
|
healthRegistry.Register(httpChecker.URI, health.PeriodicChecker(checker, interval))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tcpChecker := range app.Config.Health.TCPCheckers {
|
||||||
|
interval := tcpChecker.Interval
|
||||||
|
if interval == 0 {
|
||||||
|
interval = defaultCheckInterval
|
||||||
|
}
|
||||||
|
|
||||||
|
checker := checks.TCPChecker(tcpChecker.Addr, tcpChecker.Timeout)
|
||||||
|
|
||||||
|
if tcpChecker.Threshold != 0 {
|
||||||
|
ctxu.GetLogger(app).Infof("configuring TCP health check addr=%s, interval=%d, threshold=%d", tcpChecker.Addr, interval/time.Second, tcpChecker.Threshold)
|
||||||
|
healthRegistry.Register(tcpChecker.Addr, health.PeriodicThresholdChecker(checker, interval, tcpChecker.Threshold))
|
||||||
|
} else {
|
||||||
|
ctxu.GetLogger(app).Infof("configuring TCP health check addr=%s, interval=%d", tcpChecker.Addr, interval/time.Second)
|
||||||
|
healthRegistry.Register(tcpChecker.Addr, health.PeriodicChecker(checker, interval))
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// register a handler with the application, by route name. The handler will be
|
// register a handler with the application, by route name. The handler will be
|
||||||
|
|
|
@ -9,6 +9,7 @@ import (
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/docker/distribution/configuration"
|
"github.com/docker/distribution/configuration"
|
||||||
|
"github.com/docker/distribution/context"
|
||||||
"github.com/docker/distribution/registry/api/errcode"
|
"github.com/docker/distribution/registry/api/errcode"
|
||||||
"github.com/docker/distribution/registry/api/v2"
|
"github.com/docker/distribution/registry/api/v2"
|
||||||
"github.com/docker/distribution/registry/auth"
|
"github.com/docker/distribution/registry/auth"
|
||||||
|
@ -16,7 +17,6 @@ import (
|
||||||
"github.com/docker/distribution/registry/storage"
|
"github.com/docker/distribution/registry/storage"
|
||||||
memorycache "github.com/docker/distribution/registry/storage/cache/memory"
|
memorycache "github.com/docker/distribution/registry/storage/cache/memory"
|
||||||
"github.com/docker/distribution/registry/storage/driver/inmemory"
|
"github.com/docker/distribution/registry/storage/driver/inmemory"
|
||||||
"golang.org/x/net/context"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// TestAppDispatcher builds an application with a test dispatcher and ensures
|
// TestAppDispatcher builds an application with a test dispatcher and ensures
|
||||||
|
|
201
registry/handlers/health_test.go
Normal file
201
registry/handlers/health_test.go
Normal file
|
@ -0,0 +1,201 @@
|
||||||
|
package handlers
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io/ioutil"
|
||||||
|
"net"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"os"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/docker/distribution/configuration"
|
||||||
|
"github.com/docker/distribution/context"
|
||||||
|
"github.com/docker/distribution/health"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestFileHealthCheck(t *testing.T) {
|
||||||
|
interval := time.Second
|
||||||
|
|
||||||
|
tmpfile, err := ioutil.TempFile(os.TempDir(), "healthcheck")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("could not create temporary file: %v", err)
|
||||||
|
}
|
||||||
|
defer tmpfile.Close()
|
||||||
|
|
||||||
|
config := configuration.Configuration{
|
||||||
|
Storage: configuration.Storage{
|
||||||
|
"inmemory": configuration.Parameters{},
|
||||||
|
},
|
||||||
|
Health: configuration.Health{
|
||||||
|
FileCheckers: []configuration.FileChecker{
|
||||||
|
{
|
||||||
|
Interval: interval,
|
||||||
|
File: tmpfile.Name(),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
app := NewApp(ctx, config)
|
||||||
|
healthRegistry := health.NewRegistry()
|
||||||
|
app.RegisterHealthChecks(healthRegistry)
|
||||||
|
|
||||||
|
// Wait for health check to happen
|
||||||
|
<-time.After(2 * interval)
|
||||||
|
|
||||||
|
status := healthRegistry.CheckStatus()
|
||||||
|
if len(status) != 1 {
|
||||||
|
t.Fatal("expected 1 item in health check results")
|
||||||
|
}
|
||||||
|
if status[tmpfile.Name()] != "file exists" {
|
||||||
|
t.Fatal(`did not get "file exists" result for health check`)
|
||||||
|
}
|
||||||
|
|
||||||
|
os.Remove(tmpfile.Name())
|
||||||
|
|
||||||
|
<-time.After(2 * interval)
|
||||||
|
if len(healthRegistry.CheckStatus()) != 0 {
|
||||||
|
t.Fatal("expected 0 items in health check results")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTCPHealthCheck(t *testing.T) {
|
||||||
|
interval := time.Second
|
||||||
|
|
||||||
|
ln, err := net.Listen("tcp", "127.0.0.1:0")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("could not create listener: %v", err)
|
||||||
|
}
|
||||||
|
addrStr := ln.Addr().String()
|
||||||
|
|
||||||
|
// Start accepting
|
||||||
|
go func() {
|
||||||
|
for {
|
||||||
|
conn, err := ln.Accept()
|
||||||
|
if err != nil {
|
||||||
|
// listener was closed
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer conn.Close()
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
config := configuration.Configuration{
|
||||||
|
Storage: configuration.Storage{
|
||||||
|
"inmemory": configuration.Parameters{},
|
||||||
|
},
|
||||||
|
Health: configuration.Health{
|
||||||
|
TCPCheckers: []configuration.TCPChecker{
|
||||||
|
{
|
||||||
|
Interval: interval,
|
||||||
|
Addr: addrStr,
|
||||||
|
Timeout: 500 * time.Millisecond,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
app := NewApp(ctx, config)
|
||||||
|
healthRegistry := health.NewRegistry()
|
||||||
|
app.RegisterHealthChecks(healthRegistry)
|
||||||
|
|
||||||
|
// Wait for health check to happen
|
||||||
|
<-time.After(2 * interval)
|
||||||
|
|
||||||
|
if len(healthRegistry.CheckStatus()) != 0 {
|
||||||
|
t.Fatal("expected 0 items in health check results")
|
||||||
|
}
|
||||||
|
|
||||||
|
ln.Close()
|
||||||
|
<-time.After(2 * interval)
|
||||||
|
|
||||||
|
// Health check should now fail
|
||||||
|
status := healthRegistry.CheckStatus()
|
||||||
|
if len(status) != 1 {
|
||||||
|
t.Fatal("expected 1 item in health check results")
|
||||||
|
}
|
||||||
|
if status[addrStr] != "connection to "+addrStr+" failed" {
|
||||||
|
t.Fatal(`did not get "connection failed" result for health check`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHTTPHealthCheck(t *testing.T) {
|
||||||
|
interval := time.Second
|
||||||
|
threshold := 3
|
||||||
|
|
||||||
|
stopFailing := make(chan struct{})
|
||||||
|
|
||||||
|
checkedServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.Method != "HEAD" {
|
||||||
|
t.Fatalf("expected HEAD request, got %s", r.Method)
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case <-stopFailing:
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
default:
|
||||||
|
w.WriteHeader(http.StatusInternalServerError)
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
|
||||||
|
config := configuration.Configuration{
|
||||||
|
Storage: configuration.Storage{
|
||||||
|
"inmemory": configuration.Parameters{},
|
||||||
|
},
|
||||||
|
Health: configuration.Health{
|
||||||
|
HTTPCheckers: []configuration.HTTPChecker{
|
||||||
|
{
|
||||||
|
Interval: interval,
|
||||||
|
URI: checkedServer.URL,
|
||||||
|
Threshold: threshold,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
app := NewApp(ctx, config)
|
||||||
|
healthRegistry := health.NewRegistry()
|
||||||
|
app.RegisterHealthChecks(healthRegistry)
|
||||||
|
|
||||||
|
for i := 0; ; i++ {
|
||||||
|
<-time.After(interval)
|
||||||
|
|
||||||
|
status := healthRegistry.CheckStatus()
|
||||||
|
|
||||||
|
if i < threshold-1 {
|
||||||
|
// definitely shouldn't have hit the threshold yet
|
||||||
|
if len(status) != 0 {
|
||||||
|
t.Fatal("expected 1 item in health check results")
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if i < threshold+1 {
|
||||||
|
// right on the threshold - don't expect a failure yet
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(status) != 1 {
|
||||||
|
t.Fatal("expected 1 item in health check results")
|
||||||
|
}
|
||||||
|
if status[checkedServer.URL] != "downstream service returned unexpected status: 500" {
|
||||||
|
t.Fatal("did not get expected result for health check")
|
||||||
|
}
|
||||||
|
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Signal HTTP handler to start returning 200
|
||||||
|
close(stopFailing)
|
||||||
|
|
||||||
|
<-time.After(2 * interval)
|
||||||
|
|
||||||
|
if len(healthRegistry.CheckStatus()) != 0 {
|
||||||
|
t.Fatal("expected 0 items in health check results")
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue