From b09b0ffcf9466e924691ef289d83e0d08f25901a Mon Sep 17 00:00:00 2001 From: Aaron Lehmann Date: Tue, 18 Aug 2015 17:19:46 -0700 Subject: [PATCH 1/6] Add configurable file-existence and HTTP health checks Add a section to the config file called "health". Within this section, "filecheckers" and "httpcheckers" list checks to run. Each check specifies a file or URI, a time interval for the check, and a threshold specifying how many times the check must fail to reach an unhealthy state. Document the new options in docs/configuration.md. Add unit testing for both types of checkers. Add an UnregisterAll function in the health package to support the unit tests, and an Unregister function for consistency with Register. Fix a string conversion problem in the health package's HTTP checker. Signed-off-by: Aaron Lehmann --- configuration/configuration.go | 33 +++++ docs/configuration.md | 144 ++++++++++++++++++++++ health/checks/checks.go | 6 +- health/health.go | 14 +++ registry/handlers/app.go | 34 +++++- registry/handlers/health_test.go | 200 +++++++++++++++++++++++++++++++ 6 files changed, 428 insertions(+), 3 deletions(-) create mode 100644 registry/handlers/health_test.go diff --git a/configuration/configuration.go b/configuration/configuration.go index c6554f45..60440e05 100644 --- a/configuration/configuration.go +++ b/configuration/configuration.go @@ -135,6 +135,8 @@ type Configuration struct { } `yaml:"pool,omitempty"` } `yaml:"redis,omitempty"` + Health Health `yaml:"health,omitempty"` + Proxy Proxy `yaml:"proxy,omitempty"` } @@ -179,6 +181,37 @@ type MailOptions struct { To []string `yaml:"to,omitempty"` } +// FileChecker is a type of entry in the checkers section for checking files +type FileChecker struct { + // Interval is the number of seconds in between checks + Interval time.Duration `yaml:"interval,omitempty"` + // File is the path to check + File string `yaml:"file,omitempty"` + // Threshold is the number of times a check must fail to trigger an + // unhealthy state + Threshold int `yaml:"threshold,omitempty"` +} + +// HTTPChecker is a type of entry in the checkers section for checking HTTP +// URIs +type HTTPChecker struct { + // Interval is the number of seconds in between checks + Interval time.Duration `yaml:"interval,omitempty"` + // URI is the HTTP URI to check + URI string `yaml:"uri,omitempty"` + // Threshold is the number of times a check must fail to trigger an + // unhealthy state + Threshold int `yaml:"threshold,omitempty"` +} + +// Health provides the configuration section for health checks. +type Health struct { + // FileChecker is a list of paths to check + FileCheckers []FileChecker `yaml:"file,omitempty"` + // HTTPChecker is a list of URIs to check + HTTPCheckers []HTTPChecker `yaml:"http,omitempty"` +} + // v0_1Configuration is a Version 0.1 Configuration struct // This is currently aliased to Configuration, as it is the current version type v0_1Configuration Configuration diff --git a/docs/configuration.md b/docs/configuration.md index a9017bb5..3e16ca5d 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -195,6 +195,15 @@ information about each option that appears later in this page. maxidle: 16 maxactive: 64 idletimeout: 300s + health: + file: + - file: /path/to/checked/file + interval: 10s + threshold: 3 + http: + - uri: http://server.to.check/must/return/200 + interval: 10s + threshold: 3 In some instances a configuration option is **optional** but it contains child options marked as **required**. This indicates that you can omit the parent with @@ -1588,6 +1597,141 @@ Configure the behavior of the Redis connection pool. +## health + + health: + file: + - file: /path/to/checked/file + interval: 10s + threshold: 3 + http: + - uri: http://server.to.check/must/return/200 + interval: 10s + threshold: 3 + +The health option is **optional**. It may contain lists of file checkers +and/or HTTP checkers. + +### file + +file is a list of paths to be periodically checked for the existence of a file. +If a file exists at the given path, the health check will fail. This can be +used as a way of bringing a registry out of rotation by creating a file. + + + + + + + + + + + + + + + + + + + + + + +
ParameterRequiredDescription
+ file + + yes + +The path to check for the existence of a file. +
+ interval + + no + + The length of time to wait between repetitions of the check. This field + takes a positive integer and an optional suffix indicating the unit of + time. Possible units are: +
    +
  • ns (nanoseconds)
  • +
  • us (microseconds)
  • +
  • ms (milliseconds)
  • +
  • s (seconds)
  • +
  • m (minutes)
  • +
  • h (hours)
  • +
+ If you omit the suffix, the system interprets the value as nanoseconds. + The default value is 10 seconds if this field is omitted. +
+ threshold + + no + + An integer specifying the number of times the check must fail before the + check triggers an unhealthy state. If this filed is not specified, a + single failure will trigger an unhealthy state. +
+ +### http + +http is a list of HTTP URIs to be periodically checked with HEAD requests. If +a HEAD request returns a status code other than 200, the health check will fail. + + + + + + + + + + + + + + + + + + + + + + +
ParameterRequiredDescription
+ uri + + yes + +The URI to check. +
+ interval + + no + + The length of time to wait between repetitions of the check. This field + takes a positive integer and an optional suffix indicating the unit of + time. Possible units are: +
    +
  • ns (nanoseconds)
  • +
  • us (microseconds)
  • +
  • ms (milliseconds)
  • +
  • s (seconds)
  • +
  • m (minutes)
  • +
  • h (hours)
  • +
+ If you omit the suffix, the system interprets the value as nanoseconds. + The default value is 10 seconds if this field is omitted. +
+ threshold + + no + + An integer specifying the number of times the check must fail before the + check triggers an unhealthy state. If this filed is not specified, a + single failure will trigger an unhealthy state. +
## Example: Development configuration diff --git a/health/checks/checks.go b/health/checks/checks.go index 9de14010..89d5f3db 100644 --- a/health/checks/checks.go +++ b/health/checks/checks.go @@ -2,9 +2,11 @@ package checks import ( "errors" - "github.com/docker/distribution/health" "net/http" "os" + "strconv" + + "github.com/docker/distribution/health" ) // FileChecker checks the existence of a file and returns and error @@ -28,7 +30,7 @@ func HTTPChecker(r string) health.Checker { return errors.New("error while checking: " + r) } if response.StatusCode != http.StatusOK { - return errors.New("downstream service returned unexpected status: " + string(response.StatusCode)) + return errors.New("downstream service returned unexpected status: " + strconv.Itoa(response.StatusCode)) } return nil }) diff --git a/health/health.go b/health/health.go index ba954919..dab2794d 100644 --- a/health/health.go +++ b/health/health.go @@ -170,6 +170,20 @@ func Register(name string, check Checker) { registeredChecks[name] = check } +// Unregister removes the named checker. +func Unregister(name string) { + mutex.Lock() + defer mutex.Unlock() + delete(registeredChecks, name) +} + +// UnregisterAll removes all registered checkers. +func UnregisterAll() { + mutex.Lock() + defer mutex.Unlock() + registeredChecks = make(map[string]Checker) +} + // RegisterFunc allows the convenience of registering a checker directly // from an arbitrary func() error func RegisterFunc(name string, check func() error) { diff --git a/registry/handlers/app.go b/registry/handlers/app.go index 7d1f1cf5..8b8543dd 100644 --- a/registry/handlers/app.go +++ b/registry/handlers/app.go @@ -15,6 +15,7 @@ import ( "github.com/docker/distribution/configuration" ctxu "github.com/docker/distribution/context" "github.com/docker/distribution/health" + "github.com/docker/distribution/health/checks" "github.com/docker/distribution/notifications" "github.com/docker/distribution/registry/api/errcode" "github.com/docker/distribution/registry/api/v2" @@ -37,6 +38,9 @@ import ( // was specified. const randomSecretSize = 32 +// defaultCheckInterval is the default time in between health checks +const defaultCheckInterval = 10 * time.Second + // App is a global registry application object. Shared resources can be placed // on this object that will be accessible from all requests. Any writable // fields should be protected. @@ -231,10 +235,38 @@ func NewApp(ctx context.Context, configuration configuration.Configuration) *App // implementing this properly will require a refactor. This method may panic // if called twice in the same process. func (app *App) RegisterHealthChecks() { - health.RegisterPeriodicThresholdFunc("storagedriver_"+app.Config.Storage.Type(), 10*time.Second, 3, func() error { + health.RegisterPeriodicThresholdFunc("storagedriver_"+app.Config.Storage.Type(), defaultCheckInterval, 3, func() error { _, err := app.driver.List(app, "/") // "/" should always exist return err // any error will be treated as failure }) + + for _, fileChecker := range app.Config.Health.FileCheckers { + interval := fileChecker.Interval + if interval == 0 { + interval = defaultCheckInterval + } + if fileChecker.Threshold != 0 { + ctxu.GetLogger(app).Infof("configuring file health check path=%s, interval=%d, threshold=%d", fileChecker.File, interval/time.Second, fileChecker.Threshold) + health.Register(fileChecker.File, health.PeriodicThresholdChecker(checks.FileChecker(fileChecker.File), interval, fileChecker.Threshold)) + } else { + ctxu.GetLogger(app).Infof("configuring file health check path=%s, interval=%d", fileChecker.File, interval/time.Second) + health.Register(fileChecker.File, health.PeriodicChecker(checks.FileChecker(fileChecker.File), interval)) + } + } + + for _, httpChecker := range app.Config.Health.HTTPCheckers { + interval := httpChecker.Interval + if interval == 0 { + interval = defaultCheckInterval + } + if httpChecker.Threshold != 0 { + ctxu.GetLogger(app).Infof("configuring HTTP health check uri=%s, interval=%d, threshold=%d", httpChecker.URI, interval/time.Second, httpChecker.Threshold) + health.Register(httpChecker.URI, health.PeriodicThresholdChecker(checks.HTTPChecker(httpChecker.URI), interval, httpChecker.Threshold)) + } else { + ctxu.GetLogger(app).Infof("configuring HTTP health check uri=%s, interval=%d", httpChecker.URI, interval/time.Second) + health.Register(httpChecker.URI, health.PeriodicChecker(checks.HTTPChecker(httpChecker.URI), interval)) + } + } } // register a handler with the application, by route name. The handler will be diff --git a/registry/handlers/health_test.go b/registry/handlers/health_test.go new file mode 100644 index 00000000..ce5860a8 --- /dev/null +++ b/registry/handlers/health_test.go @@ -0,0 +1,200 @@ +package handlers + +import ( + "encoding/json" + "io/ioutil" + "net/http" + "net/http/httptest" + "os" + "testing" + "time" + + "github.com/docker/distribution/configuration" + "github.com/docker/distribution/health" + "golang.org/x/net/context" +) + +func TestFileHealthCheck(t *testing.T) { + // In case other tests registered checks before this one + health.UnregisterAll() + + interval := time.Second + + tmpfile, err := ioutil.TempFile(os.TempDir(), "healthcheck") + if err != nil { + t.Fatalf("could not create temporary file: %v", err) + } + defer tmpfile.Close() + + config := configuration.Configuration{ + Storage: configuration.Storage{ + "inmemory": configuration.Parameters{}, + }, + Health: configuration.Health{ + FileCheckers: []configuration.FileChecker{ + { + Interval: interval, + File: tmpfile.Name(), + }, + }, + }, + } + + ctx := context.Background() + + app := NewApp(ctx, config) + app.RegisterHealthChecks() + + debugServer := httptest.NewServer(nil) + + // Wait for health check to happen + <-time.After(2 * interval) + + resp, err := http.Get(debugServer.URL + "/debug/health") + if err != nil { + t.Fatalf("error performing HTTP GET: %v", err) + } + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + t.Fatalf("error reading HTTP body: %v", err) + } + resp.Body.Close() + var decoded map[string]string + err = json.Unmarshal(body, &decoded) + if err != nil { + t.Fatalf("error unmarshaling json: %v", err) + } + if len(decoded) != 1 { + t.Fatal("expected 1 item in returned json") + } + if decoded[tmpfile.Name()] != "file exists" { + t.Fatal(`did not get "file exists" result for health check`) + } + + os.Remove(tmpfile.Name()) + + <-time.After(2 * interval) + resp, err = http.Get(debugServer.URL + "/debug/health") + if err != nil { + t.Fatalf("error performing HTTP GET: %v", err) + } + body, err = ioutil.ReadAll(resp.Body) + if err != nil { + t.Fatalf("error reading HTTP body: %v", err) + } + resp.Body.Close() + var decoded2 map[string]string + err = json.Unmarshal(body, &decoded2) + if err != nil { + t.Fatalf("error unmarshaling json: %v", err) + } + if len(decoded2) != 0 { + t.Fatal("expected 0 items in returned json") + } +} + +func TestHTTPHealthCheck(t *testing.T) { + // In case other tests registered checks before this one + health.UnregisterAll() + + interval := time.Second + threshold := 3 + + stopFailing := make(chan struct{}) + + checkedServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method != "HEAD" { + t.Fatalf("expected HEAD request, got %s", r.Method) + } + select { + case <-stopFailing: + w.WriteHeader(http.StatusOK) + default: + w.WriteHeader(http.StatusInternalServerError) + } + })) + + config := configuration.Configuration{ + Storage: configuration.Storage{ + "inmemory": configuration.Parameters{}, + }, + Health: configuration.Health{ + HTTPCheckers: []configuration.HTTPChecker{ + { + Interval: interval, + URI: checkedServer.URL, + Threshold: threshold, + }, + }, + }, + } + + ctx := context.Background() + + app := NewApp(ctx, config) + app.RegisterHealthChecks() + + debugServer := httptest.NewServer(nil) + + for i := 0; ; i++ { + <-time.After(interval) + + resp, err := http.Get(debugServer.URL + "/debug/health") + if err != nil { + t.Fatalf("error performing HTTP GET: %v", err) + } + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + t.Fatalf("error reading HTTP body: %v", err) + } + resp.Body.Close() + var decoded map[string]string + err = json.Unmarshal(body, &decoded) + if err != nil { + t.Fatalf("error unmarshaling json: %v", err) + } + + if i < threshold-1 { + // definitely shouldn't have hit the threshold yet + if len(decoded) != 0 { + t.Fatal("expected 1 items in returned json") + } + continue + } + if i < threshold+1 { + // right on the threshold - don't expect a failure yet + continue + } + + if len(decoded) != 1 { + t.Fatal("expected 1 item in returned json") + } + if decoded[checkedServer.URL] != "downstream service returned unexpected status: 500" { + t.Fatal("did not get expected result for health check") + } + + break + } + + // Signal HTTP handler to start returning 200 + close(stopFailing) + + <-time.After(2 * interval) + resp, err := http.Get(debugServer.URL + "/debug/health") + if err != nil { + t.Fatalf("error performing HTTP GET: %v", err) + } + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + t.Fatalf("error reading HTTP body: %v", err) + } + resp.Body.Close() + var decoded map[string]string + err = json.Unmarshal(body, &decoded) + if err != nil { + t.Fatalf("error unmarshaling json: %v", err) + } + if len(decoded) != 0 { + t.Fatal("expected 0 items in returned json") + } +} From 216df32510c3fc9417e359a1e1fb2615c8041c5c Mon Sep 17 00:00:00 2001 From: Aaron Lehmann Date: Wed, 19 Aug 2015 14:12:51 -0700 Subject: [PATCH 2/6] Add storagedriver section to health check configuration Add default storagedriver health check to example configuration files with parameters matching the previous hardcoded configuration. Signed-off-by: Aaron Lehmann --- cmd/registry/config-cache.yml | 5 +++ cmd/registry/config-dev.yml | 6 ++- cmd/registry/config-example.yml | 5 +++ configuration/configuration.go | 11 +++++ docs/configuration.md | 76 ++++++++++++++++++++++++++++++++- registry/handlers/app.go | 21 +++++++-- 6 files changed, 117 insertions(+), 7 deletions(-) diff --git a/cmd/registry/config-cache.yml b/cmd/registry/config-cache.yml index 6566130b..7a274ea5 100644 --- a/cmd/registry/config-cache.yml +++ b/cmd/registry/config-cache.yml @@ -48,3 +48,8 @@ proxy: remoteurl: https://registry-1.docker.io username: username password: password +health: + storagedriver: + enabled: true + interval: 10s + threshold: 3 diff --git a/cmd/registry/config-dev.yml b/cmd/registry/config-dev.yml index 729e7fd2..b6438be5 100644 --- a/cmd/registry/config-dev.yml +++ b/cmd/registry/config-dev.yml @@ -59,4 +59,8 @@ notifications: threshold: 10 backoff: 1s disabled: true - +health: + storagedriver: + enabled: true + interval: 10s + threshold: 3 diff --git a/cmd/registry/config-example.yml b/cmd/registry/config-example.yml index ef5994e5..b5700e19 100644 --- a/cmd/registry/config-example.yml +++ b/cmd/registry/config-example.yml @@ -11,3 +11,8 @@ http: addr: :5000 headers: X-Content-Type-Options: [nosniff] +health: + storagedriver: + enabled: true + interval: 10s + threshold: 3 diff --git a/configuration/configuration.go b/configuration/configuration.go index 60440e05..970a6ef4 100644 --- a/configuration/configuration.go +++ b/configuration/configuration.go @@ -210,6 +210,17 @@ type Health struct { FileCheckers []FileChecker `yaml:"file,omitempty"` // HTTPChecker is a list of URIs to check HTTPCheckers []HTTPChecker `yaml:"http,omitempty"` + // StorageDriver configures a health check on the configured storage + // driver + StorageDriver struct { + // Enabled turns on the health check for the storage driver + Enabled bool `yaml:"enabled,omitempty"` + // Interval is the number of seconds in between checks + Interval time.Duration `yaml:"interval,omitempty"` + // Threshold is the number of times a check must fail to trigger an + // unhealthy state + Threshold int `yaml:"threshold,omitempty"` + } `yaml:"storagedriver,omitempty"` } // v0_1Configuration is a Version 0.1 Configuration struct diff --git a/docs/configuration.md b/docs/configuration.md index 3e16ca5d..a0ddc6fd 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -196,6 +196,10 @@ information about each option that appears later in this page. maxactive: 64 idletimeout: 300s health: + storagedriver: + enabled: true + interval: 10s + threshold: 3 file: - file: /path/to/checked/file interval: 10s @@ -1600,6 +1604,10 @@ Configure the behavior of the Redis connection pool. ## health health: + storagedriver: + enabled: true + interval: 10s + threshold: 3 file: - file: /path/to/checked/file interval: 10s @@ -1609,8 +1617,72 @@ Configure the behavior of the Redis connection pool. interval: 10s threshold: 3 -The health option is **optional**. It may contain lists of file checkers -and/or HTTP checkers. +The health option is **optional**. It may contain preferences for a periodic +health check on the storage driver's backend storage, and optional periodic +checks on local files and/or HTTP URIs. The results of the health checks are +available at /debug/health on the debug HTTP server if the debug HTTP server is +enabled (see http section). + +### storagedriver + +storagedriver contains options for a health check on the configured storage +driver's backend storage. enabled must be set to true for this health check to +be active. + + + + + + + + + + + + + + + + + + + + + + +
ParameterRequiredDescription
+ enabled + + yes + +"true" to enable the storage driver health check or "false" to disable it. +
+ interval + + no + + The length of time to wait between repetitions of the check. This field + takes a positive integer and an optional suffix indicating the unit of + time. Possible units are: +
    +
  • ns (nanoseconds)
  • +
  • us (microseconds)
  • +
  • ms (milliseconds)
  • +
  • s (seconds)
  • +
  • m (minutes)
  • +
  • h (hours)
  • +
+ If you omit the suffix, the system interprets the value as nanoseconds. + The default value is 10 seconds if this field is omitted. +
+ threshold + + no + + An integer specifying the number of times the check must fail before the + check triggers an unhealthy state. If this filed is not specified, a + single failure will trigger an unhealthy state. +
### file diff --git a/registry/handlers/app.go b/registry/handlers/app.go index 8b8543dd..9cf6447a 100644 --- a/registry/handlers/app.go +++ b/registry/handlers/app.go @@ -235,10 +235,23 @@ func NewApp(ctx context.Context, configuration configuration.Configuration) *App // implementing this properly will require a refactor. This method may panic // if called twice in the same process. func (app *App) RegisterHealthChecks() { - health.RegisterPeriodicThresholdFunc("storagedriver_"+app.Config.Storage.Type(), defaultCheckInterval, 3, func() error { - _, err := app.driver.List(app, "/") // "/" should always exist - return err // any error will be treated as failure - }) + if app.Config.Health.StorageDriver.Enabled { + interval := app.Config.Health.StorageDriver.Interval + if interval == 0 { + interval = defaultCheckInterval + } + + storageDriverCheck := func() error { + _, err := app.driver.List(app, "/") // "/" should always exist + return err // any error will be treated as failure + } + + if app.Config.Health.StorageDriver.Threshold != 0 { + health.RegisterPeriodicThresholdFunc("storagedriver_"+app.Config.Storage.Type(), interval, app.Config.Health.StorageDriver.Threshold, storageDriverCheck) + } else { + health.RegisterPeriodicFunc("storagedriver_"+app.Config.Storage.Type(), interval, storageDriverCheck) + } + } for _, fileChecker := range app.Config.Health.FileCheckers { interval := fileChecker.Interval From 79959f578a557ebc89a485e96205dd98607a5f0a Mon Sep 17 00:00:00 2001 From: Aaron Lehmann Date: Wed, 19 Aug 2015 14:24:31 -0700 Subject: [PATCH 3/6] Switch tests to import "github.com/docker/distribution/context" Signed-off-by: Aaron Lehmann --- registry/auth/silly/access_test.go | 2 +- registry/auth/token/token_test.go | 2 +- registry/handlers/api_test.go | 2 +- registry/handlers/app_test.go | 2 +- registry/handlers/health_test.go | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/registry/auth/silly/access_test.go b/registry/auth/silly/access_test.go index 8b5ecb80..ff2155b1 100644 --- a/registry/auth/silly/access_test.go +++ b/registry/auth/silly/access_test.go @@ -5,8 +5,8 @@ import ( "net/http/httptest" "testing" + "github.com/docker/distribution/context" "github.com/docker/distribution/registry/auth" - "golang.org/x/net/context" ) func TestSillyAccessController(t *testing.T) { diff --git a/registry/auth/token/token_test.go b/registry/auth/token/token_test.go index 9d84d4ef..119aa738 100644 --- a/registry/auth/token/token_test.go +++ b/registry/auth/token/token_test.go @@ -15,9 +15,9 @@ import ( "testing" "time" + "github.com/docker/distribution/context" "github.com/docker/distribution/registry/auth" "github.com/docker/libtrust" - "golang.org/x/net/context" ) func makeRootKeys(numKeys int) ([]libtrust.PrivateKey, error) { diff --git a/registry/handlers/api_test.go b/registry/handlers/api_test.go index e351cb95..a975bd33 100644 --- a/registry/handlers/api_test.go +++ b/registry/handlers/api_test.go @@ -19,6 +19,7 @@ import ( "testing" "github.com/docker/distribution/configuration" + "github.com/docker/distribution/context" "github.com/docker/distribution/digest" "github.com/docker/distribution/manifest" "github.com/docker/distribution/registry/api/errcode" @@ -27,7 +28,6 @@ import ( "github.com/docker/distribution/testutil" "github.com/docker/libtrust" "github.com/gorilla/handlers" - "golang.org/x/net/context" ) var headerConfig = http.Header{ diff --git a/registry/handlers/app_test.go b/registry/handlers/app_test.go index fbb0b188..0038a97d 100644 --- a/registry/handlers/app_test.go +++ b/registry/handlers/app_test.go @@ -9,6 +9,7 @@ import ( "testing" "github.com/docker/distribution/configuration" + "github.com/docker/distribution/context" "github.com/docker/distribution/registry/api/errcode" "github.com/docker/distribution/registry/api/v2" "github.com/docker/distribution/registry/auth" @@ -16,7 +17,6 @@ import ( "github.com/docker/distribution/registry/storage" memorycache "github.com/docker/distribution/registry/storage/cache/memory" "github.com/docker/distribution/registry/storage/driver/inmemory" - "golang.org/x/net/context" ) // TestAppDispatcher builds an application with a test dispatcher and ensures diff --git a/registry/handlers/health_test.go b/registry/handlers/health_test.go index ce5860a8..38ea9b2f 100644 --- a/registry/handlers/health_test.go +++ b/registry/handlers/health_test.go @@ -10,8 +10,8 @@ import ( "time" "github.com/docker/distribution/configuration" + "github.com/docker/distribution/context" "github.com/docker/distribution/health" - "golang.org/x/net/context" ) func TestFileHealthCheck(t *testing.T) { From b9b9cafa8f2f65432b7b0b47bd44fcaab20a8a72 Mon Sep 17 00:00:00 2001 From: Aaron Lehmann Date: Wed, 19 Aug 2015 15:11:10 -0700 Subject: [PATCH 4/6] Expose a Registry type in health package, so unit tests can stay isolated from each other Update docs. Change health_test.go tests to create their own registries and register the checks there. The tests now call CheckStatus directly instead of polling the HTTP handler, which returns results from the default registry. Signed-off-by: Aaron Lehmann --- health/doc.go | 2 +- health/health.go | 101 +++++++++++++++++++++---------- health/health_test.go | 2 +- registry/handlers/app.go | 22 ++++--- registry/handlers/health_test.go | 100 ++++++------------------------ 5 files changed, 104 insertions(+), 123 deletions(-) diff --git a/health/doc.go b/health/doc.go index 8faa32f7..194b8a56 100644 --- a/health/doc.go +++ b/health/doc.go @@ -39,7 +39,7 @@ // // The recommended way of registering checks is using a periodic Check. // PeriodicChecks run on a certain schedule and asynchronously update the -// status of the check. This allows "CheckStatus()" to return without blocking +// status of the check. This allows CheckStatus to return without blocking // on an expensive check. // // A trivial example of a check that runs every 5 seconds and shuts down our diff --git a/health/health.go b/health/health.go index dab2794d..220282dc 100644 --- a/health/health.go +++ b/health/health.go @@ -11,10 +11,26 @@ import ( "github.com/docker/distribution/registry/api/errcode" ) -var ( - mutex sync.RWMutex - registeredChecks = make(map[string]Checker) -) +// A Registry is a collection of checks. Most applications will use the global +// registry defined in DefaultRegistry. However, unit tests may need to create +// separate registries to isolate themselves from other tests. +type Registry struct { + mu sync.RWMutex + registeredChecks map[string]Checker +} + +// NewRegistry creates a new registry. This isn't necessary for normal use of +// the package, but may be useful for unit tests so individual tests have their +// own set of checks. +func NewRegistry() *Registry { + return &Registry{ + registeredChecks: make(map[string]Checker), + } +} + +// DefaultRegistry is the default registry where checks are registered. It is +// the registry used by the HTTP handler. +var DefaultRegistry *Registry // Checker is the interface for a Health Checker type Checker interface { @@ -144,11 +160,11 @@ func PeriodicThresholdChecker(check Checker, period time.Duration, threshold int } // CheckStatus returns a map with all the current health check errors -func CheckStatus() map[string]string { // TODO(stevvooe) this needs a proper type - mutex.RLock() - defer mutex.RUnlock() +func (registry *Registry) CheckStatus() map[string]string { // TODO(stevvooe) this needs a proper type + registry.mu.RLock() + defer registry.mu.RUnlock() statusKeys := make(map[string]string) - for k, v := range registeredChecks { + for k, v := range registry.registeredChecks { err := v.Check() if err != nil { statusKeys[k] = err.Error() @@ -158,48 +174,66 @@ func CheckStatus() map[string]string { // TODO(stevvooe) this needs a proper typ return statusKeys } -// Register associates the checker with the provided name. We allow -// overwrites to a specific check status. -func Register(name string, check Checker) { - mutex.Lock() - defer mutex.Unlock() - _, ok := registeredChecks[name] +// CheckStatus returns a map with all the current health check errors from the +// default registry. +func CheckStatus() map[string]string { + return DefaultRegistry.CheckStatus() +} + +// Register associates the checker with the provided name. +func (registry *Registry) Register(name string, check Checker) { + if registry == nil { + registry = DefaultRegistry + } + registry.mu.Lock() + defer registry.mu.Unlock() + _, ok := registry.registeredChecks[name] if ok { panic("Check already exists: " + name) } - registeredChecks[name] = check + registry.registeredChecks[name] = check } -// Unregister removes the named checker. -func Unregister(name string) { - mutex.Lock() - defer mutex.Unlock() - delete(registeredChecks, name) +// Register associates the checker with the provided name in the default +// registry. +func Register(name string, check Checker) { + DefaultRegistry.Register(name, check) } -// UnregisterAll removes all registered checkers. -func UnregisterAll() { - mutex.Lock() - defer mutex.Unlock() - registeredChecks = make(map[string]Checker) +// RegisterFunc allows the convenience of registering a checker directly from +// an arbitrary func() error. +func (registry *Registry) RegisterFunc(name string, check func() error) { + registry.Register(name, CheckFunc(check)) } -// RegisterFunc allows the convenience of registering a checker directly -// from an arbitrary func() error +// RegisterFunc allows the convenience of registering a checker in the default +// registry directly from an arbitrary func() error. func RegisterFunc(name string, check func() error) { - Register(name, CheckFunc(check)) + DefaultRegistry.RegisterFunc(name, check) } // RegisterPeriodicFunc allows the convenience of registering a PeriodicChecker -// from an arbitrary func() error +// from an arbitrary func() error. +func (registry *Registry) RegisterPeriodicFunc(name string, period time.Duration, check CheckFunc) { + registry.Register(name, PeriodicChecker(CheckFunc(check), period)) +} + +// RegisterPeriodicFunc allows the convenience of registering a PeriodicChecker +// in the default registry from an arbitrary func() error. func RegisterPeriodicFunc(name string, period time.Duration, check CheckFunc) { - Register(name, PeriodicChecker(CheckFunc(check), period)) + DefaultRegistry.RegisterPeriodicFunc(name, period, check) } // RegisterPeriodicThresholdFunc allows the convenience of registering a -// PeriodicChecker from an arbitrary func() error +// PeriodicChecker from an arbitrary func() error. +func (registry *Registry) RegisterPeriodicThresholdFunc(name string, period time.Duration, threshold int, check CheckFunc) { + registry.Register(name, PeriodicThresholdChecker(CheckFunc(check), period, threshold)) +} + +// RegisterPeriodicThresholdFunc allows the convenience of registering a +// PeriodicChecker in the default registry from an arbitrary func() error. func RegisterPeriodicThresholdFunc(name string, period time.Duration, threshold int, check CheckFunc) { - Register(name, PeriodicThresholdChecker(CheckFunc(check), period, threshold)) + DefaultRegistry.RegisterPeriodicThresholdFunc(name, period, threshold, check) } // StatusHandler returns a JSON blob with all the currently registered Health Checks @@ -265,7 +299,8 @@ func statusResponse(w http.ResponseWriter, r *http.Request, status int, checks m } } -// Registers global /debug/health api endpoint +// Registers global /debug/health api endpoint, creates default registry func init() { + DefaultRegistry = NewRegistry() http.HandleFunc("/debug/health", StatusHandler) } diff --git a/health/health_test.go b/health/health_test.go index 3228cb80..766fe159 100644 --- a/health/health_test.go +++ b/health/health_test.go @@ -51,7 +51,7 @@ func TestReturns503IfThereAreErrorChecks(t *testing.T) { // the web application when things aren't so healthy. func TestHealthHandler(t *testing.T) { // clear out existing checks. - registeredChecks = make(map[string]Checker) + DefaultRegistry = NewRegistry() // protect an http server handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { diff --git a/registry/handlers/app.go b/registry/handlers/app.go index 9cf6447a..91f4e1a3 100644 --- a/registry/handlers/app.go +++ b/registry/handlers/app.go @@ -234,7 +234,15 @@ func NewApp(ctx context.Context, configuration configuration.Configuration) *App // process. Because the configuration and app are tightly coupled, // implementing this properly will require a refactor. This method may panic // if called twice in the same process. -func (app *App) RegisterHealthChecks() { +func (app *App) RegisterHealthChecks(healthRegistries ...*health.Registry) { + if len(healthRegistries) > 1 { + panic("RegisterHealthChecks called with more than one registry") + } + healthRegistry := health.DefaultRegistry + if len(healthRegistries) == 1 { + healthRegistry = healthRegistries[0] + } + if app.Config.Health.StorageDriver.Enabled { interval := app.Config.Health.StorageDriver.Interval if interval == 0 { @@ -247,9 +255,9 @@ func (app *App) RegisterHealthChecks() { } if app.Config.Health.StorageDriver.Threshold != 0 { - health.RegisterPeriodicThresholdFunc("storagedriver_"+app.Config.Storage.Type(), interval, app.Config.Health.StorageDriver.Threshold, storageDriverCheck) + healthRegistry.RegisterPeriodicThresholdFunc("storagedriver_"+app.Config.Storage.Type(), interval, app.Config.Health.StorageDriver.Threshold, storageDriverCheck) } else { - health.RegisterPeriodicFunc("storagedriver_"+app.Config.Storage.Type(), interval, storageDriverCheck) + healthRegistry.RegisterPeriodicFunc("storagedriver_"+app.Config.Storage.Type(), interval, storageDriverCheck) } } @@ -260,10 +268,10 @@ func (app *App) RegisterHealthChecks() { } if fileChecker.Threshold != 0 { ctxu.GetLogger(app).Infof("configuring file health check path=%s, interval=%d, threshold=%d", fileChecker.File, interval/time.Second, fileChecker.Threshold) - health.Register(fileChecker.File, health.PeriodicThresholdChecker(checks.FileChecker(fileChecker.File), interval, fileChecker.Threshold)) + healthRegistry.Register(fileChecker.File, health.PeriodicThresholdChecker(checks.FileChecker(fileChecker.File), interval, fileChecker.Threshold)) } else { ctxu.GetLogger(app).Infof("configuring file health check path=%s, interval=%d", fileChecker.File, interval/time.Second) - health.Register(fileChecker.File, health.PeriodicChecker(checks.FileChecker(fileChecker.File), interval)) + healthRegistry.Register(fileChecker.File, health.PeriodicChecker(checks.FileChecker(fileChecker.File), interval)) } } @@ -274,10 +282,10 @@ func (app *App) RegisterHealthChecks() { } if httpChecker.Threshold != 0 { ctxu.GetLogger(app).Infof("configuring HTTP health check uri=%s, interval=%d, threshold=%d", httpChecker.URI, interval/time.Second, httpChecker.Threshold) - health.Register(httpChecker.URI, health.PeriodicThresholdChecker(checks.HTTPChecker(httpChecker.URI), interval, httpChecker.Threshold)) + healthRegistry.Register(httpChecker.URI, health.PeriodicThresholdChecker(checks.HTTPChecker(httpChecker.URI), interval, httpChecker.Threshold)) } else { ctxu.GetLogger(app).Infof("configuring HTTP health check uri=%s, interval=%d", httpChecker.URI, interval/time.Second) - health.Register(httpChecker.URI, health.PeriodicChecker(checks.HTTPChecker(httpChecker.URI), interval)) + healthRegistry.Register(httpChecker.URI, health.PeriodicChecker(checks.HTTPChecker(httpChecker.URI), interval)) } } } diff --git a/registry/handlers/health_test.go b/registry/handlers/health_test.go index 38ea9b2f..de2b71cc 100644 --- a/registry/handlers/health_test.go +++ b/registry/handlers/health_test.go @@ -1,7 +1,6 @@ package handlers import ( - "encoding/json" "io/ioutil" "net/http" "net/http/httptest" @@ -15,9 +14,6 @@ import ( ) func TestFileHealthCheck(t *testing.T) { - // In case other tests registered checks before this one - health.UnregisterAll() - interval := time.Second tmpfile, err := ioutil.TempFile(os.TempDir(), "healthcheck") @@ -43,60 +39,29 @@ func TestFileHealthCheck(t *testing.T) { ctx := context.Background() app := NewApp(ctx, config) - app.RegisterHealthChecks() - - debugServer := httptest.NewServer(nil) + healthRegistry := health.NewRegistry() + app.RegisterHealthChecks(healthRegistry) // Wait for health check to happen <-time.After(2 * interval) - resp, err := http.Get(debugServer.URL + "/debug/health") - if err != nil { - t.Fatalf("error performing HTTP GET: %v", err) + status := healthRegistry.CheckStatus() + if len(status) != 1 { + t.Fatal("expected 1 item in health check results") } - body, err := ioutil.ReadAll(resp.Body) - if err != nil { - t.Fatalf("error reading HTTP body: %v", err) - } - resp.Body.Close() - var decoded map[string]string - err = json.Unmarshal(body, &decoded) - if err != nil { - t.Fatalf("error unmarshaling json: %v", err) - } - if len(decoded) != 1 { - t.Fatal("expected 1 item in returned json") - } - if decoded[tmpfile.Name()] != "file exists" { + if status[tmpfile.Name()] != "file exists" { t.Fatal(`did not get "file exists" result for health check`) } os.Remove(tmpfile.Name()) <-time.After(2 * interval) - resp, err = http.Get(debugServer.URL + "/debug/health") - if err != nil { - t.Fatalf("error performing HTTP GET: %v", err) - } - body, err = ioutil.ReadAll(resp.Body) - if err != nil { - t.Fatalf("error reading HTTP body: %v", err) - } - resp.Body.Close() - var decoded2 map[string]string - err = json.Unmarshal(body, &decoded2) - if err != nil { - t.Fatalf("error unmarshaling json: %v", err) - } - if len(decoded2) != 0 { - t.Fatal("expected 0 items in returned json") + if len(healthRegistry.CheckStatus()) != 0 { + t.Fatal("expected 0 items in health check results") } } func TestHTTPHealthCheck(t *testing.T) { - // In case other tests registered checks before this one - health.UnregisterAll() - interval := time.Second threshold := 3 @@ -132,32 +97,18 @@ func TestHTTPHealthCheck(t *testing.T) { ctx := context.Background() app := NewApp(ctx, config) - app.RegisterHealthChecks() - - debugServer := httptest.NewServer(nil) + healthRegistry := health.NewRegistry() + app.RegisterHealthChecks(healthRegistry) for i := 0; ; i++ { <-time.After(interval) - resp, err := http.Get(debugServer.URL + "/debug/health") - if err != nil { - t.Fatalf("error performing HTTP GET: %v", err) - } - body, err := ioutil.ReadAll(resp.Body) - if err != nil { - t.Fatalf("error reading HTTP body: %v", err) - } - resp.Body.Close() - var decoded map[string]string - err = json.Unmarshal(body, &decoded) - if err != nil { - t.Fatalf("error unmarshaling json: %v", err) - } + status := healthRegistry.CheckStatus() if i < threshold-1 { // definitely shouldn't have hit the threshold yet - if len(decoded) != 0 { - t.Fatal("expected 1 items in returned json") + if len(status) != 0 { + t.Fatal("expected 1 item in health check results") } continue } @@ -166,10 +117,10 @@ func TestHTTPHealthCheck(t *testing.T) { continue } - if len(decoded) != 1 { - t.Fatal("expected 1 item in returned json") + if len(status) != 1 { + t.Fatal("expected 1 item in health check results") } - if decoded[checkedServer.URL] != "downstream service returned unexpected status: 500" { + if status[checkedServer.URL] != "downstream service returned unexpected status: 500" { t.Fatal("did not get expected result for health check") } @@ -180,21 +131,8 @@ func TestHTTPHealthCheck(t *testing.T) { close(stopFailing) <-time.After(2 * interval) - resp, err := http.Get(debugServer.URL + "/debug/health") - if err != nil { - t.Fatalf("error performing HTTP GET: %v", err) - } - body, err := ioutil.ReadAll(resp.Body) - if err != nil { - t.Fatalf("error reading HTTP body: %v", err) - } - resp.Body.Close() - var decoded map[string]string - err = json.Unmarshal(body, &decoded) - if err != nil { - t.Fatalf("error unmarshaling json: %v", err) - } - if len(decoded) != 0 { - t.Fatal("expected 0 items in returned json") + + if len(healthRegistry.CheckStatus()) != 0 { + t.Fatal("expected 0 items in health check results") } } From e8f088fea63797e190909a8bd0ce05ad95798a4e Mon Sep 17 00:00:00 2001 From: Aaron Lehmann Date: Wed, 19 Aug 2015 17:57:18 -0700 Subject: [PATCH 5/6] Add a TCP health checker Also, add timeout and status code parameters to the HTTP checker, and remove the threshold parameter for the file checker. Signed-off-by: Aaron Lehmann --- configuration/configuration.go | 34 +++++-- docs/configuration.md | 154 +++++++++++++++++++++++++++---- health/checks/checks.go | 32 +++++-- health/checks/checks_test.go | 4 +- registry/handlers/app.go | 38 ++++++-- registry/handlers/health_test.go | 63 +++++++++++++ 6 files changed, 279 insertions(+), 46 deletions(-) diff --git a/configuration/configuration.go b/configuration/configuration.go index 970a6ef4..b9685741 100644 --- a/configuration/configuration.go +++ b/configuration/configuration.go @@ -181,9 +181,9 @@ type MailOptions struct { To []string `yaml:"to,omitempty"` } -// FileChecker is a type of entry in the checkers section for checking files +// FileChecker is a type of entry in the health section for checking files. type FileChecker struct { - // Interval is the number of seconds in between checks + // Interval is the duration in between checks Interval time.Duration `yaml:"interval,omitempty"` // File is the path to check File string `yaml:"file,omitempty"` @@ -192,10 +192,13 @@ type FileChecker struct { Threshold int `yaml:"threshold,omitempty"` } -// HTTPChecker is a type of entry in the checkers section for checking HTTP -// URIs +// HTTPChecker is a type of entry in the health section for checking HTTP URIs. type HTTPChecker struct { - // Interval is the number of seconds in between checks + // Timeout is the duration to wait before timing out the HTTP request + Timeout time.Duration `yaml:"interval,omitempty"` + // StatusCode is the expected status code + StatusCode int + // Interval is the duration in between checks Interval time.Duration `yaml:"interval,omitempty"` // URI is the HTTP URI to check URI string `yaml:"uri,omitempty"` @@ -204,18 +207,33 @@ type HTTPChecker struct { Threshold int `yaml:"threshold,omitempty"` } +// TCPChecker is a type of entry in the health section for checking TCP servers. +type TCPChecker struct { + // Timeout is the duration to wait before timing out the TCP connection + Timeout time.Duration `yaml:"interval,omitempty"` + // Interval is the duration in between checks + Interval time.Duration `yaml:"interval,omitempty"` + // Addr is the TCP address to check + Addr string `yaml:"addr,omitempty"` + // Threshold is the number of times a check must fail to trigger an + // unhealthy state + Threshold int `yaml:"threshold,omitempty"` +} + // Health provides the configuration section for health checks. type Health struct { - // FileChecker is a list of paths to check + // FileCheckers is a list of paths to check FileCheckers []FileChecker `yaml:"file,omitempty"` - // HTTPChecker is a list of URIs to check + // HTTPCheckers is a list of URIs to check HTTPCheckers []HTTPChecker `yaml:"http,omitempty"` + // TCPCheckers is a list of URIs to check + TCPCheckers []TCPChecker `yaml:"tcp,omitempty"` // StorageDriver configures a health check on the configured storage // driver StorageDriver struct { // Enabled turns on the health check for the storage driver Enabled bool `yaml:"enabled,omitempty"` - // Interval is the number of seconds in between checks + // Interval is the duration in between checks Interval time.Duration `yaml:"interval,omitempty"` // Threshold is the number of times a check must fail to trigger an // unhealthy state diff --git a/docs/configuration.md b/docs/configuration.md index a0ddc6fd..3e4bacc8 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -203,9 +203,15 @@ information about each option that appears later in this page. file: - file: /path/to/checked/file interval: 10s - threshold: 3 http: - uri: http://server.to.check/must/return/200 + statuscode: 200 + timeout: 3s + interval: 10s + threshold: 3 + tcp: + - addr: redis-server.domain.com:6379 + timeout: 3s interval: 10s threshold: 3 @@ -1611,17 +1617,23 @@ Configure the behavior of the Redis connection pool. file: - file: /path/to/checked/file interval: 10s - threshold: 3 http: - uri: http://server.to.check/must/return/200 + statuscode: 200 + timeout: 3s + interval: 10s + threshold: 3 + tcp: + - addr: redis-server.domain.com:6379 + timeout: 3s interval: 10s threshold: 3 The health option is **optional**. It may contain preferences for a periodic health check on the storage driver's backend storage, and optional periodic -checks on local files and/or HTTP URIs. The results of the health checks are -available at /debug/health on the debug HTTP server if the debug HTTP server is -enabled (see http section). +checks on local files, HTTP URIs, and/or TCP servers. The results of the health +checks are available at /debug/health on the debug HTTP server if the debug +HTTP server is enabled (see http section). ### storagedriver @@ -1730,25 +1742,13 @@ The path to check for the existence of a file. The default value is 10 seconds if this field is omitted. - - - threshold - - - no - - - An integer specifying the number of times the check must fail before the - check triggers an unhealthy state. If this filed is not specified, a - single failure will trigger an unhealthy state. - - ### http http is a list of HTTP URIs to be periodically checked with HEAD requests. If -a HEAD request returns a status code other than 200, the health check will fail. +a HEAD request doesn't complete or returns an unexpected status code, the +health check will fail. @@ -1767,6 +1767,122 @@ a HEAD request returns a status code other than 200, the health check will fail. The URI to check. + + + + + + + + + + + + + + + + + + + + +
+ statuscode + + no + +Expected status code from the HTTP URI. Defaults to 200. +
+ timeout + + no + + The length of time to wait before timing out the HTTP request. This field + takes a positive integer and an optional suffix indicating the unit of + time. Possible units are: +
    +
  • ns (nanoseconds)
  • +
  • us (microseconds)
  • +
  • ms (milliseconds)
  • +
  • s (seconds)
  • +
  • m (minutes)
  • +
  • h (hours)
  • +
+ If you omit the suffix, the system interprets the value as nanoseconds. +
+ interval + + no + + The length of time to wait between repetitions of the check. This field + takes a positive integer and an optional suffix indicating the unit of + time. Possible units are: +
    +
  • ns (nanoseconds)
  • +
  • us (microseconds)
  • +
  • ms (milliseconds)
  • +
  • s (seconds)
  • +
  • m (minutes)
  • +
  • h (hours)
  • +
+ If you omit the suffix, the system interprets the value as nanoseconds. + The default value is 10 seconds if this field is omitted. +
+ threshold + + no + + An integer specifying the number of times the check must fail before the + check triggers an unhealthy state. If this filed is not specified, a + single failure will trigger an unhealthy state. +
+ +### tcp + +tcp is a list of TCP addresses to be periodically checked with connection +attempts. The addresses must include port numbers. If a connection attempt +fails, the health check will fail. + + + + + + + + + + + + + + + + + @@ -1619,6 +1623,8 @@ Configure the behavior of the Redis connection pool. interval: 10s http: - uri: http://server.to.check/must/return/200 + headers: + Authorization: [Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==] statuscode: 200 timeout: 3s interval: 10s @@ -1766,6 +1772,19 @@ health check will fail. + + + + +
ParameterRequiredDescription
+ addr + + yes + +The TCP address to connect to, including a port number. +
+ timeout + + no + + The length of time to wait before timing out the TCP connection. This + field takes a positive integer and an optional suffix indicating the unit + of time. Possible units are: +
    +
  • ns (nanoseconds)
  • +
  • us (microseconds)
  • +
  • ms (milliseconds)
  • +
  • s (seconds)
  • +
  • m (minutes)
  • +
  • h (hours)
  • +
+ If you omit the suffix, the system interprets the value as nanoseconds. +
interval diff --git a/health/checks/checks.go b/health/checks/checks.go index 89d5f3db..86e914b1 100644 --- a/health/checks/checks.go +++ b/health/checks/checks.go @@ -2,15 +2,17 @@ package checks import ( "errors" + "net" "net/http" "os" "strconv" + "time" "github.com/docker/distribution/health" ) -// FileChecker checks the existence of a file and returns and error -// if the file exists, taking the application out of rotation +// FileChecker checks the existence of a file and returns an error +// if the file exists. func FileChecker(f string) health.Checker { return health.CheckFunc(func() error { if _, err := os.Stat(f); err == nil { @@ -20,18 +22,32 @@ func FileChecker(f string) health.Checker { }) } -// HTTPChecker does a HEAD request and verifies if the HTTP status -// code return is a 200, taking the application out of rotation if -// otherwise -func HTTPChecker(r string) health.Checker { +// HTTPChecker does a HEAD request and verifies that the HTTP status code +// returned matches statusCode. +func HTTPChecker(r string, statusCode int, timeout time.Duration) health.Checker { return health.CheckFunc(func() error { - response, err := http.Head(r) + client := http.Client{ + Timeout: timeout, + } + response, err := client.Head(r) if err != nil { return errors.New("error while checking: " + r) } - if response.StatusCode != http.StatusOK { + if response.StatusCode != statusCode { return errors.New("downstream service returned unexpected status: " + strconv.Itoa(response.StatusCode)) } return nil }) } + +// TCPChecker attempts to open a TCP connection. +func TCPChecker(addr string, timeout time.Duration) health.Checker { + return health.CheckFunc(func() error { + conn, err := net.DialTimeout("tcp", addr, timeout) + if err != nil { + return errors.New("connection to " + addr + " failed") + } + conn.Close() + return nil + }) +} diff --git a/health/checks/checks_test.go b/health/checks/checks_test.go index 4e49d118..8ba24d33 100644 --- a/health/checks/checks_test.go +++ b/health/checks/checks_test.go @@ -15,11 +15,11 @@ func TestFileChecker(t *testing.T) { } func TestHTTPChecker(t *testing.T) { - if err := HTTPChecker("https://www.google.cybertron").Check(); err == nil { + if err := HTTPChecker("https://www.google.cybertron", 200, 0).Check(); err == nil { t.Errorf("Google on Cybertron was expected as not exists") } - if err := HTTPChecker("https://www.google.pt").Check(); err != nil { + if err := HTTPChecker("https://www.google.pt", 200, 0).Check(); err != nil { t.Errorf("Google at Portugal was expected as exists, error:%v", err) } } diff --git a/registry/handlers/app.go b/registry/handlers/app.go index 91f4e1a3..24f43f37 100644 --- a/registry/handlers/app.go +++ b/registry/handlers/app.go @@ -266,13 +266,8 @@ func (app *App) RegisterHealthChecks(healthRegistries ...*health.Registry) { if interval == 0 { interval = defaultCheckInterval } - if fileChecker.Threshold != 0 { - ctxu.GetLogger(app).Infof("configuring file health check path=%s, interval=%d, threshold=%d", fileChecker.File, interval/time.Second, fileChecker.Threshold) - healthRegistry.Register(fileChecker.File, health.PeriodicThresholdChecker(checks.FileChecker(fileChecker.File), interval, fileChecker.Threshold)) - } else { - ctxu.GetLogger(app).Infof("configuring file health check path=%s, interval=%d", fileChecker.File, interval/time.Second) - healthRegistry.Register(fileChecker.File, health.PeriodicChecker(checks.FileChecker(fileChecker.File), interval)) - } + ctxu.GetLogger(app).Infof("configuring file health check path=%s, interval=%d", fileChecker.File, interval/time.Second) + healthRegistry.Register(fileChecker.File, health.PeriodicChecker(checks.FileChecker(fileChecker.File), interval)) } for _, httpChecker := range app.Config.Health.HTTPCheckers { @@ -280,12 +275,37 @@ func (app *App) RegisterHealthChecks(healthRegistries ...*health.Registry) { if interval == 0 { interval = defaultCheckInterval } + + statusCode := httpChecker.StatusCode + if statusCode == 0 { + statusCode = 200 + } + + checker := checks.HTTPChecker(httpChecker.URI, statusCode, httpChecker.Timeout) + if httpChecker.Threshold != 0 { ctxu.GetLogger(app).Infof("configuring HTTP health check uri=%s, interval=%d, threshold=%d", httpChecker.URI, interval/time.Second, httpChecker.Threshold) - healthRegistry.Register(httpChecker.URI, health.PeriodicThresholdChecker(checks.HTTPChecker(httpChecker.URI), interval, httpChecker.Threshold)) + healthRegistry.Register(httpChecker.URI, health.PeriodicThresholdChecker(checker, interval, httpChecker.Threshold)) } else { ctxu.GetLogger(app).Infof("configuring HTTP health check uri=%s, interval=%d", httpChecker.URI, interval/time.Second) - healthRegistry.Register(httpChecker.URI, health.PeriodicChecker(checks.HTTPChecker(httpChecker.URI), interval)) + healthRegistry.Register(httpChecker.URI, health.PeriodicChecker(checker, interval)) + } + } + + for _, tcpChecker := range app.Config.Health.TCPCheckers { + interval := tcpChecker.Interval + if interval == 0 { + interval = defaultCheckInterval + } + + checker := checks.TCPChecker(tcpChecker.Addr, tcpChecker.Timeout) + + if tcpChecker.Threshold != 0 { + ctxu.GetLogger(app).Infof("configuring TCP health check addr=%s, interval=%d, threshold=%d", tcpChecker.Addr, interval/time.Second, tcpChecker.Threshold) + healthRegistry.Register(tcpChecker.Addr, health.PeriodicThresholdChecker(checker, interval, tcpChecker.Threshold)) + } else { + ctxu.GetLogger(app).Infof("configuring TCP health check addr=%s, interval=%d", tcpChecker.Addr, interval/time.Second) + healthRegistry.Register(tcpChecker.Addr, health.PeriodicChecker(checker, interval)) } } } diff --git a/registry/handlers/health_test.go b/registry/handlers/health_test.go index de2b71cc..bb460b47 100644 --- a/registry/handlers/health_test.go +++ b/registry/handlers/health_test.go @@ -2,6 +2,7 @@ package handlers import ( "io/ioutil" + "net" "net/http" "net/http/httptest" "os" @@ -61,6 +62,68 @@ func TestFileHealthCheck(t *testing.T) { } } +func TestTCPHealthCheck(t *testing.T) { + interval := time.Second + + ln, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatalf("could not create listener: %v", err) + } + addrStr := ln.Addr().String() + + // Start accepting + go func() { + for { + conn, err := ln.Accept() + if err != nil { + // listener was closed + return + } + defer conn.Close() + } + }() + + config := configuration.Configuration{ + Storage: configuration.Storage{ + "inmemory": configuration.Parameters{}, + }, + Health: configuration.Health{ + TCPCheckers: []configuration.TCPChecker{ + { + Interval: interval, + Addr: addrStr, + Timeout: 500 * time.Millisecond, + }, + }, + }, + } + + ctx := context.Background() + + app := NewApp(ctx, config) + healthRegistry := health.NewRegistry() + app.RegisterHealthChecks(healthRegistry) + + // Wait for health check to happen + <-time.After(2 * interval) + + if len(healthRegistry.CheckStatus()) != 0 { + t.Fatal("expected 0 items in health check results") + } + + ln.Close() + <-time.After(2 * interval) + + // Health check should now fail + status := healthRegistry.CheckStatus() + if len(status) != 1 { + t.Fatal("expected 1 item in health check results") + } + if status[addrStr] != "connection to "+addrStr+" failed" { + t.Fatal(`did not get "connection failed" result for health check`) + } +} + func TestHTTPHealthCheck(t *testing.T) { interval := time.Second threshold := 3 From b67aab2f604a2c1531602ce694a129f74c0dd843 Mon Sep 17 00:00:00 2001 From: Aaron Lehmann Date: Wed, 19 Aug 2015 18:23:58 -0700 Subject: [PATCH 6/6] Add headers parameter for HTTP checker Signed-off-by: Aaron Lehmann --- configuration/configuration.go | 2 ++ docs/configuration.md | 21 ++++++++++++++++++++- health/checks/checks.go | 13 +++++++++++-- health/checks/checks_test.go | 4 ++-- registry/handlers/app.go | 2 +- 5 files changed, 36 insertions(+), 6 deletions(-) diff --git a/configuration/configuration.go b/configuration/configuration.go index b9685741..1b3519d5 100644 --- a/configuration/configuration.go +++ b/configuration/configuration.go @@ -202,6 +202,8 @@ type HTTPChecker struct { Interval time.Duration `yaml:"interval,omitempty"` // URI is the HTTP URI to check URI string `yaml:"uri,omitempty"` + // Headers lists static headers that should be added to all requests + Headers http.Header `yaml:"headers"` // Threshold is the number of times a check must fail to trigger an // unhealthy state Threshold int `yaml:"threshold,omitempty"` diff --git a/docs/configuration.md b/docs/configuration.md index 3e4bacc8..3563ef51 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -205,6 +205,8 @@ information about each option that appears later in this page. interval: 10s http: - uri: http://server.to.check/must/return/200 + headers: + Authorization: [Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==] statuscode: 200 timeout: 3s interval: 10s @@ -1400,7 +1402,9 @@ The URL to which events should be published. yes - Static headers to add to each request. + Static headers to add to each request. Each header's name should be a key + underneath headers, and each value is a list of payloads for that + header name. Note that values must always be lists.
The URI to check.
+ headers + + no + + Static headers to add to each request. Each header's name should be a key + underneath headers, and each value is a list of payloads for that + header name. Note that values must always be lists. +
diff --git a/health/checks/checks.go b/health/checks/checks.go index 86e914b1..e3c3b08d 100644 --- a/health/checks/checks.go +++ b/health/checks/checks.go @@ -24,12 +24,21 @@ func FileChecker(f string) health.Checker { // HTTPChecker does a HEAD request and verifies that the HTTP status code // returned matches statusCode. -func HTTPChecker(r string, statusCode int, timeout time.Duration) health.Checker { +func HTTPChecker(r string, statusCode int, timeout time.Duration, headers http.Header) health.Checker { return health.CheckFunc(func() error { client := http.Client{ Timeout: timeout, } - response, err := client.Head(r) + req, err := http.NewRequest("HEAD", r, nil) + if err != nil { + return errors.New("error creating request: " + r) + } + for headerName, headerValues := range headers { + for _, headerValue := range headerValues { + req.Header.Add(headerName, headerValue) + } + } + response, err := client.Do(req) if err != nil { return errors.New("error while checking: " + r) } diff --git a/health/checks/checks_test.go b/health/checks/checks_test.go index 8ba24d33..6b6dd14f 100644 --- a/health/checks/checks_test.go +++ b/health/checks/checks_test.go @@ -15,11 +15,11 @@ func TestFileChecker(t *testing.T) { } func TestHTTPChecker(t *testing.T) { - if err := HTTPChecker("https://www.google.cybertron", 200, 0).Check(); err == nil { + if err := HTTPChecker("https://www.google.cybertron", 200, 0, nil).Check(); err == nil { t.Errorf("Google on Cybertron was expected as not exists") } - if err := HTTPChecker("https://www.google.pt", 200, 0).Check(); err != nil { + if err := HTTPChecker("https://www.google.pt", 200, 0, nil).Check(); err != nil { t.Errorf("Google at Portugal was expected as exists, error:%v", err) } } diff --git a/registry/handlers/app.go b/registry/handlers/app.go index 24f43f37..b1e46b02 100644 --- a/registry/handlers/app.go +++ b/registry/handlers/app.go @@ -281,7 +281,7 @@ func (app *App) RegisterHealthChecks(healthRegistries ...*health.Registry) { statusCode = 200 } - checker := checks.HTTPChecker(httpChecker.URI, statusCode, httpChecker.Timeout) + checker := checks.HTTPChecker(httpChecker.URI, statusCode, httpChecker.Timeout, httpChecker.Headers) if httpChecker.Threshold != 0 { ctxu.GetLogger(app).Infof("configuring HTTP health check uri=%s, interval=%d, threshold=%d", httpChecker.URI, interval/time.Second, httpChecker.Threshold)