diff --git a/cmd/registry/config-cache.yml b/cmd/registry/config-cache.yml index 6566130b..7a274ea5 100644 --- a/cmd/registry/config-cache.yml +++ b/cmd/registry/config-cache.yml @@ -48,3 +48,8 @@ proxy: remoteurl: https://registry-1.docker.io username: username password: password +health: + storagedriver: + enabled: true + interval: 10s + threshold: 3 diff --git a/cmd/registry/config-dev.yml b/cmd/registry/config-dev.yml index 729e7fd2..b6438be5 100644 --- a/cmd/registry/config-dev.yml +++ b/cmd/registry/config-dev.yml @@ -59,4 +59,8 @@ notifications: threshold: 10 backoff: 1s disabled: true - +health: + storagedriver: + enabled: true + interval: 10s + threshold: 3 diff --git a/cmd/registry/config-example.yml b/cmd/registry/config-example.yml index ef5994e5..b5700e19 100644 --- a/cmd/registry/config-example.yml +++ b/cmd/registry/config-example.yml @@ -11,3 +11,8 @@ http: addr: :5000 headers: X-Content-Type-Options: [nosniff] +health: + storagedriver: + enabled: true + interval: 10s + threshold: 3 diff --git a/configuration/configuration.go b/configuration/configuration.go index 60440e05..970a6ef4 100644 --- a/configuration/configuration.go +++ b/configuration/configuration.go @@ -210,6 +210,17 @@ type Health struct { FileCheckers []FileChecker `yaml:"file,omitempty"` // HTTPChecker is a list of URIs to check HTTPCheckers []HTTPChecker `yaml:"http,omitempty"` + // StorageDriver configures a health check on the configured storage + // driver + StorageDriver struct { + // Enabled turns on the health check for the storage driver + Enabled bool `yaml:"enabled,omitempty"` + // Interval is the number of seconds in between checks + Interval time.Duration `yaml:"interval,omitempty"` + // Threshold is the number of times a check must fail to trigger an + // unhealthy state + Threshold int `yaml:"threshold,omitempty"` + } `yaml:"storagedriver,omitempty"` } // v0_1Configuration is a Version 0.1 Configuration struct diff --git a/docs/configuration.md b/docs/configuration.md index 3e16ca5d..a0ddc6fd 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -196,6 +196,10 @@ information about each option that appears later in this page. maxactive: 64 idletimeout: 300s health: + storagedriver: + enabled: true + interval: 10s + threshold: 3 file: - file: /path/to/checked/file interval: 10s @@ -1600,6 +1604,10 @@ Configure the behavior of the Redis connection pool. ## health health: + storagedriver: + enabled: true + interval: 10s + threshold: 3 file: - file: /path/to/checked/file interval: 10s @@ -1609,8 +1617,72 @@ Configure the behavior of the Redis connection pool. interval: 10s threshold: 3 -The health option is **optional**. It may contain lists of file checkers -and/or HTTP checkers. +The health option is **optional**. It may contain preferences for a periodic +health check on the storage driver's backend storage, and optional periodic +checks on local files and/or HTTP URIs. The results of the health checks are +available at /debug/health on the debug HTTP server if the debug HTTP server is +enabled (see http section). + +### storagedriver + +storagedriver contains options for a health check on the configured storage +driver's backend storage. enabled must be set to true for this health check to +be active. + + + + + + + + + + + + + + + + + + + + + + +
ParameterRequiredDescription
+ enabled + + yes + +"true" to enable the storage driver health check or "false" to disable it. +
+ interval + + no + + The length of time to wait between repetitions of the check. This field + takes a positive integer and an optional suffix indicating the unit of + time. Possible units are: +
    +
  • ns (nanoseconds)
  • +
  • us (microseconds)
  • +
  • ms (milliseconds)
  • +
  • s (seconds)
  • +
  • m (minutes)
  • +
  • h (hours)
  • +
+ If you omit the suffix, the system interprets the value as nanoseconds. + The default value is 10 seconds if this field is omitted. +
+ threshold + + no + + An integer specifying the number of times the check must fail before the + check triggers an unhealthy state. If this filed is not specified, a + single failure will trigger an unhealthy state. +
### file diff --git a/registry/handlers/app.go b/registry/handlers/app.go index 8b8543dd..9cf6447a 100644 --- a/registry/handlers/app.go +++ b/registry/handlers/app.go @@ -235,10 +235,23 @@ func NewApp(ctx context.Context, configuration configuration.Configuration) *App // implementing this properly will require a refactor. This method may panic // if called twice in the same process. func (app *App) RegisterHealthChecks() { - health.RegisterPeriodicThresholdFunc("storagedriver_"+app.Config.Storage.Type(), defaultCheckInterval, 3, func() error { - _, err := app.driver.List(app, "/") // "/" should always exist - return err // any error will be treated as failure - }) + if app.Config.Health.StorageDriver.Enabled { + interval := app.Config.Health.StorageDriver.Interval + if interval == 0 { + interval = defaultCheckInterval + } + + storageDriverCheck := func() error { + _, err := app.driver.List(app, "/") // "/" should always exist + return err // any error will be treated as failure + } + + if app.Config.Health.StorageDriver.Threshold != 0 { + health.RegisterPeriodicThresholdFunc("storagedriver_"+app.Config.Storage.Type(), interval, app.Config.Health.StorageDriver.Threshold, storageDriverCheck) + } else { + health.RegisterPeriodicFunc("storagedriver_"+app.Config.Storage.Type(), interval, storageDriverCheck) + } + } for _, fileChecker := range app.Config.Health.FileCheckers { interval := fileChecker.Interval