From fe7a60ee0ef1fb8abc1bb50eb23a5b21a044ba7a Mon Sep 17 00:00:00 2001 From: Ludovic Fernandez Date: Sat, 9 Feb 2019 05:40:45 +0100 Subject: [PATCH] Uses a jittered exponential backoff (#794) --- Gopkg.lock | 9 ++ acme/api/api.go | 53 +++--- challenge/resolver/solver_manager.go | 35 ++-- vendor/github.com/cenkalti/backoff/LICENSE | 20 +++ vendor/github.com/cenkalti/backoff/backoff.go | 66 ++++++++ vendor/github.com/cenkalti/backoff/context.go | 63 ++++++++ .../cenkalti/backoff/exponential.go | 153 ++++++++++++++++++ vendor/github.com/cenkalti/backoff/retry.go | 82 ++++++++++ vendor/github.com/cenkalti/backoff/ticker.go | 82 ++++++++++ vendor/github.com/cenkalti/backoff/tries.go | 35 ++++ 10 files changed, 569 insertions(+), 29 deletions(-) create mode 100644 vendor/github.com/cenkalti/backoff/LICENSE create mode 100644 vendor/github.com/cenkalti/backoff/backoff.go create mode 100644 vendor/github.com/cenkalti/backoff/context.go create mode 100644 vendor/github.com/cenkalti/backoff/exponential.go create mode 100644 vendor/github.com/cenkalti/backoff/retry.go create mode 100644 vendor/github.com/cenkalti/backoff/ticker.go create mode 100644 vendor/github.com/cenkalti/backoff/tries.go diff --git a/Gopkg.lock b/Gopkg.lock index b8527d04..68e449d6 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -127,6 +127,14 @@ revision = "8b15f938ed215522a37275106e847f6f0be85fe8" version = "v1.15.23" +[[projects]] + digest = "1:cdee563173093e5ae7ab2a19c298e0904129719e1919a3c532b7bb0c3398b818" + name = "github.com/cenkalti/backoff" + packages = ["."] + pruneopts = "NUT" + revision = "1e4cf3da559842a91afcb6ea6141451e6c30c618" + version = "v2.1.1" + [[projects]] digest = "1:03cfacdc6bfd46007c15786c1ece3fa074f89e5193a292f0f26d9e98c99c7cc2" name = "github.com/cloudflare/cloudflare-go" @@ -667,6 +675,7 @@ "github.com/aws/aws-sdk-go/aws/session", "github.com/aws/aws-sdk-go/service/lightsail", "github.com/aws/aws-sdk-go/service/route53", + "github.com/cenkalti/backoff", "github.com/cloudflare/cloudflare-go", "github.com/cpu/goacmedns", "github.com/decker502/dnspod-go", diff --git a/acme/api/api.go b/acme/api/api.go index 14b18f52..a62e49e4 100644 --- a/acme/api/api.go +++ b/acme/api/api.go @@ -2,12 +2,15 @@ package api import ( "bytes" + "context" "crypto" "encoding/json" "errors" "fmt" "net/http" + "time" + "github.com/cenkalti/backoff" "github.com/xenolf/lego/acme" "github.com/xenolf/lego/acme/api/internal/nonces" "github.com/xenolf/lego/acme/api/internal/secure" @@ -64,34 +67,46 @@ func (a *Core) post(uri string, reqBody, response interface{}) (*http.Response, return nil, errors.New("failed to marshal message") } - return a.retrievablePost(uri, content, response, 0) + return a.retrievablePost(uri, content, response) } // postAsGet performs an HTTP POST ("POST-as-GET") request. // https://tools.ietf.org/html/draft-ietf-acme-acme-16#section-6.3 func (a *Core) postAsGet(uri string, response interface{}) (*http.Response, error) { - return a.retrievablePost(uri, []byte{}, response, 0) + return a.retrievablePost(uri, []byte{}, response) } -func (a *Core) retrievablePost(uri string, content []byte, response interface{}, retry int) (*http.Response, error) { - resp, err := a.signedPost(uri, content, response) - if err != nil { - // during tests, 5 retries allow to support ~50% of bad nonce. - if retry >= 5 { - log.Infof("too many retry on a nonce error, retry count: %d", retry) - return resp, err - } - switch err.(type) { - // Retry once if the nonce was invalidated - case *acme.NonceError: - log.Infof("nonce error retry: %s", err) - resp, err = a.retrievablePost(uri, content, response, retry+1) - if err != nil { - return resp, err +func (a *Core) retrievablePost(uri string, content []byte, response interface{}) (*http.Response, error) { + // during tests, allow to support ~90% of bad nonce with a minimum of attempts. + bo := backoff.NewExponentialBackOff() + bo.InitialInterval = 200 * time.Millisecond + bo.MaxInterval = 5 * time.Second + bo.MaxElapsedTime = 20 * time.Second + + ctx, cancel := context.WithCancel(context.Background()) + + var resp *http.Response + operation := func() error { + var err error + resp, err = a.signedPost(uri, content, response) + if err != nil { + switch err.(type) { + // Retry if the nonce was invalidated + case *acme.NonceError: + log.Infof("nonce error retry: %s", err) + return err + default: + cancel() + return err } - default: - return resp, err } + + return nil + } + + err := backoff.Retry(operation, backoff.WithContext(bo, ctx)) + if err != nil { + return nil, err } return resp, nil diff --git a/challenge/resolver/solver_manager.go b/challenge/resolver/solver_manager.go index 55faf77a..840e19d8 100644 --- a/challenge/resolver/solver_manager.go +++ b/challenge/resolver/solver_manager.go @@ -1,12 +1,14 @@ package resolver import ( + "context" "errors" "fmt" "sort" "strconv" "time" + "github.com/cenkalti/backoff" "github.com/xenolf/lego/acme" "github.com/xenolf/lego/acme/api" "github.com/xenolf/lego/challenge" @@ -90,16 +92,35 @@ func validate(core *api.Core, domain string, chlg acme.Challenge) error { return nil } + ra, err := strconv.Atoi(chlng.RetryAfter) + if err != nil { + // The ACME server MUST return a Retry-After. + // If it doesn't, we'll just poll hard. + // Boulder does not implement the ability to retry challenges or the Retry-After header. + // https://github.com/letsencrypt/boulder/blob/master/docs/acme-divergences.md#section-82 + ra = 5 + } + initialInterval := time.Duration(ra) * time.Second + + bo := backoff.NewExponentialBackOff() + bo.InitialInterval = initialInterval + bo.MaxInterval = 10 * initialInterval + bo.MaxElapsedTime = 100 * initialInterval + + ctx, cancel := context.WithCancel(context.Background()) + // After the path is sent, the ACME server will access our server. // Repeatedly check the server for an updated status on our request. - for { + operation := func() error { authz, err := core.Authorizations.Get(chlng.AuthorizationURL) if err != nil { + cancel() return err } valid, err := checkAuthorizationStatus(authz) if err != nil { + cancel() return err } @@ -108,16 +129,10 @@ func validate(core *api.Core, domain string, chlg acme.Challenge) error { return nil } - ra, err := strconv.Atoi(chlng.RetryAfter) - if err != nil { - // The ACME server MUST return a Retry-After. - // If it doesn't, we'll just poll hard. - // Boulder does not implement the ability to retry challenges or the Retry-After header. - // https://github.com/letsencrypt/boulder/blob/master/docs/acme-divergences.md#section-82 - ra = 5 - } - time.Sleep(time.Duration(ra) * time.Second) + return errors.New("the server didn't respond to our request") } + + return backoff.Retry(operation, backoff.WithContext(bo, ctx)) } func checkChallengeStatus(chlng acme.ExtendedChallenge) (bool, error) { diff --git a/vendor/github.com/cenkalti/backoff/LICENSE b/vendor/github.com/cenkalti/backoff/LICENSE new file mode 100644 index 00000000..89b81799 --- /dev/null +++ b/vendor/github.com/cenkalti/backoff/LICENSE @@ -0,0 +1,20 @@ +The MIT License (MIT) + +Copyright (c) 2014 Cenk Altı + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/vendor/github.com/cenkalti/backoff/backoff.go b/vendor/github.com/cenkalti/backoff/backoff.go new file mode 100644 index 00000000..3676ee40 --- /dev/null +++ b/vendor/github.com/cenkalti/backoff/backoff.go @@ -0,0 +1,66 @@ +// Package backoff implements backoff algorithms for retrying operations. +// +// Use Retry function for retrying operations that may fail. +// If Retry does not meet your needs, +// copy/paste the function into your project and modify as you wish. +// +// There is also Ticker type similar to time.Ticker. +// You can use it if you need to work with channels. +// +// See Examples section below for usage examples. +package backoff + +import "time" + +// BackOff is a backoff policy for retrying an operation. +type BackOff interface { + // NextBackOff returns the duration to wait before retrying the operation, + // or backoff. Stop to indicate that no more retries should be made. + // + // Example usage: + // + // duration := backoff.NextBackOff(); + // if (duration == backoff.Stop) { + // // Do not retry operation. + // } else { + // // Sleep for duration and retry operation. + // } + // + NextBackOff() time.Duration + + // Reset to initial state. + Reset() +} + +// Stop indicates that no more retries should be made for use in NextBackOff(). +const Stop time.Duration = -1 + +// ZeroBackOff is a fixed backoff policy whose backoff time is always zero, +// meaning that the operation is retried immediately without waiting, indefinitely. +type ZeroBackOff struct{} + +func (b *ZeroBackOff) Reset() {} + +func (b *ZeroBackOff) NextBackOff() time.Duration { return 0 } + +// StopBackOff is a fixed backoff policy that always returns backoff.Stop for +// NextBackOff(), meaning that the operation should never be retried. +type StopBackOff struct{} + +func (b *StopBackOff) Reset() {} + +func (b *StopBackOff) NextBackOff() time.Duration { return Stop } + +// ConstantBackOff is a backoff policy that always returns the same backoff delay. +// This is in contrast to an exponential backoff policy, +// which returns a delay that grows longer as you call NextBackOff() over and over again. +type ConstantBackOff struct { + Interval time.Duration +} + +func (b *ConstantBackOff) Reset() {} +func (b *ConstantBackOff) NextBackOff() time.Duration { return b.Interval } + +func NewConstantBackOff(d time.Duration) *ConstantBackOff { + return &ConstantBackOff{Interval: d} +} diff --git a/vendor/github.com/cenkalti/backoff/context.go b/vendor/github.com/cenkalti/backoff/context.go new file mode 100644 index 00000000..7706faa2 --- /dev/null +++ b/vendor/github.com/cenkalti/backoff/context.go @@ -0,0 +1,63 @@ +package backoff + +import ( + "context" + "time" +) + +// BackOffContext is a backoff policy that stops retrying after the context +// is canceled. +type BackOffContext interface { + BackOff + Context() context.Context +} + +type backOffContext struct { + BackOff + ctx context.Context +} + +// WithContext returns a BackOffContext with context ctx +// +// ctx must not be nil +func WithContext(b BackOff, ctx context.Context) BackOffContext { + if ctx == nil { + panic("nil context") + } + + if b, ok := b.(*backOffContext); ok { + return &backOffContext{ + BackOff: b.BackOff, + ctx: ctx, + } + } + + return &backOffContext{ + BackOff: b, + ctx: ctx, + } +} + +func ensureContext(b BackOff) BackOffContext { + if cb, ok := b.(BackOffContext); ok { + return cb + } + return WithContext(b, context.Background()) +} + +func (b *backOffContext) Context() context.Context { + return b.ctx +} + +func (b *backOffContext) NextBackOff() time.Duration { + select { + case <-b.ctx.Done(): + return Stop + default: + } + next := b.BackOff.NextBackOff() + if deadline, ok := b.ctx.Deadline(); ok && deadline.Sub(time.Now()) < next { + return Stop + } + return next +} diff --git a/vendor/github.com/cenkalti/backoff/exponential.go b/vendor/github.com/cenkalti/backoff/exponential.go new file mode 100644 index 00000000..a031a659 --- /dev/null +++ b/vendor/github.com/cenkalti/backoff/exponential.go @@ -0,0 +1,153 @@ +package backoff + +import ( + "math/rand" + "time" +) + +/* +ExponentialBackOff is a backoff implementation that increases the backoff +period for each retry attempt using a randomization function that grows exponentially. + +NextBackOff() is calculated using the following formula: + + randomized interval = + RetryInterval * (random value in range [1 - RandomizationFactor, 1 + RandomizationFactor]) + +In other words NextBackOff() will range between the randomization factor +percentage below and above the retry interval. + +For example, given the following parameters: + + RetryInterval = 2 + RandomizationFactor = 0.5 + Multiplier = 2 + +the actual backoff period used in the next retry attempt will range between 1 and 3 seconds, +multiplied by the exponential, that is, between 2 and 6 seconds. + +Note: MaxInterval caps the RetryInterval and not the randomized interval. + +If the time elapsed since an ExponentialBackOff instance is created goes past the +MaxElapsedTime, then the method NextBackOff() starts returning backoff.Stop. + +The elapsed time can be reset by calling Reset(). + +Example: Given the following default arguments, for 10 tries the sequence will be, +and assuming we go over the MaxElapsedTime on the 10th try: + + Request # RetryInterval (seconds) Randomized Interval (seconds) + + 1 0.5 [0.25, 0.75] + 2 0.75 [0.375, 1.125] + 3 1.125 [0.562, 1.687] + 4 1.687 [0.8435, 2.53] + 5 2.53 [1.265, 3.795] + 6 3.795 [1.897, 5.692] + 7 5.692 [2.846, 8.538] + 8 8.538 [4.269, 12.807] + 9 12.807 [6.403, 19.210] + 10 19.210 backoff.Stop + +Note: Implementation is not thread-safe. +*/ +type ExponentialBackOff struct { + InitialInterval time.Duration + RandomizationFactor float64 + Multiplier float64 + MaxInterval time.Duration + // After MaxElapsedTime the ExponentialBackOff stops. + // It never stops if MaxElapsedTime == 0. + MaxElapsedTime time.Duration + Clock Clock + + currentInterval time.Duration + startTime time.Time +} + +// Clock is an interface that returns current time for BackOff. +type Clock interface { + Now() time.Time +} + +// Default values for ExponentialBackOff. +const ( + DefaultInitialInterval = 500 * time.Millisecond + DefaultRandomizationFactor = 0.5 + DefaultMultiplier = 1.5 + DefaultMaxInterval = 60 * time.Second + DefaultMaxElapsedTime = 15 * time.Minute +) + +// NewExponentialBackOff creates an instance of ExponentialBackOff using default values. +func NewExponentialBackOff() *ExponentialBackOff { + b := &ExponentialBackOff{ + InitialInterval: DefaultInitialInterval, + RandomizationFactor: DefaultRandomizationFactor, + Multiplier: DefaultMultiplier, + MaxInterval: DefaultMaxInterval, + MaxElapsedTime: DefaultMaxElapsedTime, + Clock: SystemClock, + } + b.Reset() + return b +} + +type systemClock struct{} + +func (t systemClock) Now() time.Time { + return time.Now() +} + +// SystemClock implements Clock interface that uses time.Now(). +var SystemClock = systemClock{} + +// Reset the interval back to the initial retry interval and restarts the timer. +func (b *ExponentialBackOff) Reset() { + b.currentInterval = b.InitialInterval + b.startTime = b.Clock.Now() +} + +// NextBackOff calculates the next backoff interval using the formula: +// Randomized interval = RetryInterval +/- (RandomizationFactor * RetryInterval) +func (b *ExponentialBackOff) NextBackOff() time.Duration { + // Make sure we have not gone over the maximum elapsed time. + if b.MaxElapsedTime != 0 && b.GetElapsedTime() > b.MaxElapsedTime { + return Stop + } + defer b.incrementCurrentInterval() + return getRandomValueFromInterval(b.RandomizationFactor, rand.Float64(), b.currentInterval) +} + +// GetElapsedTime returns the elapsed time since an ExponentialBackOff instance +// is created and is reset when Reset() is called. +// +// The elapsed time is computed using time.Now().UnixNano(). It is +// safe to call even while the backoff policy is used by a running +// ticker. +func (b *ExponentialBackOff) GetElapsedTime() time.Duration { + return b.Clock.Now().Sub(b.startTime) +} + +// Increments the current interval by multiplying it with the multiplier. +func (b *ExponentialBackOff) incrementCurrentInterval() { + // Check for overflow, if overflow is detected set the current interval to the max interval. + if float64(b.currentInterval) >= float64(b.MaxInterval)/b.Multiplier { + b.currentInterval = b.MaxInterval + } else { + b.currentInterval = time.Duration(float64(b.currentInterval) * b.Multiplier) + } +} + +// Returns a random value from the following interval: +// [randomizationFactor * currentInterval, randomizationFactor * currentInterval]. +func getRandomValueFromInterval(randomizationFactor, random float64, currentInterval time.Duration) time.Duration { + var delta = randomizationFactor * float64(currentInterval) + var minInterval = float64(currentInterval) - delta + var maxInterval = float64(currentInterval) + delta + + // Get a random value from the range [minInterval, maxInterval]. + // The formula used below has a +1 because if the minInterval is 1 and the maxInterval is 3 then + // we want a 33% chance for selecting either 1, 2 or 3. + return time.Duration(minInterval + (random * (maxInterval - minInterval + 1))) +} diff --git a/vendor/github.com/cenkalti/backoff/retry.go b/vendor/github.com/cenkalti/backoff/retry.go new file mode 100644 index 00000000..e936a506 --- /dev/null +++ b/vendor/github.com/cenkalti/backoff/retry.go @@ -0,0 +1,82 @@ +package backoff + +import "time" + +// An Operation is executing by Retry() or RetryNotify(). +// The operation will be retried using a backoff policy if it returns an error. +type Operation func() error + +// Notify is a notify-on-error function. It receives an operation error and +// backoff delay if the operation failed (with an error). +// +// NOTE that if the backoff policy stated to stop retrying, +// the notify function isn't called. +type Notify func(error, time.Duration) + +// Retry the operation o until it does not return error or BackOff stops. +// o is guaranteed to be run at least once. +// +// If o returns a *PermanentError, the operation is not retried, and the +// wrapped error is returned. +// +// Retry sleeps the goroutine for the duration returned by BackOff after a +// failed operation returns. +func Retry(o Operation, b BackOff) error { return RetryNotify(o, b, nil) } + +// RetryNotify calls notify function with the error and wait duration +// for each failed attempt before sleep. +func RetryNotify(operation Operation, b BackOff, notify Notify) error { + var err error + var next time.Duration + var t *time.Timer + + cb := ensureContext(b) + + b.Reset() + for { + if err = operation(); err == nil { + return nil + } + + if permanent, ok := err.(*PermanentError); ok { + return permanent.Err + } + + if next = cb.NextBackOff(); next == Stop { + return err + } + + if notify != nil { + notify(err, next) + } + + if t == nil { + t = time.NewTimer(next) + defer t.Stop() + } else { + t.Reset(next) + } + + select { + case <-cb.Context().Done(): + return err + case <-t.C: + } + } +} + +// PermanentError signals that the operation should not be retried. +type PermanentError struct { + Err error +} + +func (e *PermanentError) Error() string { + return e.Err.Error() +} + +// Permanent wraps the given err in a *PermanentError. +func Permanent(err error) *PermanentError { + return &PermanentError{ + Err: err, + } +} diff --git a/vendor/github.com/cenkalti/backoff/ticker.go b/vendor/github.com/cenkalti/backoff/ticker.go new file mode 100644 index 00000000..e41084b0 --- /dev/null +++ b/vendor/github.com/cenkalti/backoff/ticker.go @@ -0,0 +1,82 @@ +package backoff + +import ( + "sync" + "time" +) + +// Ticker holds a channel that delivers `ticks' of a clock at times reported by a BackOff. +// +// Ticks will continue to arrive when the previous operation is still running, +// so operations that take a while to fail could run in quick succession. +type Ticker struct { + C <-chan time.Time + c chan time.Time + b BackOffContext + stop chan struct{} + stopOnce sync.Once +} + +// NewTicker returns a new Ticker containing a channel that will send +// the time at times specified by the BackOff argument. Ticker is +// guaranteed to tick at least once. The channel is closed when Stop +// method is called or BackOff stops. It is not safe to manipulate the +// provided backoff policy (notably calling NextBackOff or Reset) +// while the ticker is running. +func NewTicker(b BackOff) *Ticker { + c := make(chan time.Time) + t := &Ticker{ + C: c, + c: c, + b: ensureContext(b), + stop: make(chan struct{}), + } + t.b.Reset() + go t.run() + return t +} + +// Stop turns off a ticker. After Stop, no more ticks will be sent. +func (t *Ticker) Stop() { + t.stopOnce.Do(func() { close(t.stop) }) +} + +func (t *Ticker) run() { + c := t.c + defer close(c) + + // Ticker is guaranteed to tick at least once. + afterC := t.send(time.Now()) + + for { + if afterC == nil { + return + } + + select { + case tick := <-afterC: + afterC = t.send(tick) + case <-t.stop: + t.c = nil // Prevent future ticks from being sent to the channel. + return + case <-t.b.Context().Done(): + return + } + } +} + +func (t *Ticker) send(tick time.Time) <-chan time.Time { + select { + case t.c <- tick: + case <-t.stop: + return nil + } + + next := t.b.NextBackOff() + if next == Stop { + t.Stop() + return nil + } + + return time.After(next) +} diff --git a/vendor/github.com/cenkalti/backoff/tries.go b/vendor/github.com/cenkalti/backoff/tries.go new file mode 100644 index 00000000..cfeefd9b --- /dev/null +++ b/vendor/github.com/cenkalti/backoff/tries.go @@ -0,0 +1,35 @@ +package backoff + +import "time" + +/* +WithMaxRetries creates a wrapper around another BackOff, which will +return Stop if NextBackOff() has been called too many times since +the last time Reset() was called + +Note: Implementation is not thread-safe. +*/ +func WithMaxRetries(b BackOff, max uint64) BackOff { + return &backOffTries{delegate: b, maxTries: max} +} + +type backOffTries struct { + delegate BackOff + maxTries uint64 + numTries uint64 +} + +func (b *backOffTries) NextBackOff() time.Duration { + if b.maxTries > 0 { + if b.maxTries <= b.numTries { + return Stop + } + b.numTries++ + } + return b.delegate.NextBackOff() +} + +func (b *backOffTries) Reset() { + b.numTries = 0 + b.delegate.Reset() +}