Implement standard backoff strategy

This commit is contained in:
Wesley Graham 2020-02-20 17:45:23 -08:00 committed by David Cowden
parent f9779d0bed
commit 8d4356733e
3 changed files with 90 additions and 38 deletions

View file

@ -181,9 +181,13 @@ func (h *Handler) GetChallenge(w http.ResponseWriter, r *http.Request) {
if err != nil { if err != nil {
api.WriteError(w, err) api.WriteError(w, err)
} else if ch.Retry.Active { } else if ch.Retry.Active {
retryAfter := int(ch.Retry.Backoffs) * (10 - ch.Retry.Called) retryAfter, err := h.Auth.BackoffChallenge(prov, acc.GetID(), chID, acc.GetKey())
w.Header().Add("Retry-After", string(retryAfter)) if err != nil {
api.WriteError(w, err)
} else {
w.Header().Add("Retry-After", retryAfter.String())
api.WriteProcessing(w, ch) api.WriteProcessing(w, ch)
}
} else { } else {
getLink := h.Auth.GetLink getLink := h.Auth.GetLink
w.Header().Add("Link", link(getLink(acme.AuthzLink, acme.URLSafeProvisionerName(prov), true, ch.GetAuthzID()), "up")) w.Header().Add("Link", link(getLink(acme.AuthzLink, acme.URLSafeProvisionerName(prov), true, ch.GetAuthzID()), "up"))

View file

@ -6,7 +6,6 @@ import (
"crypto/x509" "crypto/x509"
"encoding/base64" "encoding/base64"
"math" "math"
"math/rand"
"net" "net"
"net/http" "net/http"
"net/url" "net/url"
@ -21,6 +20,7 @@ import (
// Interface is the acme authority interface. // Interface is the acme authority interface.
type Interface interface { type Interface interface {
BackoffChallenge(provisioner.Interface, string, string, *jose.JSONWebKey) (time.Duration, error)
DeactivateAccount(provisioner.Interface, string) (*Account, error) DeactivateAccount(provisioner.Interface, string) (*Account, error)
FinalizeOrder(provisioner.Interface, string, string, *x509.CertificateRequest) (*Order, error) FinalizeOrder(provisioner.Interface, string, string, *x509.CertificateRequest) (*Order, error)
GetAccount(provisioner.Interface, string) (*Account, error) GetAccount(provisioner.Interface, string) (*Account, error)
@ -295,8 +295,7 @@ func (a *Authority) ValidateChallenge(p provisioner.Interface, accID, chID strin
if ch.getStatus() == StatusInvalid { if ch.getStatus() == StatusInvalid {
return ch.toACME(a.db, a.dir, p) return ch.toACME(a.db, a.dir, p)
} }
duration := time.Duration(ch.getRetry().Backoffs + math.Mod(rand.Float64(), 5)) time.Sleep(ch.getBackoff())
time.Sleep(duration*time.Second)
} }
return ch.toACME(a.db, a.dir, p) return ch.toACME(a.db, a.dir, p)
} }
@ -312,3 +311,24 @@ func (a *Authority) GetCertificate(accID, certID string) ([]byte, error) {
} }
return cert.toACME(a.db, a.dir) return cert.toACME(a.db, a.dir)
} }
// BackoffChallenge returns the total time to wait until the next sequence of validation attempts completes
func (a *Authority) BackoffChallenge(p provisioner.Interface, accID, chID string, jwk *jose.JSONWebKey) (time.Duration, error) {
ch, err := getChallenge(a.db, chID)
if err != nil {
return -1, err
}
if accID != ch.getAccountID() {
return -1, UnauthorizedErr(errors.New("account does not own challenge"))
}
remCalls := ch.getRetry().MaxAttempts - math.Mod(ch.getRetry().Called, ch.getRetry().MaxAttempts)
totBackoff := 0*time.Second
for i:=0; i < int(remCalls); i++ {
clone := ch.clone()
clone.Retry.Called += float64(i)
totBackoff += clone.getBackoff()
}
return totBackoff, nil
}

View file

@ -11,6 +11,8 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"math"
"math/rand"
"net" "net"
"net/http" "net/http"
"strings" "strings"
@ -38,8 +40,12 @@ type Challenge struct {
} }
type Retry struct { type Retry struct {
Called int `json:"id"` Called float64 `json:"id"`
Backoffs float64 `json:"backoffs"` BaseDelay time.Duration `json:"basedelay"`
MaxDelay time.Duration `json:"maxdelay"`
Multiplier float64 `json:"multiplier"`
Jitter float64 `json:"jitter"`
MaxAttempts float64 `json:"maxattempts"`
Active bool `json:"active"` Active bool `json:"active"`
Mux sync.Mutex `json:"mux"` Mux sync.Mutex `json:"mux"`
} }
@ -85,6 +91,7 @@ type challenge interface {
getAuthzID() string getAuthzID() string
getToken() string getToken() string
getRetry() *Retry getRetry() *Retry
getBackoff() time.Duration
clone() *baseChallenge clone() *baseChallenge
getAccountID() string getAccountID() string
getValidated() time.Time getValidated() time.Time
@ -131,7 +138,7 @@ func newBaseChallenge(accountID, authzID string) (*baseChallenge, error) {
Status: StatusPending, Status: StatusPending,
Token: token, Token: token,
Created: clock.Now(), Created: clock.Now(),
Retry: &Retry{Called:0, Backoffs:1, Active:false}, Retry: &Retry{Called:0, BaseDelay:1, MaxDelay: 30, Jitter: 1, MaxAttempts:10, Active:false},
}, nil }, nil
} }
@ -190,6 +197,25 @@ func (bc *baseChallenge) getError() *AError {
return bc.Error return bc.Error
} }
// getBackoff returns the backoff time of the baseChallenge.
func (bc *baseChallenge) getBackoff() time.Duration {
if bc.Retry.Called == 0 {
return bc.Retry.BaseDelay
}
backoff, max := float64(bc.Retry.BaseDelay), float64(bc.Retry.MaxDelay)
backoff *= math.Pow(bc.Retry.Multiplier, bc.Retry.Called)
if backoff > max {
backoff = max
}
// Introduce Jitter to ensure that clustered requests wont operate in unison
backoff *= 1 + bc.Retry.Jitter*(rand.Float64()*2-1)
if backoff < 0 {
return 0
}
return time.Duration(backoff)
}
// toACME converts the internal Challenge type into the public acmeChallenge // toACME converts the internal Challenge type into the public acmeChallenge
// type for presentation in the ACME protocol. // type for presentation in the ACME protocol.
func (bc *baseChallenge) toACME(db nosql.DB, dir *directory, p provisioner.Interface) (*Challenge, error) { func (bc *baseChallenge) toACME(db nosql.DB, dir *directory, p provisioner.Interface) (*Challenge, error) {
@ -341,14 +367,14 @@ func (hc *http01Challenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo valida
resp, err := vo.httpGet(url) resp, err := vo.httpGet(url)
if err != nil { if err != nil {
if err = hc.storeError(db, ConnectionErr(errors.Wrapf(err, if err = hc.iterateRetry(db, ConnectionErr(errors.Wrapf(err,
"error doing http GET for url %s", url))); err != nil { "error doing http GET for url %s", url))); err != nil {
return nil, err return nil, err
} }
return hc, nil return hc, nil
} }
if resp.StatusCode >= 400 { if resp.StatusCode >= 400 {
if err = hc.storeError(db, if err = hc.iterateRetry(db,
ConnectionErr(errors.Errorf("error doing http GET for url %s with status code %d", ConnectionErr(errors.Errorf("error doing http GET for url %s with status code %d",
url, resp.StatusCode))); err != nil { url, resp.StatusCode))); err != nil {
return nil, err return nil, err
@ -366,12 +392,16 @@ func (hc *http01Challenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo valida
expected, err := KeyAuthorization(hc.Token, jwk) expected, err := KeyAuthorization(hc.Token, jwk)
if err != nil { if err != nil {
// TODO retry?
return nil, err return nil, err
} }
if keyAuth != expected { if keyAuth != expected {
rejectedErr := RejectedIdentifierErr(errors.Errorf("keyAuthorization does not match; "+ // TODO should we just bail here, this is a "successful" failure.
"expected %s, but got %s", expected, keyAuth)) if err = hc.iterateRetry(db, RejectedIdentifierErr(errors.Errorf("keyAuthorization does not match; "+
if err = hc.updateRetry(db, rejectedErr); err != nil { "expected %s, but got %s", expected, keyAuth))); err != nil {
// TODO what's the difference between returning nil with an error, and returning a retry-
// incremented challenge? I think we should probably only hard error if there are no more
// retries left.
return nil, err return nil, err
} }
return hc, err return hc, err
@ -383,6 +413,7 @@ func (hc *http01Challenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo valida
upd.Error = nil upd.Error = nil
upd.Validated = clock.Now() upd.Validated = clock.Now()
upd.Retry.Active = false upd.Retry.Active = false
upd.Retry.Called = 0
if err := upd.save(db, hc); err != nil { if err := upd.save(db, hc); err != nil {
return nil, err return nil, err
@ -426,7 +457,7 @@ func (tc *tlsALPN01Challenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo val
conn, err := vo.tlsDial("tcp", hostPort, config) conn, err := vo.tlsDial("tcp", hostPort, config)
if err != nil { if err != nil {
if err = tc.storeError(db, if err = tc.iterateRetry(db,
ConnectionErr(errors.Wrapf(err, "error doing TLS dial for %s", hostPort))); err != nil { ConnectionErr(errors.Wrapf(err, "error doing TLS dial for %s", hostPort))); err != nil {
return nil, err return nil, err
} }
@ -438,7 +469,7 @@ func (tc *tlsALPN01Challenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo val
certs := cs.PeerCertificates certs := cs.PeerCertificates
if len(certs) == 0 { if len(certs) == 0 {
if err = tc.storeError(db, if err = tc.iterateRetry(db,
RejectedIdentifierErr(errors.Errorf("%s challenge for %s resulted in no certificates", RejectedIdentifierErr(errors.Errorf("%s challenge for %s resulted in no certificates",
tc.Type, tc.Value))); err != nil { tc.Type, tc.Value))); err != nil {
return nil, err return nil, err
@ -447,8 +478,7 @@ func (tc *tlsALPN01Challenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo val
} }
if !cs.NegotiatedProtocolIsMutual || cs.NegotiatedProtocol != "acme-tls/1" { if !cs.NegotiatedProtocolIsMutual || cs.NegotiatedProtocol != "acme-tls/1" {
if err = tc.storeError(db, if err = tc.iterateRetry(db, RejectedIdentifierErr(errors.Errorf("cannot negotiate ALPN acme-tls/1 protocol for "+
RejectedIdentifierErr(errors.Errorf("cannot negotiate ALPN acme-tls/1 protocol for "+
"tls-alpn-01 challenge"))); err != nil { "tls-alpn-01 challenge"))); err != nil {
return nil, err return nil, err
} }
@ -458,7 +488,7 @@ func (tc *tlsALPN01Challenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo val
leafCert := certs[0] leafCert := certs[0]
if len(leafCert.DNSNames) != 1 || !strings.EqualFold(leafCert.DNSNames[0], tc.Value) { if len(leafCert.DNSNames) != 1 || !strings.EqualFold(leafCert.DNSNames[0], tc.Value) {
if err = tc.storeError(db, if err = tc.iterateRetry(db,
RejectedIdentifierErr(errors.Errorf("incorrect certificate for tls-alpn-01 challenge: "+ RejectedIdentifierErr(errors.Errorf("incorrect certificate for tls-alpn-01 challenge: "+
"leaf certificate must contain a single DNS name, %v", tc.Value))); err != nil { "leaf certificate must contain a single DNS name, %v", tc.Value))); err != nil {
return nil, err return nil, err
@ -479,7 +509,7 @@ func (tc *tlsALPN01Challenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo val
for _, ext := range leafCert.Extensions { for _, ext := range leafCert.Extensions {
if idPeAcmeIdentifier.Equal(ext.Id) { if idPeAcmeIdentifier.Equal(ext.Id) {
if !ext.Critical { if !ext.Critical {
if err = tc.storeError(db, if err = tc.iterateRetry(db,
RejectedIdentifierErr(errors.Errorf("incorrect certificate for tls-alpn-01 challenge: " + RejectedIdentifierErr(errors.Errorf("incorrect certificate for tls-alpn-01 challenge: " +
"acmeValidationV1 extension not critical"))); err != nil { "acmeValidationV1 extension not critical"))); err != nil {
return nil, err return nil, err
@ -491,7 +521,7 @@ func (tc *tlsALPN01Challenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo val
rest, err := asn1.Unmarshal(ext.Value, &extValue) rest, err := asn1.Unmarshal(ext.Value, &extValue)
if err != nil || len(rest) > 0 || len(hashedKeyAuth) != len(extValue) { if err != nil || len(rest) > 0 || len(hashedKeyAuth) != len(extValue) {
if err = tc.storeError(db, if err = tc.iterateRetry(db,
RejectedIdentifierErr(errors.Errorf("incorrect certificate for tls-alpn-01 challenge: "+ RejectedIdentifierErr(errors.Errorf("incorrect certificate for tls-alpn-01 challenge: "+
"malformed acmeValidationV1 extension value"))); err != nil { "malformed acmeValidationV1 extension value"))); err != nil {
return nil, err return nil, err
@ -500,7 +530,7 @@ func (tc *tlsALPN01Challenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo val
} }
if subtle.ConstantTimeCompare(hashedKeyAuth[:], extValue) != 1 { if subtle.ConstantTimeCompare(hashedKeyAuth[:], extValue) != 1 {
if err = tc.storeError(db, if err = tc.iterateRetry(db,
RejectedIdentifierErr(errors.Errorf("incorrect certificate for tls-alpn-01 challenge: "+ RejectedIdentifierErr(errors.Errorf("incorrect certificate for tls-alpn-01 challenge: "+
"expected acmeValidationV1 extension value %s for this challenge but got %s", "expected acmeValidationV1 extension value %s for this challenge but got %s",
hex.EncodeToString(hashedKeyAuth[:]), hex.EncodeToString(extValue)))); err != nil { hex.EncodeToString(hashedKeyAuth[:]), hex.EncodeToString(extValue)))); err != nil {
@ -513,6 +543,7 @@ func (tc *tlsALPN01Challenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo val
upd.Status = StatusValid upd.Status = StatusValid
upd.Error = nil upd.Error = nil
upd.Validated = clock.Now() upd.Validated = clock.Now()
upd.Retry.Active = false
if err := upd.save(db, tc); err != nil { if err := upd.save(db, tc); err != nil {
return nil, err return nil, err
@ -526,7 +557,8 @@ func (tc *tlsALPN01Challenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo val
} }
if foundIDPeAcmeIdentifierV1Obsolete { if foundIDPeAcmeIdentifierV1Obsolete {
if err = tc.storeError(db, // TODO this isn't likely going to change without user action, this is probably a good case for a hard failure...
if err = tc.iterateRetry(db,
RejectedIdentifierErr(errors.Errorf("incorrect certificate for tls-alpn-01 challenge: "+ RejectedIdentifierErr(errors.Errorf("incorrect certificate for tls-alpn-01 challenge: "+
"obsolete id-pe-acmeIdentifier in acmeValidationV1 extension"))); err != nil { "obsolete id-pe-acmeIdentifier in acmeValidationV1 extension"))); err != nil {
return nil, err return nil, err
@ -534,7 +566,7 @@ func (tc *tlsALPN01Challenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo val
return tc, nil return tc, nil
} }
if err = tc.storeError(db, if err = tc.iterateRetry(db,
RejectedIdentifierErr(errors.Errorf("incorrect certificate for tls-alpn-01 challenge: "+ RejectedIdentifierErr(errors.Errorf("incorrect certificate for tls-alpn-01 challenge: "+
"missing acmeValidationV1 extension"))); err != nil { "missing acmeValidationV1 extension"))); err != nil {
return nil, err return nil, err
@ -593,7 +625,7 @@ func (dc *dns01Challenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo validat
if err != nil { if err != nil {
dnsErr := DNSErr(errors.Wrapf(err, "error looking up TXT "+ dnsErr := DNSErr(errors.Wrapf(err, "error looking up TXT "+
"records for domain %s", domain)) "records for domain %s", domain))
if err = dc.updateRetry(db, dnsErr); err != nil { if err = dc.iterateRetry(db, dnsErr); err != nil {
return nil, err return nil, err
} }
return dc, nil return dc, nil
@ -615,10 +647,7 @@ func (dc *dns01Challenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo validat
if !found { if !found {
rejectedErr := RejectedIdentifierErr(errors.Errorf("keyAuthorization "+ rejectedErr := RejectedIdentifierErr(errors.Errorf("keyAuthorization "+
"does not match; expected %s, but got %s", expectedKeyAuth, txtRecords)) "does not match; expected %s, but got %s", expectedKeyAuth, txtRecords))
upd := dc.clone() if err = dc.iterateRetry(db, rejectedErr); err != nil {
upd.Error = rejectedErr.ToACME()
upd.Error.Subproblems = append(upd.Error.Subproblems, rejectedErr)
if err = upd.save(db, dc); err != nil {
return nil, err return nil, err
} }
return dc, nil return dc, nil
@ -629,6 +658,7 @@ func (dc *dns01Challenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo validat
upd.Error = nil upd.Error = nil
upd.Validated = time.Now().UTC() upd.Validated = time.Now().UTC()
upd.Retry.Active = false upd.Retry.Active = false
upd.Retry.Called = 0
if err := upd.save(db, dc); err != nil { if err := upd.save(db, dc); err != nil {
return nil, err return nil, err
@ -651,18 +681,16 @@ func getChallenge(db nosql.DB, id string) (challenge, error) {
return ch, nil return ch, nil
} }
// updateRetry updates a challenge's retry and error objects upon a failed validation attempt // iterateRetry iterates a challenge's retry and error objects upon a failed validation attempt
func (bc *baseChallenge) updateRetry(db nosql.DB, error *Error) error { func (bc *baseChallenge) iterateRetry(db nosql.DB, error *Error) error {
upd := bc.clone() upd := bc.clone()
upd.Error = error.ToACME() upd.Error = error.ToACME()
upd.Error.Subproblems = append(upd.Error.Subproblems, error) upd.Error.Subproblems = append(upd.Error.Subproblems, error)
upd.Retry.Called ++ upd.Retry.Called ++
upd.Retry.Active = true upd.Retry.Active = true
if upd.Retry.Called >= 10 { if math.Mod(upd.Retry.Called , upd.Retry.MaxAttempts) == 0 {
upd.Status = StatusInvalid upd.Status = StatusInvalid
upd.Retry.Backoffs *= 2
upd.Retry.Active = false upd.Retry.Active = false
upd.Retry.Called = 0
} }
if err := upd.save(db, bc); err != nil { if err := upd.save(db, bc); err != nil {
return err return err