diff --git a/acme/api/handler.go b/acme/api/handler.go index 31e19b7b..728244d0 100644 --- a/acme/api/handler.go +++ b/acme/api/handler.go @@ -148,51 +148,90 @@ func (h *Handler) GetAuthz(w http.ResponseWriter, r *http.Request) { api.JSON(w, authz) } -// GetChallenge ACME api for retrieving a Challenge. +// ACME api for retrieving the Challenge resource. +// +// Potential Challenges are requested by the client when creating an order. +// Once the client knows the appropriate validation resources are provisioned, +// it makes a POST-as-GET request to this endpoint in order to initiate the +// validation flow. +// +// The validation state machine describes the flow for a challenge. +// +// https://tools.ietf.org/html/rfc8555#section-7.1.6 +// +// Once a validation attempt has completed without error, the challenge's +// status is updated depending on the result (valid|invalid) of the server's +// validation attempt. Once this is the case, a challenge cannot be reset. +// +// If a challenge cannot be completed because no suitable data can be +// acquired the server (whilst communicating retry information) and the +// client (whilst respecting the information from the server) may request +// retries of the validation. +// +// https://tools.ietf.org/html/rfc8555#section-8.2 +// +// Retry status is communicated using the error field and by sending a +// Retry-After header back to the client. +// +// The request body is challenge-specific. The current challenges (http-01, +// dns-01, tls-alpn-01) simply expect an empty object ("{}") in the payload +// of the JWT sent by the client. We don't gain anything by stricly enforcing +// nonexistence of unknown attributes, or, in these three cases, enforcing +// an empty payload. And the spec also says to just ignore it: +// +// > The server MUST ignore any fields in the response object +// > that are not specified as response fields for this type of challenge. +// +// https://tools.ietf.org/html/rfc8555#section-7.5.1 +// func (h *Handler) GetChallenge(w http.ResponseWriter, r *http.Request) { prov, err := provisionerFromContext(r) if err != nil { api.WriteError(w, err) return } + acc, err := accountFromContext(r) if err != nil { api.WriteError(w, err) return } - // Just verify that the payload was set, since we're not strictly adhering - // to ACME V2 spec for reasons specified below. + + // Just verify that the payload was set since the client is required + // to send _something_. _, err = payloadFromContext(r) if err != nil { api.WriteError(w, err) return } - // NOTE: We should be checking that the request is either a POST-as-GET, or - // that the payload is an empty JSON block ({}). However, older ACME clients - // still send a vestigial body (rather than an empty JSON block) and - // strict enforcement would render these clients broken. For the time being - // we'll just ignore the body. var ( - ch *acme.Challenge + ch *acme.Challenge chID = chi.URLParam(r, "chID") ) ch, err = h.Auth.ValidateChallenge(prov, acc.GetID(), chID, acc.GetKey()) if err != nil { api.WriteError(w, err) - } else if ch.Retry.Active { - retryAfter, err := h.Auth.BackoffChallenge(prov, acc.GetID(), chID, acc.GetKey()) - if err != nil { - api.WriteError(w, err) - } else { - w.Header().Add("Retry-After", retryAfter.String()) - api.WriteProcessing(w, ch) - } - } else { + } + + switch ch.Status { + case acme.StatusPending: + panic("validation attempt did not move challenge to the processing state") + // When a transient error occurs, the challenge will not be progressed to the `invalid` state. + // Add a Retry-After header to indicate that the client should check again in the future. + case acme.StatusProcessing: + w.Header().Add("Retry-After", ch.RetryAfter) + w.Header().Add("Cache-Control", "no-cache") + api.JSON(w, ch) + case acme.StatusInvalid: + api.JSON(w, ch) + case acme.StatusValid: getLink := h.Auth.GetLink w.Header().Add("Link", link(getLink(acme.AuthzLink, acme.URLSafeProvisionerName(prov), true, ch.GetAuthzID()), "up")) w.Header().Set("Location", getLink(acme.ChallengeLink, acme.URLSafeProvisionerName(prov), true, ch.GetID())) api.JSON(w, ch) + default: + panic("unexpected challenge state" + ch.Status) } } diff --git a/acme/authority.go b/acme/authority.go index b58dc147..63998acf 100644 --- a/acme/authority.go +++ b/acme/authority.go @@ -5,10 +5,12 @@ import ( "crypto/tls" "crypto/x509" "encoding/base64" - "math" + "log" "net" "net/http" "net/url" + "os" + "strconv" "time" "github.com/pkg/errors" @@ -20,7 +22,6 @@ import ( // Interface is the acme authority interface. type Interface interface { - BackoffChallenge(provisioner.Interface, string, string, *jose.JSONWebKey) (time.Duration, error) DeactivateAccount(provisioner.Interface, string) (*Account, error) FinalizeOrder(provisioner.Interface, string, string, *x509.CertificateRequest) (*Order, error) GetAccount(provisioner.Interface, string) (*Account, error) @@ -56,8 +57,24 @@ var ( orderTable = []byte("acme_orders") ordersByAccountIDTable = []byte("acme_account_orders_index") certTable = []byte("acme_certs") + ordinal int ) +// Ordinal is used during challenge retries to indicate ownership. +func init() { + ordstr := os.Getenv("STEP_CA_ORDINAL"); + if ordstr == "" { + ordinal = 0 + } else { + ord, err := strconv.Atoi(ordstr) + if err != nil { + log.Fatal("Unrecognized ordinal ingeter value.") + panic(nil) + } + ordinal = ord + } +} + // NewAuthority returns a new Authority that implements the ACME interface. func NewAuthority(db nosql.DB, dns, prefix string, signAuth SignAuthority) (*Authority, error) { if _, ok := db.(*database.SimpleDB); !ok { @@ -256,50 +273,192 @@ func (a *Authority) GetAuthz(p provisioner.Interface, accID, authzID string) (*A return az.toACME(a.db, a.dir, p) } -// ValidateChallenge attempts to validate the challenge. +// The challenge validation state machine looks like: +// +// * https://tools.ietf.org/html/rfc8555#section-7.1.6 +// +// While in the processing state, the server may retry as it sees fit. The challenge validation strategy +// needs to be rather specific in order for retries to work in a replicated, crash-proof deployment. +// In general, the goal is to allow requests to hit arbitrary instances of step-ca while managing retry +// responsibility such that multiple instances agree on an owner. Additionally, when a deployment of the +// CA is in progress, the ownership should be carried forward and new, updated (or in general, restarted), +// instances should pick back up where the crashed instance left off. +// +// The steps are: +// +// 1. Upon incoming request to the challenge endpoint, take ownership of the retry responsibility. +// (a) Set Retry.Owner to this instance's ordinal (STEP_CA_ORDINAL). +// (b) Set Retry.NumAttempts to 0 and Retry.MaxAttempts to the desired max. +// (c) Set Challenge.Status to "processing" +// (d) Set retry_after to a time (retryInterval) in the future. +// 2. Perform the validation attempt. +// 3. If the validation attempt results in a challenge that is still processing, schedule a retry. +// +// It's possible that another request to re-attempt the challenge comes in while a retry attempt is +// pending from a previous request. In general, these old attempts will see that Retry.NextAttempt +// is in the future and drop their task. But this also might have happened on another instance, etc. +// +// 4. When the retry timer fires, check to make sure the retry should still process. +// (a) Refresh the challenge from the DB. +// (a) Check that Retry.Owner is equal to this instance's ordinal. +// (b) Check that Retry.NextAttempt is in the past. +// 5. If the retry will commence, immediately update Retry.NextAttempt and save the challenge. +// +// Finally, if this instance is terminated, retries need to be reschedule when the instance restarts. This +// is handled in the acme provisioner (authority/provisioner/acme.go) initialization. +// +// Note: the default ordinal does not need to be changed unless step-ca is running in a replicated scenario. +// func (a *Authority) ValidateChallenge(p provisioner.Interface, accID, chID string, jwk *jose.JSONWebKey) (*Challenge, error) { ch, err := getChallenge(a.db, chID) + + // Validate the challenge belongs to the account owned by the requester. if err != nil { return nil, err } if accID != ch.getAccountID() { return nil, UnauthorizedErr(errors.New("account does not own challenge")) } - retry := ch.getRetry() - if retry.Active { - return ch.toACME(a.db, a.dir, p) - } - retry.Mux.Lock() - defer retry.Mux.Unlock() + // Take ownership of the challenge status and retry state. The values must be reset. + up := ch.clone() + up.Status = StatusProcessing + up.Retry = &Retry { + Owner: ordinal, + ProvisionerID: p.GetID(), + NumAttempts: 0, + MaxAttempts: 10, + NextAttempt: time.Now().Add(retryInterval).UTC().Format(time.RFC3339), + + } + err = up.save(a.db, ch) + if err != nil { + return nil, Wrap(err, "error saving challenge") + } + ch = up + + v, err := a.validate(ch, jwk) + // An error here is non-recoverable. Recoverable errors are set on the challenge object + // and should not be returned directly. + if err != nil { + return nil, Wrap(err, "error attempting challenge validation") + } + err = v.save(a.db, ch) + if err != nil { + return nil, Wrap(err, "error saving challenge") + } + ch = v + + switch ch.getStatus() { + case StatusValid, StatusInvalid: + break + case StatusProcessing: + if ch.getRetry().Active() { + time.AfterFunc(retryInterval, func() { + a.RetryChallenge(ch.getID()) + }) + } + default: + panic("post-validation challenge in unexpected state" + ch.getStatus()) + } + return ch.toACME(a.dir, p) +} + +// The challenge validation process is specific to the type of challenge (dns-01, http-01, tls-alpn-01). +// But, we still pass generic "options" to the polymorphic validate call. +func (a *Authority) validate(ch challenge, jwk *jose.JSONWebKey) (challenge, error) { client := http.Client{ Timeout: time.Duration(30 * time.Second), } dialer := &net.Dialer{ Timeout: 30 * time.Second, } - for ch.getRetry().Active { - ch, err = ch.validate(a.db, jwk, validateOptions{ - httpGet: client.Get, - lookupTxt: net.LookupTXT, - tlsDial: func(network, addr string, config *tls.Config) (*tls.Conn, error) { - return tls.DialWithDialer(dialer, network, addr, config) - }, - }) - if err != nil { - return nil, Wrap(err, "error attempting challenge validation") - } - if ch.getStatus() == StatusValid { - break - } - if ch.getStatus() == StatusInvalid { - return ch.toACME(a.db, a.dir, p) - } - time.Sleep(ch.getBackoff()) - } - return ch.toACME(a.db, a.dir, p) + return ch.validate(jwk, validateOptions{ + httpGet: client.Get, + lookupTxt: net.LookupTXT, + tlsDial: func(network, addr string, config *tls.Config) (*tls.Conn, error) { + return tls.DialWithDialer(dialer, network, addr, config) + }, + }) } + +const retryInterval = 12 * time.Second + +// see: ValidateChallenge +func (a *Authority) RetryChallenge(chID string) { + ch, err := getChallenge(a.db, chID) + if err != nil { + return + } + switch ch.getStatus() { + case StatusPending: + panic("pending challenges must first be moved to the processing state") + case StatusInvalid, StatusValid: + return + case StatusProcessing: + break + default: + panic("unknown challenge state: " + ch.getStatus()) + } + + // When retrying, check to make sure the ordinal has not changed. + // Make sure there are still retries left. + // Then check to make sure Retry.NextAttempt is in the past. + retry := ch.getRetry() + switch { + case retry.Owner != ordinal: + return + case !retry.Active(): + return + } + t, err := time.Parse(time.RFC3339, retry.NextAttempt) + now := time.Now().UTC() + switch { + case err != nil: + return + case t.Before(now): + return + } + + // Update the db so that other retries simply drop when their timer fires. + up := ch.clone() + up.Retry.NextAttempt = now.Add(retryInterval).UTC().Format(time.RFC3339) + up.Retry.NumAttempts += 1 + err = up.save(a.db, ch) + if err != nil { + return + } + ch = up + + p, err := a.LoadProvisionerByID(retry.ProvisionerID) + acc, err := a.GetAccount(p, ch.getAccountID()) + + v, err := a.validate(up, acc.Key) + if err != nil { + return + } + err = v.save(a.db, ch) + if err != nil { + return + } + ch = v + + switch ch.getStatus() { + case StatusValid, StatusInvalid: + break + case StatusProcessing: + if ch.getRetry().Active() { + time.AfterFunc(retryInterval, func() { + a.RetryChallenge(ch.getID()) + }) + } + default: + panic("post-validation challenge in unexpected state " + ch.getStatus()) + } +} + + // GetCertificate retrieves the Certificate by ID. func (a *Authority) GetCertificate(accID, certID string) ([]byte, error) { cert, err := getCert(a.db, certID) @@ -312,23 +471,3 @@ func (a *Authority) GetCertificate(accID, certID string) ([]byte, error) { return cert.toACME(a.db, a.dir) } -// BackoffChallenge returns the total time to wait until the next sequence of validation attempts completes -func (a *Authority) BackoffChallenge(p provisioner.Interface, accID, chID string, jwk *jose.JSONWebKey) (time.Duration, error) { - ch, err := getChallenge(a.db, chID) - if err != nil { - return -1, err - } - if accID != ch.getAccountID() { - return -1, UnauthorizedErr(errors.New("account does not own challenge")) - } - - remCalls := ch.getRetry().MaxAttempts - math.Mod(ch.getRetry().Called, ch.getRetry().MaxAttempts) - totBackoff := 0 * time.Second - for i := 0; i < int(remCalls); i++ { - clone := ch.clone() - clone.Retry.Called += float64(i) - totBackoff += clone.getBackoff() - } - - return totBackoff, nil -} diff --git a/acme/authz.go b/acme/authz.go index cdcb15e5..789dcab2 100644 --- a/acme/authz.go +++ b/acme/authz.go @@ -148,7 +148,7 @@ func (ba *baseAuthz) toACME(db nosql.DB, dir *directory, p provisioner.Interface if err != nil { return nil, err } - chs[i], err = ch.toACME(db, dir, p) + chs[i], err = ch.toACME(dir, p) if err != nil { return nil, err } diff --git a/acme/challenge.go b/acme/challenge.go index 9b47c7a6..031dcc7c 100644 --- a/acme/challenge.go +++ b/acme/challenge.go @@ -11,12 +11,9 @@ import ( "encoding/json" "fmt" "io/ioutil" - "math" - "math/rand" "net" "net/http" "strings" - "sync" "time" "github.com/pkg/errors" @@ -34,22 +31,11 @@ type Challenge struct { Validated string `json:"validated,omitempty"` URL string `json:"url"` Error *AError `json:"error,omitempty"` - Retry *Retry `json:"retry"` + RetryAfter string `json:"retry_after,omitempty"` ID string `json:"-"` AuthzID string `json:"-"` } -type Retry struct { - Called float64 `json:"id"` - BaseDelay time.Duration `json:"basedelay"` - MaxDelay time.Duration `json:"maxdelay"` - Multiplier float64 `json:"multiplier"` - Jitter float64 `json:"jitter"` - MaxAttempts float64 `json:"maxattempts"` - Active bool `json:"active"` - Mux sync.Mutex `json:"mux"` -} - // ToLog enables response logging. func (c *Challenge) ToLog() (interface{}, error) { b, err := json.Marshal(c) @@ -82,7 +68,7 @@ type validateOptions struct { // challenge is the interface ACME challenege types must implement. type challenge interface { save(db nosql.DB, swap challenge) error - validate(nosql.DB, *jose.JSONWebKey, validateOptions) (challenge, error) + validate(*jose.JSONWebKey, validateOptions) (challenge, error) getType() string getError() *AError getValue() string @@ -91,18 +77,18 @@ type challenge interface { getAuthzID() string getToken() string getRetry() *Retry - getBackoff() time.Duration clone() *baseChallenge getAccountID() string getValidated() time.Time getCreated() time.Time - toACME(nosql.DB, *directory, provisioner.Interface) (*Challenge, error) + toACME(*directory, provisioner.Interface) (*Challenge, error) } // ChallengeOptions is the type used to created a new Challenge. type ChallengeOptions struct { AccountID string AuthzID string + ProvisionerID string Identifier Identifier } @@ -115,10 +101,10 @@ type baseChallenge struct { Status string `json:"status"` Token string `json:"token"` Value string `json:"value"` - Validated time.Time `json:"validated"` Created time.Time `json:"created"` + Validated time.Time `json:"validated"` Error *AError `json:"error"` - Retry *Retry `json:"retry"` + Retry *Retry `json:"retry"` } func newBaseChallenge(accountID, authzID string) (*baseChallenge, error) { @@ -138,7 +124,6 @@ func newBaseChallenge(accountID, authzID string) (*baseChallenge, error) { Status: StatusPending, Token: token, Created: clock.Now(), - Retry: &Retry{Called:0, BaseDelay:1, MaxDelay: 30, Jitter: 1, MaxAttempts:10, Active:false}, }, nil } @@ -197,28 +182,9 @@ func (bc *baseChallenge) getError() *AError { return bc.Error } -// getBackoff returns the backoff time of the baseChallenge. -func (bc *baseChallenge) getBackoff() time.Duration { - if bc.Retry.Called == 0 { - return bc.Retry.BaseDelay - } - - backoff, max := float64(bc.Retry.BaseDelay), float64(bc.Retry.MaxDelay) - backoff *= math.Pow(bc.Retry.Multiplier, bc.Retry.Called) - if backoff > max { - backoff = max - } - // Introduce Jitter to ensure that clustered requests wont operate in unison - backoff *= 1 + bc.Retry.Jitter*(rand.Float64()*2-1) - if backoff < 0 { - return 0 - } - return time.Duration(backoff) -} - // toACME converts the internal Challenge type into the public acmeChallenge // type for presentation in the ACME protocol. -func (bc *baseChallenge) toACME(db nosql.DB, dir *directory, p provisioner.Interface) (*Challenge, error) { +func (bc *baseChallenge) toACME(dir *directory, p provisioner.Interface) (*Challenge, error) { ac := &Challenge{ Type: bc.getType(), Status: bc.getStatus(), @@ -233,8 +199,8 @@ func (bc *baseChallenge) toACME(db nosql.DB, dir *directory, p provisioner.Inter if bc.Error != nil { ac.Error = bc.Error } - if bc.Retry != nil { - ac.Retry = bc.Retry + if bc.Retry != nil && bc.Status == StatusProcessing { + ac.RetryAfter = bc.Retry.NextAttempt } return ac, nil } @@ -274,10 +240,12 @@ func (bc *baseChallenge) save(db nosql.DB, old challenge) error { func (bc *baseChallenge) clone() *baseChallenge { u := *bc + r := *bc.Retry + u.Retry = &r return &u } -func (bc *baseChallenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo validateOptions) (challenge, error) { +func (bc *baseChallenge) validate(jwk *jose.JSONWebKey, vo validateOptions) (challenge, error) { return nil, ServerInternalErr(errors.New("unimplemented")) } @@ -323,6 +291,21 @@ func unmarshalChallenge(data []byte) (challenge, error) { } } +// Challenge retry information is internally relevant and needs to be stored in the DB, but should not be part +// of the public challenge API apart from the Retry-After header. +type Retry struct { + Owner int `json:"owner"` + ProvisionerID string `json:"provisionerid"` + NumAttempts int `json:"numattempts"` + MaxAttempts int `json:"maxattempts"` + NextAttempt string `json:"nextattempt"` +} + +func (r *Retry) Active() bool { + return r.NumAttempts < r.MaxAttempts +} + + // http01Challenge represents an http-01 acme challenge. type http01Challenge struct { *baseChallenge @@ -347,78 +330,61 @@ func newHTTP01Challenge(db nosql.DB, ops ChallengeOptions) (challenge, error) { // Validate attempts to validate the challenge. If the challenge has been // satisfactorily validated, the 'status' and 'validated' attributes are // updated. -func (hc *http01Challenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo validateOptions) (challenge, error) { +func (hc *http01Challenge) validate(jwk *jose.JSONWebKey, vo validateOptions) (challenge, error) { // If already valid or invalid then return without performing validation. - if hc.getStatus() == StatusValid { + switch hc.getStatus() { + case StatusPending: + panic("pending challenges must first be moved to the processing state") + case StatusProcessing: + break + case StatusValid, StatusInvalid: return hc, nil + default: + panic("unknown challenge state: " + hc.getStatus()) } - if hc.getStatus() == StatusInvalid { - // TODO: Resolve segfault on upd.save - upd := hc.clone() - upd.Status = StatusPending - if err := upd.save(db, hc); err != nil { - fmt.Printf("Error in save: %s\n\n\n", err) - return nil, err - } - fmt.Print("Through Save\n\n") - return hc, nil - } - url := fmt.Sprintf("http://%s/.well-known/acme-challenge/%s", hc.Value, hc.Token) + up := &http01Challenge{hc.baseChallenge.clone()} + + url := fmt.Sprintf("http://%s/.well-known/acme-challenge/%s", hc.Value, hc.Token) resp, err := vo.httpGet(url) if err != nil { - if err = hc.iterateRetry(db, ConnectionErr(errors.Wrapf(err, - "error doing http GET for url %s", url))); err != nil { - return nil, err - } - return hc, nil - } - if resp.StatusCode >= 400 { - if err = hc.iterateRetry(db, - ConnectionErr(errors.Errorf("error doing http GET for url %s with status code %d", - url, resp.StatusCode))); err != nil { - return nil, err - } - return hc, nil + e := errors.Wrapf(err, "error doing http GET for url %s", url) + up.Error = ConnectionErr(e).ToACME() + return up, nil } defer resp.Body.Close() + if resp.StatusCode >= 400 { + e := errors.Errorf("error doing http GET for url %s with status code %d", url, resp.StatusCode) + up.Error = ConnectionErr(e).ToACME() + return up, nil + } + body, err := ioutil.ReadAll(resp.Body) if err != nil { - return nil, ServerInternalErr(errors.Wrapf(err, "error reading "+ - "response body for url %s", url)) + e := errors.Wrapf(err, "error reading response body for url %s", url) + up.Error = ServerInternalErr(e).ToACME() + return up, nil } - keyAuth := strings.Trim(string(body), "\r\n") + keyAuth := strings.Trim(string(body), "\r\n") expected, err := KeyAuthorization(hc.Token, jwk) if err != nil { - // TODO retry? return nil, err } if keyAuth != expected { - // TODO should we just bail here, this is a "successful" failure. - if err = hc.iterateRetry(db, RejectedIdentifierErr(errors.Errorf("keyAuthorization does not match; "+ - "expected %s, but got %s", expected, keyAuth))); err != nil { - // TODO what's the difference between returning nil with an error, and returning a retry- - // incremented challenge? I think we should probably only hard error if there are no more - // retries left. - return nil, err - } - return hc, err + // add base challenge fail validation + e := errors.Errorf("keyAuthorization does not match; expected %s, but got %s", expected, keyAuth) + up.Error = RejectedIdentifierErr(e).ToACME() + up.Status = StatusInvalid + return up, nil } - // Update and store the challenge. - upd := &http01Challenge{hc.baseChallenge.clone()} - upd.Status = StatusValid - upd.Error = nil - upd.Validated = clock.Now() - upd.Retry.Active = false - upd.Retry.Called = 0 - - if err := upd.save(db, hc); err != nil { - return nil, err - } - return upd, nil + up.Status = StatusValid + up.Validated = clock.Now() + up.Error = nil + up.Retry = nil + return up, nil } type tlsALPN01Challenge struct { @@ -434,34 +400,39 @@ func newTLSALPN01Challenge(db nosql.DB, ops ChallengeOptions) (challenge, error) bc.Type = "tls-alpn-01" bc.Value = ops.Identifier.Value - hc := &tlsALPN01Challenge{bc} - if err := hc.save(db, nil); err != nil { + tc := &tlsALPN01Challenge{bc} + if err := tc.save(db, nil); err != nil { return nil, err } - return hc, nil + return tc, nil } -func (tc *tlsALPN01Challenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo validateOptions) (challenge, error) { +func (tc *tlsALPN01Challenge) validate(jwk *jose.JSONWebKey, vo validateOptions) (challenge, error) { // If already valid or invalid then return without performing validation. - if tc.getStatus() == StatusValid || tc.getStatus() == StatusInvalid { + switch tc.getStatus() { + case StatusPending: + panic("pending challenges must first be moved to the processing state") + case StatusProcessing: + break + case StatusValid, StatusInvalid: return tc, nil + default: + panic("unknown challenge state: " + tc.getStatus()) } + up := &tlsALPN01Challenge{tc.baseChallenge.clone()} + config := &tls.Config{ NextProtos: []string{"acme-tls/1"}, ServerName: tc.Value, InsecureSkipVerify: true, // we expect a self-signed challenge certificate } - hostPort := net.JoinHostPort(tc.Value, "443") - conn, err := vo.tlsDial("tcp", hostPort, config) if err != nil { - if err = tc.iterateRetry(db, - ConnectionErr(errors.Wrapf(err, "error doing TLS dial for %s", hostPort))); err != nil { - return nil, err - } - return tc, nil + e := errors.Wrapf(err, "error doing TLS dial for %s", hostPort) + up.Error = ConnectionErr(e).ToACME() + return up, nil } defer conn.Close() @@ -469,31 +440,22 @@ func (tc *tlsALPN01Challenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo val certs := cs.PeerCertificates if len(certs) == 0 { - if err = tc.iterateRetry(db, - RejectedIdentifierErr(errors.Errorf("%s challenge for %s resulted in no certificates", - tc.Type, tc.Value))); err != nil { - return nil, err - } - return tc, nil + e := errors.Errorf("%s challenge for %s resulted in no certificates", tc.Type, tc.Value) + up.Error = RejectedIdentifierErr(e).ToACME() + return up, nil } - if !cs.NegotiatedProtocolIsMutual || cs.NegotiatedProtocol != "acme-tls/1" { - if err = tc.iterateRetry(db, RejectedIdentifierErr(errors.Errorf("cannot negotiate ALPN acme-tls/1 protocol for "+ - "tls-alpn-01 challenge"))); err != nil { - return nil, err - } - return tc, nil + e := errors.Errorf("cannot negotiate ALPN acme-tls/1 protocol for tls-alpn-01 challenge") + up.Error = RejectedIdentifierErr(e).ToACME() + return up, nil } leafCert := certs[0] - if len(leafCert.DNSNames) != 1 || !strings.EqualFold(leafCert.DNSNames[0], tc.Value) { - if err = tc.iterateRetry(db, - RejectedIdentifierErr(errors.Errorf("incorrect certificate for tls-alpn-01 challenge: "+ - "leaf certificate must contain a single DNS name, %v", tc.Value))); err != nil { - return nil, err - } - return tc, nil + e := errors.Errorf("incorrect certificate for tls-alpn-01 challenge: " + + "leaf certificate must contain a single DNS name, %v", tc.Value) + up.Error = RejectedIdentifierErr(e).ToACME() + return up, nil } idPeAcmeIdentifier := asn1.ObjectIdentifier{1, 3, 6, 1, 5, 5, 7, 1, 31} @@ -509,46 +471,37 @@ func (tc *tlsALPN01Challenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo val for _, ext := range leafCert.Extensions { if idPeAcmeIdentifier.Equal(ext.Id) { if !ext.Critical { - if err = tc.iterateRetry(db, - RejectedIdentifierErr(errors.Errorf("incorrect certificate for tls-alpn-01 challenge: " + - "acmeValidationV1 extension not critical"))); err != nil { - return nil, err - } - return tc, nil + e := errors.Errorf("incorrect certificate for tls-alpn-01 challenge: " + + "acmeValidationV1 extension not critical") + up.Error = IncorrectResponseErr(e).ToACME() + return up, nil } var extValue []byte rest, err := asn1.Unmarshal(ext.Value, &extValue) if err != nil || len(rest) > 0 || len(hashedKeyAuth) != len(extValue) { - if err = tc.iterateRetry(db, - RejectedIdentifierErr(errors.Errorf("incorrect certificate for tls-alpn-01 challenge: "+ - "malformed acmeValidationV1 extension value"))); err != nil { - return nil, err - } - return tc, nil + e := errors.Errorf("incorrect certificate for tls-alpn-01 challenge: " + + "malformed acmeValidationV1 extension value") + up.Error = IncorrectResponseErr(e).ToACME() + return up, nil } if subtle.ConstantTimeCompare(hashedKeyAuth[:], extValue) != 1 { - if err = tc.iterateRetry(db, - RejectedIdentifierErr(errors.Errorf("incorrect certificate for tls-alpn-01 challenge: "+ + e := errors.Errorf("incorrect certificate for tls-alpn-01 challenge: " + "expected acmeValidationV1 extension value %s for this challenge but got %s", - hex.EncodeToString(hashedKeyAuth[:]), hex.EncodeToString(extValue)))); err != nil { - return nil, err - } - return tc, nil + hex.EncodeToString(hashedKeyAuth[:]), hex.EncodeToString(extValue)) + up.Error = IncorrectResponseErr(e).ToACME() + // There is an appropriate value, but it doesn't match. + up.Status = StatusInvalid + return up, nil } - upd := &tlsALPN01Challenge{tc.baseChallenge.clone()} - upd.Status = StatusValid - upd.Error = nil - upd.Validated = clock.Now() - upd.Retry.Active = false - - if err := upd.save(db, tc); err != nil { - return nil, err - } - return upd, nil + up.Validated = clock.Now() + up.Status = StatusValid + up.Error = nil + up.Retry = nil + return up, nil } if idPeAcmeIdentifierV1Obsolete.Equal(ext.Id) { @@ -557,20 +510,15 @@ func (tc *tlsALPN01Challenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo val } if foundIDPeAcmeIdentifierV1Obsolete { - // TODO this isn't likely going to change without user action, this is probably a good case for a hard failure... - if err = tc.iterateRetry(db, - RejectedIdentifierErr(errors.Errorf("incorrect certificate for tls-alpn-01 challenge: "+ - "obsolete id-pe-acmeIdentifier in acmeValidationV1 extension"))); err != nil { - return nil, err - } - return tc, nil + e := errors.Errorf("incorrect certificate for tls-alpn-01 challenge: " + + "obsolete id-pe-acmeIdentifier in acmeValidationV1 extension") + up.Error = IncorrectResponseErr(e).ToACME() + return up, nil } - if err = tc.iterateRetry(db, - RejectedIdentifierErr(errors.Errorf("incorrect certificate for tls-alpn-01 challenge: "+ - "missing acmeValidationV1 extension"))); err != nil { - return nil, err - } + e := errors.Errorf("incorrect certificate for tls-alpn-01 challenge: "+ + "missing acmeValidationV1 extension") + up.Error = IncorrectResponseErr(e).ToACME() return tc, nil } @@ -595,39 +543,35 @@ func newDNS01Challenge(db nosql.DB, ops ChallengeOptions) (challenge, error) { return dc, nil } -// KeyAuthorization creates the ACME key authorization value from a token -// and a jwk. -func KeyAuthorization(token string, jwk *jose.JSONWebKey) (string, error) { - thumbprint, err := jwk.Thumbprint(crypto.SHA256) - if err != nil { - return "", ServerInternalErr(errors.Wrap(err, "error generating JWK thumbprint")) - } - encPrint := base64.RawURLEncoding.EncodeToString(thumbprint) - return fmt.Sprintf("%s.%s", token, encPrint), nil -} - // validate attempts to validate the challenge. If the challenge has been // satisfactorily validated, the 'status' and 'validated' attributes are // updated. -func (dc *dns01Challenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo validateOptions) (challenge, error) { +func (dc *dns01Challenge) validate(jwk *jose.JSONWebKey, vo validateOptions) (challenge, error) { // If already valid or invalid then return without performing validation. - if dc.getStatus() == StatusValid || dc.getStatus() == StatusInvalid { + switch dc.getStatus() { + case StatusPending: + panic("pending challenges must first be moved to the processing state") + case StatusProcessing: + break + case StatusValid, StatusInvalid: return dc, nil + default: + panic("unknown challenge state: " + dc.getStatus()) } + up := &dns01Challenge{dc.baseChallenge.clone()} + // Normalize domain for wildcard DNS names // This is done to avoid making TXT lookups for domains like // _acme-challenge.*.example.com // Instead perform txt lookup for _acme-challenge.example.com domain := strings.TrimPrefix(dc.Value, "*.") + record := "_acme-challenge." + domain - txtRecords, err := vo.lookupTxt("_acme-challenge." + domain) + txtRecords, err := vo.lookupTxt(record) if err != nil { - dnsErr := DNSErr(errors.Wrapf(err, "error looking up TXT "+ - "records for domain %s", domain)) - if err = dc.iterateRetry(db, dnsErr); err != nil { - return nil, err - } + e := errors.Wrapf(err, "error looking up TXT records for domain %s", domain) + up.Error = DNSErr(e).ToACME() return dc, nil } @@ -637,33 +581,39 @@ func (dc *dns01Challenge) validate(db nosql.DB, jwk *jose.JSONWebKey, vo validat } h := sha256.Sum256([]byte(expectedKeyAuth)) expected := base64.RawURLEncoding.EncodeToString(h[:]) - var found bool + + if len(txtRecords) == 0 { + e := errors.Errorf("no TXT record found at '%s'", record) + up.Error = DNSErr(e).ToACME() + return up, nil + } + for _, r := range txtRecords { if r == expected { - found = true - break + up.Validated = time.Now().UTC() + up.Status = StatusValid + up.Error = nil + up.Retry = nil + return up, nil } } - if !found { - rejectedErr := RejectedIdentifierErr(errors.Errorf("keyAuthorization "+ - "does not match; expected %s, but got %s", expectedKeyAuth, txtRecords)) - if err = dc.iterateRetry(db, rejectedErr); err != nil { - return nil, err - } - return dc, nil - } - // Update and store the challenge. - upd := &dns01Challenge{dc.baseChallenge.clone()} - upd.Status = StatusValid - upd.Error = nil - upd.Validated = time.Now().UTC() - upd.Retry.Active = false - upd.Retry.Called = 0 - if err := upd.save(db, dc); err != nil { - return nil, err + up.Status = StatusInvalid + e := errors.Errorf("keyAuthorization does not match; expected %s, but got %s", + expectedKeyAuth, txtRecords) + up.Error = IncorrectResponseErr(e).ToACME() + return up, nil +} + +// KeyAuthorization creates the ACME key authorization value from a token +// and a jwk. +func KeyAuthorization(token string, jwk *jose.JSONWebKey) (string, *Error) { + thumbprint, err := jwk.Thumbprint(crypto.SHA256) + if err != nil { + return "", ServerInternalErr(errors.Wrap(err, "error generating JWK thumbprint")) } - return upd, nil + encPrint := base64.RawURLEncoding.EncodeToString(thumbprint) + return fmt.Sprintf("%s.%s", token, encPrint), nil } // getChallenge retrieves and unmarshals an ACME challenge type from the database. @@ -681,19 +631,3 @@ func getChallenge(db nosql.DB, id string) (challenge, error) { return ch, nil } -// iterateRetry iterates a challenge's retry and error objects upon a failed validation attempt -func (bc *baseChallenge) iterateRetry(db nosql.DB, error *Error) error { - upd := bc.clone() - upd.Error = error.ToACME() - upd.Error.Subproblems = append(upd.Error.Subproblems, error) - upd.Retry.Called ++ - upd.Retry.Active = true - if math.Mod(upd.Retry.Called , upd.Retry.MaxAttempts) == 0 { - upd.Status = StatusInvalid - upd.Retry.Active = false - } - if err := upd.save(db, bc); err != nil { - return err - } - return nil -} diff --git a/acme/common.go b/acme/common.go index ecdd371a..8b6b2e82 100644 --- a/acme/common.go +++ b/acme/common.go @@ -29,6 +29,8 @@ var ( StatusInvalid = "invalid" // StatusPending -- pending; e.g. an Order that is not ready to be finalized. StatusPending = "pending" + // processing -- e.g. a Challenge that is in the process of being validated. + StatusProcessing = "processing" // StatusDeactivated -- deactivated; e.g. for an Account that is not longer valid. StatusDeactivated = "deactivated" // StatusReady -- ready; e.g. for an Order that is ready to be finalized. diff --git a/api/utils.go b/api/utils.go index 154023d0..0d87a065 100644 --- a/api/utils.go +++ b/api/utils.go @@ -52,11 +52,6 @@ func JSON(w http.ResponseWriter, v interface{}) { JSONStatus(w, v, http.StatusOK) } -// JSON writes the passed value into the http.ResponseWriter. -func WriteProcessing(w http.ResponseWriter, v interface{}) { - JSONStatus(w, v, http.StatusProcessing) -} - // JSONStatus writes the given value into the http.ResponseWriter and the // given status is written as the status code of the response. func JSONStatus(w http.ResponseWriter, v interface{}, status int) {