forked from TrueCloudLab/certificates
9af4dd3692
Section 8.2 of RFC 8555 explains how retries apply to the validation process. However, much is left up to the implementer. Add retries every 12 seconds for 2 minutes after a client requests a validation. The challenge status remains "processing" indefinitely until a distinct conclusion is reached. This allows a client to continually re-request a validation by sending a post-get to the challenge resource until the process fails or succeeds. Challenges in the processing state include information about why a validation did not complete in the error field. The server also includes a Retry-After header to help clients and servers coordinate. Retries are inherently stateful because they're part of the public API. When running step-ca in a highly available setup with replicas, care must be taken to maintain a persistent identifier for each instance "slot". In kubernetes, this implies a *stateful set*.
473 lines
14 KiB
Go
473 lines
14 KiB
Go
package acme
|
|
|
|
import (
|
|
"crypto"
|
|
"crypto/tls"
|
|
"crypto/x509"
|
|
"encoding/base64"
|
|
"log"
|
|
"net"
|
|
"net/http"
|
|
"net/url"
|
|
"os"
|
|
"strconv"
|
|
"time"
|
|
|
|
"github.com/pkg/errors"
|
|
"github.com/smallstep/certificates/authority/provisioner"
|
|
database "github.com/smallstep/certificates/db"
|
|
"github.com/smallstep/cli/jose"
|
|
"github.com/smallstep/nosql"
|
|
)
|
|
|
|
// Interface is the acme authority interface.
|
|
type Interface interface {
|
|
DeactivateAccount(provisioner.Interface, string) (*Account, error)
|
|
FinalizeOrder(provisioner.Interface, string, string, *x509.CertificateRequest) (*Order, error)
|
|
GetAccount(provisioner.Interface, string) (*Account, error)
|
|
GetAccountByKey(provisioner.Interface, *jose.JSONWebKey) (*Account, error)
|
|
GetAuthz(provisioner.Interface, string, string) (*Authz, error)
|
|
GetCertificate(string, string) ([]byte, error)
|
|
GetDirectory(provisioner.Interface) *Directory
|
|
GetLink(Link, string, bool, ...string) string
|
|
GetOrder(provisioner.Interface, string, string) (*Order, error)
|
|
GetOrdersByAccount(provisioner.Interface, string) ([]string, error)
|
|
LoadProvisionerByID(string) (provisioner.Interface, error)
|
|
NewAccount(provisioner.Interface, AccountOptions) (*Account, error)
|
|
NewNonce() (string, error)
|
|
NewOrder(provisioner.Interface, OrderOptions) (*Order, error)
|
|
UpdateAccount(provisioner.Interface, string, []string) (*Account, error)
|
|
UseNonce(string) error
|
|
ValidateChallenge(provisioner.Interface, string, string, *jose.JSONWebKey) (*Challenge, error)
|
|
}
|
|
|
|
// Authority is the layer that handles all ACME interactions.
|
|
type Authority struct {
|
|
db nosql.DB
|
|
dir *directory
|
|
signAuth SignAuthority
|
|
}
|
|
|
|
var (
|
|
accountTable = []byte("acme_accounts")
|
|
accountByKeyIDTable = []byte("acme_keyID_accountID_index")
|
|
authzTable = []byte("acme_authzs")
|
|
challengeTable = []byte("acme_challenges")
|
|
nonceTable = []byte("nonces")
|
|
orderTable = []byte("acme_orders")
|
|
ordersByAccountIDTable = []byte("acme_account_orders_index")
|
|
certTable = []byte("acme_certs")
|
|
ordinal int
|
|
)
|
|
|
|
// Ordinal is used during challenge retries to indicate ownership.
|
|
func init() {
|
|
ordstr := os.Getenv("STEP_CA_ORDINAL");
|
|
if ordstr == "" {
|
|
ordinal = 0
|
|
} else {
|
|
ord, err := strconv.Atoi(ordstr)
|
|
if err != nil {
|
|
log.Fatal("Unrecognized ordinal ingeter value.")
|
|
panic(nil)
|
|
}
|
|
ordinal = ord
|
|
}
|
|
}
|
|
|
|
// NewAuthority returns a new Authority that implements the ACME interface.
|
|
func NewAuthority(db nosql.DB, dns, prefix string, signAuth SignAuthority) (*Authority, error) {
|
|
if _, ok := db.(*database.SimpleDB); !ok {
|
|
// If it's not a SimpleDB then go ahead and bootstrap the DB with the
|
|
// necessary ACME tables. SimpleDB should ONLY be used for testing.
|
|
tables := [][]byte{accountTable, accountByKeyIDTable, authzTable,
|
|
challengeTable, nonceTable, orderTable, ordersByAccountIDTable,
|
|
certTable}
|
|
for _, b := range tables {
|
|
if err := db.CreateTable(b); err != nil {
|
|
return nil, errors.Wrapf(err, "error creating table %s",
|
|
string(b))
|
|
}
|
|
}
|
|
}
|
|
return &Authority{
|
|
db: db, dir: newDirectory(dns, prefix), signAuth: signAuth,
|
|
}, nil
|
|
}
|
|
|
|
// GetLink returns the requested link from the directory.
|
|
func (a *Authority) GetLink(typ Link, provID string, abs bool, inputs ...string) string {
|
|
return a.dir.getLink(typ, provID, abs, inputs...)
|
|
}
|
|
|
|
// GetDirectory returns the ACME directory object.
|
|
func (a *Authority) GetDirectory(p provisioner.Interface) *Directory {
|
|
name := url.PathEscape(p.GetName())
|
|
return &Directory{
|
|
NewNonce: a.dir.getLink(NewNonceLink, name, true),
|
|
NewAccount: a.dir.getLink(NewAccountLink, name, true),
|
|
NewOrder: a.dir.getLink(NewOrderLink, name, true),
|
|
RevokeCert: a.dir.getLink(RevokeCertLink, name, true),
|
|
KeyChange: a.dir.getLink(KeyChangeLink, name, true),
|
|
}
|
|
}
|
|
|
|
// LoadProvisionerByID calls out to the SignAuthority interface to load a
|
|
// provisioner by ID.
|
|
func (a *Authority) LoadProvisionerByID(id string) (provisioner.Interface, error) {
|
|
return a.signAuth.LoadProvisionerByID(id)
|
|
}
|
|
|
|
// NewNonce generates, stores, and returns a new ACME nonce.
|
|
func (a *Authority) NewNonce() (string, error) {
|
|
n, err := newNonce(a.db)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
return n.ID, nil
|
|
}
|
|
|
|
// UseNonce consumes the given nonce if it is valid, returns error otherwise.
|
|
func (a *Authority) UseNonce(nonce string) error {
|
|
return useNonce(a.db, nonce)
|
|
}
|
|
|
|
// NewAccount creates, stores, and returns a new ACME account.
|
|
func (a *Authority) NewAccount(p provisioner.Interface, ao AccountOptions) (*Account, error) {
|
|
acc, err := newAccount(a.db, ao)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return acc.toACME(a.db, a.dir, p)
|
|
}
|
|
|
|
// UpdateAccount updates an ACME account.
|
|
func (a *Authority) UpdateAccount(p provisioner.Interface, id string, contact []string) (*Account, error) {
|
|
acc, err := getAccountByID(a.db, id)
|
|
if err != nil {
|
|
return nil, ServerInternalErr(err)
|
|
}
|
|
if acc, err = acc.update(a.db, contact); err != nil {
|
|
return nil, err
|
|
}
|
|
return acc.toACME(a.db, a.dir, p)
|
|
}
|
|
|
|
// GetAccount returns an ACME account.
|
|
func (a *Authority) GetAccount(p provisioner.Interface, id string) (*Account, error) {
|
|
acc, err := getAccountByID(a.db, id)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return acc.toACME(a.db, a.dir, p)
|
|
}
|
|
|
|
// DeactivateAccount deactivates an ACME account.
|
|
func (a *Authority) DeactivateAccount(p provisioner.Interface, id string) (*Account, error) {
|
|
acc, err := getAccountByID(a.db, id)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if acc, err = acc.deactivate(a.db); err != nil {
|
|
return nil, err
|
|
}
|
|
return acc.toACME(a.db, a.dir, p)
|
|
}
|
|
|
|
func keyToID(jwk *jose.JSONWebKey) (string, error) {
|
|
kid, err := jwk.Thumbprint(crypto.SHA256)
|
|
if err != nil {
|
|
return "", ServerInternalErr(errors.Wrap(err, "error generating jwk thumbprint"))
|
|
}
|
|
return base64.RawURLEncoding.EncodeToString(kid), nil
|
|
}
|
|
|
|
// GetAccountByKey returns the ACME associated with the jwk id.
|
|
func (a *Authority) GetAccountByKey(p provisioner.Interface, jwk *jose.JSONWebKey) (*Account, error) {
|
|
kid, err := keyToID(jwk)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
acc, err := getAccountByKeyID(a.db, kid)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return acc.toACME(a.db, a.dir, p)
|
|
}
|
|
|
|
// GetOrder returns an ACME order.
|
|
func (a *Authority) GetOrder(p provisioner.Interface, accID, orderID string) (*Order, error) {
|
|
o, err := getOrder(a.db, orderID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if accID != o.AccountID {
|
|
return nil, UnauthorizedErr(errors.New("account does not own order"))
|
|
}
|
|
if o, err = o.updateStatus(a.db); err != nil {
|
|
return nil, err
|
|
}
|
|
return o.toACME(a.db, a.dir, p)
|
|
}
|
|
|
|
// GetOrdersByAccount returns the list of order urls owned by the account.
|
|
func (a *Authority) GetOrdersByAccount(p provisioner.Interface, id string) ([]string, error) {
|
|
oids, err := getOrderIDsByAccount(a.db, id)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var ret = []string{}
|
|
for _, oid := range oids {
|
|
o, err := getOrder(a.db, oid)
|
|
if err != nil {
|
|
return nil, ServerInternalErr(err)
|
|
}
|
|
if o.Status == StatusInvalid {
|
|
continue
|
|
}
|
|
ret = append(ret, a.dir.getLink(OrderLink, URLSafeProvisionerName(p), true, o.ID))
|
|
}
|
|
return ret, nil
|
|
}
|
|
|
|
// NewOrder generates, stores, and returns a new ACME order.
|
|
func (a *Authority) NewOrder(p provisioner.Interface, ops OrderOptions) (*Order, error) {
|
|
order, err := newOrder(a.db, ops)
|
|
if err != nil {
|
|
return nil, Wrap(err, "error creating order")
|
|
}
|
|
return order.toACME(a.db, a.dir, p)
|
|
}
|
|
|
|
// FinalizeOrder attempts to finalize an order and generate a new certificate.
|
|
func (a *Authority) FinalizeOrder(p provisioner.Interface, accID, orderID string, csr *x509.CertificateRequest) (*Order, error) {
|
|
o, err := getOrder(a.db, orderID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if accID != o.AccountID {
|
|
return nil, UnauthorizedErr(errors.New("account does not own order"))
|
|
}
|
|
o, err = o.finalize(a.db, csr, a.signAuth, p)
|
|
if err != nil {
|
|
return nil, Wrap(err, "error finalizing order")
|
|
}
|
|
return o.toACME(a.db, a.dir, p)
|
|
}
|
|
|
|
// GetAuthz retrieves and attempts to update the status on an ACME authz
|
|
// before returning.
|
|
func (a *Authority) GetAuthz(p provisioner.Interface, accID, authzID string) (*Authz, error) {
|
|
az, err := getAuthz(a.db, authzID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if accID != az.getAccountID() {
|
|
return nil, UnauthorizedErr(errors.New("account does not own authz"))
|
|
}
|
|
az, err = az.updateStatus(a.db)
|
|
if err != nil {
|
|
return nil, Wrap(err, "error updating authz status")
|
|
}
|
|
return az.toACME(a.db, a.dir, p)
|
|
}
|
|
|
|
// The challenge validation state machine looks like:
|
|
//
|
|
// * https://tools.ietf.org/html/rfc8555#section-7.1.6
|
|
//
|
|
// While in the processing state, the server may retry as it sees fit. The challenge validation strategy
|
|
// needs to be rather specific in order for retries to work in a replicated, crash-proof deployment.
|
|
// In general, the goal is to allow requests to hit arbitrary instances of step-ca while managing retry
|
|
// responsibility such that multiple instances agree on an owner. Additionally, when a deployment of the
|
|
// CA is in progress, the ownership should be carried forward and new, updated (or in general, restarted),
|
|
// instances should pick back up where the crashed instance left off.
|
|
//
|
|
// The steps are:
|
|
//
|
|
// 1. Upon incoming request to the challenge endpoint, take ownership of the retry responsibility.
|
|
// (a) Set Retry.Owner to this instance's ordinal (STEP_CA_ORDINAL).
|
|
// (b) Set Retry.NumAttempts to 0 and Retry.MaxAttempts to the desired max.
|
|
// (c) Set Challenge.Status to "processing"
|
|
// (d) Set retry_after to a time (retryInterval) in the future.
|
|
// 2. Perform the validation attempt.
|
|
// 3. If the validation attempt results in a challenge that is still processing, schedule a retry.
|
|
//
|
|
// It's possible that another request to re-attempt the challenge comes in while a retry attempt is
|
|
// pending from a previous request. In general, these old attempts will see that Retry.NextAttempt
|
|
// is in the future and drop their task. But this also might have happened on another instance, etc.
|
|
//
|
|
// 4. When the retry timer fires, check to make sure the retry should still process.
|
|
// (a) Refresh the challenge from the DB.
|
|
// (a) Check that Retry.Owner is equal to this instance's ordinal.
|
|
// (b) Check that Retry.NextAttempt is in the past.
|
|
// 5. If the retry will commence, immediately update Retry.NextAttempt and save the challenge.
|
|
//
|
|
// Finally, if this instance is terminated, retries need to be reschedule when the instance restarts. This
|
|
// is handled in the acme provisioner (authority/provisioner/acme.go) initialization.
|
|
//
|
|
// Note: the default ordinal does not need to be changed unless step-ca is running in a replicated scenario.
|
|
//
|
|
func (a *Authority) ValidateChallenge(p provisioner.Interface, accID, chID string, jwk *jose.JSONWebKey) (*Challenge, error) {
|
|
ch, err := getChallenge(a.db, chID)
|
|
|
|
// Validate the challenge belongs to the account owned by the requester.
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if accID != ch.getAccountID() {
|
|
return nil, UnauthorizedErr(errors.New("account does not own challenge"))
|
|
}
|
|
|
|
// Take ownership of the challenge status and retry state. The values must be reset.
|
|
up := ch.clone()
|
|
up.Status = StatusProcessing
|
|
up.Retry = &Retry {
|
|
Owner: ordinal,
|
|
ProvisionerID: p.GetID(),
|
|
NumAttempts: 0,
|
|
MaxAttempts: 10,
|
|
NextAttempt: time.Now().Add(retryInterval).UTC().Format(time.RFC3339),
|
|
|
|
}
|
|
err = up.save(a.db, ch)
|
|
if err != nil {
|
|
return nil, Wrap(err, "error saving challenge")
|
|
}
|
|
ch = up
|
|
|
|
v, err := a.validate(ch, jwk)
|
|
// An error here is non-recoverable. Recoverable errors are set on the challenge object
|
|
// and should not be returned directly.
|
|
if err != nil {
|
|
return nil, Wrap(err, "error attempting challenge validation")
|
|
}
|
|
err = v.save(a.db, ch)
|
|
if err != nil {
|
|
return nil, Wrap(err, "error saving challenge")
|
|
}
|
|
ch = v
|
|
|
|
switch ch.getStatus() {
|
|
case StatusValid, StatusInvalid:
|
|
break
|
|
case StatusProcessing:
|
|
if ch.getRetry().Active() {
|
|
time.AfterFunc(retryInterval, func() {
|
|
a.RetryChallenge(ch.getID())
|
|
})
|
|
}
|
|
default:
|
|
panic("post-validation challenge in unexpected state" + ch.getStatus())
|
|
}
|
|
return ch.toACME(a.dir, p)
|
|
}
|
|
|
|
// The challenge validation process is specific to the type of challenge (dns-01, http-01, tls-alpn-01).
|
|
// But, we still pass generic "options" to the polymorphic validate call.
|
|
func (a *Authority) validate(ch challenge, jwk *jose.JSONWebKey) (challenge, error) {
|
|
client := http.Client{
|
|
Timeout: time.Duration(30 * time.Second),
|
|
}
|
|
dialer := &net.Dialer{
|
|
Timeout: 30 * time.Second,
|
|
}
|
|
return ch.validate(jwk, validateOptions{
|
|
httpGet: client.Get,
|
|
lookupTxt: net.LookupTXT,
|
|
tlsDial: func(network, addr string, config *tls.Config) (*tls.Conn, error) {
|
|
return tls.DialWithDialer(dialer, network, addr, config)
|
|
},
|
|
})
|
|
}
|
|
|
|
|
|
const retryInterval = 12 * time.Second
|
|
|
|
// see: ValidateChallenge
|
|
func (a *Authority) RetryChallenge(chID string) {
|
|
ch, err := getChallenge(a.db, chID)
|
|
if err != nil {
|
|
return
|
|
}
|
|
switch ch.getStatus() {
|
|
case StatusPending:
|
|
panic("pending challenges must first be moved to the processing state")
|
|
case StatusInvalid, StatusValid:
|
|
return
|
|
case StatusProcessing:
|
|
break
|
|
default:
|
|
panic("unknown challenge state: " + ch.getStatus())
|
|
}
|
|
|
|
// When retrying, check to make sure the ordinal has not changed.
|
|
// Make sure there are still retries left.
|
|
// Then check to make sure Retry.NextAttempt is in the past.
|
|
retry := ch.getRetry()
|
|
switch {
|
|
case retry.Owner != ordinal:
|
|
return
|
|
case !retry.Active():
|
|
return
|
|
}
|
|
t, err := time.Parse(time.RFC3339, retry.NextAttempt)
|
|
now := time.Now().UTC()
|
|
switch {
|
|
case err != nil:
|
|
return
|
|
case t.Before(now):
|
|
return
|
|
}
|
|
|
|
// Update the db so that other retries simply drop when their timer fires.
|
|
up := ch.clone()
|
|
up.Retry.NextAttempt = now.Add(retryInterval).UTC().Format(time.RFC3339)
|
|
up.Retry.NumAttempts += 1
|
|
err = up.save(a.db, ch)
|
|
if err != nil {
|
|
return
|
|
}
|
|
ch = up
|
|
|
|
p, err := a.LoadProvisionerByID(retry.ProvisionerID)
|
|
acc, err := a.GetAccount(p, ch.getAccountID())
|
|
|
|
v, err := a.validate(up, acc.Key)
|
|
if err != nil {
|
|
return
|
|
}
|
|
err = v.save(a.db, ch)
|
|
if err != nil {
|
|
return
|
|
}
|
|
ch = v
|
|
|
|
switch ch.getStatus() {
|
|
case StatusValid, StatusInvalid:
|
|
break
|
|
case StatusProcessing:
|
|
if ch.getRetry().Active() {
|
|
time.AfterFunc(retryInterval, func() {
|
|
a.RetryChallenge(ch.getID())
|
|
})
|
|
}
|
|
default:
|
|
panic("post-validation challenge in unexpected state " + ch.getStatus())
|
|
}
|
|
}
|
|
|
|
|
|
// GetCertificate retrieves the Certificate by ID.
|
|
func (a *Authority) GetCertificate(accID, certID string) ([]byte, error) {
|
|
cert, err := getCert(a.db, certID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if accID != cert.AccountID {
|
|
return nil, UnauthorizedErr(errors.New("account does not own certificate"))
|
|
}
|
|
return cert.toACME(a.db, a.dir)
|
|
}
|
|
|