After several experiments at SoundCloud we found that the current minimum read timeout of 10ms is too low. A single request against a slow/unavailable authoritative server can cause all TCP connections to get closed. We record a 50th percentile forward/proxy latency of <5ms, and a 99th percentile latency of 60ms. Using a minimum timeout of 200ms seems to be a fair trade-off between avoiding unnecessary high connection churn and reacting to upstream failures in a timely manner. This change also renames hcDuration to hcInterval to reflect its usage, and removes the duplicated timeout constant to make code comprehension easier.
112 lines
2.6 KiB
Go
112 lines
2.6 KiB
Go
package forward
|
|
|
|
import (
|
|
"crypto/tls"
|
|
"runtime"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/coredns/coredns/plugin/pkg/up"
|
|
|
|
"github.com/miekg/dns"
|
|
)
|
|
|
|
// Proxy defines an upstream host.
|
|
type Proxy struct {
|
|
avgRtt int64
|
|
fails uint32
|
|
|
|
addr string
|
|
client *dns.Client
|
|
|
|
// Connection caching
|
|
expire time.Duration
|
|
transport *transport
|
|
|
|
// health checking
|
|
probe *up.Probe
|
|
}
|
|
|
|
// NewProxy returns a new proxy.
|
|
func NewProxy(addr string, tlsConfig *tls.Config) *Proxy {
|
|
p := &Proxy{
|
|
addr: addr,
|
|
fails: 0,
|
|
probe: up.New(),
|
|
transport: newTransport(addr, tlsConfig),
|
|
avgRtt: int64(maxTimeout / 2),
|
|
}
|
|
p.client = dnsClient(tlsConfig)
|
|
runtime.SetFinalizer(p, (*Proxy).finalizer)
|
|
return p
|
|
}
|
|
|
|
// Addr returns the address to forward to.
|
|
func (p *Proxy) Addr() (addr string) { return p.addr }
|
|
|
|
// dnsClient returns a client used for health checking.
|
|
func dnsClient(tlsConfig *tls.Config) *dns.Client {
|
|
c := new(dns.Client)
|
|
c.Net = "udp"
|
|
// TODO(miek): this should be half of hcDuration?
|
|
c.ReadTimeout = 1 * time.Second
|
|
c.WriteTimeout = 1 * time.Second
|
|
|
|
if tlsConfig != nil {
|
|
c.Net = "tcp-tls"
|
|
c.TLSConfig = tlsConfig
|
|
}
|
|
return c
|
|
}
|
|
|
|
// SetTLSConfig sets the TLS config in the lower p.transport and in the healthchecking client.
|
|
func (p *Proxy) SetTLSConfig(cfg *tls.Config) {
|
|
p.transport.SetTLSConfig(cfg)
|
|
p.client = dnsClient(cfg)
|
|
}
|
|
|
|
// IsTLS returns true if proxy uses tls.
|
|
func (p *Proxy) IsTLS() bool { return p.transport.tlsConfig != nil }
|
|
|
|
// SetExpire sets the expire duration in the lower p.transport.
|
|
func (p *Proxy) SetExpire(expire time.Duration) { p.transport.SetExpire(expire) }
|
|
|
|
// Dial connects to the host in p with the configured transport.
|
|
func (p *Proxy) Dial(proto string) (*dns.Conn, bool, error) { return p.transport.Dial(proto) }
|
|
|
|
// Yield returns the connection to the pool.
|
|
func (p *Proxy) Yield(c *dns.Conn) { p.transport.Yield(c) }
|
|
|
|
// Healthcheck kicks of a round of health checks for this proxy.
|
|
func (p *Proxy) Healthcheck() { p.probe.Do(p.Check) }
|
|
|
|
// Down returns true if this proxy is down, i.e. has *more* fails than maxfails.
|
|
func (p *Proxy) Down(maxfails uint32) bool {
|
|
if maxfails == 0 {
|
|
return false
|
|
}
|
|
|
|
fails := atomic.LoadUint32(&p.fails)
|
|
return fails > maxfails
|
|
}
|
|
|
|
// close stops the health checking goroutine.
|
|
func (p *Proxy) close() {
|
|
p.probe.Stop()
|
|
}
|
|
|
|
func (p *Proxy) finalizer() {
|
|
p.transport.Stop()
|
|
}
|
|
|
|
// start starts the proxy's healthchecking.
|
|
func (p *Proxy) start(duration time.Duration) {
|
|
p.probe.Start(duration)
|
|
p.transport.Start()
|
|
}
|
|
|
|
const (
|
|
maxTimeout = 2 * time.Second
|
|
minTimeout = 200 * time.Millisecond
|
|
hcInterval = 500 * time.Millisecond
|
|
)
|