Plugin/Forward - autotune the dialTimeout for connection (#1852)
* - implement an auto-tunable dialTimeout for fallback. * - fix gofmt * - factorized timeout computation with readTimeout / updated readme / * - fix comment
This commit is contained in:
parent
6f865a9de7
commit
70c957d885
5 changed files with 84 additions and 52 deletions
|
@ -82,6 +82,10 @@ forward FROM TO... {
|
||||||
Also note the TLS config is "global" for the whole forwarding proxy if you need a different
|
Also note the TLS config is "global" for the whole forwarding proxy if you need a different
|
||||||
`tls-name` for different upstreams you're out of luck.
|
`tls-name` for different upstreams you're out of luck.
|
||||||
|
|
||||||
|
On each endpoint, the timeouts of the communication are set by default and automatically tuned depending early results.
|
||||||
|
- dialTimeout by default is 30 sec, and can decrease automatically down to 100ms
|
||||||
|
- readTimeout by default is 2 sec, and can decrease automatically down to 10ms
|
||||||
|
|
||||||
## Metrics
|
## Metrics
|
||||||
|
|
||||||
If monitoring is enabled (via the *prometheus* directive) then the following metric are exported:
|
If monitoring is enabled (via the *prometheus* directive) then the following metric are exported:
|
||||||
|
|
|
@ -16,21 +16,65 @@ import (
|
||||||
"github.com/miekg/dns"
|
"github.com/miekg/dns"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (p *Proxy) readTimeout() time.Duration {
|
// limitTimeout is a utility function to auto-tune timeout values
|
||||||
rtt := time.Duration(atomic.LoadInt64(&p.avgRtt))
|
// average observed time is moved towards the last observed delay moderated by a weight
|
||||||
|
// next timeout to use will be the double of the computed average, limited by min and max frame.
|
||||||
|
func limitTimeout(currentAvg *int64, minValue time.Duration, maxValue time.Duration) time.Duration {
|
||||||
|
rt := time.Duration(atomic.LoadInt64(currentAvg))
|
||||||
|
if rt < minValue {
|
||||||
|
return minValue
|
||||||
|
}
|
||||||
|
if rt < maxValue/2 {
|
||||||
|
return 2 * rt
|
||||||
|
}
|
||||||
|
return maxValue
|
||||||
|
}
|
||||||
|
|
||||||
if rtt < minTimeout {
|
func averageTimeout(currentAvg *int64, observedDuration time.Duration, weight int64) {
|
||||||
return minTimeout
|
dt := time.Duration(atomic.LoadInt64(currentAvg))
|
||||||
|
atomic.AddInt64(currentAvg, int64(observedDuration-dt)/weight)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *transport) dialTimeout() time.Duration {
|
||||||
|
return limitTimeout(&t.avgDialTime, minDialTimeout, maxDialTimeout)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *transport) updateDialTimeout(newDialTime time.Duration) {
|
||||||
|
averageTimeout(&t.avgDialTime, newDialTime, cumulativeAvgWeight)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dial dials the address configured in transport, potentially reusing a connection or creating a new one.
|
||||||
|
func (t *transport) Dial(proto string) (*dns.Conn, bool, error) {
|
||||||
|
// If tls has been configured; use it.
|
||||||
|
if t.tlsConfig != nil {
|
||||||
|
proto = "tcp-tls"
|
||||||
}
|
}
|
||||||
if rtt < maxTimeout/2 {
|
|
||||||
return 2 * rtt
|
t.dial <- proto
|
||||||
|
c := <-t.ret
|
||||||
|
|
||||||
|
if c != nil {
|
||||||
|
return c, true, nil
|
||||||
}
|
}
|
||||||
return maxTimeout
|
|
||||||
|
reqTime := time.Now()
|
||||||
|
timeout := t.dialTimeout()
|
||||||
|
if proto == "tcp-tls" {
|
||||||
|
conn, err := dns.DialTimeoutWithTLS("tcp", t.addr, t.tlsConfig, timeout)
|
||||||
|
t.updateDialTimeout(time.Since(reqTime))
|
||||||
|
return conn, false, err
|
||||||
|
}
|
||||||
|
conn, err := dns.DialTimeout(proto, t.addr, timeout)
|
||||||
|
t.updateDialTimeout(time.Since(reqTime))
|
||||||
|
return conn, false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Proxy) readTimeout() time.Duration {
|
||||||
|
return limitTimeout(&p.avgRtt, minTimeout, maxTimeout)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Proxy) updateRtt(newRtt time.Duration) {
|
func (p *Proxy) updateRtt(newRtt time.Duration) {
|
||||||
rtt := time.Duration(atomic.LoadInt64(&p.avgRtt))
|
averageTimeout(&p.avgRtt, newRtt, cumulativeAvgWeight)
|
||||||
atomic.AddInt64(&p.avgRtt, int64((newRtt-rtt)/rttCount))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Connect selects an upstream, sends the request and waits for a response.
|
// Connect selects an upstream, sends the request and waits for a response.
|
||||||
|
@ -92,4 +136,4 @@ func (p *Proxy) Connect(ctx context.Context, state request.Request, forceTCP, me
|
||||||
return ret, nil
|
return ret, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
const rttCount = 4
|
const cumulativeAvgWeight = 4
|
||||||
|
|
|
@ -17,6 +17,7 @@ type persistConn struct {
|
||||||
|
|
||||||
// transport hold the persistent cache.
|
// transport hold the persistent cache.
|
||||||
type transport struct {
|
type transport struct {
|
||||||
|
avgDialTime int64 // kind of average time of dial time
|
||||||
conns map[string][]*persistConn // Buckets for udp, tcp and tcp-tls.
|
conns map[string][]*persistConn // Buckets for udp, tcp and tcp-tls.
|
||||||
expire time.Duration // After this duration a connection is expired.
|
expire time.Duration // After this duration a connection is expired.
|
||||||
addr string
|
addr string
|
||||||
|
@ -30,6 +31,7 @@ type transport struct {
|
||||||
|
|
||||||
func newTransport(addr string, tlsConfig *tls.Config) *transport {
|
func newTransport(addr string, tlsConfig *tls.Config) *transport {
|
||||||
t := &transport{
|
t := &transport{
|
||||||
|
avgDialTime: int64(defaultDialTimeout / 2),
|
||||||
conns: make(map[string][]*persistConn),
|
conns: make(map[string][]*persistConn),
|
||||||
expire: defaultExpire,
|
expire: defaultExpire,
|
||||||
addr: addr,
|
addr: addr,
|
||||||
|
@ -141,28 +143,6 @@ func (t *transport) cleanup(all bool) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Dial dials the address configured in transport, potentially reusing a connection or creating a new one.
|
|
||||||
func (t *transport) Dial(proto string) (*dns.Conn, bool, error) {
|
|
||||||
// If tls has been configured; use it.
|
|
||||||
if t.tlsConfig != nil {
|
|
||||||
proto = "tcp-tls"
|
|
||||||
}
|
|
||||||
|
|
||||||
t.dial <- proto
|
|
||||||
c := <-t.ret
|
|
||||||
|
|
||||||
if c != nil {
|
|
||||||
return c, true, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
if proto == "tcp-tls" {
|
|
||||||
conn, err := dns.DialTimeoutWithTLS("tcp", t.addr, t.tlsConfig, dialTimeout)
|
|
||||||
return conn, false, err
|
|
||||||
}
|
|
||||||
conn, err := dns.DialTimeout(proto, t.addr, dialTimeout)
|
|
||||||
return conn, false, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Yield return the connection to transport for reuse.
|
// Yield return the connection to transport for reuse.
|
||||||
func (t *transport) Yield(c *dns.Conn) { t.yield <- c }
|
func (t *transport) Yield(c *dns.Conn) { t.yield <- c }
|
||||||
|
|
||||||
|
@ -178,4 +158,9 @@ func (t *transport) SetExpire(expire time.Duration) { t.expire = expire }
|
||||||
// SetTLSConfig sets the TLS config in transport.
|
// SetTLSConfig sets the TLS config in transport.
|
||||||
func (t *transport) SetTLSConfig(cfg *tls.Config) { t.tlsConfig = cfg }
|
func (t *transport) SetTLSConfig(cfg *tls.Config) { t.tlsConfig = cfg }
|
||||||
|
|
||||||
const defaultExpire = 10 * time.Second
|
const (
|
||||||
|
defaultExpire = 10 * time.Second
|
||||||
|
minDialTimeout = 100 * time.Millisecond
|
||||||
|
maxDialTimeout = 30 * time.Second
|
||||||
|
defaultDialTimeout = 30 * time.Second
|
||||||
|
)
|
||||||
|
|
|
@ -140,9 +140,9 @@ func TestCleanupAll(t *testing.T) {
|
||||||
|
|
||||||
tr := newTransport(s.Addr, nil /* no TLS */)
|
tr := newTransport(s.Addr, nil /* no TLS */)
|
||||||
|
|
||||||
c1, _ := dns.DialTimeout("udp", tr.addr, dialTimeout)
|
c1, _ := dns.DialTimeout("udp", tr.addr, defaultDialTimeout)
|
||||||
c2, _ := dns.DialTimeout("udp", tr.addr, dialTimeout)
|
c2, _ := dns.DialTimeout("udp", tr.addr, defaultDialTimeout)
|
||||||
c3, _ := dns.DialTimeout("udp", tr.addr, dialTimeout)
|
c3, _ := dns.DialTimeout("udp", tr.addr, defaultDialTimeout)
|
||||||
|
|
||||||
tr.conns["udp"] = []*persistConn{
|
tr.conns["udp"] = []*persistConn{
|
||||||
{c1, time.Now()},
|
{c1, time.Now()},
|
||||||
|
|
|
@ -106,7 +106,6 @@ func (p *Proxy) start(duration time.Duration) {
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
dialTimeout = 4 * time.Second
|
|
||||||
timeout = 2 * time.Second
|
timeout = 2 * time.Second
|
||||||
maxTimeout = 2 * time.Second
|
maxTimeout = 2 * time.Second
|
||||||
minTimeout = 10 * time.Millisecond
|
minTimeout = 10 * time.Millisecond
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue