plugin/cache: Add refresh mode setting to serve_stale (#5131)
This PR adds an optional REFRESH_MODE parameter on the serve_stale configuration directive of the cache plugin, which verifies that the upstream is still unavailable before returning stale entries. Signed-off-by: Antoine Tollenaere <atollena@gmail.com>
This commit is contained in:
parent
c3572fdb30
commit
66f2ac7568
6 changed files with 170 additions and 25 deletions
9
plugin/cache/README.md
vendored
9
plugin/cache/README.md
vendored
|
@ -37,7 +37,7 @@ cache [TTL] [ZONES...] {
|
||||||
success CAPACITY [TTL] [MINTTL]
|
success CAPACITY [TTL] [MINTTL]
|
||||||
denial CAPACITY [TTL] [MINTTL]
|
denial CAPACITY [TTL] [MINTTL]
|
||||||
prefetch AMOUNT [[DURATION] [PERCENTAGE%]]
|
prefetch AMOUNT [[DURATION] [PERCENTAGE%]]
|
||||||
serve_stale [DURATION]
|
serve_stale [DURATION] [REFRESH_MODE]
|
||||||
}
|
}
|
||||||
~~~
|
~~~
|
||||||
|
|
||||||
|
@ -57,7 +57,12 @@ cache [TTL] [ZONES...] {
|
||||||
* `serve_stale`, when serve\_stale is set, cache always will serve an expired entry to a client if there is one
|
* `serve_stale`, when serve\_stale is set, cache always will serve an expired entry to a client if there is one
|
||||||
available. When this happens, cache will attempt to refresh the cache entry after sending the expired cache
|
available. When this happens, cache will attempt to refresh the cache entry after sending the expired cache
|
||||||
entry to the client. The responses have a TTL of 0. **DURATION** is how far back to consider
|
entry to the client. The responses have a TTL of 0. **DURATION** is how far back to consider
|
||||||
stale responses as fresh. The default duration is 1h.
|
stale responses as fresh. The default duration is 1h. **REFRESH_MODE** controls when the attempt to refresh
|
||||||
|
the cache happens. `verified` will first verify that an entry is still unavailable from the source before sending
|
||||||
|
the stale response to the client. `immediate` will immediately send the expired response to the client before
|
||||||
|
checking to see if the entry is available from the source. **REFRESH_MODE** defaults to `immediate`. Setting this
|
||||||
|
value to `verified` can lead to increased latency when serving stale responses, but will prevent stale entries
|
||||||
|
from ever being served if an updated response can be retrieved from the source.
|
||||||
|
|
||||||
## Capacity and Eviction
|
## Capacity and Eviction
|
||||||
|
|
||||||
|
|
29
plugin/cache/cache.go
vendored
29
plugin/cache/cache.go
vendored
|
@ -38,7 +38,9 @@ type Cache struct {
|
||||||
duration time.Duration
|
duration time.Duration
|
||||||
percentage int
|
percentage int
|
||||||
|
|
||||||
|
// Stale serve
|
||||||
staleUpTo time.Duration
|
staleUpTo time.Duration
|
||||||
|
verifyStale bool
|
||||||
|
|
||||||
// Testing.
|
// Testing.
|
||||||
now func() time.Time
|
now func() time.Time
|
||||||
|
@ -227,6 +229,33 @@ func (w *ResponseWriter) Write(buf []byte) (int, error) {
|
||||||
return n, err
|
return n, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// verifyStaleResponseWriter is a response writer that only writes messages if they should replace a
|
||||||
|
// stale cache entry, and otherwise discards them.
|
||||||
|
type verifyStaleResponseWriter struct {
|
||||||
|
*ResponseWriter
|
||||||
|
refreshed bool // set to true if the last WriteMsg wrote to ResponseWriter, false otherwise.
|
||||||
|
}
|
||||||
|
|
||||||
|
// newVerifyStaleResponseWriter returns a ResponseWriter to be used when verifying stale cache
|
||||||
|
// entries. It only forward writes if an entry was successfully refreshed according to RFC8767,
|
||||||
|
// section 4 (response is NoError or NXDomain), and ignores any other response.
|
||||||
|
func newVerifyStaleResponseWriter(w *ResponseWriter) *verifyStaleResponseWriter {
|
||||||
|
return &verifyStaleResponseWriter{
|
||||||
|
w,
|
||||||
|
false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WriteMsg implements the dns.ResponseWriter interface.
|
||||||
|
func (w *verifyStaleResponseWriter) WriteMsg(res *dns.Msg) error {
|
||||||
|
w.refreshed = false
|
||||||
|
if res.Rcode == dns.RcodeSuccess || res.Rcode == dns.RcodeNameError {
|
||||||
|
w.refreshed = true
|
||||||
|
return w.ResponseWriter.WriteMsg(res) // stores to the cache and send to client
|
||||||
|
}
|
||||||
|
return nil // else discard
|
||||||
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
maxTTL = dnsutil.MaximumDefaulTTL
|
maxTTL = dnsutil.MaximumDefaulTTL
|
||||||
minTTL = dnsutil.MinimalDefaultTTL
|
minTTL = dnsutil.MinimalDefaultTTL
|
||||||
|
|
90
plugin/cache/cache_test.go
vendored
90
plugin/cache/cache_test.go
vendored
|
@ -266,7 +266,7 @@ func TestServeFromStaleCache(t *testing.T) {
|
||||||
req.SetQuestion("cached.org.", dns.TypeA)
|
req.SetQuestion("cached.org.", dns.TypeA)
|
||||||
ctx := context.TODO()
|
ctx := context.TODO()
|
||||||
|
|
||||||
// Cache example.org.
|
// Cache cached.org. with 60s TTL
|
||||||
rec := dnstest.NewRecorder(&test.ResponseWriter{})
|
rec := dnstest.NewRecorder(&test.ResponseWriter{})
|
||||||
c.staleUpTo = 1 * time.Hour
|
c.staleUpTo = 1 * time.Hour
|
||||||
c.ServeDNS(ctx, rec, req)
|
c.ServeDNS(ctx, rec, req)
|
||||||
|
@ -304,6 +304,80 @@ func TestServeFromStaleCache(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestServeFromStaleCacheFetchVerify(t *testing.T) {
|
||||||
|
c := New()
|
||||||
|
c.Next = ttlBackend(120)
|
||||||
|
|
||||||
|
req := new(dns.Msg)
|
||||||
|
req.SetQuestion("cached.org.", dns.TypeA)
|
||||||
|
ctx := context.TODO()
|
||||||
|
|
||||||
|
// Cache cached.org. with 120s TTL
|
||||||
|
rec := dnstest.NewRecorder(&test.ResponseWriter{})
|
||||||
|
c.staleUpTo = 1 * time.Hour
|
||||||
|
c.verifyStale = true
|
||||||
|
c.ServeDNS(ctx, rec, req)
|
||||||
|
if c.pcache.Len() != 1 {
|
||||||
|
t.Fatalf("Msg with > 0 TTL should have been cached")
|
||||||
|
}
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
upstreamRCode int
|
||||||
|
upstreamTtl int
|
||||||
|
futureMinutes int
|
||||||
|
expectedRCode int
|
||||||
|
expectedTtl int
|
||||||
|
}{
|
||||||
|
// After 1 minutes of initial TTL, we should see a cached response
|
||||||
|
{"cached.org.", dns.RcodeSuccess, 200, 1, dns.RcodeSuccess, 60}, // ttl = 120 - 60 -- not refreshed
|
||||||
|
|
||||||
|
// After the 2 more minutes, we should see upstream responses because upstream is available
|
||||||
|
{"cached.org.", dns.RcodeSuccess, 200, 3, dns.RcodeSuccess, 200},
|
||||||
|
|
||||||
|
// After the TTL expired, if the server fails we should get the cached entry
|
||||||
|
{"cached.org.", dns.RcodeServerFailure, 200, 7, dns.RcodeSuccess, 0},
|
||||||
|
|
||||||
|
// After 1 more minutes, if the server serves nxdomain we should see them (despite being within the serve stale period)
|
||||||
|
{"cached.org.", dns.RcodeNameError, 150, 8, dns.RcodeNameError, 150},
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, tt := range tests {
|
||||||
|
rec := dnstest.NewRecorder(&test.ResponseWriter{})
|
||||||
|
c.now = func() time.Time { return time.Now().Add(time.Duration(tt.futureMinutes) * time.Minute) }
|
||||||
|
|
||||||
|
if tt.upstreamRCode == dns.RcodeSuccess {
|
||||||
|
c.Next = ttlBackend(tt.upstreamTtl)
|
||||||
|
} else if tt.upstreamRCode == dns.RcodeServerFailure {
|
||||||
|
// Make upstream fail, should now rely on cache during the c.staleUpTo period
|
||||||
|
c.Next = servFailBackend(tt.upstreamTtl)
|
||||||
|
} else if tt.upstreamRCode == dns.RcodeNameError {
|
||||||
|
c.Next = nxDomainBackend(tt.upstreamTtl)
|
||||||
|
} else {
|
||||||
|
t.Fatal("upstream code not implemented")
|
||||||
|
}
|
||||||
|
|
||||||
|
r := req.Copy()
|
||||||
|
r.SetQuestion(tt.name, dns.TypeA)
|
||||||
|
ret, _ := c.ServeDNS(ctx, rec, r)
|
||||||
|
if ret != tt.expectedRCode {
|
||||||
|
t.Errorf("Test %d: expected rcode=%v, got rcode=%v", i, tt.expectedRCode, ret)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if ret == dns.RcodeSuccess {
|
||||||
|
recTtl := rec.Msg.Answer[0].Header().Ttl
|
||||||
|
if tt.expectedTtl != int(recTtl) {
|
||||||
|
t.Errorf("Test %d: expected TTL=%d, got TTL=%d", i, tt.expectedTtl, recTtl)
|
||||||
|
}
|
||||||
|
} else if ret == dns.RcodeNameError {
|
||||||
|
soaTtl := rec.Msg.Ns[0].Header().Ttl
|
||||||
|
if tt.expectedTtl != int(soaTtl) {
|
||||||
|
t.Errorf("Test %d: expected TTL=%d, got TTL=%d", i, tt.expectedTtl, soaTtl)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestNegativeStaleMaskingPositiveCache(t *testing.T) {
|
func TestNegativeStaleMaskingPositiveCache(t *testing.T) {
|
||||||
c := New()
|
c := New()
|
||||||
c.staleUpTo = time.Minute * 10
|
c.staleUpTo = time.Minute * 10
|
||||||
|
@ -454,6 +528,20 @@ func ttlBackend(ttl int) plugin.Handler {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func servFailBackend(ttl int) plugin.Handler {
|
||||||
|
return plugin.HandlerFunc(func(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) {
|
||||||
|
m := new(dns.Msg)
|
||||||
|
m.SetReply(r)
|
||||||
|
m.Response, m.RecursionAvailable = true, true
|
||||||
|
|
||||||
|
m.Ns = []dns.RR{test.SOA(fmt.Sprintf("example.org. %d IN SOA sns.dns.icann.org. noc.dns.icann.org. 2016082540 7200 3600 1209600 3600", ttl))}
|
||||||
|
|
||||||
|
m.MsgHdr.Rcode = dns.RcodeServerFailure
|
||||||
|
w.WriteMsg(m)
|
||||||
|
return dns.RcodeServerFailure, nil
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func TestComputeTTL(t *testing.T) {
|
func TestComputeTTL(t *testing.T) {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
msgTTL time.Duration
|
msgTTL time.Duration
|
||||||
|
|
20
plugin/cache/handler.go
vendored
20
plugin/cache/handler.go
vendored
|
@ -35,19 +35,29 @@ func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
|
||||||
|
|
||||||
ttl := 0
|
ttl := 0
|
||||||
i := c.getIgnoreTTL(now, state, server)
|
i := c.getIgnoreTTL(now, state, server)
|
||||||
if i != nil {
|
|
||||||
ttl = i.ttl(now)
|
|
||||||
}
|
|
||||||
if i == nil {
|
if i == nil {
|
||||||
crr := &ResponseWriter{ResponseWriter: w, Cache: c, state: state, server: server, do: do}
|
crr := &ResponseWriter{ResponseWriter: w, Cache: c, state: state, server: server, do: do}
|
||||||
return c.doRefresh(ctx, state, crr)
|
return c.doRefresh(ctx, state, crr)
|
||||||
}
|
}
|
||||||
|
ttl = i.ttl(now)
|
||||||
if ttl < 0 {
|
if ttl < 0 {
|
||||||
servedStale.WithLabelValues(server, c.zonesMetricLabel).Inc()
|
// serve stale behavior
|
||||||
|
if c.verifyStale {
|
||||||
|
crr := &ResponseWriter{ResponseWriter: w, Cache: c, state: state, server: server, do: do}
|
||||||
|
cw := newVerifyStaleResponseWriter(crr)
|
||||||
|
ret, err := c.doRefresh(ctx, state, cw)
|
||||||
|
if cw.refreshed {
|
||||||
|
return ret, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Adjust the time to get a 0 TTL in the reply built from a stale item.
|
// Adjust the time to get a 0 TTL in the reply built from a stale item.
|
||||||
now = now.Add(time.Duration(ttl) * time.Second)
|
now = now.Add(time.Duration(ttl) * time.Second)
|
||||||
|
if !c.verifyStale {
|
||||||
cw := newPrefetchResponseWriter(server, state, c)
|
cw := newPrefetchResponseWriter(server, state, c)
|
||||||
go c.doPrefetch(ctx, state, cw, i, now)
|
go c.doPrefetch(ctx, state, cw, i, now)
|
||||||
|
}
|
||||||
|
servedStale.WithLabelValues(server, c.zonesMetricLabel).Inc()
|
||||||
} else if c.shouldPrefetch(i, now) {
|
} else if c.shouldPrefetch(i, now) {
|
||||||
cw := newPrefetchResponseWriter(server, state, c)
|
cw := newPrefetchResponseWriter(server, state, c)
|
||||||
go c.doPrefetch(ctx, state, cw, i, now)
|
go c.doPrefetch(ctx, state, cw, i, now)
|
||||||
|
@ -70,7 +80,7 @@ func (c *Cache) doPrefetch(ctx context.Context, state request.Request, cw *Respo
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Cache) doRefresh(ctx context.Context, state request.Request, cw *ResponseWriter) (int, error) {
|
func (c *Cache) doRefresh(ctx context.Context, state request.Request, cw dns.ResponseWriter) (int, error) {
|
||||||
if !state.Do() {
|
if !state.Do() {
|
||||||
setDo(state.Req)
|
setDo(state.Req)
|
||||||
}
|
}
|
||||||
|
|
12
plugin/cache/setup.go
vendored
12
plugin/cache/setup.go
vendored
|
@ -166,11 +166,11 @@ func cacheParse(c *caddy.Controller) (*Cache, error) {
|
||||||
|
|
||||||
case "serve_stale":
|
case "serve_stale":
|
||||||
args := c.RemainingArgs()
|
args := c.RemainingArgs()
|
||||||
if len(args) > 1 {
|
if len(args) > 2 {
|
||||||
return nil, c.ArgErr()
|
return nil, c.ArgErr()
|
||||||
}
|
}
|
||||||
ca.staleUpTo = 1 * time.Hour
|
ca.staleUpTo = 1 * time.Hour
|
||||||
if len(args) == 1 {
|
if len(args) > 0 {
|
||||||
d, err := time.ParseDuration(args[0])
|
d, err := time.ParseDuration(args[0])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -180,6 +180,14 @@ func cacheParse(c *caddy.Controller) (*Cache, error) {
|
||||||
}
|
}
|
||||||
ca.staleUpTo = d
|
ca.staleUpTo = d
|
||||||
}
|
}
|
||||||
|
ca.verifyStale = false
|
||||||
|
if len(args) > 1 {
|
||||||
|
mode := strings.ToLower(args[1])
|
||||||
|
if mode != "immediate" && mode != "verify" {
|
||||||
|
return nil, fmt.Errorf("invalid value for serve_stale refresh mode: %s", mode)
|
||||||
|
}
|
||||||
|
ca.verifyStale = mode == "verify"
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
return nil, c.ArgErr()
|
return nil, c.ArgErr()
|
||||||
}
|
}
|
||||||
|
|
23
plugin/cache/setup_test.go
vendored
23
plugin/cache/setup_test.go
vendored
|
@ -120,17 +120,22 @@ func TestServeStale(t *testing.T) {
|
||||||
input string
|
input string
|
||||||
shouldErr bool
|
shouldErr bool
|
||||||
staleUpTo time.Duration
|
staleUpTo time.Duration
|
||||||
|
verifyStale bool
|
||||||
}{
|
}{
|
||||||
{"serve_stale", false, 1 * time.Hour},
|
{"serve_stale", false, 1 * time.Hour, false},
|
||||||
{"serve_stale 20m", false, 20 * time.Minute},
|
{"serve_stale 20m", false, 20 * time.Minute, false},
|
||||||
{"serve_stale 1h20m", false, 80 * time.Minute},
|
{"serve_stale 1h20m", false, 80 * time.Minute, false},
|
||||||
{"serve_stale 0m", false, 0},
|
{"serve_stale 0m", false, 0, false},
|
||||||
{"serve_stale 0", false, 0},
|
{"serve_stale 0", false, 0, false},
|
||||||
|
{"serve_stale 0 verify", false, 0, true},
|
||||||
|
{"serve_stale 0 immediate", false, 0, false},
|
||||||
|
{"serve_stale 0 VERIFY", false, 0, true},
|
||||||
// fails
|
// fails
|
||||||
{"serve_stale 20", true, 0},
|
{"serve_stale 20", true, 0, false},
|
||||||
{"serve_stale -20m", true, 0},
|
{"serve_stale -20m", true, 0, false},
|
||||||
{"serve_stale aa", true, 0},
|
{"serve_stale aa", true, 0, false},
|
||||||
{"serve_stale 1m nono", true, 0},
|
{"serve_stale 1m nono", true, 0, false},
|
||||||
|
{"serve_stale 0 after nono", true, 0, false},
|
||||||
}
|
}
|
||||||
for i, test := range tests {
|
for i, test := range tests {
|
||||||
c := caddy.NewTestController("dns", fmt.Sprintf("cache {\n%s\n}", test.input))
|
c := caddy.NewTestController("dns", fmt.Sprintf("cache {\n%s\n}", test.input))
|
||||||
|
|
Loading…
Add table
Reference in a new issue