plugin/cache: Add refresh mode setting to serve_stale (#5131)

This PR adds an optional REFRESH_MODE parameter on the serve_stale configuration directive of the
cache plugin, which verifies that the upstream is still unavailable before returning stale entries.

Signed-off-by: Antoine Tollenaere <atollena@gmail.com>
This commit is contained in:
Antoine Tollenaere 2022-05-02 19:16:33 +02:00 committed by GitHub
parent c3572fdb30
commit 66f2ac7568
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 170 additions and 25 deletions

View file

@ -37,7 +37,7 @@ cache [TTL] [ZONES...] {
success CAPACITY [TTL] [MINTTL] success CAPACITY [TTL] [MINTTL]
denial CAPACITY [TTL] [MINTTL] denial CAPACITY [TTL] [MINTTL]
prefetch AMOUNT [[DURATION] [PERCENTAGE%]] prefetch AMOUNT [[DURATION] [PERCENTAGE%]]
serve_stale [DURATION] serve_stale [DURATION] [REFRESH_MODE]
} }
~~~ ~~~
@ -57,7 +57,12 @@ cache [TTL] [ZONES...] {
* `serve_stale`, when serve\_stale is set, cache always will serve an expired entry to a client if there is one * `serve_stale`, when serve\_stale is set, cache always will serve an expired entry to a client if there is one
available. When this happens, cache will attempt to refresh the cache entry after sending the expired cache available. When this happens, cache will attempt to refresh the cache entry after sending the expired cache
entry to the client. The responses have a TTL of 0. **DURATION** is how far back to consider entry to the client. The responses have a TTL of 0. **DURATION** is how far back to consider
stale responses as fresh. The default duration is 1h. stale responses as fresh. The default duration is 1h. **REFRESH_MODE** controls when the attempt to refresh
the cache happens. `verified` will first verify that an entry is still unavailable from the source before sending
the stale response to the client. `immediate` will immediately send the expired response to the client before
checking to see if the entry is available from the source. **REFRESH_MODE** defaults to `immediate`. Setting this
value to `verified` can lead to increased latency when serving stale responses, but will prevent stale entries
from ever being served if an updated response can be retrieved from the source.
## Capacity and Eviction ## Capacity and Eviction

29
plugin/cache/cache.go vendored
View file

@ -38,7 +38,9 @@ type Cache struct {
duration time.Duration duration time.Duration
percentage int percentage int
// Stale serve
staleUpTo time.Duration staleUpTo time.Duration
verifyStale bool
// Testing. // Testing.
now func() time.Time now func() time.Time
@ -227,6 +229,33 @@ func (w *ResponseWriter) Write(buf []byte) (int, error) {
return n, err return n, err
} }
// verifyStaleResponseWriter is a response writer that only writes messages if they should replace a
// stale cache entry, and otherwise discards them.
type verifyStaleResponseWriter struct {
*ResponseWriter
refreshed bool // set to true if the last WriteMsg wrote to ResponseWriter, false otherwise.
}
// newVerifyStaleResponseWriter returns a ResponseWriter to be used when verifying stale cache
// entries. It only forward writes if an entry was successfully refreshed according to RFC8767,
// section 4 (response is NoError or NXDomain), and ignores any other response.
func newVerifyStaleResponseWriter(w *ResponseWriter) *verifyStaleResponseWriter {
return &verifyStaleResponseWriter{
w,
false,
}
}
// WriteMsg implements the dns.ResponseWriter interface.
func (w *verifyStaleResponseWriter) WriteMsg(res *dns.Msg) error {
w.refreshed = false
if res.Rcode == dns.RcodeSuccess || res.Rcode == dns.RcodeNameError {
w.refreshed = true
return w.ResponseWriter.WriteMsg(res) // stores to the cache and send to client
}
return nil // else discard
}
const ( const (
maxTTL = dnsutil.MaximumDefaulTTL maxTTL = dnsutil.MaximumDefaulTTL
minTTL = dnsutil.MinimalDefaultTTL minTTL = dnsutil.MinimalDefaultTTL

View file

@ -266,7 +266,7 @@ func TestServeFromStaleCache(t *testing.T) {
req.SetQuestion("cached.org.", dns.TypeA) req.SetQuestion("cached.org.", dns.TypeA)
ctx := context.TODO() ctx := context.TODO()
// Cache example.org. // Cache cached.org. with 60s TTL
rec := dnstest.NewRecorder(&test.ResponseWriter{}) rec := dnstest.NewRecorder(&test.ResponseWriter{})
c.staleUpTo = 1 * time.Hour c.staleUpTo = 1 * time.Hour
c.ServeDNS(ctx, rec, req) c.ServeDNS(ctx, rec, req)
@ -304,6 +304,80 @@ func TestServeFromStaleCache(t *testing.T) {
} }
} }
func TestServeFromStaleCacheFetchVerify(t *testing.T) {
c := New()
c.Next = ttlBackend(120)
req := new(dns.Msg)
req.SetQuestion("cached.org.", dns.TypeA)
ctx := context.TODO()
// Cache cached.org. with 120s TTL
rec := dnstest.NewRecorder(&test.ResponseWriter{})
c.staleUpTo = 1 * time.Hour
c.verifyStale = true
c.ServeDNS(ctx, rec, req)
if c.pcache.Len() != 1 {
t.Fatalf("Msg with > 0 TTL should have been cached")
}
tests := []struct {
name string
upstreamRCode int
upstreamTtl int
futureMinutes int
expectedRCode int
expectedTtl int
}{
// After 1 minutes of initial TTL, we should see a cached response
{"cached.org.", dns.RcodeSuccess, 200, 1, dns.RcodeSuccess, 60}, // ttl = 120 - 60 -- not refreshed
// After the 2 more minutes, we should see upstream responses because upstream is available
{"cached.org.", dns.RcodeSuccess, 200, 3, dns.RcodeSuccess, 200},
// After the TTL expired, if the server fails we should get the cached entry
{"cached.org.", dns.RcodeServerFailure, 200, 7, dns.RcodeSuccess, 0},
// After 1 more minutes, if the server serves nxdomain we should see them (despite being within the serve stale period)
{"cached.org.", dns.RcodeNameError, 150, 8, dns.RcodeNameError, 150},
}
for i, tt := range tests {
rec := dnstest.NewRecorder(&test.ResponseWriter{})
c.now = func() time.Time { return time.Now().Add(time.Duration(tt.futureMinutes) * time.Minute) }
if tt.upstreamRCode == dns.RcodeSuccess {
c.Next = ttlBackend(tt.upstreamTtl)
} else if tt.upstreamRCode == dns.RcodeServerFailure {
// Make upstream fail, should now rely on cache during the c.staleUpTo period
c.Next = servFailBackend(tt.upstreamTtl)
} else if tt.upstreamRCode == dns.RcodeNameError {
c.Next = nxDomainBackend(tt.upstreamTtl)
} else {
t.Fatal("upstream code not implemented")
}
r := req.Copy()
r.SetQuestion(tt.name, dns.TypeA)
ret, _ := c.ServeDNS(ctx, rec, r)
if ret != tt.expectedRCode {
t.Errorf("Test %d: expected rcode=%v, got rcode=%v", i, tt.expectedRCode, ret)
continue
}
if ret == dns.RcodeSuccess {
recTtl := rec.Msg.Answer[0].Header().Ttl
if tt.expectedTtl != int(recTtl) {
t.Errorf("Test %d: expected TTL=%d, got TTL=%d", i, tt.expectedTtl, recTtl)
}
} else if ret == dns.RcodeNameError {
soaTtl := rec.Msg.Ns[0].Header().Ttl
if tt.expectedTtl != int(soaTtl) {
t.Errorf("Test %d: expected TTL=%d, got TTL=%d", i, tt.expectedTtl, soaTtl)
}
}
}
}
func TestNegativeStaleMaskingPositiveCache(t *testing.T) { func TestNegativeStaleMaskingPositiveCache(t *testing.T) {
c := New() c := New()
c.staleUpTo = time.Minute * 10 c.staleUpTo = time.Minute * 10
@ -454,6 +528,20 @@ func ttlBackend(ttl int) plugin.Handler {
}) })
} }
func servFailBackend(ttl int) plugin.Handler {
return plugin.HandlerFunc(func(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) {
m := new(dns.Msg)
m.SetReply(r)
m.Response, m.RecursionAvailable = true, true
m.Ns = []dns.RR{test.SOA(fmt.Sprintf("example.org. %d IN SOA sns.dns.icann.org. noc.dns.icann.org. 2016082540 7200 3600 1209600 3600", ttl))}
m.MsgHdr.Rcode = dns.RcodeServerFailure
w.WriteMsg(m)
return dns.RcodeServerFailure, nil
})
}
func TestComputeTTL(t *testing.T) { func TestComputeTTL(t *testing.T) {
tests := []struct { tests := []struct {
msgTTL time.Duration msgTTL time.Duration

View file

@ -35,19 +35,29 @@ func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
ttl := 0 ttl := 0
i := c.getIgnoreTTL(now, state, server) i := c.getIgnoreTTL(now, state, server)
if i != nil {
ttl = i.ttl(now)
}
if i == nil { if i == nil {
crr := &ResponseWriter{ResponseWriter: w, Cache: c, state: state, server: server, do: do} crr := &ResponseWriter{ResponseWriter: w, Cache: c, state: state, server: server, do: do}
return c.doRefresh(ctx, state, crr) return c.doRefresh(ctx, state, crr)
} }
ttl = i.ttl(now)
if ttl < 0 { if ttl < 0 {
servedStale.WithLabelValues(server, c.zonesMetricLabel).Inc() // serve stale behavior
if c.verifyStale {
crr := &ResponseWriter{ResponseWriter: w, Cache: c, state: state, server: server, do: do}
cw := newVerifyStaleResponseWriter(crr)
ret, err := c.doRefresh(ctx, state, cw)
if cw.refreshed {
return ret, err
}
}
// Adjust the time to get a 0 TTL in the reply built from a stale item. // Adjust the time to get a 0 TTL in the reply built from a stale item.
now = now.Add(time.Duration(ttl) * time.Second) now = now.Add(time.Duration(ttl) * time.Second)
if !c.verifyStale {
cw := newPrefetchResponseWriter(server, state, c) cw := newPrefetchResponseWriter(server, state, c)
go c.doPrefetch(ctx, state, cw, i, now) go c.doPrefetch(ctx, state, cw, i, now)
}
servedStale.WithLabelValues(server, c.zonesMetricLabel).Inc()
} else if c.shouldPrefetch(i, now) { } else if c.shouldPrefetch(i, now) {
cw := newPrefetchResponseWriter(server, state, c) cw := newPrefetchResponseWriter(server, state, c)
go c.doPrefetch(ctx, state, cw, i, now) go c.doPrefetch(ctx, state, cw, i, now)
@ -70,7 +80,7 @@ func (c *Cache) doPrefetch(ctx context.Context, state request.Request, cw *Respo
} }
} }
func (c *Cache) doRefresh(ctx context.Context, state request.Request, cw *ResponseWriter) (int, error) { func (c *Cache) doRefresh(ctx context.Context, state request.Request, cw dns.ResponseWriter) (int, error) {
if !state.Do() { if !state.Do() {
setDo(state.Req) setDo(state.Req)
} }

12
plugin/cache/setup.go vendored
View file

@ -166,11 +166,11 @@ func cacheParse(c *caddy.Controller) (*Cache, error) {
case "serve_stale": case "serve_stale":
args := c.RemainingArgs() args := c.RemainingArgs()
if len(args) > 1 { if len(args) > 2 {
return nil, c.ArgErr() return nil, c.ArgErr()
} }
ca.staleUpTo = 1 * time.Hour ca.staleUpTo = 1 * time.Hour
if len(args) == 1 { if len(args) > 0 {
d, err := time.ParseDuration(args[0]) d, err := time.ParseDuration(args[0])
if err != nil { if err != nil {
return nil, err return nil, err
@ -180,6 +180,14 @@ func cacheParse(c *caddy.Controller) (*Cache, error) {
} }
ca.staleUpTo = d ca.staleUpTo = d
} }
ca.verifyStale = false
if len(args) > 1 {
mode := strings.ToLower(args[1])
if mode != "immediate" && mode != "verify" {
return nil, fmt.Errorf("invalid value for serve_stale refresh mode: %s", mode)
}
ca.verifyStale = mode == "verify"
}
default: default:
return nil, c.ArgErr() return nil, c.ArgErr()
} }

View file

@ -120,17 +120,22 @@ func TestServeStale(t *testing.T) {
input string input string
shouldErr bool shouldErr bool
staleUpTo time.Duration staleUpTo time.Duration
verifyStale bool
}{ }{
{"serve_stale", false, 1 * time.Hour}, {"serve_stale", false, 1 * time.Hour, false},
{"serve_stale 20m", false, 20 * time.Minute}, {"serve_stale 20m", false, 20 * time.Minute, false},
{"serve_stale 1h20m", false, 80 * time.Minute}, {"serve_stale 1h20m", false, 80 * time.Minute, false},
{"serve_stale 0m", false, 0}, {"serve_stale 0m", false, 0, false},
{"serve_stale 0", false, 0}, {"serve_stale 0", false, 0, false},
{"serve_stale 0 verify", false, 0, true},
{"serve_stale 0 immediate", false, 0, false},
{"serve_stale 0 VERIFY", false, 0, true},
// fails // fails
{"serve_stale 20", true, 0}, {"serve_stale 20", true, 0, false},
{"serve_stale -20m", true, 0}, {"serve_stale -20m", true, 0, false},
{"serve_stale aa", true, 0}, {"serve_stale aa", true, 0, false},
{"serve_stale 1m nono", true, 0}, {"serve_stale 1m nono", true, 0, false},
{"serve_stale 0 after nono", true, 0, false},
} }
for i, test := range tests { for i, test := range tests {
c := caddy.NewTestController("dns", fmt.Sprintf("cache {\n%s\n}", test.input)) c := caddy.NewTestController("dns", fmt.Sprintf("cache {\n%s\n}", test.input))