plugin/cache: Add refresh mode setting to serve_stale (#5131)
This PR adds an optional REFRESH_MODE parameter on the serve_stale configuration directive of the cache plugin, which verifies that the upstream is still unavailable before returning stale entries. Signed-off-by: Antoine Tollenaere <atollena@gmail.com>
This commit is contained in:
parent
c3572fdb30
commit
66f2ac7568
6 changed files with 170 additions and 25 deletions
9
plugin/cache/README.md
vendored
9
plugin/cache/README.md
vendored
|
@ -37,7 +37,7 @@ cache [TTL] [ZONES...] {
|
|||
success CAPACITY [TTL] [MINTTL]
|
||||
denial CAPACITY [TTL] [MINTTL]
|
||||
prefetch AMOUNT [[DURATION] [PERCENTAGE%]]
|
||||
serve_stale [DURATION]
|
||||
serve_stale [DURATION] [REFRESH_MODE]
|
||||
}
|
||||
~~~
|
||||
|
||||
|
@ -57,7 +57,12 @@ cache [TTL] [ZONES...] {
|
|||
* `serve_stale`, when serve\_stale is set, cache always will serve an expired entry to a client if there is one
|
||||
available. When this happens, cache will attempt to refresh the cache entry after sending the expired cache
|
||||
entry to the client. The responses have a TTL of 0. **DURATION** is how far back to consider
|
||||
stale responses as fresh. The default duration is 1h.
|
||||
stale responses as fresh. The default duration is 1h. **REFRESH_MODE** controls when the attempt to refresh
|
||||
the cache happens. `verified` will first verify that an entry is still unavailable from the source before sending
|
||||
the stale response to the client. `immediate` will immediately send the expired response to the client before
|
||||
checking to see if the entry is available from the source. **REFRESH_MODE** defaults to `immediate`. Setting this
|
||||
value to `verified` can lead to increased latency when serving stale responses, but will prevent stale entries
|
||||
from ever being served if an updated response can be retrieved from the source.
|
||||
|
||||
## Capacity and Eviction
|
||||
|
||||
|
|
31
plugin/cache/cache.go
vendored
31
plugin/cache/cache.go
vendored
|
@ -38,7 +38,9 @@ type Cache struct {
|
|||
duration time.Duration
|
||||
percentage int
|
||||
|
||||
staleUpTo time.Duration
|
||||
// Stale serve
|
||||
staleUpTo time.Duration
|
||||
verifyStale bool
|
||||
|
||||
// Testing.
|
||||
now func() time.Time
|
||||
|
@ -227,6 +229,33 @@ func (w *ResponseWriter) Write(buf []byte) (int, error) {
|
|||
return n, err
|
||||
}
|
||||
|
||||
// verifyStaleResponseWriter is a response writer that only writes messages if they should replace a
|
||||
// stale cache entry, and otherwise discards them.
|
||||
type verifyStaleResponseWriter struct {
|
||||
*ResponseWriter
|
||||
refreshed bool // set to true if the last WriteMsg wrote to ResponseWriter, false otherwise.
|
||||
}
|
||||
|
||||
// newVerifyStaleResponseWriter returns a ResponseWriter to be used when verifying stale cache
|
||||
// entries. It only forward writes if an entry was successfully refreshed according to RFC8767,
|
||||
// section 4 (response is NoError or NXDomain), and ignores any other response.
|
||||
func newVerifyStaleResponseWriter(w *ResponseWriter) *verifyStaleResponseWriter {
|
||||
return &verifyStaleResponseWriter{
|
||||
w,
|
||||
false,
|
||||
}
|
||||
}
|
||||
|
||||
// WriteMsg implements the dns.ResponseWriter interface.
|
||||
func (w *verifyStaleResponseWriter) WriteMsg(res *dns.Msg) error {
|
||||
w.refreshed = false
|
||||
if res.Rcode == dns.RcodeSuccess || res.Rcode == dns.RcodeNameError {
|
||||
w.refreshed = true
|
||||
return w.ResponseWriter.WriteMsg(res) // stores to the cache and send to client
|
||||
}
|
||||
return nil // else discard
|
||||
}
|
||||
|
||||
const (
|
||||
maxTTL = dnsutil.MaximumDefaulTTL
|
||||
minTTL = dnsutil.MinimalDefaultTTL
|
||||
|
|
90
plugin/cache/cache_test.go
vendored
90
plugin/cache/cache_test.go
vendored
|
@ -266,7 +266,7 @@ func TestServeFromStaleCache(t *testing.T) {
|
|||
req.SetQuestion("cached.org.", dns.TypeA)
|
||||
ctx := context.TODO()
|
||||
|
||||
// Cache example.org.
|
||||
// Cache cached.org. with 60s TTL
|
||||
rec := dnstest.NewRecorder(&test.ResponseWriter{})
|
||||
c.staleUpTo = 1 * time.Hour
|
||||
c.ServeDNS(ctx, rec, req)
|
||||
|
@ -304,6 +304,80 @@ func TestServeFromStaleCache(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestServeFromStaleCacheFetchVerify(t *testing.T) {
|
||||
c := New()
|
||||
c.Next = ttlBackend(120)
|
||||
|
||||
req := new(dns.Msg)
|
||||
req.SetQuestion("cached.org.", dns.TypeA)
|
||||
ctx := context.TODO()
|
||||
|
||||
// Cache cached.org. with 120s TTL
|
||||
rec := dnstest.NewRecorder(&test.ResponseWriter{})
|
||||
c.staleUpTo = 1 * time.Hour
|
||||
c.verifyStale = true
|
||||
c.ServeDNS(ctx, rec, req)
|
||||
if c.pcache.Len() != 1 {
|
||||
t.Fatalf("Msg with > 0 TTL should have been cached")
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
upstreamRCode int
|
||||
upstreamTtl int
|
||||
futureMinutes int
|
||||
expectedRCode int
|
||||
expectedTtl int
|
||||
}{
|
||||
// After 1 minutes of initial TTL, we should see a cached response
|
||||
{"cached.org.", dns.RcodeSuccess, 200, 1, dns.RcodeSuccess, 60}, // ttl = 120 - 60 -- not refreshed
|
||||
|
||||
// After the 2 more minutes, we should see upstream responses because upstream is available
|
||||
{"cached.org.", dns.RcodeSuccess, 200, 3, dns.RcodeSuccess, 200},
|
||||
|
||||
// After the TTL expired, if the server fails we should get the cached entry
|
||||
{"cached.org.", dns.RcodeServerFailure, 200, 7, dns.RcodeSuccess, 0},
|
||||
|
||||
// After 1 more minutes, if the server serves nxdomain we should see them (despite being within the serve stale period)
|
||||
{"cached.org.", dns.RcodeNameError, 150, 8, dns.RcodeNameError, 150},
|
||||
}
|
||||
|
||||
for i, tt := range tests {
|
||||
rec := dnstest.NewRecorder(&test.ResponseWriter{})
|
||||
c.now = func() time.Time { return time.Now().Add(time.Duration(tt.futureMinutes) * time.Minute) }
|
||||
|
||||
if tt.upstreamRCode == dns.RcodeSuccess {
|
||||
c.Next = ttlBackend(tt.upstreamTtl)
|
||||
} else if tt.upstreamRCode == dns.RcodeServerFailure {
|
||||
// Make upstream fail, should now rely on cache during the c.staleUpTo period
|
||||
c.Next = servFailBackend(tt.upstreamTtl)
|
||||
} else if tt.upstreamRCode == dns.RcodeNameError {
|
||||
c.Next = nxDomainBackend(tt.upstreamTtl)
|
||||
} else {
|
||||
t.Fatal("upstream code not implemented")
|
||||
}
|
||||
|
||||
r := req.Copy()
|
||||
r.SetQuestion(tt.name, dns.TypeA)
|
||||
ret, _ := c.ServeDNS(ctx, rec, r)
|
||||
if ret != tt.expectedRCode {
|
||||
t.Errorf("Test %d: expected rcode=%v, got rcode=%v", i, tt.expectedRCode, ret)
|
||||
continue
|
||||
}
|
||||
if ret == dns.RcodeSuccess {
|
||||
recTtl := rec.Msg.Answer[0].Header().Ttl
|
||||
if tt.expectedTtl != int(recTtl) {
|
||||
t.Errorf("Test %d: expected TTL=%d, got TTL=%d", i, tt.expectedTtl, recTtl)
|
||||
}
|
||||
} else if ret == dns.RcodeNameError {
|
||||
soaTtl := rec.Msg.Ns[0].Header().Ttl
|
||||
if tt.expectedTtl != int(soaTtl) {
|
||||
t.Errorf("Test %d: expected TTL=%d, got TTL=%d", i, tt.expectedTtl, soaTtl)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestNegativeStaleMaskingPositiveCache(t *testing.T) {
|
||||
c := New()
|
||||
c.staleUpTo = time.Minute * 10
|
||||
|
@ -454,6 +528,20 @@ func ttlBackend(ttl int) plugin.Handler {
|
|||
})
|
||||
}
|
||||
|
||||
func servFailBackend(ttl int) plugin.Handler {
|
||||
return plugin.HandlerFunc(func(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) {
|
||||
m := new(dns.Msg)
|
||||
m.SetReply(r)
|
||||
m.Response, m.RecursionAvailable = true, true
|
||||
|
||||
m.Ns = []dns.RR{test.SOA(fmt.Sprintf("example.org. %d IN SOA sns.dns.icann.org. noc.dns.icann.org. 2016082540 7200 3600 1209600 3600", ttl))}
|
||||
|
||||
m.MsgHdr.Rcode = dns.RcodeServerFailure
|
||||
w.WriteMsg(m)
|
||||
return dns.RcodeServerFailure, nil
|
||||
})
|
||||
}
|
||||
|
||||
func TestComputeTTL(t *testing.T) {
|
||||
tests := []struct {
|
||||
msgTTL time.Duration
|
||||
|
|
24
plugin/cache/handler.go
vendored
24
plugin/cache/handler.go
vendored
|
@ -35,19 +35,29 @@ func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
|
|||
|
||||
ttl := 0
|
||||
i := c.getIgnoreTTL(now, state, server)
|
||||
if i != nil {
|
||||
ttl = i.ttl(now)
|
||||
}
|
||||
if i == nil {
|
||||
crr := &ResponseWriter{ResponseWriter: w, Cache: c, state: state, server: server, do: do}
|
||||
return c.doRefresh(ctx, state, crr)
|
||||
}
|
||||
ttl = i.ttl(now)
|
||||
if ttl < 0 {
|
||||
servedStale.WithLabelValues(server, c.zonesMetricLabel).Inc()
|
||||
// serve stale behavior
|
||||
if c.verifyStale {
|
||||
crr := &ResponseWriter{ResponseWriter: w, Cache: c, state: state, server: server, do: do}
|
||||
cw := newVerifyStaleResponseWriter(crr)
|
||||
ret, err := c.doRefresh(ctx, state, cw)
|
||||
if cw.refreshed {
|
||||
return ret, err
|
||||
}
|
||||
}
|
||||
|
||||
// Adjust the time to get a 0 TTL in the reply built from a stale item.
|
||||
now = now.Add(time.Duration(ttl) * time.Second)
|
||||
cw := newPrefetchResponseWriter(server, state, c)
|
||||
go c.doPrefetch(ctx, state, cw, i, now)
|
||||
if !c.verifyStale {
|
||||
cw := newPrefetchResponseWriter(server, state, c)
|
||||
go c.doPrefetch(ctx, state, cw, i, now)
|
||||
}
|
||||
servedStale.WithLabelValues(server, c.zonesMetricLabel).Inc()
|
||||
} else if c.shouldPrefetch(i, now) {
|
||||
cw := newPrefetchResponseWriter(server, state, c)
|
||||
go c.doPrefetch(ctx, state, cw, i, now)
|
||||
|
@ -70,7 +80,7 @@ func (c *Cache) doPrefetch(ctx context.Context, state request.Request, cw *Respo
|
|||
}
|
||||
}
|
||||
|
||||
func (c *Cache) doRefresh(ctx context.Context, state request.Request, cw *ResponseWriter) (int, error) {
|
||||
func (c *Cache) doRefresh(ctx context.Context, state request.Request, cw dns.ResponseWriter) (int, error) {
|
||||
if !state.Do() {
|
||||
setDo(state.Req)
|
||||
}
|
||||
|
|
12
plugin/cache/setup.go
vendored
12
plugin/cache/setup.go
vendored
|
@ -166,11 +166,11 @@ func cacheParse(c *caddy.Controller) (*Cache, error) {
|
|||
|
||||
case "serve_stale":
|
||||
args := c.RemainingArgs()
|
||||
if len(args) > 1 {
|
||||
if len(args) > 2 {
|
||||
return nil, c.ArgErr()
|
||||
}
|
||||
ca.staleUpTo = 1 * time.Hour
|
||||
if len(args) == 1 {
|
||||
if len(args) > 0 {
|
||||
d, err := time.ParseDuration(args[0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -180,6 +180,14 @@ func cacheParse(c *caddy.Controller) (*Cache, error) {
|
|||
}
|
||||
ca.staleUpTo = d
|
||||
}
|
||||
ca.verifyStale = false
|
||||
if len(args) > 1 {
|
||||
mode := strings.ToLower(args[1])
|
||||
if mode != "immediate" && mode != "verify" {
|
||||
return nil, fmt.Errorf("invalid value for serve_stale refresh mode: %s", mode)
|
||||
}
|
||||
ca.verifyStale = mode == "verify"
|
||||
}
|
||||
default:
|
||||
return nil, c.ArgErr()
|
||||
}
|
||||
|
|
29
plugin/cache/setup_test.go
vendored
29
plugin/cache/setup_test.go
vendored
|
@ -117,20 +117,25 @@ func TestSetup(t *testing.T) {
|
|||
|
||||
func TestServeStale(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
shouldErr bool
|
||||
staleUpTo time.Duration
|
||||
input string
|
||||
shouldErr bool
|
||||
staleUpTo time.Duration
|
||||
verifyStale bool
|
||||
}{
|
||||
{"serve_stale", false, 1 * time.Hour},
|
||||
{"serve_stale 20m", false, 20 * time.Minute},
|
||||
{"serve_stale 1h20m", false, 80 * time.Minute},
|
||||
{"serve_stale 0m", false, 0},
|
||||
{"serve_stale 0", false, 0},
|
||||
{"serve_stale", false, 1 * time.Hour, false},
|
||||
{"serve_stale 20m", false, 20 * time.Minute, false},
|
||||
{"serve_stale 1h20m", false, 80 * time.Minute, false},
|
||||
{"serve_stale 0m", false, 0, false},
|
||||
{"serve_stale 0", false, 0, false},
|
||||
{"serve_stale 0 verify", false, 0, true},
|
||||
{"serve_stale 0 immediate", false, 0, false},
|
||||
{"serve_stale 0 VERIFY", false, 0, true},
|
||||
// fails
|
||||
{"serve_stale 20", true, 0},
|
||||
{"serve_stale -20m", true, 0},
|
||||
{"serve_stale aa", true, 0},
|
||||
{"serve_stale 1m nono", true, 0},
|
||||
{"serve_stale 20", true, 0, false},
|
||||
{"serve_stale -20m", true, 0, false},
|
||||
{"serve_stale aa", true, 0, false},
|
||||
{"serve_stale 1m nono", true, 0, false},
|
||||
{"serve_stale 0 after nono", true, 0, false},
|
||||
}
|
||||
for i, test := range tests {
|
||||
c := caddy.NewTestController("dns", fmt.Sprintf("cache {\n%s\n}", test.input))
|
||||
|
|
Loading…
Add table
Reference in a new issue