From b4df2d0d4c2d2014b113f9f8caf1f1b2ed6dc192 Mon Sep 17 00:00:00 2001 From: Gonzalo Paniagua Javier Date: Fri, 29 Nov 2019 11:17:50 -0400 Subject: [PATCH] Add a serve_stale option for plugin/cache (#3468) Automatically submitted. --- plugin/cache/README.md | 6 ++++ plugin/cache/cache.go | 2 ++ plugin/cache/cache_test.go | 54 ++++++++++++++++++++++++++++++-- plugin/cache/handler.go | 63 +++++++++++++++++++++++++++++++------- plugin/cache/setup.go | 19 +++++++++++- plugin/cache/setup_test.go | 37 ++++++++++++++++++++++ 6 files changed, 166 insertions(+), 15 deletions(-) diff --git a/plugin/cache/README.md b/plugin/cache/README.md index 2958f90c9..14636e861 100644 --- a/plugin/cache/README.md +++ b/plugin/cache/README.md @@ -34,6 +34,7 @@ cache [TTL] [ZONES...] { success CAPACITY [TTL] [MINTTL] denial CAPACITY [TTL] [MINTTL] prefetch AMOUNT [[DURATION] [PERCENTAGE%]] + serve_stale [DURATION] } ~~~ @@ -50,6 +51,10 @@ cache [TTL] [ZONES...] { **DURATION** defaults to 1m. Prefetching will happen when the TTL drops below **PERCENTAGE**, which defaults to `10%`, or latest 1 second before TTL expiration. Values should be in the range `[10%, 90%]`. Note the percent sign is mandatory. **PERCENTAGE** is treated as an `int`. +* `serve_stale`, when serve\_stale is set, cache always will serve an expired entry to a client if there is one + available. When this happens, cache will attempt to refresh the cache entry after sending the expired cache + entry to the client. The responses have a TTL of 0. **DURATION** is how far back to consider + stale responses as fresh. The default duration is 1h. ## Capacity and Eviction @@ -69,6 +74,7 @@ If monitoring is enabled (via the *prometheus* plugin) then the following metric * `coredns_cache_hits_total{server, type}` - Counter of cache hits by cache type. * `coredns_cache_misses_total{server}` - Counter of cache misses. * `coredns_cache_drops_total{server}` - Counter of dropped messages. +* `coredns_cache_served_stale_total{server}` - Counter of requests served from stale cache entries. Cache types are either "denial" or "success". `Server` is the server handling the request, see the metrics plugin for documentation. diff --git a/plugin/cache/cache.go b/plugin/cache/cache.go index 43a40c409..6b50c51cc 100644 --- a/plugin/cache/cache.go +++ b/plugin/cache/cache.go @@ -36,6 +36,8 @@ type Cache struct { duration time.Duration percentage int + staleUpTo time.Duration + // Testing. now func() time.Time } diff --git a/plugin/cache/cache_test.go b/plugin/cache/cache_test.go index b32353372..138458c8f 100644 --- a/plugin/cache/cache_test.go +++ b/plugin/cache/cache_test.go @@ -2,10 +2,12 @@ package cache import ( "context" + "fmt" "testing" "time" "github.com/coredns/coredns/plugin" + "github.com/coredns/coredns/plugin/pkg/dnstest" "github.com/coredns/coredns/plugin/pkg/response" "github.com/coredns/coredns/plugin/test" "github.com/coredns/coredns/request" @@ -233,7 +235,7 @@ func TestCacheZeroTTL(t *testing.T) { c := New() c.minpttl = 0 c.minnttl = 0 - c.Next = zeroTTLBackend() + c.Next = ttlBackend(0) req := new(dns.Msg) req.SetQuestion("example.org.", dns.TypeA) @@ -248,6 +250,52 @@ func TestCacheZeroTTL(t *testing.T) { } } +func TestServeFromStaleCache(t *testing.T) { + c := New() + c.Next = ttlBackend(60) + + req := new(dns.Msg) + req.SetQuestion("cached.org.", dns.TypeA) + ctx := context.TODO() + + // Cache example.org. + rec := dnstest.NewRecorder(&test.ResponseWriter{}) + c.staleUpTo = 1 * time.Hour + c.ServeDNS(ctx, rec, req) + if c.pcache.Len() != 1 { + t.Fatalf("Msg with > 0 TTL should have been cached") + } + + // No more backend resolutions, just from cache if available. + c.Next = plugin.HandlerFunc(func(context.Context, dns.ResponseWriter, *dns.Msg) (int, error) { + return 255, nil // Below, a 255 means we tried querying upstream. + }) + + tests := []struct { + name string + futureMinutes int + expectedResult int + }{ + {"cached.org.", 30, 0}, + {"cached.org.", 60, 0}, + {"cached.org.", 70, 255}, + + {"notcached.org.", 30, 255}, + {"notcached.org.", 60, 255}, + {"notcached.org.", 70, 255}, + } + + for i, tt := range tests { + rec := dnstest.NewRecorder(&test.ResponseWriter{}) + c.now = func() time.Time { return time.Now().Add(time.Duration(tt.futureMinutes) * time.Minute) } + r := req.Copy() + r.SetQuestion(tt.name, dns.TypeA) + if ret, _ := c.ServeDNS(ctx, rec, r); ret != tt.expectedResult { + t.Errorf("Test %d: expecting %v; got %v", i, tt.expectedResult, ret) + } + } +} + func BenchmarkCacheResponse(b *testing.B) { c := New() c.prefetch = 1 @@ -286,13 +334,13 @@ func BackendHandler() plugin.Handler { }) } -func zeroTTLBackend() plugin.Handler { +func ttlBackend(ttl int) plugin.Handler { return plugin.HandlerFunc(func(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) { m := new(dns.Msg) m.SetReply(r) m.Response, m.RecursionAvailable = true, true - m.Answer = []dns.RR{test.A("example.org. 0 IN A 127.0.0.53")} + m.Answer = []dns.RR{test.A(fmt.Sprintf("example.org. %d IN A 127.0.0.53", ttl))} w.WriteMsg(m) return dns.RcodeSuccess, nil }) diff --git a/plugin/cache/handler.go b/plugin/cache/handler.go index 4dc29167a..905a98ef4 100644 --- a/plugin/cache/handler.go +++ b/plugin/cache/handler.go @@ -26,19 +26,32 @@ func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) server := metrics.WithServer(ctx) - i, found := c.get(now, state, server) - if i != nil && found { - resp := i.toMsg(r, now) - w.WriteMsg(resp) - - if c.shouldPrefetch(i, now) { - go c.doPrefetch(ctx, state, server, i, now) - } - return dns.RcodeSuccess, nil + ttl := 0 + i := c.getIgnoreTTL(now, state, server) + if i != nil { + ttl = i.ttl(now) } + if i == nil || -ttl >= int(c.staleUpTo.Seconds()) { + crr := &ResponseWriter{ResponseWriter: w, Cache: c, state: state, server: server} + return plugin.NextOrFailure(c.Name(), c.Next, ctx, crr, r) + } + if ttl < 0 { + servedStale.WithLabelValues(server).Inc() + // Adjust the time to get a 0 TTL in the reply built from a stale item. + now = now.Add(time.Duration(ttl) * time.Second) + go func() { + r := r.Copy() + crr := &ResponseWriter{Cache: c, state: state, server: server, prefetch: true, remoteAddr: w.LocalAddr()} + plugin.NextOrFailure(c.Name(), c.Next, ctx, crr, r) + }() + } + resp := i.toMsg(r, now) + w.WriteMsg(resp) - crr := &ResponseWriter{ResponseWriter: w, Cache: c, state: state, server: server} - return plugin.NextOrFailure(c.Name(), c.Next, ctx, crr, r) + if c.shouldPrefetch(i, now) { + go c.doPrefetch(ctx, state, server, i, now) + } + return dns.RcodeSuccess, nil } func (c *Cache) doPrefetch(ctx context.Context, state request.Request, server string, i *item, now time.Time) { @@ -83,6 +96,27 @@ func (c *Cache) get(now time.Time, state request.Request, server string) (*item, return nil, false } +// getIgnoreTTL unconditionally returns an item if it exists in the cache. +func (c *Cache) getIgnoreTTL(now time.Time, state request.Request, server string) *item { + k := hash(state.Name(), state.QType(), state.Do()) + + if i, ok := c.ncache.Get(k); ok { + ttl := i.(*item).ttl(now) + if ttl > 0 || (c.staleUpTo > 0 && -ttl < int(c.staleUpTo.Seconds())) { + cacheHits.WithLabelValues(server, Denial).Inc() + } + return i.(*item) + } + if i, ok := c.pcache.Get(k); ok { + ttl := i.(*item).ttl(now) + if ttl > 0 || (c.staleUpTo > 0 && -ttl < int(c.staleUpTo.Seconds())) { + cacheHits.WithLabelValues(server, Success).Inc() + } + return i.(*item) + } + return nil +} + func (c *Cache) exists(state request.Request) *item { k := hash(state.Name(), state.QType(), state.Do()) if i, ok := c.ncache.Get(k); ok { @@ -129,4 +163,11 @@ var ( Name: "drops_total", Help: "The number responses that are not cached, because the reply is malformed.", }, []string{"server"}) + + servedStale = prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: plugin.Namespace, + Subsystem: "cache", + Name: "served_stale_total", + Help: "The number of requests served from stale cache entries.", + }, []string{"server"}) ) diff --git a/plugin/cache/setup.go b/plugin/cache/setup.go index e4ca494db..62c5c9d2c 100644 --- a/plugin/cache/setup.go +++ b/plugin/cache/setup.go @@ -1,6 +1,7 @@ package cache import ( + "errors" "fmt" "strconv" "time" @@ -31,7 +32,7 @@ func setup(c *caddy.Controller) error { c.OnStartup(func() error { metrics.MustRegister(c, cacheSize, cacheHits, cacheMisses, - cachePrefetches, cacheDrops) + cachePrefetches, cacheDrops, servedStale) return nil }) @@ -176,6 +177,22 @@ func cacheParse(c *caddy.Controller) (*Cache, error) { ca.percentage = num } + case "serve_stale": + args := c.RemainingArgs() + if len(args) > 1 { + return nil, c.ArgErr() + } + ca.staleUpTo = 1 * time.Hour + if len(args) == 1 { + d, err := time.ParseDuration(args[0]) + if err != nil { + return nil, err + } + if d < 0 { + return nil, errors.New("invalid negative duration for serve_stale") + } + ca.staleUpTo = d + } default: return nil, c.ArgErr() } diff --git a/plugin/cache/setup_test.go b/plugin/cache/setup_test.go index 975520d31..6352bcadb 100644 --- a/plugin/cache/setup_test.go +++ b/plugin/cache/setup_test.go @@ -1,6 +1,7 @@ package cache import ( + "fmt" "testing" "time" @@ -113,3 +114,39 @@ func TestSetup(t *testing.T) { } } } + +func TestServeStale(t *testing.T) { + tests := []struct { + input string + shouldErr bool + staleUpTo time.Duration + }{ + {"serve_stale", false, 1 * time.Hour}, + {"serve_stale 20m", false, 20 * time.Minute}, + {"serve_stale 1h20m", false, 80 * time.Minute}, + {"serve_stale 0m", false, 0}, + {"serve_stale 0", false, 0}, + // fails + {"serve_stale 20", true, 0}, + {"serve_stale -20m", true, 0}, + {"serve_stale aa", true, 0}, + {"serve_stale 1m nono", true, 0}, + } + for i, test := range tests { + c := caddy.NewTestController("dns", fmt.Sprintf("cache {\n%s\n}", test.input)) + ca, err := cacheParse(c) + if test.shouldErr && err == nil { + t.Errorf("Test %v: Expected error but found nil", i) + continue + } else if !test.shouldErr && err != nil { + t.Errorf("Test %v: Expected no error but found error: %v", i, err) + continue + } + if test.shouldErr && err != nil { + continue + } + if ca.staleUpTo != test.staleUpTo { + t.Errorf("Test %v: Expected stale %v but found: %v", i, test.staleUpTo, ca.staleUpTo) + } + } +}