Add a serve_stale option for plugin/cache (#3468)

Automatically submitted.
This commit is contained in:
Gonzalo Paniagua Javier 2019-11-29 11:17:50 -04:00 committed by corbot[bot]
parent 24176a97e6
commit b4df2d0d4c
6 changed files with 166 additions and 15 deletions

View file

@ -34,6 +34,7 @@ cache [TTL] [ZONES...] {
success CAPACITY [TTL] [MINTTL]
denial CAPACITY [TTL] [MINTTL]
prefetch AMOUNT [[DURATION] [PERCENTAGE%]]
serve_stale [DURATION]
}
~~~
@ -50,6 +51,10 @@ cache [TTL] [ZONES...] {
**DURATION** defaults to 1m. Prefetching will happen when the TTL drops below **PERCENTAGE**,
which defaults to `10%`, or latest 1 second before TTL expiration. Values should be in the range `[10%, 90%]`.
Note the percent sign is mandatory. **PERCENTAGE** is treated as an `int`.
* `serve_stale`, when serve\_stale is set, cache always will serve an expired entry to a client if there is one
available. When this happens, cache will attempt to refresh the cache entry after sending the expired cache
entry to the client. The responses have a TTL of 0. **DURATION** is how far back to consider
stale responses as fresh. The default duration is 1h.
## Capacity and Eviction
@ -69,6 +74,7 @@ If monitoring is enabled (via the *prometheus* plugin) then the following metric
* `coredns_cache_hits_total{server, type}` - Counter of cache hits by cache type.
* `coredns_cache_misses_total{server}` - Counter of cache misses.
* `coredns_cache_drops_total{server}` - Counter of dropped messages.
* `coredns_cache_served_stale_total{server}` - Counter of requests served from stale cache entries.
Cache types are either "denial" or "success". `Server` is the server handling the request, see the
metrics plugin for documentation.

View file

@ -36,6 +36,8 @@ type Cache struct {
duration time.Duration
percentage int
staleUpTo time.Duration
// Testing.
now func() time.Time
}

View file

@ -2,10 +2,12 @@ package cache
import (
"context"
"fmt"
"testing"
"time"
"github.com/coredns/coredns/plugin"
"github.com/coredns/coredns/plugin/pkg/dnstest"
"github.com/coredns/coredns/plugin/pkg/response"
"github.com/coredns/coredns/plugin/test"
"github.com/coredns/coredns/request"
@ -233,7 +235,7 @@ func TestCacheZeroTTL(t *testing.T) {
c := New()
c.minpttl = 0
c.minnttl = 0
c.Next = zeroTTLBackend()
c.Next = ttlBackend(0)
req := new(dns.Msg)
req.SetQuestion("example.org.", dns.TypeA)
@ -248,6 +250,52 @@ func TestCacheZeroTTL(t *testing.T) {
}
}
func TestServeFromStaleCache(t *testing.T) {
c := New()
c.Next = ttlBackend(60)
req := new(dns.Msg)
req.SetQuestion("cached.org.", dns.TypeA)
ctx := context.TODO()
// Cache example.org.
rec := dnstest.NewRecorder(&test.ResponseWriter{})
c.staleUpTo = 1 * time.Hour
c.ServeDNS(ctx, rec, req)
if c.pcache.Len() != 1 {
t.Fatalf("Msg with > 0 TTL should have been cached")
}
// No more backend resolutions, just from cache if available.
c.Next = plugin.HandlerFunc(func(context.Context, dns.ResponseWriter, *dns.Msg) (int, error) {
return 255, nil // Below, a 255 means we tried querying upstream.
})
tests := []struct {
name string
futureMinutes int
expectedResult int
}{
{"cached.org.", 30, 0},
{"cached.org.", 60, 0},
{"cached.org.", 70, 255},
{"notcached.org.", 30, 255},
{"notcached.org.", 60, 255},
{"notcached.org.", 70, 255},
}
for i, tt := range tests {
rec := dnstest.NewRecorder(&test.ResponseWriter{})
c.now = func() time.Time { return time.Now().Add(time.Duration(tt.futureMinutes) * time.Minute) }
r := req.Copy()
r.SetQuestion(tt.name, dns.TypeA)
if ret, _ := c.ServeDNS(ctx, rec, r); ret != tt.expectedResult {
t.Errorf("Test %d: expecting %v; got %v", i, tt.expectedResult, ret)
}
}
}
func BenchmarkCacheResponse(b *testing.B) {
c := New()
c.prefetch = 1
@ -286,13 +334,13 @@ func BackendHandler() plugin.Handler {
})
}
func zeroTTLBackend() plugin.Handler {
func ttlBackend(ttl int) plugin.Handler {
return plugin.HandlerFunc(func(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) {
m := new(dns.Msg)
m.SetReply(r)
m.Response, m.RecursionAvailable = true, true
m.Answer = []dns.RR{test.A("example.org. 0 IN A 127.0.0.53")}
m.Answer = []dns.RR{test.A(fmt.Sprintf("example.org. %d IN A 127.0.0.53", ttl))}
w.WriteMsg(m)
return dns.RcodeSuccess, nil
})

View file

@ -26,8 +26,25 @@ func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
server := metrics.WithServer(ctx)
i, found := c.get(now, state, server)
if i != nil && found {
ttl := 0
i := c.getIgnoreTTL(now, state, server)
if i != nil {
ttl = i.ttl(now)
}
if i == nil || -ttl >= int(c.staleUpTo.Seconds()) {
crr := &ResponseWriter{ResponseWriter: w, Cache: c, state: state, server: server}
return plugin.NextOrFailure(c.Name(), c.Next, ctx, crr, r)
}
if ttl < 0 {
servedStale.WithLabelValues(server).Inc()
// Adjust the time to get a 0 TTL in the reply built from a stale item.
now = now.Add(time.Duration(ttl) * time.Second)
go func() {
r := r.Copy()
crr := &ResponseWriter{Cache: c, state: state, server: server, prefetch: true, remoteAddr: w.LocalAddr()}
plugin.NextOrFailure(c.Name(), c.Next, ctx, crr, r)
}()
}
resp := i.toMsg(r, now)
w.WriteMsg(resp)
@ -35,10 +52,6 @@ func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
go c.doPrefetch(ctx, state, server, i, now)
}
return dns.RcodeSuccess, nil
}
crr := &ResponseWriter{ResponseWriter: w, Cache: c, state: state, server: server}
return plugin.NextOrFailure(c.Name(), c.Next, ctx, crr, r)
}
func (c *Cache) doPrefetch(ctx context.Context, state request.Request, server string, i *item, now time.Time) {
@ -83,6 +96,27 @@ func (c *Cache) get(now time.Time, state request.Request, server string) (*item,
return nil, false
}
// getIgnoreTTL unconditionally returns an item if it exists in the cache.
func (c *Cache) getIgnoreTTL(now time.Time, state request.Request, server string) *item {
k := hash(state.Name(), state.QType(), state.Do())
if i, ok := c.ncache.Get(k); ok {
ttl := i.(*item).ttl(now)
if ttl > 0 || (c.staleUpTo > 0 && -ttl < int(c.staleUpTo.Seconds())) {
cacheHits.WithLabelValues(server, Denial).Inc()
}
return i.(*item)
}
if i, ok := c.pcache.Get(k); ok {
ttl := i.(*item).ttl(now)
if ttl > 0 || (c.staleUpTo > 0 && -ttl < int(c.staleUpTo.Seconds())) {
cacheHits.WithLabelValues(server, Success).Inc()
}
return i.(*item)
}
return nil
}
func (c *Cache) exists(state request.Request) *item {
k := hash(state.Name(), state.QType(), state.Do())
if i, ok := c.ncache.Get(k); ok {
@ -129,4 +163,11 @@ var (
Name: "drops_total",
Help: "The number responses that are not cached, because the reply is malformed.",
}, []string{"server"})
servedStale = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace,
Subsystem: "cache",
Name: "served_stale_total",
Help: "The number of requests served from stale cache entries.",
}, []string{"server"})
)

19
plugin/cache/setup.go vendored
View file

@ -1,6 +1,7 @@
package cache
import (
"errors"
"fmt"
"strconv"
"time"
@ -31,7 +32,7 @@ func setup(c *caddy.Controller) error {
c.OnStartup(func() error {
metrics.MustRegister(c,
cacheSize, cacheHits, cacheMisses,
cachePrefetches, cacheDrops)
cachePrefetches, cacheDrops, servedStale)
return nil
})
@ -176,6 +177,22 @@ func cacheParse(c *caddy.Controller) (*Cache, error) {
ca.percentage = num
}
case "serve_stale":
args := c.RemainingArgs()
if len(args) > 1 {
return nil, c.ArgErr()
}
ca.staleUpTo = 1 * time.Hour
if len(args) == 1 {
d, err := time.ParseDuration(args[0])
if err != nil {
return nil, err
}
if d < 0 {
return nil, errors.New("invalid negative duration for serve_stale")
}
ca.staleUpTo = d
}
default:
return nil, c.ArgErr()
}

View file

@ -1,6 +1,7 @@
package cache
import (
"fmt"
"testing"
"time"
@ -113,3 +114,39 @@ func TestSetup(t *testing.T) {
}
}
}
func TestServeStale(t *testing.T) {
tests := []struct {
input string
shouldErr bool
staleUpTo time.Duration
}{
{"serve_stale", false, 1 * time.Hour},
{"serve_stale 20m", false, 20 * time.Minute},
{"serve_stale 1h20m", false, 80 * time.Minute},
{"serve_stale 0m", false, 0},
{"serve_stale 0", false, 0},
// fails
{"serve_stale 20", true, 0},
{"serve_stale -20m", true, 0},
{"serve_stale aa", true, 0},
{"serve_stale 1m nono", true, 0},
}
for i, test := range tests {
c := caddy.NewTestController("dns", fmt.Sprintf("cache {\n%s\n}", test.input))
ca, err := cacheParse(c)
if test.shouldErr && err == nil {
t.Errorf("Test %v: Expected error but found nil", i)
continue
} else if !test.shouldErr && err != nil {
t.Errorf("Test %v: Expected no error but found error: %v", i, err)
continue
}
if test.shouldErr && err != nil {
continue
}
if ca.staleUpTo != test.staleUpTo {
t.Errorf("Test %v: Expected stale %v but found: %v", i, test.staleUpTo, ca.staleUpTo)
}
}
}