Add cache

This commits adds rudimentary support for a cache directory, enabled by
default. The cache directory is created if it does not exist. The cache
is used if there's anything in it, newly created snapshot and index
files are written to the cache automatically.
This commit is contained in:
Alexander Neumann 2017-06-10 13:10:08 +02:00
parent 5ace41471e
commit 9be24a1c9f
14 changed files with 1020 additions and 3 deletions

152
internal/cache/backend.go vendored Normal file
View file

@ -0,0 +1,152 @@
package cache
import (
"context"
"io"
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/restic"
)
// Backend wraps a restic.Backend and adds a cache.
type Backend struct {
restic.Backend
restic.Cache
}
// ensure cachedBackend implements restic.Backend
var _ restic.Backend = &Backend{}
// Remove deletes a file from the backend and the cache if it has been cached.
func (b *Backend) Remove(ctx context.Context, h restic.Handle) error {
debug.Log("cache Remove(%v)", h)
err := b.Backend.Remove(ctx, h)
if err != nil {
return err
}
return b.Cache.Remove(h)
}
type teeReader struct {
rd io.Reader
wr io.Writer
err error
}
func (t *teeReader) Read(p []byte) (n int, err error) {
n, err = t.rd.Read(p)
if t.err == nil && n > 0 {
_, t.err = t.wr.Write(p[:n])
}
return n, err
}
var autoCacheTypes = map[restic.FileType]struct{}{
restic.IndexFile: struct{}{},
restic.SnapshotFile: struct{}{},
}
// Save stores a new file is the backend and the cache.
func (b *Backend) Save(ctx context.Context, h restic.Handle, rd io.Reader) (err error) {
debug.Log("cache Save(%v)", h)
if _, ok := autoCacheTypes[h.Type]; !ok {
return b.Backend.Save(ctx, h, rd)
}
wr, err := b.Cache.SaveWriter(h)
if err != nil {
debug.Log("unable to save object to cache: %v", err)
return b.Backend.Save(ctx, h, rd)
}
tr := &teeReader{rd: rd, wr: wr}
err = b.Backend.Save(ctx, h, tr)
if err != nil {
wr.Close()
b.Cache.Remove(h)
return err
}
err = wr.Close()
if err != nil {
debug.Log("cache writer returned error: %v", err)
b.Cache.Remove(h)
}
return nil
}
var autoCacheFiles = map[restic.FileType]bool{
restic.IndexFile: true,
restic.SnapshotFile: true,
}
// Load loads a file from the cache or the backend.
func (b *Backend) Load(ctx context.Context, h restic.Handle, length int, offset int64) (io.ReadCloser, error) {
if b.Cache.Has(h) {
debug.Log("Load(%v, %v, %v) from cache", h, length, offset)
return b.Cache.Load(h, length, offset)
}
debug.Log("Load(%v, %v, %v) delegated to backend", h, length, offset)
rd, err := b.Backend.Load(ctx, h, length, offset)
if err != nil {
if b.Backend.IsNotExist(err) {
// try to remove from the cache, ignore errors
_ = b.Cache.Remove(h)
}
return nil, err
}
// only cache complete files
if offset != 0 || length != 0 {
debug.Log("won't store partial file %v", h)
return rd, err
}
if _, ok := autoCacheFiles[h.Type]; !ok {
debug.Log("wrong type for auto store %v", h)
return rd, nil
}
debug.Log("auto-store %v in the cache", h)
// cache the file, then return cached copy
if err = b.Cache.Save(h, rd); err != nil {
return nil, err
}
if err = rd.Close(); err != nil {
// try to remove from the cache, ignore errors
_ = b.Cache.Remove(h)
return nil, err
}
// load from the cache and save in the backend
return b.Cache.Load(h, 0, 0)
}
// Stat tests whether the backend has a file. If it does not exist but still
// exists in the cache, it is removed from the cache.
func (b *Backend) Stat(ctx context.Context, h restic.Handle) (restic.FileInfo, error) {
debug.Log("cache Stat(%v)", h)
fi, err := b.Backend.Stat(ctx, h)
if err != nil {
if b.Backend.IsNotExist(err) {
// try to remove from the cache, ignore errors
_ = b.Cache.Remove(h)
}
return fi, err
}
return fi, err
}
// IsNotExist returns true if the error is caused by a non-existing file.
func (b *Backend) IsNotExist(err error) bool {
return b.Backend.IsNotExist(err)
}

114
internal/cache/backend_test.go vendored Normal file
View file

@ -0,0 +1,114 @@
package cache
import (
"bytes"
"context"
"math/rand"
"testing"
"github.com/restic/restic/internal/backend"
"github.com/restic/restic/internal/backend/mem"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/test"
)
func loadAndCompare(t testing.TB, be restic.Backend, h restic.Handle, data []byte) {
buf, err := backend.LoadAll(context.TODO(), be, h)
if err != nil {
t.Fatal(err)
}
if len(buf) != len(data) {
t.Fatalf("wrong number of bytes read, want %v, got %v", len(data), len(buf))
}
if !bytes.Equal(buf, data) {
t.Fatalf("wrong data returned, want:\n %02x\ngot:\n %02x", data[:16], buf[:16])
}
}
func save(t testing.TB, be restic.Backend, h restic.Handle, data []byte) {
err := be.Save(context.TODO(), h, bytes.NewReader(data))
if err != nil {
t.Fatal(err)
}
}
func remove(t testing.TB, be restic.Backend, h restic.Handle) {
err := be.Remove(context.TODO(), h)
if err != nil {
t.Fatal(err)
}
}
func randomData(n int) (restic.Handle, []byte) {
data := test.Random(rand.Int(), n)
id := restic.Hash(data)
copy(id[:], data)
h := restic.Handle{
Type: restic.IndexFile,
Name: id.String(),
}
return h, data
}
func TestBackend(t *testing.T) {
be := mem.New()
c, cleanup := TestNewCache(t)
defer cleanup()
wbe := c.Wrap(be)
h, data := randomData(5234142)
// save directly in backend
save(t, be, h, data)
if c.Has(h) {
t.Errorf("cache has file too early")
}
// load data via cache
loadAndCompare(t, wbe, h, data)
if !c.Has(h) {
t.Errorf("cache dosen't have file after load")
}
// remove via cache
remove(t, wbe, h)
if c.Has(h) {
t.Errorf("cache has file after remove")
}
// save via cache
save(t, wbe, h, data)
if !c.Has(h) {
t.Errorf("cache dosen't have file after load")
}
// load data directly from backend
loadAndCompare(t, be, h, data)
// load data via cache
loadAndCompare(t, be, h, data)
// remove directly
remove(t, be, h)
if !c.Has(h) {
t.Errorf("file not in cache any more")
}
// run stat
_, err := wbe.Stat(context.TODO(), h)
if err == nil {
t.Errorf("expected error for removed file not found, got nil")
}
if !wbe.IsNotExist(err) {
t.Errorf("Stat() returned error that does not match IsNotExist(): %v", err)
}
if c.Has(h) {
t.Errorf("removed file still in cache after stat")
}
}

122
internal/cache/cache.go vendored Normal file
View file

@ -0,0 +1,122 @@
package cache
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"github.com/pkg/errors"
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/fs"
"github.com/restic/restic/internal/restic"
)
// Cache manages a local cache.
type Cache struct {
Path string
}
const dirMode = 0700
const fileMode = 0600
func readVersion(dir string) (v uint, err error) {
buf, err := ioutil.ReadFile(filepath.Join(dir, "version"))
if os.IsNotExist(err) {
return 0, nil
}
if err != nil {
return 0, errors.Wrap(err, "ReadFile")
}
ver, err := strconv.ParseUint(string(buf), 10, 32)
if err != nil {
return 0, errors.Wrap(err, "ParseUint")
}
return uint(ver), nil
}
const cacheVersion = 1
// ensure Cache implements restic.Cache
var _ restic.Cache = &Cache{}
var cacheLayoutPaths = map[restic.FileType]string{
restic.DataFile: "data",
restic.SnapshotFile: "snapshots",
restic.IndexFile: "index",
}
// New returns a new cache for the repo ID at dir. If dir is the empty string,
// the default cache location (according to the XDG standard) is used.
func New(id string, dir string) (c *Cache, err error) {
if dir == "" {
dir, err = getXDGCacheDir()
if err != nil {
return nil, err
}
}
cachedir := filepath.Join(dir, id)
debug.Log("using cache dir %v", cachedir)
v, err := readVersion(cachedir)
if err != nil {
return nil, err
}
if v > cacheVersion {
return nil, errors.New("cache version is newer")
}
// create the repo cache dir if it does not exist yet
if err = fs.MkdirAll(cachedir, dirMode); err != nil {
return nil, err
}
if v < cacheVersion {
err = ioutil.WriteFile(filepath.Join(cachedir, "version"), []byte(fmt.Sprintf("%d", cacheVersion)), 0644)
if err != nil {
return nil, errors.Wrap(err, "WriteFile")
}
}
for _, p := range cacheLayoutPaths {
if err = fs.MkdirAll(filepath.Join(cachedir, p), dirMode); err != nil {
return nil, err
}
}
c = &Cache{
Path: cachedir,
}
return c, nil
}
// errNoSuchFile is returned when a file is not cached.
type errNoSuchFile struct {
Type string
Name string
}
func (e errNoSuchFile) Error() string {
return fmt.Sprintf("file %v (%v) is not cached", e.Name, e.Type)
}
// IsNotExist returns true if the error was caused by a non-existing file.
func (c *Cache) IsNotExist(err error) bool {
_, ok := errors.Cause(err).(errNoSuchFile)
return ok
}
// Wrap returns a backend with a cache.
func (c *Cache) Wrap(be restic.Backend) restic.Backend {
return &Backend{
Backend: be,
Cache: c,
}
}

49
internal/cache/dir.go vendored Normal file
View file

@ -0,0 +1,49 @@
package cache
import (
"os"
"path/filepath"
"github.com/pkg/errors"
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/fs"
)
// getXDGCacheDir returns the cache directory according to XDG basedir spec, see
// http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
func getXDGCacheDir() (string, error) {
xdgcache := os.Getenv("XDG_CACHE_HOME")
home := os.Getenv("HOME")
if xdgcache == "" && home == "" {
return "", errors.New("unable to locate cache directory (XDG_CACHE_HOME and HOME unset)")
}
cachedir := ""
if xdgcache != "" {
cachedir = filepath.Join(xdgcache, "restic")
} else if home != "" {
cachedir = filepath.Join(home, ".cache", "restic")
}
fi, err := fs.Stat(cachedir)
if os.IsNotExist(errors.Cause(err)) {
err = fs.MkdirAll(cachedir, 0700)
if err != nil {
return "", errors.Wrap(err, "MkdirAll")
}
fi, err = fs.Stat(cachedir)
debug.Log("create cache dir %v", cachedir)
}
if err != nil {
return "", errors.Wrap(err, "Stat")
}
if !fi.IsDir() {
return "", errors.Errorf("cache dir %v is not a directory", cachedir)
}
return cachedir, nil
}

194
internal/cache/file.go vendored Normal file
View file

@ -0,0 +1,194 @@
package cache
import (
"io"
"os"
"path/filepath"
"github.com/pkg/errors"
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/fs"
"github.com/restic/restic/internal/restic"
)
func (c *Cache) filename(h restic.Handle) string {
if len(h.Name) < 2 {
panic("Name is empty or too short")
}
subdir := h.Name[:2]
return filepath.Join(c.Path, cacheLayoutPaths[h.Type], subdir, h.Name)
}
func (c *Cache) canBeCached(t restic.FileType) bool {
if c == nil {
return false
}
if _, ok := cacheLayoutPaths[t]; !ok {
return false
}
return true
}
type readCloser struct {
io.Reader
io.Closer
}
// Load returns a reader that yields the contents of the file with the
// given handle. rd must be closed after use. If an error is returned, the
// ReadCloser is nil.
func (c *Cache) Load(h restic.Handle, length int, offset int64) (io.ReadCloser, error) {
debug.Log("Load from cache: %v", h)
if !c.canBeCached(h.Type) {
return nil, errors.New("cannot be cached")
}
f, err := fs.Open(c.filename(h))
if err != nil {
return nil, errors.Wrap(err, "Open")
}
if offset > 0 {
if _, err = f.Seek(offset, io.SeekStart); err != nil {
f.Close()
return nil, err
}
}
rd := readCloser{Reader: f, Closer: f}
if length > 0 {
rd.Reader = io.LimitReader(f, int64(length))
}
return rd, nil
}
// SaveWriter returns a writer for the cache object h. It must be closed after writing is finished.
func (c *Cache) SaveWriter(h restic.Handle) (io.WriteCloser, error) {
debug.Log("Save to cache: %v", h)
if !c.canBeCached(h.Type) {
return nil, errors.New("cannot be cached")
}
p := c.filename(h)
err := fs.MkdirAll(filepath.Dir(p), 0700)
if err != nil {
return nil, errors.Wrap(err, "MkdirAll")
}
f, err := fs.OpenFile(p, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0400)
if err != nil {
return nil, errors.Wrap(err, "Create")
}
return f, err
}
// Save saves a file in the cache.
func (c *Cache) Save(h restic.Handle, rd io.Reader) error {
debug.Log("Save to cache: %v", h)
if rd == nil {
return errors.New("Save() called with nil reader")
}
f, err := c.SaveWriter(h)
if err != nil {
return err
}
if _, err = io.Copy(f, rd); err != nil {
_ = f.Close()
_ = c.Remove(h)
return errors.Wrap(err, "Copy")
}
if err = f.Close(); err != nil {
return errors.Wrap(err, "Close")
}
return nil
}
// Remove deletes a file. When the file is not cache, no error is returned.
func (c *Cache) Remove(h restic.Handle) error {
if !c.Has(h) {
return nil
}
return fs.Remove(c.filename(h))
}
// Clear removes all files of type t from the cache that are not contained in
// the set valid.
func (c *Cache) Clear(t restic.FileType, valid restic.IDSet) error {
debug.Log("Clearing cache for %v: %v valid files", t, len(valid))
if !c.canBeCached(t) {
return nil
}
list, err := c.list(t)
if err != nil {
return err
}
for id := range list {
if valid.Has(id) {
continue
}
if err = fs.Remove(c.filename(restic.Handle{Type: t, Name: id.String()})); err != nil {
return err
}
}
return nil
}
func isFile(fi os.FileInfo) bool {
return fi.Mode()&(os.ModeType|os.ModeCharDevice) == 0
}
// list returns a list of all files of type T in the cache.
func (c *Cache) list(t restic.FileType) (restic.IDSet, error) {
if !c.canBeCached(t) {
return nil, errors.New("cannot be cached")
}
list := restic.NewIDSet()
dir := filepath.Join(c.Path, cacheLayoutPaths[t])
err := filepath.Walk(dir, func(name string, fi os.FileInfo, err error) error {
if err != nil {
return errors.Wrap(err, "Walk")
}
if !isFile(fi) {
return nil
}
id, err := restic.ParseID(filepath.Base(name))
if err != nil {
return nil
}
list.Insert(id)
return nil
})
return list, err
}
// Has returns true if the file is cached.
func (c *Cache) Has(h restic.Handle) bool {
if !c.canBeCached(h.Type) {
return false
}
_, err := fs.Stat(c.filename(h))
if err == nil {
return true
}
return false
}

259
internal/cache/file_test.go vendored Normal file
View file

@ -0,0 +1,259 @@
package cache
import (
"bytes"
"fmt"
"io"
"io/ioutil"
"math/rand"
"testing"
"time"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/test"
)
func generateRandomFiles(t testing.TB, tpe restic.FileType, c *Cache) restic.IDSet {
ids := restic.NewIDSet()
for i := 0; i < rand.Intn(15)+10; i++ {
buf := test.Random(rand.Int(), 1<<19)
id := restic.Hash(buf)
h := restic.Handle{Type: tpe, Name: id.String()}
if c.Has(h) {
t.Errorf("index %v present before save", id)
}
err := c.Save(h, bytes.NewReader(buf))
if err != nil {
t.Fatal(err)
}
ids.Insert(id)
}
return ids
}
// randomID returns a random ID from s.
func randomID(s restic.IDSet) restic.ID {
for id := range s {
return id
}
panic("set is empty")
}
func load(t testing.TB, c *Cache, h restic.Handle) []byte {
rd, err := c.Load(h, 0, 0)
if err != nil {
t.Fatal(err)
}
if rd == nil {
t.Fatalf("Load() returned nil reader")
}
buf, err := ioutil.ReadAll(rd)
if err != nil {
t.Fatal(err)
}
if err = rd.Close(); err != nil {
t.Fatal(err)
}
return buf
}
func listFiles(t testing.TB, c *Cache, tpe restic.FileType) restic.IDSet {
list, err := c.list(tpe)
if err != nil {
t.Errorf("listing failed: %v", err)
}
return list
}
func clearFiles(t testing.TB, c *Cache, tpe restic.FileType, valid restic.IDSet) {
if err := c.Clear(tpe, valid); err != nil {
t.Error(err)
}
}
func TestFiles(t *testing.T) {
seed := time.Now().Unix()
t.Logf("seed is %v", seed)
rand.Seed(seed)
c, cleanup := TestNewCache(t)
defer cleanup()
var tests = []restic.FileType{
restic.SnapshotFile,
restic.DataFile,
restic.IndexFile,
}
for _, tpe := range tests {
t.Run(fmt.Sprintf("%v", tpe), func(t *testing.T) {
ids := generateRandomFiles(t, tpe, c)
id := randomID(ids)
h := restic.Handle{Type: tpe, Name: id.String()}
id2 := restic.Hash(load(t, c, h))
if !id.Equal(id2) {
t.Errorf("wrong data returned, want %v, got %v", id.Str(), id2.Str())
}
if !c.Has(h) {
t.Errorf("cache thinks index %v isn't present", id.Str())
}
list := listFiles(t, c, tpe)
if !ids.Equals(list) {
t.Errorf("wrong list of index IDs returned, want:\n %v\ngot:\n %v", ids, list)
}
clearFiles(t, c, tpe, restic.NewIDSet(id))
list2 := listFiles(t, c, tpe)
ids.Delete(id)
want := restic.NewIDSet(id)
if !list2.Equals(want) {
t.Errorf("ClearIndexes removed indexes, want:\n %v\ngot:\n %v", list2, want)
}
clearFiles(t, c, tpe, restic.NewIDSet())
want = restic.NewIDSet()
list3 := listFiles(t, c, tpe)
if !list3.Equals(want) {
t.Errorf("ClearIndexes returned a wrong list, want:\n %v\ngot:\n %v", want, list3)
}
})
}
}
func TestFileSaveWriter(t *testing.T) {
seed := time.Now().Unix()
t.Logf("seed is %v", seed)
rand.Seed(seed)
c, cleanup := TestNewCache(t)
defer cleanup()
// save about 5 MiB of data in the cache
data := test.Random(rand.Int(), 5234142)
id := restic.ID{}
copy(id[:], data)
h := restic.Handle{
Type: restic.DataFile,
Name: id.String(),
}
wr, err := c.SaveWriter(h)
if err != nil {
t.Fatal(err)
}
n, err := io.Copy(wr, bytes.NewReader(data))
if err != nil {
t.Fatal(err)
}
if n != int64(len(data)) {
t.Fatalf("wrong number of bytes written, want %v, got %v", len(data), n)
}
if err = wr.Close(); err != nil {
t.Fatal(err)
}
rd, err := c.Load(h, 0, 0)
if err != nil {
t.Fatal(err)
}
buf, err := ioutil.ReadAll(rd)
if err != nil {
t.Fatal(err)
}
if len(buf) != len(data) {
t.Fatalf("wrong number of bytes read, want %v, got %v", len(data), len(buf))
}
if !bytes.Equal(buf, data) {
t.Fatalf("wrong data returned, want:\n %02x\ngot:\n %02x", data[:16], buf[:16])
}
if err = rd.Close(); err != nil {
t.Fatal(err)
}
}
func TestFileLoad(t *testing.T) {
seed := time.Now().Unix()
t.Logf("seed is %v", seed)
rand.Seed(seed)
c, cleanup := TestNewCache(t)
defer cleanup()
// save about 5 MiB of data in the cache
data := test.Random(rand.Int(), 5234142)
id := restic.ID{}
copy(id[:], data)
h := restic.Handle{
Type: restic.DataFile,
Name: id.String(),
}
if err := c.Save(h, bytes.NewReader(data)); err != nil {
t.Fatalf("Save() returned error: %v", err)
}
var tests = []struct {
offset int64
length int
}{
{0, 0},
{5, 0},
{32*1024 + 5, 0},
{0, 123},
{0, 64*1024 + 234},
{100, 5234142},
}
for _, test := range tests {
t.Run(fmt.Sprintf("%v/%v", test.length, test.offset), func(t *testing.T) {
rd, err := c.Load(h, test.length, test.offset)
if err != nil {
t.Fatal(err)
}
buf, err := ioutil.ReadAll(rd)
if err != nil {
t.Fatal(err)
}
if err = rd.Close(); err != nil {
t.Fatal(err)
}
o := int(test.offset)
l := test.length
if test.length == 0 {
l = len(data) - o
}
if l > len(data)-o {
l = len(data) - o
}
if len(buf) != l {
t.Fatalf("wrong number of bytes returned: want %d, got %d", l, len(buf))
}
if !bytes.Equal(buf, data[o:o+l]) {
t.Fatalf("wrong data returned, want:\n %02x\ngot:\n %02x", data[o:o+16], buf[:16])
}
})
}
}

20
internal/cache/testing.go vendored Normal file
View file

@ -0,0 +1,20 @@
package cache
import (
"testing"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/test"
)
// TestNewCache returns a cache in a temporary directory which is removed when
// cleanup is called.
func TestNewCache(t testing.TB) (*Cache, func()) {
dir, cleanup := test.TempDir(t)
t.Logf("created new cache at %v", dir)
cache, err := New(restic.NewRandomID().String(), dir)
if err != nil {
t.Fatal(err)
}
return cache, cleanup
}