Merge pull request #3522 from greatroar/dump-lru
Use LRU cache in restic dump
This commit is contained in:
commit
a5e103a212
10 changed files with 141 additions and 92 deletions
10
changelog/unreleased/pull-3508
Normal file
10
changelog/unreleased/pull-3508
Normal file
|
@ -0,0 +1,10 @@
|
|||
Enhancement: Cache blobs read by the dump command
|
||||
|
||||
When dumping a file using the `restic dump` command, restic did not cache blobs
|
||||
in any way, so even consecutive runs of the same blob did get loaded from the
|
||||
repository again and again, slowing down the dump.
|
||||
|
||||
Now, the caching mechanism already used by the `fuse` command is also used by
|
||||
the `dump` command. This makes dumping much faster, especially for sparse files.
|
||||
|
||||
https://github.com/restic/restic/pull/3508
|
|
@ -95,7 +95,8 @@ func printFromTree(ctx context.Context, tree *restic.Tree, repo restic.Repositor
|
|||
if node.Name == pathComponents[0] {
|
||||
switch {
|
||||
case l == 1 && dump.IsFile(node):
|
||||
return dump.GetNodeData(ctx, os.Stdout, repo, node)
|
||||
cache := dump.NewCache()
|
||||
return dump.WriteNodeData(ctx, os.Stdout, repo, node, cache)
|
||||
case l > 1 && dump.IsDir(node):
|
||||
subtree, err := repo.LoadTree(ctx, *node.Subtree)
|
||||
if err != nil {
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
package fuse
|
||||
package bloblru
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
@ -10,12 +10,12 @@ import (
|
|||
)
|
||||
|
||||
// Crude estimate of the overhead per blob: a SHA-256, a linked list node
|
||||
// and some pointers. See comment in blobCache.add.
|
||||
const cacheOverhead = len(restic.ID{}) + 64
|
||||
// and some pointers. See comment in Cache.add.
|
||||
const overhead = len(restic.ID{}) + 64
|
||||
|
||||
// A blobCache is a fixed-size cache of blob contents.
|
||||
// A Cache is a fixed-size LRU cache of blob contents.
|
||||
// It is safe for concurrent access.
|
||||
type blobCache struct {
|
||||
type Cache struct {
|
||||
mu sync.Mutex
|
||||
c *simplelru.LRU
|
||||
|
||||
|
@ -23,16 +23,16 @@ type blobCache struct {
|
|||
}
|
||||
|
||||
// Construct a blob cache that stores at most size bytes worth of blobs.
|
||||
func newBlobCache(size int) *blobCache {
|
||||
c := &blobCache{
|
||||
func New(size int) *Cache {
|
||||
c := &Cache{
|
||||
free: size,
|
||||
size: size,
|
||||
}
|
||||
|
||||
// NewLRU wants us to specify some max. number of entries, else it errors.
|
||||
// The actual maximum will be smaller than size/cacheOverhead, because we
|
||||
// The actual maximum will be smaller than size/overhead, because we
|
||||
// evict entries (RemoveOldest in add) to maintain our size bound.
|
||||
maxEntries := size / cacheOverhead
|
||||
maxEntries := size / overhead
|
||||
lru, err := simplelru.NewLRU(maxEntries, c.evict)
|
||||
if err != nil {
|
||||
panic(err) // Can only be maxEntries <= 0.
|
||||
|
@ -42,10 +42,12 @@ func newBlobCache(size int) *blobCache {
|
|||
return c
|
||||
}
|
||||
|
||||
func (c *blobCache) add(id restic.ID, blob []byte) {
|
||||
debug.Log("blobCache: add %v", id)
|
||||
// Add adds key id with value blob to c.
|
||||
// It may return an evicted buffer for reuse.
|
||||
func (c *Cache) Add(id restic.ID, blob []byte) (old []byte) {
|
||||
debug.Log("bloblru.Cache: add %v", id)
|
||||
|
||||
size := len(blob) + cacheOverhead
|
||||
size := len(blob) + overhead
|
||||
if size > c.size {
|
||||
return
|
||||
}
|
||||
|
@ -59,29 +61,36 @@ func (c *blobCache) add(id restic.ID, blob []byte) {
|
|||
return
|
||||
}
|
||||
|
||||
// This loop takes at most min(maxEntries, maxchunksize/cacheOverhead)
|
||||
// This loop takes at most min(maxEntries, maxchunksize/overhead)
|
||||
// iterations.
|
||||
for size > c.free {
|
||||
c.c.RemoveOldest()
|
||||
_, val, _ := c.c.RemoveOldest()
|
||||
b := val.([]byte)
|
||||
if len(b) > len(old) {
|
||||
// We can only return one buffer, so pick the largest.
|
||||
old = b
|
||||
}
|
||||
}
|
||||
|
||||
c.c.Add(key, blob)
|
||||
c.free -= size
|
||||
|
||||
return old
|
||||
}
|
||||
|
||||
func (c *blobCache) get(id restic.ID) ([]byte, bool) {
|
||||
func (c *Cache) Get(id restic.ID) ([]byte, bool) {
|
||||
c.mu.Lock()
|
||||
value, ok := c.c.Get(id)
|
||||
c.mu.Unlock()
|
||||
|
||||
debug.Log("blobCache: get %v, hit %v", id, ok)
|
||||
debug.Log("bloblru.Cache: get %v, hit %v", id, ok)
|
||||
|
||||
blob, ok := value.([]byte)
|
||||
return blob, ok
|
||||
}
|
||||
|
||||
func (c *blobCache) evict(key, value interface{}) {
|
||||
func (c *Cache) evict(key, value interface{}) {
|
||||
blob := value.([]byte)
|
||||
debug.Log("blobCache: evict %v, %d bytes", key, len(blob))
|
||||
c.free += len(blob) + cacheOverhead
|
||||
debug.Log("bloblru.Cache: evict %v, %d bytes", key, len(blob))
|
||||
c.free += len(blob) + overhead
|
||||
}
|
50
internal/bloblru/cache_test.go
Normal file
50
internal/bloblru/cache_test.go
Normal file
|
@ -0,0 +1,50 @@
|
|||
package bloblru
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/restic/restic/internal/restic"
|
||||
rtest "github.com/restic/restic/internal/test"
|
||||
)
|
||||
|
||||
func TestCache(t *testing.T) {
|
||||
var id1, id2, id3 restic.ID
|
||||
id1[0] = 1
|
||||
id2[0] = 2
|
||||
id3[0] = 3
|
||||
|
||||
const (
|
||||
kiB = 1 << 10
|
||||
cacheSize = 64*kiB + 3*overhead
|
||||
)
|
||||
|
||||
c := New(cacheSize)
|
||||
|
||||
addAndCheck := func(id restic.ID, exp []byte) {
|
||||
c.Add(id, exp)
|
||||
blob, ok := c.Get(id)
|
||||
rtest.Assert(t, ok, "blob %v added but not found in cache", id)
|
||||
rtest.Equals(t, &exp[0], &blob[0])
|
||||
rtest.Equals(t, exp, blob)
|
||||
}
|
||||
|
||||
addAndCheck(id1, make([]byte, 32*kiB))
|
||||
addAndCheck(id2, make([]byte, 30*kiB))
|
||||
addAndCheck(id3, make([]byte, 10*kiB))
|
||||
|
||||
_, ok := c.Get(id2)
|
||||
rtest.Assert(t, ok, "blob %v not present", id2)
|
||||
_, ok = c.Get(id1)
|
||||
rtest.Assert(t, !ok, "blob %v present, but should have been evicted", id1)
|
||||
|
||||
c.Add(id1, make([]byte, 1+c.size))
|
||||
_, ok = c.Get(id1)
|
||||
rtest.Assert(t, !ok, "blob %v too large but still added to cache")
|
||||
|
||||
c.c.Remove(id1)
|
||||
c.c.Remove(id3)
|
||||
c.c.Remove(id2)
|
||||
|
||||
rtest.Equals(t, cacheSize, c.size)
|
||||
rtest.Equals(t, cacheSize, c.free)
|
||||
}
|
|
@ -5,6 +5,7 @@ import (
|
|||
"io"
|
||||
"path"
|
||||
|
||||
"github.com/restic/restic/internal/bloblru"
|
||||
"github.com/restic/restic/internal/errors"
|
||||
"github.com/restic/restic/internal/restic"
|
||||
"github.com/restic/restic/internal/walker"
|
||||
|
@ -20,7 +21,11 @@ type dumper interface {
|
|||
// It will loop over all nodes in the tree and dump them recursively.
|
||||
type WriteDump func(ctx context.Context, repo restic.Repository, tree *restic.Tree, rootPath string, dst io.Writer) error
|
||||
|
||||
func writeDump(ctx context.Context, repo restic.Repository, tree *restic.Tree, rootPath string, dmp dumper, dst io.Writer) error {
|
||||
func NewCache() *bloblru.Cache {
|
||||
return bloblru.New(64 << 20)
|
||||
}
|
||||
|
||||
func writeDump(ctx context.Context, repo restic.Repository, tree *restic.Tree, rootPath string, dmp dumper) error {
|
||||
for _, rootNode := range tree.Nodes {
|
||||
rootNode.Path = rootPath
|
||||
err := dumpTree(ctx, repo, rootNode, rootPath, dmp)
|
||||
|
@ -71,20 +76,24 @@ func dumpTree(ctx context.Context, repo restic.Repository, rootNode *restic.Node
|
|||
return err
|
||||
}
|
||||
|
||||
// GetNodeData will write the contents of the node to the given output.
|
||||
func GetNodeData(ctx context.Context, output io.Writer, repo restic.Repository, node *restic.Node) error {
|
||||
// WriteNodeData writes the contents of the node to the given Writer.
|
||||
func WriteNodeData(ctx context.Context, w io.Writer, repo restic.Repository, node *restic.Node, cache *bloblru.Cache) error {
|
||||
var (
|
||||
buf []byte
|
||||
err error
|
||||
)
|
||||
for _, id := range node.Content {
|
||||
buf, err = repo.LoadBlob(ctx, restic.DataBlob, id, buf)
|
||||
if err != nil {
|
||||
return err
|
||||
blob, ok := cache.Get(id)
|
||||
if !ok {
|
||||
blob, err = repo.LoadBlob(ctx, restic.DataBlob, id, buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
buf = cache.Add(id, blob) // Reuse evicted buffer.
|
||||
}
|
||||
|
||||
_, err = output.Write(buf)
|
||||
if err != nil {
|
||||
if _, err := w.Write(blob); err != nil {
|
||||
return errors.Wrap(err, "Write")
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,25 +8,29 @@ import (
|
|||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/restic/restic/internal/bloblru"
|
||||
"github.com/restic/restic/internal/errors"
|
||||
"github.com/restic/restic/internal/restic"
|
||||
)
|
||||
|
||||
type tarDumper struct {
|
||||
w *tar.Writer
|
||||
cache *bloblru.Cache
|
||||
w *tar.Writer
|
||||
}
|
||||
|
||||
// Statically ensure that tarDumper implements dumper.
|
||||
var _ dumper = tarDumper{}
|
||||
var _ dumper = &tarDumper{}
|
||||
|
||||
// WriteTar will write the contents of the given tree, encoded as a tar to the given destination.
|
||||
func WriteTar(ctx context.Context, repo restic.Repository, tree *restic.Tree, rootPath string, dst io.Writer) error {
|
||||
dmp := tarDumper{w: tar.NewWriter(dst)}
|
||||
|
||||
return writeDump(ctx, repo, tree, rootPath, dmp, dst)
|
||||
dmp := &tarDumper{
|
||||
cache: NewCache(),
|
||||
w: tar.NewWriter(dst),
|
||||
}
|
||||
return writeDump(ctx, repo, tree, rootPath, dmp)
|
||||
}
|
||||
|
||||
func (dmp tarDumper) Close() error {
|
||||
func (dmp *tarDumper) Close() error {
|
||||
return dmp.w.Close()
|
||||
}
|
||||
|
||||
|
@ -39,7 +43,7 @@ const (
|
|||
cISVTX = 0o1000 // Save text (sticky bit)
|
||||
)
|
||||
|
||||
func (dmp tarDumper) dumpNode(ctx context.Context, node *restic.Node, repo restic.Repository) error {
|
||||
func (dmp *tarDumper) dumpNode(ctx context.Context, node *restic.Node, repo restic.Repository) error {
|
||||
relPath, err := filepath.Rel("/", node.Path)
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -90,7 +94,7 @@ func (dmp tarDumper) dumpNode(ctx context.Context, node *restic.Node, repo resti
|
|||
return errors.Wrap(err, "TarHeader")
|
||||
}
|
||||
|
||||
return GetNodeData(ctx, dmp.w, repo, node)
|
||||
return WriteNodeData(ctx, dmp.w, repo, node, dmp.cache)
|
||||
}
|
||||
|
||||
func parseXattrs(xattrs []restic.ExtendedAttribute) map[string]string {
|
||||
|
|
|
@ -6,29 +6,33 @@ import (
|
|||
"io"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/restic/restic/internal/bloblru"
|
||||
"github.com/restic/restic/internal/errors"
|
||||
"github.com/restic/restic/internal/restic"
|
||||
)
|
||||
|
||||
type zipDumper struct {
|
||||
w *zip.Writer
|
||||
cache *bloblru.Cache
|
||||
w *zip.Writer
|
||||
}
|
||||
|
||||
// Statically ensure that zipDumper implements dumper.
|
||||
var _ dumper = zipDumper{}
|
||||
var _ dumper = &zipDumper{}
|
||||
|
||||
// WriteZip will write the contents of the given tree, encoded as a zip to the given destination.
|
||||
func WriteZip(ctx context.Context, repo restic.Repository, tree *restic.Tree, rootPath string, dst io.Writer) error {
|
||||
dmp := zipDumper{w: zip.NewWriter(dst)}
|
||||
|
||||
return writeDump(ctx, repo, tree, rootPath, dmp, dst)
|
||||
dmp := &zipDumper{
|
||||
cache: NewCache(),
|
||||
w: zip.NewWriter(dst),
|
||||
}
|
||||
return writeDump(ctx, repo, tree, rootPath, dmp)
|
||||
}
|
||||
|
||||
func (dmp zipDumper) Close() error {
|
||||
func (dmp *zipDumper) Close() error {
|
||||
return dmp.w.Close()
|
||||
}
|
||||
|
||||
func (dmp zipDumper) dumpNode(ctx context.Context, node *restic.Node, repo restic.Repository) error {
|
||||
func (dmp *zipDumper) dumpNode(ctx context.Context, node *restic.Node, repo restic.Repository) error {
|
||||
relPath, err := filepath.Rel("/", node.Path)
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -58,5 +62,5 @@ func (dmp zipDumper) dumpNode(ctx context.Context, node *restic.Node, repo resti
|
|||
return nil
|
||||
}
|
||||
|
||||
return GetNodeData(ctx, w, repo, node)
|
||||
return WriteNodeData(ctx, w, repo, node, dmp.cache)
|
||||
}
|
||||
|
|
|
@ -96,7 +96,7 @@ func (f *file) Open(ctx context.Context, req *fuse.OpenRequest, resp *fuse.OpenR
|
|||
|
||||
func (f *openFile) getBlobAt(ctx context.Context, i int) (blob []byte, err error) {
|
||||
|
||||
blob, ok := f.root.blobCache.get(f.node.Content[i])
|
||||
blob, ok := f.root.blobCache.Get(f.node.Content[i])
|
||||
if ok {
|
||||
return blob, nil
|
||||
}
|
||||
|
@ -107,7 +107,7 @@ func (f *openFile) getBlobAt(ctx context.Context, i int) (blob []byte, err error
|
|||
return nil, err
|
||||
}
|
||||
|
||||
f.root.blobCache.add(f.node.Content[i], blob)
|
||||
f.root.blobCache.Add(f.node.Content[i], blob)
|
||||
|
||||
return blob, nil
|
||||
}
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
//go:build darwin || freebsd || linux
|
||||
// +build darwin freebsd linux
|
||||
|
||||
package fuse
|
||||
|
@ -10,6 +11,7 @@ import (
|
|||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/restic/restic/internal/bloblru"
|
||||
"github.com/restic/restic/internal/repository"
|
||||
"github.com/restic/restic/internal/restic"
|
||||
|
||||
|
@ -19,48 +21,6 @@ import (
|
|||
rtest "github.com/restic/restic/internal/test"
|
||||
)
|
||||
|
||||
func TestCache(t *testing.T) {
|
||||
var id1, id2, id3 restic.ID
|
||||
id1[0] = 1
|
||||
id2[0] = 2
|
||||
id3[0] = 3
|
||||
|
||||
const (
|
||||
kiB = 1 << 10
|
||||
cacheSize = 64*kiB + 3*cacheOverhead
|
||||
)
|
||||
|
||||
c := newBlobCache(cacheSize)
|
||||
|
||||
addAndCheck := func(id restic.ID, exp []byte) {
|
||||
c.add(id, exp)
|
||||
blob, ok := c.get(id)
|
||||
rtest.Assert(t, ok, "blob %v added but not found in cache", id)
|
||||
rtest.Equals(t, &exp[0], &blob[0])
|
||||
rtest.Equals(t, exp, blob)
|
||||
}
|
||||
|
||||
addAndCheck(id1, make([]byte, 32*kiB))
|
||||
addAndCheck(id2, make([]byte, 30*kiB))
|
||||
addAndCheck(id3, make([]byte, 10*kiB))
|
||||
|
||||
_, ok := c.get(id2)
|
||||
rtest.Assert(t, ok, "blob %v not present", id2)
|
||||
_, ok = c.get(id1)
|
||||
rtest.Assert(t, !ok, "blob %v present, but should have been evicted", id1)
|
||||
|
||||
c.add(id1, make([]byte, 1+c.size))
|
||||
_, ok = c.get(id1)
|
||||
rtest.Assert(t, !ok, "blob %v too large but still added to cache")
|
||||
|
||||
c.c.Remove(id1)
|
||||
c.c.Remove(id3)
|
||||
c.c.Remove(id2)
|
||||
|
||||
rtest.Equals(t, cacheSize, c.size)
|
||||
rtest.Equals(t, cacheSize, c.free)
|
||||
}
|
||||
|
||||
func testRead(t testing.TB, f fs.Handle, offset, length int, data []byte) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
@ -156,7 +116,7 @@ func TestFuseFile(t *testing.T) {
|
|||
Size: filesize,
|
||||
Content: content,
|
||||
}
|
||||
root := &Root{repo: repo, blobCache: newBlobCache(blobCacheSize)}
|
||||
root := &Root{repo: repo, blobCache: bloblru.New(blobCacheSize)}
|
||||
|
||||
inode := fs.GenerateDynamicInode(1, "foo")
|
||||
f, err := newFile(context.TODO(), root, inode, node)
|
||||
|
@ -191,7 +151,7 @@ func TestFuseDir(t *testing.T) {
|
|||
repo, cleanup := repository.TestRepository(t)
|
||||
defer cleanup()
|
||||
|
||||
root := &Root{repo: repo, blobCache: newBlobCache(blobCacheSize)}
|
||||
root := &Root{repo: repo, blobCache: bloblru.New(blobCacheSize)}
|
||||
|
||||
node := &restic.Node{
|
||||
Mode: 0755,
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
//go:build darwin || freebsd || linux
|
||||
// +build darwin freebsd linux
|
||||
|
||||
package fuse
|
||||
|
@ -6,6 +7,7 @@ import (
|
|||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/restic/restic/internal/bloblru"
|
||||
"github.com/restic/restic/internal/debug"
|
||||
"github.com/restic/restic/internal/restic"
|
||||
|
||||
|
@ -27,7 +29,7 @@ type Root struct {
|
|||
cfg Config
|
||||
inode uint64
|
||||
snapshots restic.Snapshots
|
||||
blobCache *blobCache
|
||||
blobCache *bloblru.Cache
|
||||
|
||||
snCount int
|
||||
lastCheck time.Time
|
||||
|
@ -54,7 +56,7 @@ func NewRoot(repo restic.Repository, cfg Config) *Root {
|
|||
repo: repo,
|
||||
inode: rootInode,
|
||||
cfg: cfg,
|
||||
blobCache: newBlobCache(blobCacheSize),
|
||||
blobCache: bloblru.New(blobCacheSize),
|
||||
}
|
||||
|
||||
if !cfg.OwnerIsRoot {
|
||||
|
|
Loading…
Reference in a new issue