Optimize FUSE - make command restic mount faster and consume less memory

- Add Open() functionality to dir
- only access index for blobs when file is read
- Implement NodeOpener and put one-time file stuff there
- Add comment about locking as suggested by bazil.org/fuse

=> Thanks at Michael Eischer for suggesting the last two improvements
This commit is contained in:
Alexander Weiss 2020-06-14 13:47:13 +02:00 committed by Michael Eischer
parent be54ceff66
commit f8316948d1
4 changed files with 109 additions and 67 deletions

View file

@ -0,0 +1,8 @@
Enhancement: Optimize `restic mount`
We've optimized the FUSE implementation used within restic.
`restic mount` is now more responsive and uses less memory.
https://github.com/restic/restic/issues/1680
https://github.com/restic/restic/pull/2587
https://github.com/restic/restic/pull/2787

View file

@ -5,6 +5,7 @@ package fuse
import (
"os"
"path/filepath"
"sync"
"bazil.org/fuse"
"bazil.org/fuse/fs"
@ -24,6 +25,7 @@ type dir struct {
inode uint64
parentInode uint64
node *restic.Node
m sync.Mutex
}
func cleanupNodeName(name string) string {
@ -32,20 +34,10 @@ func cleanupNodeName(name string) string {
func newDir(ctx context.Context, root *Root, inode, parentInode uint64, node *restic.Node) (*dir, error) {
debug.Log("new dir for %v (%v)", node.Name, node.Subtree)
tree, err := root.repo.LoadTree(ctx, *node.Subtree)
if err != nil {
debug.Log(" error loading tree %v: %v", node.Subtree, err)
return nil, err
}
items := make(map[string]*restic.Node)
for _, node := range tree.Nodes {
items[cleanupNodeName(node.Name)] = node
}
return &dir{
root: root,
node: node,
items: items,
inode: inode,
parentInode: parentInode,
}, nil
@ -72,24 +64,6 @@ func replaceSpecialNodes(ctx context.Context, repo restic.Repository, node *rest
func newDirFromSnapshot(ctx context.Context, root *Root, inode uint64, snapshot *restic.Snapshot) (*dir, error) {
debug.Log("new dir for snapshot %v (%v)", snapshot.ID(), snapshot.Tree)
tree, err := root.repo.LoadTree(ctx, *snapshot.Tree)
if err != nil {
debug.Log(" loadTree(%v) failed: %v", snapshot.ID(), err)
return nil, err
}
items := make(map[string]*restic.Node)
for _, n := range tree.Nodes {
nodes, err := replaceSpecialNodes(ctx, root.repo, n)
if err != nil {
debug.Log(" replaceSpecialNodes(%v) failed: %v", n, err)
return nil, err
}
for _, node := range nodes {
items[cleanupNodeName(node.Name)] = node
}
}
return &dir{
root: root,
node: &restic.Node{
@ -97,12 +71,42 @@ func newDirFromSnapshot(ctx context.Context, root *Root, inode uint64, snapshot
ModTime: snapshot.Time,
ChangeTime: snapshot.Time,
Mode: os.ModeDir | 0555,
Subtree: snapshot.Tree,
},
items: items,
inode: inode,
}, nil
}
func (d *dir) open(ctx context.Context) error {
d.m.Lock()
defer d.m.Unlock()
if d.items != nil {
return nil
}
debug.Log("open dir %v (%v)", d.node.Name, d.node.Subtree)
tree, err := d.root.repo.LoadTree(ctx, *d.node.Subtree)
if err != nil {
debug.Log(" error loading tree %v: %v", d.node.Subtree, err)
return err
}
items := make(map[string]*restic.Node)
for _, n := range tree.Nodes {
nodes, err := replaceSpecialNodes(ctx, d.root.repo, n)
if err != nil {
debug.Log(" replaceSpecialNodes(%v) failed: %v", n, err)
return err
}
for _, node := range nodes {
items[cleanupNodeName(node.Name)] = node
}
}
d.items = items
return nil
}
func (d *dir) Attr(ctx context.Context, a *fuse.Attr) error {
debug.Log("Attr()")
a.Inode = d.inode
@ -133,6 +137,10 @@ func (d *dir) calcNumberOfLinks() uint32 {
func (d *dir) ReadDirAll(ctx context.Context) ([]fuse.Dirent, error) {
debug.Log("ReadDirAll()")
err := d.open(ctx)
if err != nil {
return nil, err
}
ret := make([]fuse.Dirent, 0, len(d.items)+2)
ret = append(ret, fuse.Dirent{
@ -171,6 +179,12 @@ func (d *dir) ReadDirAll(ctx context.Context) ([]fuse.Dirent, error) {
func (d *dir) Lookup(ctx context.Context, name string) (fs.Node, error) {
debug.Log("Lookup(%v)", name)
err := d.open(ctx)
if err != nil {
return nil, err
}
node, ok := d.items[name]
if !ok {
debug.Log(" Lookup(%v) -> not found", name)

View file

@ -5,11 +5,10 @@ package fuse
import (
"sort"
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/debug"
"bazil.org/fuse"
"bazil.org/fuse/fs"
"golang.org/x/net/context"
@ -18,43 +17,30 @@ import (
// The default block size to report in stat
const blockSize = 512
// Statically ensure that *file implements the given interface
var _ = fs.HandleReader(&file{})
// Statically ensure that *file and *openFile implement the given interfaces
var _ = fs.HandleReader(&openFile{})
var _ = fs.NodeListxattrer(&file{})
var _ = fs.NodeGetxattrer(&file{})
var _ = fs.NodeOpener(&file{})
type file struct {
root *Root
node *restic.Node
inode uint64
}
type openFile struct {
file
// cumsize[i] holds the cumulative size of blobs[:i].
cumsize []uint64
}
func newFile(ctx context.Context, root *Root, inode uint64, node *restic.Node) (fusefile *file, err error) {
debug.Log("create new file for %v with %d blobs", node.Name, len(node.Content))
var bytes uint64
cumsize := make([]uint64, 1+len(node.Content))
for i, id := range node.Content {
size, found := root.repo.LookupBlobSize(id, restic.DataBlob)
if !found {
return nil, errors.Errorf("id %v not found in repository", id)
}
bytes += uint64(size)
cumsize[i+1] = bytes
}
if bytes != node.Size {
debug.Log("sizes do not match: node.Size %v != size %v, using real size", node.Size, bytes)
node.Size = bytes
}
return &file{
inode: inode,
root: root,
node: node,
cumsize: cumsize,
}, nil
}
@ -79,8 +65,36 @@ func (f *file) Attr(ctx context.Context, a *fuse.Attr) error {
}
func (f *file) getBlobAt(ctx context.Context, i int) (blob []byte, err error) {
debug.Log("getBlobAt(%v, %v)", f.node.Name, i)
func (f *file) Open(ctx context.Context, req *fuse.OpenRequest, resp *fuse.OpenResponse) (fs.Handle, error) {
debug.Log("open file %v with %d blobs", f.node.Name, len(f.node.Content))
var bytes uint64
cumsize := make([]uint64, 1+len(f.node.Content))
for i, id := range f.node.Content {
size, found := f.root.repo.LookupBlobSize(id, restic.DataBlob)
if !found {
return nil, errors.Errorf("id %v not found in repository", id)
}
bytes += uint64(size)
cumsize[i+1] = bytes
}
var of = openFile{file: *f}
if bytes != f.node.Size {
debug.Log("sizes do not match: node.Size %v != size %v, using real size", f.node.Size, bytes)
// Make a copy of the node with correct size
nodenew := *f.node
nodenew.Size = bytes
of.file.node = &nodenew
}
of.cumsize = cumsize
return &of, nil
}
func (f *openFile) getBlobAt(ctx context.Context, i int) (blob []byte, err error) {
blob, ok := f.root.blobCache.get(f.node.Content[i])
if ok {
@ -98,18 +112,12 @@ func (f *file) getBlobAt(ctx context.Context, i int) (blob []byte, err error) {
return blob, nil
}
func (f *file) Read(ctx context.Context, req *fuse.ReadRequest, resp *fuse.ReadResponse) error {
func (f *openFile) Read(ctx context.Context, req *fuse.ReadRequest, resp *fuse.ReadResponse) error {
debug.Log("Read(%v, %v, %v), file size %v", f.node.Name, req.Size, req.Offset, f.node.Size)
offset := uint64(req.Offset)
if offset > f.node.Size {
debug.Log("Read(%v): offset is greater than file size: %v > %v",
f.node.Name, req.Offset, f.node.Size)
// return no data
resp.Data = resp.Data[:0]
return nil
}
// as stated in https://godoc.org/bazil.org/fuse/fs#HandleReader there
// is no need to check if offset > size
// handle special case: file is empty
if f.node.Size == 0 {
@ -126,6 +134,15 @@ func (f *file) Read(ctx context.Context, req *fuse.ReadRequest, resp *fuse.ReadR
dst := resp.Data[0:req.Size]
readBytes := 0
remainingBytes := req.Size
// The documentation of bazil/fuse actually says that synchronization is
// required (see https://godoc.org/bazil.org/fuse#hdr-Service_Methods):
//
// Multiple goroutines may call service methods simultaneously;
// the methods being called are responsible for appropriate synchronization.
//
// However, no lock needed here as getBlobAt can be called conurrently
// (blobCache has it's own locking)
for i := startContent; remainingBytes > 0 && i < len(f.cumsize)-1; i++ {
blob, err := f.getBlobAt(ctx, i)
if err != nil {

View file

@ -62,7 +62,7 @@ func TestCache(t *testing.T) {
rtest.Equals(t, cacheSize, c.free)
}
func testRead(t testing.TB, f *file, offset, length int, data []byte) {
func testRead(t testing.TB, f fs.Handle, offset, length int, data []byte) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
@ -73,7 +73,8 @@ func testRead(t testing.TB, f *file, offset, length int, data []byte) {
resp := &fuse.ReadResponse{
Data: data,
}
rtest.OK(t, f.Read(ctx, req, resp))
fr := f.(fs.HandleReader)
rtest.OK(t, fr.Read(ctx, req, resp))
}
func firstSnapshotID(t testing.TB, repo restic.Repository) (first restic.ID) {
@ -156,11 +157,13 @@ func TestFuseFile(t *testing.T) {
Size: filesize,
Content: content,
}
root := NewRoot(repo, Config{})
root := &Root{repo: repo, blobCache: newBlobCache(blobCacheSize)}
inode := fs.GenerateDynamicInode(1, "foo")
f, err := newFile(context.TODO(), root, inode, node)
rtest.OK(t, err)
of, err := f.Open(context.TODO(), nil, nil)
rtest.OK(t, err)
attr := fuse.Attr{}
rtest.OK(t, f.Attr(ctx, &attr))
@ -178,7 +181,7 @@ func TestFuseFile(t *testing.T) {
buf := make([]byte, length)
testRead(t, f, offset, length, buf)
testRead(t, of, offset, length, buf)
if !bytes.Equal(b, buf) {
t.Errorf("test %d failed, wrong data returned (offset %v, length %v)", i, offset, length)
}