Enable sparseness only conditionally

We can either preallocate storage for a file or sparsify it. This
detects a pack file as sparse if it contains an all zero block or
consists of only one block. As the file sparsification is just an
approximation, hide it behind a `--sparse` parameter.
This commit is contained in:
Michael Eischer 2022-08-07 17:26:46 +02:00
parent 3047bf611c
commit 5b6a77058a
9 changed files with 102 additions and 68 deletions

View file

@ -1,6 +1,7 @@
package main
import (
"runtime"
"strings"
"time"
@ -42,6 +43,7 @@ type RestoreOptions struct {
InsensitiveInclude []string
Target string
snapshotFilterOptions
Sparse bool
Verify bool
}
@ -58,6 +60,9 @@ func init() {
flags.StringVarP(&restoreOptions.Target, "target", "t", "", "directory to extract data to")
initSingleSnapshotFilterOptions(flags, &restoreOptions.snapshotFilterOptions)
if runtime.GOOS != "windows" {
flags.BoolVar(&restoreOptions.Sparse, "sparse", false, "restore files as sparse (not supported on windows)")
}
flags.BoolVar(&restoreOptions.Verify, "verify", false, "verify restored files content")
}
@ -147,7 +152,7 @@ func runRestore(opts RestoreOptions, gopts GlobalOptions, args []string) error {
return err
}
res, err := restorer.NewRestorer(ctx, repo, id)
res, err := restorer.NewRestorer(ctx, repo, id, opts.Sparse)
if err != nil {
Exitf(2, "creating restorer failed: %v\n", err)
}

View file

@ -7,6 +7,7 @@ import (
"golang.org/x/sync/errgroup"
"github.com/restic/chunker"
"github.com/restic/restic/internal/crypto"
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/errors"
@ -27,6 +28,7 @@ const (
type fileInfo struct {
lock sync.Mutex
inProgress bool
sparse bool
size int64
location string // file on local filesystem relative to restorer basedir
blobs interface{} // blobs of the file
@ -51,6 +53,8 @@ type fileRestorer struct {
workerCount int
filesWriter *filesWriter
zeroChunk restic.ID
sparse bool
dst string
files []*fileInfo
@ -61,7 +65,8 @@ func newFileRestorer(dst string,
packLoader repository.BackendLoadFn,
key *crypto.Key,
idx func(restic.BlobHandle) []restic.PackedBlob,
connections uint) *fileRestorer {
connections uint,
sparse bool) *fileRestorer {
// as packs are streamed the concurrency is limited by IO
workerCount := int(connections)
@ -71,6 +76,8 @@ func newFileRestorer(dst string,
idx: idx,
packLoader: packLoader,
filesWriter: newFilesWriter(workerCount),
zeroChunk: restic.Hash(make([]byte, chunker.MinSize)),
sparse: sparse,
workerCount: workerCount,
dst: dst,
Error: restorerAbortOnAllErrors,
@ -133,7 +140,16 @@ func (r *fileRestorer) restoreFiles(ctx context.Context) error {
packOrder = append(packOrder, packID)
}
pack.files[file] = struct{}{}
if blob.ID.Equal(r.zeroChunk) {
file.sparse = r.sparse
}
})
if len(fileBlobs) == 1 {
// no need to preallocate files with a single block, thus we can always consider them to be sparse
// in addition, a short chunk will never match r.zeroChunk which would prevent sparseness for short files
file.sparse = r.sparse
}
if err != nil {
// repository index is messed up, can't do anything
return err
@ -253,7 +269,7 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) error {
file.inProgress = true
createSize = file.size
}
return r.filesWriter.writeToFile(r.targetPath(file.location), blobData, offset, createSize)
return r.filesWriter.writeToFile(r.targetPath(file.location), blobData, offset, createSize, file.sparse)
}
err := sanitizeError(file, writeToFile())
if err != nil {

View file

@ -147,10 +147,10 @@ func newTestRepo(content []TestFile) *TestRepo {
return repo
}
func restoreAndVerify(t *testing.T, tempdir string, content []TestFile, files map[string]bool) {
func restoreAndVerify(t *testing.T, tempdir string, content []TestFile, files map[string]bool, sparse bool) {
repo := newTestRepo(content)
r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2)
r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2, sparse)
if files == nil {
r.files = repo.files
@ -188,30 +188,32 @@ func TestFileRestorerBasic(t *testing.T) {
tempdir, cleanup := rtest.TempDir(t)
defer cleanup()
restoreAndVerify(t, tempdir, []TestFile{
{
name: "file1",
blobs: []TestBlob{
{"data1-1", "pack1-1"},
{"data1-2", "pack1-2"},
for _, sparse := range []bool{false, true} {
restoreAndVerify(t, tempdir, []TestFile{
{
name: "file1",
blobs: []TestBlob{
{"data1-1", "pack1-1"},
{"data1-2", "pack1-2"},
},
},
},
{
name: "file2",
blobs: []TestBlob{
{"data2-1", "pack2-1"},
{"data2-2", "pack2-2"},
{
name: "file2",
blobs: []TestBlob{
{"data2-1", "pack2-1"},
{"data2-2", "pack2-2"},
},
},
},
{
name: "file3",
blobs: []TestBlob{
// same blob multiple times
{"data3-1", "pack3-1"},
{"data3-1", "pack3-1"},
{
name: "file3",
blobs: []TestBlob{
// same blob multiple times
{"data3-1", "pack3-1"},
{"data3-1", "pack3-1"},
},
},
},
}, nil)
}, nil, sparse)
}
}
func TestFileRestorerPackSkip(t *testing.T) {
@ -221,28 +223,30 @@ func TestFileRestorerPackSkip(t *testing.T) {
files := make(map[string]bool)
files["file2"] = true
restoreAndVerify(t, tempdir, []TestFile{
{
name: "file1",
blobs: []TestBlob{
{"data1-1", "pack1"},
{"data1-2", "pack1"},
{"data1-3", "pack1"},
{"data1-4", "pack1"},
{"data1-5", "pack1"},
{"data1-6", "pack1"},
for _, sparse := range []bool{false, true} {
restoreAndVerify(t, tempdir, []TestFile{
{
name: "file1",
blobs: []TestBlob{
{"data1-1", "pack1"},
{"data1-2", "pack1"},
{"data1-3", "pack1"},
{"data1-4", "pack1"},
{"data1-5", "pack1"},
{"data1-6", "pack1"},
},
},
},
{
name: "file2",
blobs: []TestBlob{
// file is contained in pack1 but need pack parts to be skipped
{"data1-2", "pack1"},
{"data1-4", "pack1"},
{"data1-6", "pack1"},
{
name: "file2",
blobs: []TestBlob{
// file is contained in pack1 but need pack parts to be skipped
{"data1-2", "pack1"},
{"data1-4", "pack1"},
{"data1-6", "pack1"},
},
},
},
}, files)
}, files, sparse)
}
}
func TestErrorRestoreFiles(t *testing.T) {
@ -264,7 +268,7 @@ func TestErrorRestoreFiles(t *testing.T) {
return loadError
}
r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2)
r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2, false)
r.files = repo.files
err := r.restoreFiles(context.TODO())
@ -304,7 +308,7 @@ func testPartialDownloadError(t *testing.T, part int) {
return loader(ctx, h, length, offset, fn)
}
r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2)
r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2, false)
r.files = repo.files
r.Error = func(s string, e error) error {
// ignore errors as in the `restore` command

View file

@ -24,8 +24,9 @@ type filesWriterBucket struct {
type partialFile struct {
*os.File
size int64 // File size, tracked for sparse writes (not on Windows).
users int // Reference count.
size int64 // File size, tracked for sparse writes (not on Windows).
users int // Reference count.
sparse bool
}
func newFilesWriter(count int) *filesWriter {
@ -38,7 +39,7 @@ func newFilesWriter(count int) *filesWriter {
}
}
func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, createSize int64) error {
func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, createSize int64, sparse bool) error {
bucket := &w.buckets[uint(xxhash.Sum64String(path))%uint(len(w.buckets))]
acquireWriter := func() (*partialFile, error) {
@ -62,7 +63,7 @@ func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, create
return nil, err
}
wr := &partialFile{File: f, users: 1}
wr := &partialFile{File: f, users: 1, sparse: sparse}
if createSize < 0 {
info, err := f.Stat()
if err != nil {
@ -72,7 +73,7 @@ func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, create
}
bucket.files[path] = wr
if createSize >= 0 {
if createSize >= 0 && !sparse {
err := preallocateFile(wr.File, createSize)
if err != nil {
// Just log the preallocate error but don't let it cause the restore process to fail.

View file

@ -16,16 +16,16 @@ func TestFilesWriterBasic(t *testing.T) {
f1 := dir + "/f1"
f2 := dir + "/f2"
rtest.OK(t, w.writeToFile(f1, []byte{1}, 0, 2))
rtest.OK(t, w.writeToFile(f1, []byte{1}, 0, 2, false))
rtest.Equals(t, 0, len(w.buckets[0].files))
rtest.OK(t, w.writeToFile(f2, []byte{2}, 0, 2))
rtest.OK(t, w.writeToFile(f2, []byte{2}, 0, 2, false))
rtest.Equals(t, 0, len(w.buckets[0].files))
rtest.OK(t, w.writeToFile(f1, []byte{1}, 1, -1))
rtest.OK(t, w.writeToFile(f1, []byte{1}, 1, -1, false))
rtest.Equals(t, 0, len(w.buckets[0].files))
rtest.OK(t, w.writeToFile(f2, []byte{2}, 1, -1))
rtest.OK(t, w.writeToFile(f2, []byte{2}, 1, -1, false))
rtest.Equals(t, 0, len(w.buckets[0].files))
buf, err := ioutil.ReadFile(f1)

View file

@ -16,8 +16,9 @@ import (
// Restorer is used to restore a snapshot to a directory.
type Restorer struct {
repo restic.Repository
sn *restic.Snapshot
repo restic.Repository
sn *restic.Snapshot
sparse bool
Error func(location string, err error) error
SelectFilter func(item string, dstpath string, node *restic.Node) (selectedForRestore bool, childMayBeSelected bool)
@ -26,9 +27,10 @@ type Restorer struct {
var restorerAbortOnAllErrors = func(location string, err error) error { return err }
// NewRestorer creates a restorer preloaded with the content from the snapshot id.
func NewRestorer(ctx context.Context, repo restic.Repository, id restic.ID) (*Restorer, error) {
func NewRestorer(ctx context.Context, repo restic.Repository, id restic.ID, sparse bool) (*Restorer, error) {
r := &Restorer{
repo: repo,
sparse: sparse,
Error: restorerAbortOnAllErrors,
SelectFilter: func(string, string, *restic.Node) (bool, bool) { return true, true },
}
@ -219,7 +221,7 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error {
}
idx := NewHardlinkIndex()
filerestorer := newFileRestorer(dst, res.repo.Backend().Load, res.repo.Key(), res.repo.Index().Lookup, res.repo.Connections())
filerestorer := newFileRestorer(dst, res.repo.Backend().Load, res.repo.Key(), res.repo.Index().Lookup, res.repo.Connections(), res.sparse)
filerestorer.Error = res.Error
debug.Log("first pass for %q", dst)

View file

@ -324,7 +324,7 @@ func TestRestorer(t *testing.T) {
_, id := saveSnapshot(t, repo, test.Snapshot)
t.Logf("snapshot saved as %v", id.Str())
res, err := NewRestorer(context.TODO(), repo, id)
res, err := NewRestorer(context.TODO(), repo, id, false)
if err != nil {
t.Fatal(err)
}
@ -447,7 +447,7 @@ func TestRestorerRelative(t *testing.T) {
_, id := saveSnapshot(t, repo, test.Snapshot)
t.Logf("snapshot saved as %v", id.Str())
res, err := NewRestorer(context.TODO(), repo, id)
res, err := NewRestorer(context.TODO(), repo, id, false)
if err != nil {
t.Fatal(err)
}
@ -682,7 +682,7 @@ func TestRestorerTraverseTree(t *testing.T) {
defer cleanup()
sn, id := saveSnapshot(t, repo, test.Snapshot)
res, err := NewRestorer(context.TODO(), repo, id)
res, err := NewRestorer(context.TODO(), repo, id, false)
if err != nil {
t.Fatal(err)
}
@ -764,7 +764,7 @@ func TestRestorerConsistentTimestampsAndPermissions(t *testing.T) {
},
})
res, err := NewRestorer(context.TODO(), repo, id)
res, err := NewRestorer(context.TODO(), repo, id, false)
rtest.OK(t, err)
res.SelectFilter = func(item string, dstpath string, node *restic.Node) (selectedForRestore bool, childMayBeSelected bool) {
@ -824,7 +824,7 @@ func TestVerifyCancel(t *testing.T) {
_, id := saveSnapshot(t, repo, snapshot)
res, err := NewRestorer(context.TODO(), repo, id)
res, err := NewRestorer(context.TODO(), repo, id, false)
rtest.OK(t, err)
tempdir, cleanup := rtest.TempDir(t)

View file

@ -36,7 +36,7 @@ func TestRestorerRestoreEmptyHardlinkedFileds(t *testing.T) {
},
})
res, err := NewRestorer(context.TODO(), repo, id)
res, err := NewRestorer(context.TODO(), repo, id, false)
rtest.OK(t, err)
res.SelectFilter = func(item string, dstpath string, node *restic.Node) (selectedForRestore bool, childMayBeSelected bool) {
@ -85,8 +85,9 @@ func TestRestorerSparseFiles(t *testing.T) {
arch := archiver.New(repo, target, archiver.Options{})
_, id, err := arch.Snapshot(context.Background(), []string{"/zeros"},
archiver.SnapshotOptions{})
rtest.OK(t, err)
res, err := NewRestorer(repo, id)
res, err := NewRestorer(context.TODO(), repo, id, true)
rtest.OK(t, err)
tempdir, cleanup := rtest.TempDir(t)
@ -102,6 +103,7 @@ func TestRestorerSparseFiles(t *testing.T) {
content, err := ioutil.ReadFile(filename)
rtest.OK(t, err)
rtest.Equals(t, len(zeros[:]), len(content))
rtest.Equals(t, zeros[:], content)
fi, err := os.Stat(filename)

View file

@ -8,6 +8,10 @@ import "bytes"
// WriteAt writes p to f.File at offset. It tries to do a sparse write
// and updates f.size.
func (f *partialFile) WriteAt(p []byte, offset int64) (n int, err error) {
if !f.sparse {
return f.File.WriteAt(p, offset)
}
n = len(p)
end := offset + int64(n)