Enable sparseness only conditionally
We can either preallocate storage for a file or sparsify it. This detects a pack file as sparse if it contains an all zero block or consists of only one block. As the file sparsification is just an approximation, hide it behind a `--sparse` parameter.
This commit is contained in:
parent
3047bf611c
commit
5b6a77058a
9 changed files with 102 additions and 68 deletions
|
@ -1,6 +1,7 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
|
@ -42,6 +43,7 @@ type RestoreOptions struct {
|
|||
InsensitiveInclude []string
|
||||
Target string
|
||||
snapshotFilterOptions
|
||||
Sparse bool
|
||||
Verify bool
|
||||
}
|
||||
|
||||
|
@ -58,6 +60,9 @@ func init() {
|
|||
flags.StringVarP(&restoreOptions.Target, "target", "t", "", "directory to extract data to")
|
||||
|
||||
initSingleSnapshotFilterOptions(flags, &restoreOptions.snapshotFilterOptions)
|
||||
if runtime.GOOS != "windows" {
|
||||
flags.BoolVar(&restoreOptions.Sparse, "sparse", false, "restore files as sparse (not supported on windows)")
|
||||
}
|
||||
flags.BoolVar(&restoreOptions.Verify, "verify", false, "verify restored files content")
|
||||
}
|
||||
|
||||
|
@ -147,7 +152,7 @@ func runRestore(opts RestoreOptions, gopts GlobalOptions, args []string) error {
|
|||
return err
|
||||
}
|
||||
|
||||
res, err := restorer.NewRestorer(ctx, repo, id)
|
||||
res, err := restorer.NewRestorer(ctx, repo, id, opts.Sparse)
|
||||
if err != nil {
|
||||
Exitf(2, "creating restorer failed: %v\n", err)
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@ import (
|
|||
|
||||
"golang.org/x/sync/errgroup"
|
||||
|
||||
"github.com/restic/chunker"
|
||||
"github.com/restic/restic/internal/crypto"
|
||||
"github.com/restic/restic/internal/debug"
|
||||
"github.com/restic/restic/internal/errors"
|
||||
|
@ -27,6 +28,7 @@ const (
|
|||
type fileInfo struct {
|
||||
lock sync.Mutex
|
||||
inProgress bool
|
||||
sparse bool
|
||||
size int64
|
||||
location string // file on local filesystem relative to restorer basedir
|
||||
blobs interface{} // blobs of the file
|
||||
|
@ -51,6 +53,8 @@ type fileRestorer struct {
|
|||
|
||||
workerCount int
|
||||
filesWriter *filesWriter
|
||||
zeroChunk restic.ID
|
||||
sparse bool
|
||||
|
||||
dst string
|
||||
files []*fileInfo
|
||||
|
@ -61,7 +65,8 @@ func newFileRestorer(dst string,
|
|||
packLoader repository.BackendLoadFn,
|
||||
key *crypto.Key,
|
||||
idx func(restic.BlobHandle) []restic.PackedBlob,
|
||||
connections uint) *fileRestorer {
|
||||
connections uint,
|
||||
sparse bool) *fileRestorer {
|
||||
|
||||
// as packs are streamed the concurrency is limited by IO
|
||||
workerCount := int(connections)
|
||||
|
@ -71,6 +76,8 @@ func newFileRestorer(dst string,
|
|||
idx: idx,
|
||||
packLoader: packLoader,
|
||||
filesWriter: newFilesWriter(workerCount),
|
||||
zeroChunk: restic.Hash(make([]byte, chunker.MinSize)),
|
||||
sparse: sparse,
|
||||
workerCount: workerCount,
|
||||
dst: dst,
|
||||
Error: restorerAbortOnAllErrors,
|
||||
|
@ -133,7 +140,16 @@ func (r *fileRestorer) restoreFiles(ctx context.Context) error {
|
|||
packOrder = append(packOrder, packID)
|
||||
}
|
||||
pack.files[file] = struct{}{}
|
||||
if blob.ID.Equal(r.zeroChunk) {
|
||||
file.sparse = r.sparse
|
||||
}
|
||||
})
|
||||
if len(fileBlobs) == 1 {
|
||||
// no need to preallocate files with a single block, thus we can always consider them to be sparse
|
||||
// in addition, a short chunk will never match r.zeroChunk which would prevent sparseness for short files
|
||||
file.sparse = r.sparse
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
// repository index is messed up, can't do anything
|
||||
return err
|
||||
|
@ -253,7 +269,7 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) error {
|
|||
file.inProgress = true
|
||||
createSize = file.size
|
||||
}
|
||||
return r.filesWriter.writeToFile(r.targetPath(file.location), blobData, offset, createSize)
|
||||
return r.filesWriter.writeToFile(r.targetPath(file.location), blobData, offset, createSize, file.sparse)
|
||||
}
|
||||
err := sanitizeError(file, writeToFile())
|
||||
if err != nil {
|
||||
|
|
|
@ -147,10 +147,10 @@ func newTestRepo(content []TestFile) *TestRepo {
|
|||
return repo
|
||||
}
|
||||
|
||||
func restoreAndVerify(t *testing.T, tempdir string, content []TestFile, files map[string]bool) {
|
||||
func restoreAndVerify(t *testing.T, tempdir string, content []TestFile, files map[string]bool, sparse bool) {
|
||||
repo := newTestRepo(content)
|
||||
|
||||
r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2)
|
||||
r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2, sparse)
|
||||
|
||||
if files == nil {
|
||||
r.files = repo.files
|
||||
|
@ -188,30 +188,32 @@ func TestFileRestorerBasic(t *testing.T) {
|
|||
tempdir, cleanup := rtest.TempDir(t)
|
||||
defer cleanup()
|
||||
|
||||
restoreAndVerify(t, tempdir, []TestFile{
|
||||
{
|
||||
name: "file1",
|
||||
blobs: []TestBlob{
|
||||
{"data1-1", "pack1-1"},
|
||||
{"data1-2", "pack1-2"},
|
||||
for _, sparse := range []bool{false, true} {
|
||||
restoreAndVerify(t, tempdir, []TestFile{
|
||||
{
|
||||
name: "file1",
|
||||
blobs: []TestBlob{
|
||||
{"data1-1", "pack1-1"},
|
||||
{"data1-2", "pack1-2"},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "file2",
|
||||
blobs: []TestBlob{
|
||||
{"data2-1", "pack2-1"},
|
||||
{"data2-2", "pack2-2"},
|
||||
{
|
||||
name: "file2",
|
||||
blobs: []TestBlob{
|
||||
{"data2-1", "pack2-1"},
|
||||
{"data2-2", "pack2-2"},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "file3",
|
||||
blobs: []TestBlob{
|
||||
// same blob multiple times
|
||||
{"data3-1", "pack3-1"},
|
||||
{"data3-1", "pack3-1"},
|
||||
{
|
||||
name: "file3",
|
||||
blobs: []TestBlob{
|
||||
// same blob multiple times
|
||||
{"data3-1", "pack3-1"},
|
||||
{"data3-1", "pack3-1"},
|
||||
},
|
||||
},
|
||||
},
|
||||
}, nil)
|
||||
}, nil, sparse)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFileRestorerPackSkip(t *testing.T) {
|
||||
|
@ -221,28 +223,30 @@ func TestFileRestorerPackSkip(t *testing.T) {
|
|||
files := make(map[string]bool)
|
||||
files["file2"] = true
|
||||
|
||||
restoreAndVerify(t, tempdir, []TestFile{
|
||||
{
|
||||
name: "file1",
|
||||
blobs: []TestBlob{
|
||||
{"data1-1", "pack1"},
|
||||
{"data1-2", "pack1"},
|
||||
{"data1-3", "pack1"},
|
||||
{"data1-4", "pack1"},
|
||||
{"data1-5", "pack1"},
|
||||
{"data1-6", "pack1"},
|
||||
for _, sparse := range []bool{false, true} {
|
||||
restoreAndVerify(t, tempdir, []TestFile{
|
||||
{
|
||||
name: "file1",
|
||||
blobs: []TestBlob{
|
||||
{"data1-1", "pack1"},
|
||||
{"data1-2", "pack1"},
|
||||
{"data1-3", "pack1"},
|
||||
{"data1-4", "pack1"},
|
||||
{"data1-5", "pack1"},
|
||||
{"data1-6", "pack1"},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "file2",
|
||||
blobs: []TestBlob{
|
||||
// file is contained in pack1 but need pack parts to be skipped
|
||||
{"data1-2", "pack1"},
|
||||
{"data1-4", "pack1"},
|
||||
{"data1-6", "pack1"},
|
||||
{
|
||||
name: "file2",
|
||||
blobs: []TestBlob{
|
||||
// file is contained in pack1 but need pack parts to be skipped
|
||||
{"data1-2", "pack1"},
|
||||
{"data1-4", "pack1"},
|
||||
{"data1-6", "pack1"},
|
||||
},
|
||||
},
|
||||
},
|
||||
}, files)
|
||||
}, files, sparse)
|
||||
}
|
||||
}
|
||||
|
||||
func TestErrorRestoreFiles(t *testing.T) {
|
||||
|
@ -264,7 +268,7 @@ func TestErrorRestoreFiles(t *testing.T) {
|
|||
return loadError
|
||||
}
|
||||
|
||||
r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2)
|
||||
r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2, false)
|
||||
r.files = repo.files
|
||||
|
||||
err := r.restoreFiles(context.TODO())
|
||||
|
@ -304,7 +308,7 @@ func testPartialDownloadError(t *testing.T, part int) {
|
|||
return loader(ctx, h, length, offset, fn)
|
||||
}
|
||||
|
||||
r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2)
|
||||
r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2, false)
|
||||
r.files = repo.files
|
||||
r.Error = func(s string, e error) error {
|
||||
// ignore errors as in the `restore` command
|
||||
|
|
|
@ -24,8 +24,9 @@ type filesWriterBucket struct {
|
|||
|
||||
type partialFile struct {
|
||||
*os.File
|
||||
size int64 // File size, tracked for sparse writes (not on Windows).
|
||||
users int // Reference count.
|
||||
size int64 // File size, tracked for sparse writes (not on Windows).
|
||||
users int // Reference count.
|
||||
sparse bool
|
||||
}
|
||||
|
||||
func newFilesWriter(count int) *filesWriter {
|
||||
|
@ -38,7 +39,7 @@ func newFilesWriter(count int) *filesWriter {
|
|||
}
|
||||
}
|
||||
|
||||
func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, createSize int64) error {
|
||||
func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, createSize int64, sparse bool) error {
|
||||
bucket := &w.buckets[uint(xxhash.Sum64String(path))%uint(len(w.buckets))]
|
||||
|
||||
acquireWriter := func() (*partialFile, error) {
|
||||
|
@ -62,7 +63,7 @@ func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, create
|
|||
return nil, err
|
||||
}
|
||||
|
||||
wr := &partialFile{File: f, users: 1}
|
||||
wr := &partialFile{File: f, users: 1, sparse: sparse}
|
||||
if createSize < 0 {
|
||||
info, err := f.Stat()
|
||||
if err != nil {
|
||||
|
@ -72,7 +73,7 @@ func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, create
|
|||
}
|
||||
bucket.files[path] = wr
|
||||
|
||||
if createSize >= 0 {
|
||||
if createSize >= 0 && !sparse {
|
||||
err := preallocateFile(wr.File, createSize)
|
||||
if err != nil {
|
||||
// Just log the preallocate error but don't let it cause the restore process to fail.
|
||||
|
|
|
@ -16,16 +16,16 @@ func TestFilesWriterBasic(t *testing.T) {
|
|||
f1 := dir + "/f1"
|
||||
f2 := dir + "/f2"
|
||||
|
||||
rtest.OK(t, w.writeToFile(f1, []byte{1}, 0, 2))
|
||||
rtest.OK(t, w.writeToFile(f1, []byte{1}, 0, 2, false))
|
||||
rtest.Equals(t, 0, len(w.buckets[0].files))
|
||||
|
||||
rtest.OK(t, w.writeToFile(f2, []byte{2}, 0, 2))
|
||||
rtest.OK(t, w.writeToFile(f2, []byte{2}, 0, 2, false))
|
||||
rtest.Equals(t, 0, len(w.buckets[0].files))
|
||||
|
||||
rtest.OK(t, w.writeToFile(f1, []byte{1}, 1, -1))
|
||||
rtest.OK(t, w.writeToFile(f1, []byte{1}, 1, -1, false))
|
||||
rtest.Equals(t, 0, len(w.buckets[0].files))
|
||||
|
||||
rtest.OK(t, w.writeToFile(f2, []byte{2}, 1, -1))
|
||||
rtest.OK(t, w.writeToFile(f2, []byte{2}, 1, -1, false))
|
||||
rtest.Equals(t, 0, len(w.buckets[0].files))
|
||||
|
||||
buf, err := ioutil.ReadFile(f1)
|
||||
|
|
|
@ -16,8 +16,9 @@ import (
|
|||
|
||||
// Restorer is used to restore a snapshot to a directory.
|
||||
type Restorer struct {
|
||||
repo restic.Repository
|
||||
sn *restic.Snapshot
|
||||
repo restic.Repository
|
||||
sn *restic.Snapshot
|
||||
sparse bool
|
||||
|
||||
Error func(location string, err error) error
|
||||
SelectFilter func(item string, dstpath string, node *restic.Node) (selectedForRestore bool, childMayBeSelected bool)
|
||||
|
@ -26,9 +27,10 @@ type Restorer struct {
|
|||
var restorerAbortOnAllErrors = func(location string, err error) error { return err }
|
||||
|
||||
// NewRestorer creates a restorer preloaded with the content from the snapshot id.
|
||||
func NewRestorer(ctx context.Context, repo restic.Repository, id restic.ID) (*Restorer, error) {
|
||||
func NewRestorer(ctx context.Context, repo restic.Repository, id restic.ID, sparse bool) (*Restorer, error) {
|
||||
r := &Restorer{
|
||||
repo: repo,
|
||||
sparse: sparse,
|
||||
Error: restorerAbortOnAllErrors,
|
||||
SelectFilter: func(string, string, *restic.Node) (bool, bool) { return true, true },
|
||||
}
|
||||
|
@ -219,7 +221,7 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error {
|
|||
}
|
||||
|
||||
idx := NewHardlinkIndex()
|
||||
filerestorer := newFileRestorer(dst, res.repo.Backend().Load, res.repo.Key(), res.repo.Index().Lookup, res.repo.Connections())
|
||||
filerestorer := newFileRestorer(dst, res.repo.Backend().Load, res.repo.Key(), res.repo.Index().Lookup, res.repo.Connections(), res.sparse)
|
||||
filerestorer.Error = res.Error
|
||||
|
||||
debug.Log("first pass for %q", dst)
|
||||
|
|
|
@ -324,7 +324,7 @@ func TestRestorer(t *testing.T) {
|
|||
_, id := saveSnapshot(t, repo, test.Snapshot)
|
||||
t.Logf("snapshot saved as %v", id.Str())
|
||||
|
||||
res, err := NewRestorer(context.TODO(), repo, id)
|
||||
res, err := NewRestorer(context.TODO(), repo, id, false)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -447,7 +447,7 @@ func TestRestorerRelative(t *testing.T) {
|
|||
_, id := saveSnapshot(t, repo, test.Snapshot)
|
||||
t.Logf("snapshot saved as %v", id.Str())
|
||||
|
||||
res, err := NewRestorer(context.TODO(), repo, id)
|
||||
res, err := NewRestorer(context.TODO(), repo, id, false)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -682,7 +682,7 @@ func TestRestorerTraverseTree(t *testing.T) {
|
|||
defer cleanup()
|
||||
sn, id := saveSnapshot(t, repo, test.Snapshot)
|
||||
|
||||
res, err := NewRestorer(context.TODO(), repo, id)
|
||||
res, err := NewRestorer(context.TODO(), repo, id, false)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -764,7 +764,7 @@ func TestRestorerConsistentTimestampsAndPermissions(t *testing.T) {
|
|||
},
|
||||
})
|
||||
|
||||
res, err := NewRestorer(context.TODO(), repo, id)
|
||||
res, err := NewRestorer(context.TODO(), repo, id, false)
|
||||
rtest.OK(t, err)
|
||||
|
||||
res.SelectFilter = func(item string, dstpath string, node *restic.Node) (selectedForRestore bool, childMayBeSelected bool) {
|
||||
|
@ -824,7 +824,7 @@ func TestVerifyCancel(t *testing.T) {
|
|||
|
||||
_, id := saveSnapshot(t, repo, snapshot)
|
||||
|
||||
res, err := NewRestorer(context.TODO(), repo, id)
|
||||
res, err := NewRestorer(context.TODO(), repo, id, false)
|
||||
rtest.OK(t, err)
|
||||
|
||||
tempdir, cleanup := rtest.TempDir(t)
|
||||
|
|
|
@ -36,7 +36,7 @@ func TestRestorerRestoreEmptyHardlinkedFileds(t *testing.T) {
|
|||
},
|
||||
})
|
||||
|
||||
res, err := NewRestorer(context.TODO(), repo, id)
|
||||
res, err := NewRestorer(context.TODO(), repo, id, false)
|
||||
rtest.OK(t, err)
|
||||
|
||||
res.SelectFilter = func(item string, dstpath string, node *restic.Node) (selectedForRestore bool, childMayBeSelected bool) {
|
||||
|
@ -85,8 +85,9 @@ func TestRestorerSparseFiles(t *testing.T) {
|
|||
arch := archiver.New(repo, target, archiver.Options{})
|
||||
_, id, err := arch.Snapshot(context.Background(), []string{"/zeros"},
|
||||
archiver.SnapshotOptions{})
|
||||
rtest.OK(t, err)
|
||||
|
||||
res, err := NewRestorer(repo, id)
|
||||
res, err := NewRestorer(context.TODO(), repo, id, true)
|
||||
rtest.OK(t, err)
|
||||
|
||||
tempdir, cleanup := rtest.TempDir(t)
|
||||
|
@ -102,6 +103,7 @@ func TestRestorerSparseFiles(t *testing.T) {
|
|||
content, err := ioutil.ReadFile(filename)
|
||||
rtest.OK(t, err)
|
||||
|
||||
rtest.Equals(t, len(zeros[:]), len(content))
|
||||
rtest.Equals(t, zeros[:], content)
|
||||
|
||||
fi, err := os.Stat(filename)
|
||||
|
|
|
@ -8,6 +8,10 @@ import "bytes"
|
|||
// WriteAt writes p to f.File at offset. It tries to do a sparse write
|
||||
// and updates f.size.
|
||||
func (f *partialFile) WriteAt(p []byte, offset int64) (n int, err error) {
|
||||
if !f.sparse {
|
||||
return f.File.WriteAt(p, offset)
|
||||
}
|
||||
|
||||
n = len(p)
|
||||
end := offset + int64(n)
|
||||
|
||||
|
|
Loading…
Reference in a new issue