forked from TrueCloudLab/restic
Write sparse files in restorer
This writes files by using (*os.File).Truncate, which resolves to the truncate system call on Unix. Compared to the naive loop, for _, b := range p { if b != 0 { return false } } the optimized allZero is about 10× faster: name old time/op new time/op delta AllZero-8 1.09ms ± 1% 0.09ms ± 1% -92.10% (p=0.000 n=10+10) name old speed new speed delta AllZero-8 3.84GB/s ± 1% 48.59GB/s ± 1% +1166.51% (p=0.000 n=10+10)
This commit is contained in:
parent
b48766d7b8
commit
5d4568d393
4 changed files with 171 additions and 17 deletions
|
@ -19,15 +19,19 @@ type filesWriter struct {
|
|||
|
||||
type filesWriterBucket struct {
|
||||
lock sync.Mutex
|
||||
files map[string]*os.File
|
||||
users map[string]int
|
||||
files map[string]*partialFile
|
||||
}
|
||||
|
||||
type partialFile struct {
|
||||
*os.File
|
||||
size int64 // File size, tracked for sparse writes (not on Windows).
|
||||
users int // Reference count.
|
||||
}
|
||||
|
||||
func newFilesWriter(count int) *filesWriter {
|
||||
buckets := make([]filesWriterBucket, count)
|
||||
for b := 0; b < count; b++ {
|
||||
buckets[b].files = make(map[string]*os.File)
|
||||
buckets[b].users = make(map[string]int)
|
||||
buckets[b].files = make(map[string]*partialFile)
|
||||
}
|
||||
return &filesWriter{
|
||||
buckets: buckets,
|
||||
|
@ -37,12 +41,12 @@ func newFilesWriter(count int) *filesWriter {
|
|||
func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, createSize int64) error {
|
||||
bucket := &w.buckets[uint(xxhash.Sum64String(path))%uint(len(w.buckets))]
|
||||
|
||||
acquireWriter := func() (*os.File, error) {
|
||||
acquireWriter := func() (*partialFile, error) {
|
||||
bucket.lock.Lock()
|
||||
defer bucket.lock.Unlock()
|
||||
|
||||
if wr, ok := bucket.files[path]; ok {
|
||||
bucket.users[path]++
|
||||
bucket.files[path].users++
|
||||
return wr, nil
|
||||
}
|
||||
|
||||
|
@ -53,16 +57,23 @@ func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, create
|
|||
flags = os.O_WRONLY
|
||||
}
|
||||
|
||||
wr, err := os.OpenFile(path, flags, 0600)
|
||||
f, err := os.OpenFile(path, flags, 0600)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
wr := &partialFile{File: f, users: 1}
|
||||
if createSize < 0 {
|
||||
info, err := f.Stat()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
wr.size = info.Size()
|
||||
}
|
||||
bucket.files[path] = wr
|
||||
bucket.users[path] = 1
|
||||
|
||||
if createSize >= 0 {
|
||||
err := preallocateFile(wr, createSize)
|
||||
err := preallocateFile(wr.File, createSize)
|
||||
if err != nil {
|
||||
// Just log the preallocate error but don't let it cause the restore process to fail.
|
||||
// Preallocate might return an error if the filesystem (implementation) does not
|
||||
|
@ -76,16 +87,15 @@ func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, create
|
|||
return wr, nil
|
||||
}
|
||||
|
||||
releaseWriter := func(wr *os.File) error {
|
||||
releaseWriter := func(wr *partialFile) error {
|
||||
bucket.lock.Lock()
|
||||
defer bucket.lock.Unlock()
|
||||
|
||||
if bucket.users[path] == 1 {
|
||||
if bucket.files[path].users == 1 {
|
||||
delete(bucket.files, path)
|
||||
delete(bucket.users, path)
|
||||
return wr.Close()
|
||||
}
|
||||
bucket.users[path]--
|
||||
bucket.files[path].users--
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -18,19 +18,15 @@ func TestFilesWriterBasic(t *testing.T) {
|
|||
|
||||
rtest.OK(t, w.writeToFile(f1, []byte{1}, 0, 2))
|
||||
rtest.Equals(t, 0, len(w.buckets[0].files))
|
||||
rtest.Equals(t, 0, len(w.buckets[0].users))
|
||||
|
||||
rtest.OK(t, w.writeToFile(f2, []byte{2}, 0, 2))
|
||||
rtest.Equals(t, 0, len(w.buckets[0].files))
|
||||
rtest.Equals(t, 0, len(w.buckets[0].users))
|
||||
|
||||
rtest.OK(t, w.writeToFile(f1, []byte{1}, 1, -1))
|
||||
rtest.Equals(t, 0, len(w.buckets[0].files))
|
||||
rtest.Equals(t, 0, len(w.buckets[0].users))
|
||||
|
||||
rtest.OK(t, w.writeToFile(f2, []byte{2}, 1, -1))
|
||||
rtest.Equals(t, 0, len(w.buckets[0].files))
|
||||
rtest.Equals(t, 0, len(w.buckets[0].users))
|
||||
|
||||
buf, err := ioutil.ReadFile(f1)
|
||||
rtest.OK(t, err)
|
||||
|
|
|
@ -4,12 +4,18 @@
|
|||
package restorer
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
"math/rand"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
"testing"
|
||||
|
||||
"github.com/restic/restic/internal/archiver"
|
||||
"github.com/restic/restic/internal/fs"
|
||||
"github.com/restic/restic/internal/repository"
|
||||
"github.com/restic/restic/internal/restic"
|
||||
rtest "github.com/restic/restic/internal/test"
|
||||
|
@ -60,3 +66,85 @@ func TestRestorerRestoreEmptyHardlinkedFileds(t *testing.T) {
|
|||
rtest.Equals(t, s1.Ino, s2.Ino)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRestorerSparseFiles(t *testing.T) {
|
||||
repo, cleanup := repository.TestRepository(t)
|
||||
defer cleanup()
|
||||
|
||||
var zeros [1<<20 + 13]byte
|
||||
|
||||
target := &fs.Reader{
|
||||
Mode: 0600,
|
||||
Name: "/zeros",
|
||||
ReadCloser: ioutil.NopCloser(bytes.NewReader(zeros[:])),
|
||||
}
|
||||
sc := archiver.NewScanner(target)
|
||||
err := sc.Scan(context.TODO(), []string{"/zeros"})
|
||||
rtest.OK(t, err)
|
||||
|
||||
arch := archiver.New(repo, target, archiver.Options{})
|
||||
_, id, err := arch.Snapshot(context.Background(), []string{"/zeros"},
|
||||
archiver.SnapshotOptions{})
|
||||
|
||||
res, err := NewRestorer(repo, id)
|
||||
rtest.OK(t, err)
|
||||
|
||||
tempdir, cleanup := rtest.TempDir(t)
|
||||
defer cleanup()
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
err = res.RestoreTo(ctx, tempdir)
|
||||
rtest.OK(t, err)
|
||||
|
||||
filename := filepath.Join(tempdir, "zeros")
|
||||
content, err := ioutil.ReadFile(filename)
|
||||
rtest.OK(t, err)
|
||||
|
||||
rtest.Equals(t, zeros[:], content)
|
||||
|
||||
fi, err := os.Stat(filename)
|
||||
rtest.OK(t, err)
|
||||
st := fi.Sys().(*syscall.Stat_t)
|
||||
if st == nil {
|
||||
return
|
||||
}
|
||||
|
||||
// st.Blocks is the size in 512-byte blocks.
|
||||
denseBlocks := math.Ceil(float64(len(zeros)) / 512)
|
||||
sparsity := 1 - float64(st.Blocks)/denseBlocks
|
||||
|
||||
// This should report 100% sparse. We don't assert that,
|
||||
// as the behavior of sparse writes depends on the underlying
|
||||
// file system as well as the OS.
|
||||
t.Logf("wrote %d zeros as %d blocks, %.1f%% sparse",
|
||||
len(zeros), st.Blocks, 100*sparsity)
|
||||
}
|
||||
|
||||
func BenchmarkZeroPrefixLen(b *testing.B) {
|
||||
var (
|
||||
buf [4<<20 + 37]byte
|
||||
r = rand.New(rand.NewSource(0x618732))
|
||||
sumSkipped int64
|
||||
)
|
||||
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(len(buf)))
|
||||
b.ResetTimer()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
j := r.Intn(len(buf))
|
||||
buf[j] = 0xff
|
||||
|
||||
skipped := zeroPrefixLen(buf[:])
|
||||
sumSkipped += int64(skipped)
|
||||
|
||||
buf[j] = 0
|
||||
}
|
||||
|
||||
// The closer this is to .5, the better. If it's far off, give the
|
||||
// benchmark more time to run with -benchtime.
|
||||
b.Logf("average number of zeros skipped: %.3f",
|
||||
float64(sumSkipped)/(float64(b.N*len(buf))))
|
||||
}
|
||||
|
|
60
internal/restorer/sparsewrite.go
Normal file
60
internal/restorer/sparsewrite.go
Normal file
|
@ -0,0 +1,60 @@
|
|||
//go:build !windows
|
||||
// +build !windows
|
||||
|
||||
package restorer
|
||||
|
||||
import "bytes"
|
||||
|
||||
// WriteAt writes p to f.File at offset. It tries to do a sparse write
|
||||
// and updates f.size.
|
||||
func (f *partialFile) WriteAt(p []byte, offset int64) (n int, err error) {
|
||||
n = len(p)
|
||||
end := offset + int64(n)
|
||||
|
||||
// Skip the longest all-zero prefix of p.
|
||||
// If it's long enough, we can punch a hole in the file.
|
||||
skipped := zeroPrefixLen(p)
|
||||
p = p[skipped:]
|
||||
offset += int64(skipped)
|
||||
|
||||
switch {
|
||||
case len(p) == 0 && end > f.size:
|
||||
// We need to do a Truncate, as WriteAt with length-0 input
|
||||
// doesn't actually extend the file.
|
||||
err = f.Truncate(end)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
case len(p) == 0:
|
||||
// All zeros, file already big enough. A previous WriteAt or
|
||||
// Truncate will have produced the zeros in f.File.
|
||||
|
||||
default:
|
||||
n, err = f.File.WriteAt(p, offset)
|
||||
}
|
||||
|
||||
end = offset + int64(n)
|
||||
if end > f.size {
|
||||
f.size = end
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
// zeroPrefixLen returns the length of the longest all-zero prefix of p.
|
||||
func zeroPrefixLen(p []byte) (n int) {
|
||||
// First skip 1kB-sized blocks, for speed.
|
||||
var zeros [1024]byte
|
||||
|
||||
for len(p) >= len(zeros) && bytes.Equal(p[:len(zeros)], zeros[:]) {
|
||||
p = p[len(zeros):]
|
||||
n += len(zeros)
|
||||
}
|
||||
|
||||
for len(p) > 0 && p[0] == 0 {
|
||||
p = p[1:]
|
||||
n++
|
||||
}
|
||||
|
||||
return n
|
||||
}
|
Loading…
Reference in a new issue