Write sparse files in restorer

This writes files by using (*os.File).Truncate, which resolves to the
truncate system call on Unix.

Compared to the naive loop,

	for _, b := range p {
		if b != 0 {
			return false
		}
	}

the optimized allZero is about 10× faster:

name       old time/op    new time/op     delta
AllZero-8    1.09ms ± 1%     0.09ms ± 1%    -92.10%  (p=0.000 n=10+10)

name       old speed      new speed       delta
AllZero-8  3.84GB/s ± 1%  48.59GB/s ± 1%  +1166.51%  (p=0.000 n=10+10)
This commit is contained in:
greatroar 2020-02-26 21:48:05 +01:00 committed by Michael Eischer
parent b48766d7b8
commit 5d4568d393
4 changed files with 171 additions and 17 deletions

View file

@ -19,15 +19,19 @@ type filesWriter struct {
type filesWriterBucket struct {
lock sync.Mutex
files map[string]*os.File
users map[string]int
files map[string]*partialFile
}
type partialFile struct {
*os.File
size int64 // File size, tracked for sparse writes (not on Windows).
users int // Reference count.
}
func newFilesWriter(count int) *filesWriter {
buckets := make([]filesWriterBucket, count)
for b := 0; b < count; b++ {
buckets[b].files = make(map[string]*os.File)
buckets[b].users = make(map[string]int)
buckets[b].files = make(map[string]*partialFile)
}
return &filesWriter{
buckets: buckets,
@ -37,12 +41,12 @@ func newFilesWriter(count int) *filesWriter {
func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, createSize int64) error {
bucket := &w.buckets[uint(xxhash.Sum64String(path))%uint(len(w.buckets))]
acquireWriter := func() (*os.File, error) {
acquireWriter := func() (*partialFile, error) {
bucket.lock.Lock()
defer bucket.lock.Unlock()
if wr, ok := bucket.files[path]; ok {
bucket.users[path]++
bucket.files[path].users++
return wr, nil
}
@ -53,16 +57,23 @@ func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, create
flags = os.O_WRONLY
}
wr, err := os.OpenFile(path, flags, 0600)
f, err := os.OpenFile(path, flags, 0600)
if err != nil {
return nil, err
}
wr := &partialFile{File: f, users: 1}
if createSize < 0 {
info, err := f.Stat()
if err != nil {
return nil, err
}
wr.size = info.Size()
}
bucket.files[path] = wr
bucket.users[path] = 1
if createSize >= 0 {
err := preallocateFile(wr, createSize)
err := preallocateFile(wr.File, createSize)
if err != nil {
// Just log the preallocate error but don't let it cause the restore process to fail.
// Preallocate might return an error if the filesystem (implementation) does not
@ -76,16 +87,15 @@ func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, create
return wr, nil
}
releaseWriter := func(wr *os.File) error {
releaseWriter := func(wr *partialFile) error {
bucket.lock.Lock()
defer bucket.lock.Unlock()
if bucket.users[path] == 1 {
if bucket.files[path].users == 1 {
delete(bucket.files, path)
delete(bucket.users, path)
return wr.Close()
}
bucket.users[path]--
bucket.files[path].users--
return nil
}

View file

@ -18,19 +18,15 @@ func TestFilesWriterBasic(t *testing.T) {
rtest.OK(t, w.writeToFile(f1, []byte{1}, 0, 2))
rtest.Equals(t, 0, len(w.buckets[0].files))
rtest.Equals(t, 0, len(w.buckets[0].users))
rtest.OK(t, w.writeToFile(f2, []byte{2}, 0, 2))
rtest.Equals(t, 0, len(w.buckets[0].files))
rtest.Equals(t, 0, len(w.buckets[0].users))
rtest.OK(t, w.writeToFile(f1, []byte{1}, 1, -1))
rtest.Equals(t, 0, len(w.buckets[0].files))
rtest.Equals(t, 0, len(w.buckets[0].users))
rtest.OK(t, w.writeToFile(f2, []byte{2}, 1, -1))
rtest.Equals(t, 0, len(w.buckets[0].files))
rtest.Equals(t, 0, len(w.buckets[0].users))
buf, err := ioutil.ReadFile(f1)
rtest.OK(t, err)

View file

@ -4,12 +4,18 @@
package restorer
import (
"bytes"
"context"
"io/ioutil"
"math"
"math/rand"
"os"
"path/filepath"
"syscall"
"testing"
"github.com/restic/restic/internal/archiver"
"github.com/restic/restic/internal/fs"
"github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/restic"
rtest "github.com/restic/restic/internal/test"
@ -60,3 +66,85 @@ func TestRestorerRestoreEmptyHardlinkedFileds(t *testing.T) {
rtest.Equals(t, s1.Ino, s2.Ino)
}
}
func TestRestorerSparseFiles(t *testing.T) {
repo, cleanup := repository.TestRepository(t)
defer cleanup()
var zeros [1<<20 + 13]byte
target := &fs.Reader{
Mode: 0600,
Name: "/zeros",
ReadCloser: ioutil.NopCloser(bytes.NewReader(zeros[:])),
}
sc := archiver.NewScanner(target)
err := sc.Scan(context.TODO(), []string{"/zeros"})
rtest.OK(t, err)
arch := archiver.New(repo, target, archiver.Options{})
_, id, err := arch.Snapshot(context.Background(), []string{"/zeros"},
archiver.SnapshotOptions{})
res, err := NewRestorer(repo, id)
rtest.OK(t, err)
tempdir, cleanup := rtest.TempDir(t)
defer cleanup()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
err = res.RestoreTo(ctx, tempdir)
rtest.OK(t, err)
filename := filepath.Join(tempdir, "zeros")
content, err := ioutil.ReadFile(filename)
rtest.OK(t, err)
rtest.Equals(t, zeros[:], content)
fi, err := os.Stat(filename)
rtest.OK(t, err)
st := fi.Sys().(*syscall.Stat_t)
if st == nil {
return
}
// st.Blocks is the size in 512-byte blocks.
denseBlocks := math.Ceil(float64(len(zeros)) / 512)
sparsity := 1 - float64(st.Blocks)/denseBlocks
// This should report 100% sparse. We don't assert that,
// as the behavior of sparse writes depends on the underlying
// file system as well as the OS.
t.Logf("wrote %d zeros as %d blocks, %.1f%% sparse",
len(zeros), st.Blocks, 100*sparsity)
}
func BenchmarkZeroPrefixLen(b *testing.B) {
var (
buf [4<<20 + 37]byte
r = rand.New(rand.NewSource(0x618732))
sumSkipped int64
)
b.ReportAllocs()
b.SetBytes(int64(len(buf)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
j := r.Intn(len(buf))
buf[j] = 0xff
skipped := zeroPrefixLen(buf[:])
sumSkipped += int64(skipped)
buf[j] = 0
}
// The closer this is to .5, the better. If it's far off, give the
// benchmark more time to run with -benchtime.
b.Logf("average number of zeros skipped: %.3f",
float64(sumSkipped)/(float64(b.N*len(buf))))
}

View file

@ -0,0 +1,60 @@
//go:build !windows
// +build !windows
package restorer
import "bytes"
// WriteAt writes p to f.File at offset. It tries to do a sparse write
// and updates f.size.
func (f *partialFile) WriteAt(p []byte, offset int64) (n int, err error) {
n = len(p)
end := offset + int64(n)
// Skip the longest all-zero prefix of p.
// If it's long enough, we can punch a hole in the file.
skipped := zeroPrefixLen(p)
p = p[skipped:]
offset += int64(skipped)
switch {
case len(p) == 0 && end > f.size:
// We need to do a Truncate, as WriteAt with length-0 input
// doesn't actually extend the file.
err = f.Truncate(end)
if err != nil {
return 0, err
}
case len(p) == 0:
// All zeros, file already big enough. A previous WriteAt or
// Truncate will have produced the zeros in f.File.
default:
n, err = f.File.WriteAt(p, offset)
}
end = offset + int64(n)
if end > f.size {
f.size = end
}
return n, err
}
// zeroPrefixLen returns the length of the longest all-zero prefix of p.
func zeroPrefixLen(p []byte) (n int) {
// First skip 1kB-sized blocks, for speed.
var zeros [1024]byte
for len(p) >= len(zeros) && bytes.Equal(p[:len(zeros)], zeros[:]) {
p = p[len(zeros):]
n += len(zeros)
}
for len(p) > 0 && p[0] == 0 {
p = p[1:]
n++
}
return n
}