repository: special case SaveBlob for all zero chunks

Sparse files contain large regions containing only zero bytes. Checking
that a blob only contains zeros is possible with over 100GB/s for modern
x86 CPUs. Calculating sha256 hashes is only possible with 500MB/s (or
2GB/s using hardware acceleration). Thus we can speed up the hash
calculation for all zero blobs (which always have length
chunker.MinSize) by checking for zero bytes and then using the
precomputed hash.

The all zeros check is only performed for blobs with the minimal chunk
size, and thus should add no overhead most of the time. For chunks which
are not all zero but have the minimal chunks size, the overhead will be
below 2% based on the above performance numbers.

This allows reading sparse sections of files as fast as the kernel can
return data to us. On my system using BTRFS this resulted in about
4GB/s.
This commit is contained in:
Michael Eischer 2022-09-04 10:49:16 +02:00
parent 34fe1362da
commit c147422ba5
2 changed files with 20 additions and 3 deletions

View file

@ -813,7 +813,14 @@ func (r *Repository) SaveBlob(ctx context.Context, t restic.BlobType, buf []byte
// compute plaintext hash if not already set
if id.IsNull() {
newID = restic.Hash(buf)
// Special case the hash calculation for all zero chunks. This is especially
// useful for sparse files containing large all zero regions. For these we can
// process chunks as fast as we can read the from disk.
if len(buf) == chunker.MinSize && restic.ZeroPrefixLen(buf) == chunker.MinSize {
newID = ZeroChunk()
} else {
newID = restic.Hash(buf)
}
} else {
newID = id
}
@ -967,3 +974,14 @@ func streamPackPart(ctx context.Context, beLoad BackendLoadFn, key *crypto.Key,
})
return errors.Wrap(err, "StreamPack")
}
var zeroChunkOnce sync.Once
var zeroChunkID restic.ID
// ZeroChunk computes and returns (cached) the ID of an all-zero chunk with size chunker.MinSize
func ZeroChunk() restic.ID {
zeroChunkOnce.Do(func() {
zeroChunkID = restic.Hash(make([]byte, chunker.MinSize))
})
return zeroChunkID
}

View file

@ -7,7 +7,6 @@ import (
"golang.org/x/sync/errgroup"
"github.com/restic/chunker"
"github.com/restic/restic/internal/crypto"
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/errors"
@ -76,7 +75,7 @@ func newFileRestorer(dst string,
idx: idx,
packLoader: packLoader,
filesWriter: newFilesWriter(workerCount),
zeroChunk: restic.Hash(make([]byte, chunker.MinSize)),
zeroChunk: repository.ZeroChunk(),
sparse: sparse,
workerCount: workerCount,
dst: dst,