forked from TrueCloudLab/restic
repository: special case SaveBlob for all zero chunks
Sparse files contain large regions containing only zero bytes. Checking that a blob only contains zeros is possible with over 100GB/s for modern x86 CPUs. Calculating sha256 hashes is only possible with 500MB/s (or 2GB/s using hardware acceleration). Thus we can speed up the hash calculation for all zero blobs (which always have length chunker.MinSize) by checking for zero bytes and then using the precomputed hash. The all zeros check is only performed for blobs with the minimal chunk size, and thus should add no overhead most of the time. For chunks which are not all zero but have the minimal chunks size, the overhead will be below 2% based on the above performance numbers. This allows reading sparse sections of files as fast as the kernel can return data to us. On my system using BTRFS this resulted in about 4GB/s.
This commit is contained in:
parent
34fe1362da
commit
c147422ba5
2 changed files with 20 additions and 3 deletions
|
@ -813,7 +813,14 @@ func (r *Repository) SaveBlob(ctx context.Context, t restic.BlobType, buf []byte
|
|||
|
||||
// compute plaintext hash if not already set
|
||||
if id.IsNull() {
|
||||
newID = restic.Hash(buf)
|
||||
// Special case the hash calculation for all zero chunks. This is especially
|
||||
// useful for sparse files containing large all zero regions. For these we can
|
||||
// process chunks as fast as we can read the from disk.
|
||||
if len(buf) == chunker.MinSize && restic.ZeroPrefixLen(buf) == chunker.MinSize {
|
||||
newID = ZeroChunk()
|
||||
} else {
|
||||
newID = restic.Hash(buf)
|
||||
}
|
||||
} else {
|
||||
newID = id
|
||||
}
|
||||
|
@ -967,3 +974,14 @@ func streamPackPart(ctx context.Context, beLoad BackendLoadFn, key *crypto.Key,
|
|||
})
|
||||
return errors.Wrap(err, "StreamPack")
|
||||
}
|
||||
|
||||
var zeroChunkOnce sync.Once
|
||||
var zeroChunkID restic.ID
|
||||
|
||||
// ZeroChunk computes and returns (cached) the ID of an all-zero chunk with size chunker.MinSize
|
||||
func ZeroChunk() restic.ID {
|
||||
zeroChunkOnce.Do(func() {
|
||||
zeroChunkID = restic.Hash(make([]byte, chunker.MinSize))
|
||||
})
|
||||
return zeroChunkID
|
||||
}
|
||||
|
|
|
@ -7,7 +7,6 @@ import (
|
|||
|
||||
"golang.org/x/sync/errgroup"
|
||||
|
||||
"github.com/restic/chunker"
|
||||
"github.com/restic/restic/internal/crypto"
|
||||
"github.com/restic/restic/internal/debug"
|
||||
"github.com/restic/restic/internal/errors"
|
||||
|
@ -76,7 +75,7 @@ func newFileRestorer(dst string,
|
|||
idx: idx,
|
||||
packLoader: packLoader,
|
||||
filesWriter: newFilesWriter(workerCount),
|
||||
zeroChunk: restic.Hash(make([]byte, chunker.MinSize)),
|
||||
zeroChunk: repository.ZeroChunk(),
|
||||
sparse: sparse,
|
||||
workerCount: workerCount,
|
||||
dst: dst,
|
||||
|
|
Loading…
Reference in a new issue