forked from TrueCloudLab/restic
Optimize pack readHeader() implementation
Load pack header length and 15 header entries with single backend request. This eliminates separate header Load() request for most pack files and significantly improves index.New() performance. Signed-off-by: Igor Fedorenko <igor@ifedorenko.com>
This commit is contained in:
parent
b63de7c798
commit
953f3d55ee
3 changed files with 88 additions and 26 deletions
7
changelog/0.8.2/pull-1574
Normal file
7
changelog/0.8.2/pull-1574
Normal file
|
@ -0,0 +1,7 @@
|
|||
Enhancement: Reduce number of remote requests reading pack header
|
||||
|
||||
This change eliminates extra remote repository calls for most pack
|
||||
files and improves repository reindex and purge time.
|
||||
|
||||
https://github.com/restic/restic/issues/1567
|
||||
https://github.com/restic/restic/pull/1574
|
|
@ -170,29 +170,14 @@ func (p *Packer) String() string {
|
|||
return fmt.Sprintf("<Packer %d blobs, %d bytes>", len(p.blobs), p.bytes)
|
||||
}
|
||||
|
||||
// readHeaderLength returns the header length read from the end of the file
|
||||
// encoded in little endian.
|
||||
func readHeaderLength(rd io.ReaderAt, size int64) (uint32, error) {
|
||||
off := size - int64(binary.Size(uint32(0)))
|
||||
|
||||
buf := make([]byte, binary.Size(uint32(0)))
|
||||
n, err := rd.ReadAt(buf, off)
|
||||
if err != nil {
|
||||
return 0, errors.Wrap(err, "ReadAt")
|
||||
}
|
||||
|
||||
if n != len(buf) {
|
||||
return 0, errors.New("not enough bytes read")
|
||||
}
|
||||
|
||||
return binary.LittleEndian.Uint32(buf), nil
|
||||
}
|
||||
|
||||
const maxHeaderSize = 16 * 1024 * 1024
|
||||
|
||||
// we require at least one entry in the header, and one blob for a pack file
|
||||
var minFileSize = entrySize + crypto.Extension
|
||||
|
||||
// number of header enries to download as part of header-length request
|
||||
var eagerEntries = uint(15)
|
||||
|
||||
// readHeader reads the header at the end of rd. size is the length of the
|
||||
// whole data accessible in rd.
|
||||
func readHeader(rd io.ReaderAt, size int64) ([]byte, error) {
|
||||
|
@ -207,11 +192,25 @@ func readHeader(rd io.ReaderAt, size int64) ([]byte, error) {
|
|||
return nil, errors.Wrap(err, "readHeader")
|
||||
}
|
||||
|
||||
hl, err := readHeaderLength(rd, size)
|
||||
// assuming extra request is significantly slower than extra bytes download,
|
||||
// eagerly download eagerEntries header entries as part of header-length request.
|
||||
// only make second request if actual number of entries is greater than eagerEntries
|
||||
|
||||
eagerHl := uint32((eagerEntries * entrySize) + crypto.Extension)
|
||||
if int64(eagerHl) > size {
|
||||
eagerHl = uint32(size) - uint32(binary.Size(uint32(0)))
|
||||
}
|
||||
eagerBuf := make([]byte, eagerHl+uint32(binary.Size(uint32(0))))
|
||||
|
||||
n, err := rd.ReadAt(eagerBuf, size-int64(len(eagerBuf)))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if n != len(eagerBuf) {
|
||||
return nil, errors.New("not enough bytes read")
|
||||
}
|
||||
|
||||
hl := binary.LittleEndian.Uint32(eagerBuf[eagerHl:])
|
||||
debug.Log("header length: %v", size)
|
||||
|
||||
if hl == 0 {
|
||||
|
@ -239,14 +238,24 @@ func readHeader(rd io.ReaderAt, size int64) ([]byte, error) {
|
|||
return nil, errors.Wrap(err, "readHeader")
|
||||
}
|
||||
|
||||
buf := make([]byte, int(hl))
|
||||
n, err := rd.ReadAt(buf, size-int64(hl)-int64(binary.Size(hl)))
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "ReadAt")
|
||||
}
|
||||
eagerBuf = eagerBuf[:eagerHl]
|
||||
|
||||
if n != len(buf) {
|
||||
return nil, errors.New("not enough bytes read")
|
||||
var buf []byte
|
||||
if hl <= eagerHl {
|
||||
// already have all header bytes. yay.
|
||||
buf = eagerBuf[eagerHl-hl:]
|
||||
} else {
|
||||
// need more header bytes
|
||||
buf = make([]byte, hl)
|
||||
missingHl := hl - eagerHl
|
||||
n, err := rd.ReadAt(buf[:missingHl], size-int64(hl)-int64(binary.Size(hl)))
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "ReadAt")
|
||||
}
|
||||
if uint32(n) != missingHl {
|
||||
return nil, errors.New("not enough bytes read")
|
||||
}
|
||||
copy(buf[hl-eagerHl:], eagerBuf)
|
||||
}
|
||||
|
||||
return buf, nil
|
||||
|
|
46
internal/pack/pack_internal_test.go
Normal file
46
internal/pack/pack_internal_test.go
Normal file
|
@ -0,0 +1,46 @@
|
|||
package pack
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"io"
|
||||
"testing"
|
||||
|
||||
"github.com/restic/restic/internal/crypto"
|
||||
rtest "github.com/restic/restic/internal/test"
|
||||
)
|
||||
|
||||
type countingReaderAt struct {
|
||||
delegate io.ReaderAt
|
||||
invocationCount int
|
||||
}
|
||||
|
||||
func (rd *countingReaderAt) ReadAt(p []byte, off int64) (n int, err error) {
|
||||
rd.invocationCount++
|
||||
return rd.delegate.ReadAt(p, off)
|
||||
}
|
||||
|
||||
func TestReadHeaderEagerLoad(t *testing.T) {
|
||||
|
||||
testReadHeader := func(entryCount uint, expectedReadInvocationCount int) {
|
||||
expectedHeader := rtest.Random(0, int(entryCount*entrySize)+crypto.Extension)
|
||||
|
||||
buf := &bytes.Buffer{}
|
||||
buf.Write(rtest.Random(0, 100)) // pack blobs data
|
||||
buf.Write(expectedHeader) // pack header
|
||||
binary.Write(buf, binary.LittleEndian, uint32(len(expectedHeader))) // pack header length
|
||||
|
||||
rd := &countingReaderAt{delegate: bytes.NewReader(buf.Bytes())}
|
||||
|
||||
header, err := readHeader(rd, int64(buf.Len()))
|
||||
rtest.OK(t, err)
|
||||
|
||||
rtest.Equals(t, expectedHeader, header)
|
||||
rtest.Equals(t, expectedReadInvocationCount, rd.invocationCount)
|
||||
}
|
||||
|
||||
testReadHeader(1, 1)
|
||||
testReadHeader(eagerEntries-1, 1)
|
||||
testReadHeader(eagerEntries, 1)
|
||||
testReadHeader(eagerEntries+1, 2)
|
||||
}
|
Loading…
Reference in a new issue