Optimize pack readHeader() implementation

Load pack header length and 15 header entries with single backend
request. This eliminates separate header Load() request for most pack
files and significantly improves index.New() performance.

Signed-off-by: Igor Fedorenko <igor@ifedorenko.com>
This commit is contained in:
Igor Fedorenko 2018-01-23 18:51:22 -05:00
parent b63de7c798
commit 953f3d55ee
3 changed files with 88 additions and 26 deletions

View file

@ -0,0 +1,7 @@
Enhancement: Reduce number of remote requests reading pack header
This change eliminates extra remote repository calls for most pack
files and improves repository reindex and purge time.
https://github.com/restic/restic/issues/1567
https://github.com/restic/restic/pull/1574

View file

@ -170,29 +170,14 @@ func (p *Packer) String() string {
return fmt.Sprintf("<Packer %d blobs, %d bytes>", len(p.blobs), p.bytes)
}
// readHeaderLength returns the header length read from the end of the file
// encoded in little endian.
func readHeaderLength(rd io.ReaderAt, size int64) (uint32, error) {
off := size - int64(binary.Size(uint32(0)))
buf := make([]byte, binary.Size(uint32(0)))
n, err := rd.ReadAt(buf, off)
if err != nil {
return 0, errors.Wrap(err, "ReadAt")
}
if n != len(buf) {
return 0, errors.New("not enough bytes read")
}
return binary.LittleEndian.Uint32(buf), nil
}
const maxHeaderSize = 16 * 1024 * 1024
// we require at least one entry in the header, and one blob for a pack file
var minFileSize = entrySize + crypto.Extension
// number of header enries to download as part of header-length request
var eagerEntries = uint(15)
// readHeader reads the header at the end of rd. size is the length of the
// whole data accessible in rd.
func readHeader(rd io.ReaderAt, size int64) ([]byte, error) {
@ -207,11 +192,25 @@ func readHeader(rd io.ReaderAt, size int64) ([]byte, error) {
return nil, errors.Wrap(err, "readHeader")
}
hl, err := readHeaderLength(rd, size)
// assuming extra request is significantly slower than extra bytes download,
// eagerly download eagerEntries header entries as part of header-length request.
// only make second request if actual number of entries is greater than eagerEntries
eagerHl := uint32((eagerEntries * entrySize) + crypto.Extension)
if int64(eagerHl) > size {
eagerHl = uint32(size) - uint32(binary.Size(uint32(0)))
}
eagerBuf := make([]byte, eagerHl+uint32(binary.Size(uint32(0))))
n, err := rd.ReadAt(eagerBuf, size-int64(len(eagerBuf)))
if err != nil {
return nil, err
}
if n != len(eagerBuf) {
return nil, errors.New("not enough bytes read")
}
hl := binary.LittleEndian.Uint32(eagerBuf[eagerHl:])
debug.Log("header length: %v", size)
if hl == 0 {
@ -239,14 +238,24 @@ func readHeader(rd io.ReaderAt, size int64) ([]byte, error) {
return nil, errors.Wrap(err, "readHeader")
}
buf := make([]byte, int(hl))
n, err := rd.ReadAt(buf, size-int64(hl)-int64(binary.Size(hl)))
if err != nil {
return nil, errors.Wrap(err, "ReadAt")
}
eagerBuf = eagerBuf[:eagerHl]
if n != len(buf) {
return nil, errors.New("not enough bytes read")
var buf []byte
if hl <= eagerHl {
// already have all header bytes. yay.
buf = eagerBuf[eagerHl-hl:]
} else {
// need more header bytes
buf = make([]byte, hl)
missingHl := hl - eagerHl
n, err := rd.ReadAt(buf[:missingHl], size-int64(hl)-int64(binary.Size(hl)))
if err != nil {
return nil, errors.Wrap(err, "ReadAt")
}
if uint32(n) != missingHl {
return nil, errors.New("not enough bytes read")
}
copy(buf[hl-eagerHl:], eagerBuf)
}
return buf, nil

View file

@ -0,0 +1,46 @@
package pack
import (
"bytes"
"encoding/binary"
"io"
"testing"
"github.com/restic/restic/internal/crypto"
rtest "github.com/restic/restic/internal/test"
)
type countingReaderAt struct {
delegate io.ReaderAt
invocationCount int
}
func (rd *countingReaderAt) ReadAt(p []byte, off int64) (n int, err error) {
rd.invocationCount++
return rd.delegate.ReadAt(p, off)
}
func TestReadHeaderEagerLoad(t *testing.T) {
testReadHeader := func(entryCount uint, expectedReadInvocationCount int) {
expectedHeader := rtest.Random(0, int(entryCount*entrySize)+crypto.Extension)
buf := &bytes.Buffer{}
buf.Write(rtest.Random(0, 100)) // pack blobs data
buf.Write(expectedHeader) // pack header
binary.Write(buf, binary.LittleEndian, uint32(len(expectedHeader))) // pack header length
rd := &countingReaderAt{delegate: bytes.NewReader(buf.Bytes())}
header, err := readHeader(rd, int64(buf.Len()))
rtest.OK(t, err)
rtest.Equals(t, expectedHeader, header)
rtest.Equals(t, expectedReadInvocationCount, rd.invocationCount)
}
testReadHeader(1, 1)
testReadHeader(eagerEntries-1, 1)
testReadHeader(eagerEntries, 1)
testReadHeader(eagerEntries+1, 2)
}