From 953f3d55ee62bee42d4fdb8b774d980255bfd29b Mon Sep 17 00:00:00 2001 From: Igor Fedorenko Date: Tue, 23 Jan 2018 18:51:22 -0500 Subject: [PATCH] Optimize pack readHeader() implementation Load pack header length and 15 header entries with single backend request. This eliminates separate header Load() request for most pack files and significantly improves index.New() performance. Signed-off-by: Igor Fedorenko --- changelog/0.8.2/pull-1574 | 7 ++++ internal/pack/pack.go | 61 +++++++++++++++++------------ internal/pack/pack_internal_test.go | 46 ++++++++++++++++++++++ 3 files changed, 88 insertions(+), 26 deletions(-) create mode 100644 changelog/0.8.2/pull-1574 create mode 100644 internal/pack/pack_internal_test.go diff --git a/changelog/0.8.2/pull-1574 b/changelog/0.8.2/pull-1574 new file mode 100644 index 000000000..1f89aaaa7 --- /dev/null +++ b/changelog/0.8.2/pull-1574 @@ -0,0 +1,7 @@ +Enhancement: Reduce number of remote requests reading pack header + +This change eliminates extra remote repository calls for most pack +files and improves repository reindex and purge time. + +https://github.com/restic/restic/issues/1567 +https://github.com/restic/restic/pull/1574 diff --git a/internal/pack/pack.go b/internal/pack/pack.go index 577b4c763..6583b224b 100644 --- a/internal/pack/pack.go +++ b/internal/pack/pack.go @@ -170,29 +170,14 @@ func (p *Packer) String() string { return fmt.Sprintf("", len(p.blobs), p.bytes) } -// readHeaderLength returns the header length read from the end of the file -// encoded in little endian. -func readHeaderLength(rd io.ReaderAt, size int64) (uint32, error) { - off := size - int64(binary.Size(uint32(0))) - - buf := make([]byte, binary.Size(uint32(0))) - n, err := rd.ReadAt(buf, off) - if err != nil { - return 0, errors.Wrap(err, "ReadAt") - } - - if n != len(buf) { - return 0, errors.New("not enough bytes read") - } - - return binary.LittleEndian.Uint32(buf), nil -} - const maxHeaderSize = 16 * 1024 * 1024 // we require at least one entry in the header, and one blob for a pack file var minFileSize = entrySize + crypto.Extension +// number of header enries to download as part of header-length request +var eagerEntries = uint(15) + // readHeader reads the header at the end of rd. size is the length of the // whole data accessible in rd. func readHeader(rd io.ReaderAt, size int64) ([]byte, error) { @@ -207,11 +192,25 @@ func readHeader(rd io.ReaderAt, size int64) ([]byte, error) { return nil, errors.Wrap(err, "readHeader") } - hl, err := readHeaderLength(rd, size) + // assuming extra request is significantly slower than extra bytes download, + // eagerly download eagerEntries header entries as part of header-length request. + // only make second request if actual number of entries is greater than eagerEntries + + eagerHl := uint32((eagerEntries * entrySize) + crypto.Extension) + if int64(eagerHl) > size { + eagerHl = uint32(size) - uint32(binary.Size(uint32(0))) + } + eagerBuf := make([]byte, eagerHl+uint32(binary.Size(uint32(0)))) + + n, err := rd.ReadAt(eagerBuf, size-int64(len(eagerBuf))) if err != nil { return nil, err } + if n != len(eagerBuf) { + return nil, errors.New("not enough bytes read") + } + hl := binary.LittleEndian.Uint32(eagerBuf[eagerHl:]) debug.Log("header length: %v", size) if hl == 0 { @@ -239,14 +238,24 @@ func readHeader(rd io.ReaderAt, size int64) ([]byte, error) { return nil, errors.Wrap(err, "readHeader") } - buf := make([]byte, int(hl)) - n, err := rd.ReadAt(buf, size-int64(hl)-int64(binary.Size(hl))) - if err != nil { - return nil, errors.Wrap(err, "ReadAt") - } + eagerBuf = eagerBuf[:eagerHl] - if n != len(buf) { - return nil, errors.New("not enough bytes read") + var buf []byte + if hl <= eagerHl { + // already have all header bytes. yay. + buf = eagerBuf[eagerHl-hl:] + } else { + // need more header bytes + buf = make([]byte, hl) + missingHl := hl - eagerHl + n, err := rd.ReadAt(buf[:missingHl], size-int64(hl)-int64(binary.Size(hl))) + if err != nil { + return nil, errors.Wrap(err, "ReadAt") + } + if uint32(n) != missingHl { + return nil, errors.New("not enough bytes read") + } + copy(buf[hl-eagerHl:], eagerBuf) } return buf, nil diff --git a/internal/pack/pack_internal_test.go b/internal/pack/pack_internal_test.go new file mode 100644 index 000000000..b83cd8f55 --- /dev/null +++ b/internal/pack/pack_internal_test.go @@ -0,0 +1,46 @@ +package pack + +import ( + "bytes" + "encoding/binary" + "io" + "testing" + + "github.com/restic/restic/internal/crypto" + rtest "github.com/restic/restic/internal/test" +) + +type countingReaderAt struct { + delegate io.ReaderAt + invocationCount int +} + +func (rd *countingReaderAt) ReadAt(p []byte, off int64) (n int, err error) { + rd.invocationCount++ + return rd.delegate.ReadAt(p, off) +} + +func TestReadHeaderEagerLoad(t *testing.T) { + + testReadHeader := func(entryCount uint, expectedReadInvocationCount int) { + expectedHeader := rtest.Random(0, int(entryCount*entrySize)+crypto.Extension) + + buf := &bytes.Buffer{} + buf.Write(rtest.Random(0, 100)) // pack blobs data + buf.Write(expectedHeader) // pack header + binary.Write(buf, binary.LittleEndian, uint32(len(expectedHeader))) // pack header length + + rd := &countingReaderAt{delegate: bytes.NewReader(buf.Bytes())} + + header, err := readHeader(rd, int64(buf.Len())) + rtest.OK(t, err) + + rtest.Equals(t, expectedHeader, header) + rtest.Equals(t, expectedReadInvocationCount, rd.invocationCount) + } + + testReadHeader(1, 1) + testReadHeader(eagerEntries-1, 1) + testReadHeader(eagerEntries, 1) + testReadHeader(eagerEntries+1, 2) +}