From 4fd5f0b8a90496d158b5a6fe58061e30923f8820 Mon Sep 17 00:00:00 2001 From: Toby Burress Date: Wed, 21 Feb 2018 23:22:18 +0000 Subject: [PATCH 1/3] Refactor the eager-header reads for readability. This pulls the header reads into a function that works in terms of the number of records requested. This preserves the existing logic of initially reading 15 records and then falling back if that fails. In the event of a header with more than 15 records, it will read all records, including the already-seen final 15 records. --- internal/pack/pack.go | 138 +++++++++++++++++++++--------------------- 1 file changed, 68 insertions(+), 70 deletions(-) diff --git a/internal/pack/pack.go b/internal/pack/pack.go index e700d284a..9ff935079 100644 --- a/internal/pack/pack.go +++ b/internal/pack/pack.go @@ -170,26 +170,74 @@ func (p *Packer) String() string { return fmt.Sprintf("", len(p.blobs), p.bytes) } -const maxHeaderSize = 16 * 1024 * 1024 +var ( + // size of the header-length field at the end of the file + headerLengthSize = binary.Size(uint32(0)) + // we require at least one entry in the header, and one blob for a pack file + minFileSize = entrySize + crypto.Extension + uint(headerLengthSize) +) -// size of the header-length field at the end of the file -var headerLengthSize = binary.Size(uint32(0)) +const ( + maxHeaderSize = 16 * 1024 * 1024 + // number of header enries to download as part of header-length request + eagerEntries = 15 +) -// we require at least one entry in the header, and one blob for a pack file -var minFileSize = entrySize + crypto.Extension + uint(headerLengthSize) +// readRecords reads count records from the underlying ReaderAt, returning the +// raw header, the total number of records in the header, and any error. If +// the header contains fewer than count entries, the return value is truncated. +func readRecords(rd io.ReaderAt, size int64, count int) ([]byte, int, error) { + var bufsize int + bufsize += count * int(entrySize) + bufsize += crypto.Extension + bufsize += headerLengthSize -// number of header enries to download as part of header-length request -var eagerEntries = uint(15) + if bufsize > int(size) { + bufsize = int(size) + } + + b := make([]byte, bufsize) + off := size - int64(bufsize) + if _, err := rd.ReadAt(b, off); err != nil { + return nil, 0, err + } + + header := b[len(b)-headerLengthSize:] + b = b[:len(b)-headerLengthSize] + hl := binary.LittleEndian.Uint32(header) + debug.Log("header length: %v", hl) + + var err error + switch { + case hl == 0: + err = InvalidFileError{Message: "header length is zero"} + case hl < crypto.Extension: + err = InvalidFileError{Message: "header length is too small"} + case (hl-crypto.Extension)%uint32(entrySize) != 0: + err = InvalidFileError{Message: "header length is invalid"} + case int64(hl) > size-int64(headerLengthSize): + err = InvalidFileError{Message: "header is larger than file"} + case int64(hl) > maxHeaderSize: + err = InvalidFileError{Message: "header is larger than maxHeaderSize"} + } + if err != nil { + return nil, 0, errors.Wrap(err, "readHeader") + } + + c := (int(hl) - crypto.Extension) / int(entrySize) + if c < count { + recordSize := c * int(entrySize) + start := len(b) - (recordSize + crypto.Extension) + b = b[start:] + } + + return b, c, nil +} // readHeader reads the header at the end of rd. size is the length of the // whole data accessible in rd. func readHeader(rd io.ReaderAt, size int64) ([]byte, error) { debug.Log("size: %v", size) - if size == 0 { - err := InvalidFileError{Message: "file is empty"} - return nil, errors.Wrap(err, "readHeader") - } - if size < int64(minFileSize) { err := InvalidFileError{Message: "file is too small"} return nil, errors.Wrap(err, "readHeader") @@ -199,69 +247,19 @@ func readHeader(rd io.ReaderAt, size int64) ([]byte, error) { // eagerly download eagerEntries header entries as part of header-length request. // only make second request if actual number of entries is greater than eagerEntries - eagerHl := uint32((eagerEntries * entrySize) + crypto.Extension) - if int64(eagerHl)+int64(headerLengthSize) > size { - eagerHl = uint32(size) - uint32(headerLengthSize) - } - eagerBuf := make([]byte, eagerHl+uint32(headerLengthSize)) - - n, err := rd.ReadAt(eagerBuf, size-int64(len(eagerBuf))) + b, c, err := readRecords(rd, size, eagerEntries) if err != nil { return nil, err } - if n != len(eagerBuf) { - return nil, errors.New("not enough bytes read") + if c <= eagerEntries { + // eager read sufficed, return what we got + return b, nil } - - hl := binary.LittleEndian.Uint32(eagerBuf[eagerHl:]) - debug.Log("header length: %v", size) - - if hl == 0 { - err := InvalidFileError{Message: "header length is zero"} - return nil, errors.Wrap(err, "readHeader") + b, _, err = readRecords(rd, size, c) + if err != nil { + return nil, err } - - if hl < crypto.Extension { - err := InvalidFileError{Message: "header length is too small"} - return nil, errors.Wrap(err, "readHeader") - } - - if (hl-crypto.Extension)%uint32(entrySize) != 0 { - err := InvalidFileError{Message: "header length is invalid"} - return nil, errors.Wrap(err, "readHeader") - } - - if int64(hl) > size-int64(headerLengthSize) { - err := InvalidFileError{Message: "header is larger than file"} - return nil, errors.Wrap(err, "readHeader") - } - - if int64(hl) > maxHeaderSize { - err := InvalidFileError{Message: "header is larger than maxHeaderSize"} - return nil, errors.Wrap(err, "readHeader") - } - - eagerBuf = eagerBuf[:eagerHl] - - var buf []byte - if hl <= eagerHl { - // already have all header bytes. yay. - buf = eagerBuf[eagerHl-hl:] - } else { - // need more header bytes - buf = make([]byte, hl) - missingHl := hl - eagerHl - n, err := rd.ReadAt(buf[:missingHl], size-int64(hl)-int64(headerLengthSize)) - if err != nil { - return nil, errors.Wrap(err, "ReadAt") - } - if uint32(n) != missingHl { - return nil, errors.New("not enough bytes read") - } - copy(buf[hl-eagerHl:], eagerBuf) - } - - return buf, nil + return b, nil } // InvalidFileError is return when a file is found that is not a pack file. From cdb48a8970d1e766c184635497d2188273d7649d Mon Sep 17 00:00:00 2001 From: Toby Burress Date: Thu, 22 Feb 2018 01:14:04 +0000 Subject: [PATCH 2/3] Add tests for the eager-header refactor. --- internal/pack/pack_internal_test.go | 54 +++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/internal/pack/pack_internal_test.go b/internal/pack/pack_internal_test.go index bd790609f..6694b7333 100644 --- a/internal/pack/pack_internal_test.go +++ b/internal/pack/pack_internal_test.go @@ -22,8 +22,8 @@ func (rd *countingReaderAt) ReadAt(p []byte, off int64) (n int, err error) { func TestReadHeaderEagerLoad(t *testing.T) { - testReadHeader := func(dataSize int, entryCount uint, expectedReadInvocationCount int) { - expectedHeader := rtest.Random(0, int(entryCount*entrySize)+crypto.Extension) + testReadHeader := func(dataSize, entryCount, expectedReadInvocationCount int) { + expectedHeader := rtest.Random(0, entryCount*int(entrySize)+crypto.Extension) buf := &bytes.Buffer{} buf.Write(rtest.Random(0, dataSize)) // pack blobs data @@ -58,3 +58,53 @@ func TestReadHeaderEagerLoad(t *testing.T) { testReadHeader(dataSize+3, 1, 1) testReadHeader(dataSize+4, 1, 1) } + +func TestReadRecords(t *testing.T) { + testReadRecords := func(dataSize, entryCount, totalRecords int) { + totalHeader := rtest.Random(0, totalRecords*int(entrySize)+crypto.Extension) + off := len(totalHeader) - (entryCount*int(entrySize) + crypto.Extension) + if off < 0 { + off = 0 + } + expectedHeader := totalHeader[off:] + + buf := &bytes.Buffer{} + buf.Write(rtest.Random(0, dataSize)) // pack blobs data + buf.Write(totalHeader) // pack header + binary.Write(buf, binary.LittleEndian, uint32(len(totalHeader))) // pack header length + + rd := bytes.NewReader(buf.Bytes()) + + header, count, err := readRecords(rd, int64(rd.Len()), entryCount) + rtest.OK(t, err) + rtest.Equals(t, expectedHeader, header) + rtest.Equals(t, totalRecords, count) + } + + // basic + testReadRecords(100, 1, 1) + testReadRecords(100, 0, 1) + testReadRecords(100, 1, 0) + + // header entries ~ eager entries + testReadRecords(100, eagerEntries, eagerEntries-1) + testReadRecords(100, eagerEntries, eagerEntries) + testReadRecords(100, eagerEntries, eagerEntries+1) + + // file size == eager header load size + eagerLoadSize := int((eagerEntries * entrySize) + crypto.Extension) + headerSize := int(1*entrySize) + crypto.Extension + dataSize := eagerLoadSize - headerSize - binary.Size(uint32(0)) + testReadRecords(dataSize-1, 1, 1) + testReadRecords(dataSize, 1, 1) + testReadRecords(dataSize+1, 1, 1) + testReadRecords(dataSize+2, 1, 1) + testReadRecords(dataSize+3, 1, 1) + testReadRecords(dataSize+4, 1, 1) + + for i := 0; i < 2; i++ { + for j := 0; j < 2; j++ { + testReadRecords(dataSize, i, j) + } + } +} From 08a5281bd44d9a31cca448cc8b89a0591c380eae Mon Sep 17 00:00:00 2001 From: Toby Burress Date: Thu, 22 Feb 2018 17:37:10 +0000 Subject: [PATCH 3/3] Incorporate PR review comments. --- internal/pack/pack.go | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/internal/pack/pack.go b/internal/pack/pack.go index 9ff935079..5bf046304 100644 --- a/internal/pack/pack.go +++ b/internal/pack/pack.go @@ -183,12 +183,13 @@ const ( eagerEntries = 15 ) -// readRecords reads count records from the underlying ReaderAt, returning the -// raw header, the total number of records in the header, and any error. If -// the header contains fewer than count entries, the return value is truncated. -func readRecords(rd io.ReaderAt, size int64, count int) ([]byte, int, error) { +// readRecords reads up to max records from the underlying ReaderAt, returning +// the raw header, the total number of records in the header, and any error. +// If the header contains fewer than max entries, the header is truncated to +// the appropriate size. +func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) { var bufsize int - bufsize += count * int(entrySize) + bufsize += max * int(entrySize) bufsize += crypto.Extension bufsize += headerLengthSize @@ -202,36 +203,34 @@ func readRecords(rd io.ReaderAt, size int64, count int) ([]byte, int, error) { return nil, 0, err } - header := b[len(b)-headerLengthSize:] + hlen := binary.LittleEndian.Uint32(b[len(b)-headerLengthSize:]) b = b[:len(b)-headerLengthSize] - hl := binary.LittleEndian.Uint32(header) - debug.Log("header length: %v", hl) + debug.Log("header length: %v", hlen) var err error switch { - case hl == 0: + case hlen == 0: err = InvalidFileError{Message: "header length is zero"} - case hl < crypto.Extension: + case hlen < crypto.Extension: err = InvalidFileError{Message: "header length is too small"} - case (hl-crypto.Extension)%uint32(entrySize) != 0: + case (hlen-crypto.Extension)%uint32(entrySize) != 0: err = InvalidFileError{Message: "header length is invalid"} - case int64(hl) > size-int64(headerLengthSize): + case int64(hlen) > size-int64(headerLengthSize): err = InvalidFileError{Message: "header is larger than file"} - case int64(hl) > maxHeaderSize: + case int64(hlen) > maxHeaderSize: err = InvalidFileError{Message: "header is larger than maxHeaderSize"} } if err != nil { return nil, 0, errors.Wrap(err, "readHeader") } - c := (int(hl) - crypto.Extension) / int(entrySize) - if c < count { - recordSize := c * int(entrySize) - start := len(b) - (recordSize + crypto.Extension) - b = b[start:] + total := (int(hlen) - crypto.Extension) / int(entrySize) + if total < max { + // truncate to the beginning of the pack header + b = b[len(b)-int(hlen):] } - return b, c, nil + return b, total, nil } // readHeader reads the header at the end of rd. size is the length of the