2015-05-15 23:50:17 +00:00
|
|
|
package transport
|
2015-05-15 23:25:00 +00:00
|
|
|
|
|
|
|
import (
|
2023-05-04 11:17:34 +00:00
|
|
|
"compress/flate"
|
|
|
|
"compress/gzip"
|
2019-04-19 17:35:16 +00:00
|
|
|
"context"
|
2015-05-15 23:25:00 +00:00
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"net/http"
|
2016-01-26 01:51:54 +00:00
|
|
|
"regexp"
|
|
|
|
"strconv"
|
2023-05-04 11:17:34 +00:00
|
|
|
"strings"
|
|
|
|
"unicode"
|
|
|
|
|
|
|
|
"github.com/klauspost/compress/zstd"
|
2016-01-26 01:51:54 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
var (
|
|
|
|
contentRangeRegexp = regexp.MustCompile(`bytes ([0-9]+)-([0-9]+)/([0-9]+|\\*)`)
|
|
|
|
|
|
|
|
// ErrWrongCodeForByteRange is returned if the client sends a request
|
|
|
|
// with a Range header but the server returns a 2xx or 3xx code other
|
|
|
|
// than 206 Partial Content.
|
|
|
|
ErrWrongCodeForByteRange = errors.New("expected HTTP 206 from byte range request")
|
2015-05-15 23:25:00 +00:00
|
|
|
)
|
|
|
|
|
2015-05-20 17:09:37 +00:00
|
|
|
// ReadSeekCloser combines io.ReadSeeker with io.Closer.
|
2022-11-10 16:23:11 +00:00
|
|
|
//
|
|
|
|
// Deprecated: use [io.ReadSeekCloser].
|
|
|
|
type ReadSeekCloser = io.ReadSeekCloser
|
2015-05-20 17:09:37 +00:00
|
|
|
|
2015-05-16 00:37:32 +00:00
|
|
|
// NewHTTPReadSeeker handles reading from an HTTP endpoint using a GET
|
|
|
|
// request. When seeking and starting a read from a non-zero offset
|
|
|
|
// the a "Range" header will be added which sets the offset.
|
2022-11-10 16:23:11 +00:00
|
|
|
//
|
2015-05-20 17:09:37 +00:00
|
|
|
// TODO(dmcgowan): Move this into a separate utility package
|
2022-11-10 16:23:11 +00:00
|
|
|
func NewHTTPReadSeeker(ctx context.Context, client *http.Client, url string, errorHandler func(*http.Response) error) *HTTPReadSeeker {
|
|
|
|
return &HTTPReadSeeker{
|
2019-04-19 17:35:16 +00:00
|
|
|
ctx: ctx,
|
2015-12-01 02:35:19 +00:00
|
|
|
client: client,
|
|
|
|
url: url,
|
|
|
|
errorHandler: errorHandler,
|
2015-05-15 23:25:00 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-10 16:23:11 +00:00
|
|
|
// HTTPReadSeeker implements an [io.ReadSeekCloser].
|
|
|
|
type HTTPReadSeeker struct {
|
2019-04-19 17:35:16 +00:00
|
|
|
ctx context.Context
|
2015-05-15 23:25:00 +00:00
|
|
|
client *http.Client
|
|
|
|
url string
|
|
|
|
|
2015-12-01 02:35:19 +00:00
|
|
|
// errorHandler creates an error from an unsuccessful HTTP response.
|
|
|
|
// This allows the error to be created with the HTTP response body
|
|
|
|
// without leaking the body through a returned error.
|
|
|
|
errorHandler func(*http.Response) error
|
|
|
|
|
2015-05-15 23:25:00 +00:00
|
|
|
size int64
|
|
|
|
|
2015-12-01 02:35:19 +00:00
|
|
|
// rc is the remote read closer.
|
|
|
|
rc io.ReadCloser
|
|
|
|
// readerOffset tracks the offset as of the last read.
|
|
|
|
readerOffset int64
|
|
|
|
// seekOffset allows Seek to override the offset. Seek changes
|
|
|
|
// seekOffset instead of changing readOffset directly so that
|
|
|
|
// connection resets can be delayed and possibly avoided if the
|
|
|
|
// seek is undone (i.e. seeking to the end and then back to the
|
|
|
|
// beginning).
|
|
|
|
seekOffset int64
|
|
|
|
err error
|
2015-05-15 23:25:00 +00:00
|
|
|
}
|
|
|
|
|
2022-11-10 16:23:11 +00:00
|
|
|
func (hrs *HTTPReadSeeker) Read(p []byte) (n int, err error) {
|
2015-05-15 23:25:00 +00:00
|
|
|
if hrs.err != nil {
|
|
|
|
return 0, hrs.err
|
|
|
|
}
|
|
|
|
|
2016-02-23 21:33:38 +00:00
|
|
|
// If we sought to a different position, we need to reset the
|
2015-12-01 02:35:19 +00:00
|
|
|
// connection. This logic is here instead of Seek so that if
|
|
|
|
// a seek is undone before the next read, the connection doesn't
|
|
|
|
// need to be closed and reopened. A common example of this is
|
|
|
|
// seeking to the end to determine the length, and then seeking
|
|
|
|
// back to the original position.
|
|
|
|
if hrs.readerOffset != hrs.seekOffset {
|
|
|
|
hrs.reset()
|
|
|
|
}
|
|
|
|
|
|
|
|
hrs.readerOffset = hrs.seekOffset
|
|
|
|
|
2015-05-15 23:25:00 +00:00
|
|
|
rd, err := hrs.reader()
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
n, err = rd.Read(p)
|
2015-12-01 02:35:19 +00:00
|
|
|
hrs.seekOffset += int64(n)
|
|
|
|
hrs.readerOffset += int64(n)
|
2015-05-15 23:25:00 +00:00
|
|
|
|
|
|
|
return n, err
|
|
|
|
}
|
|
|
|
|
2022-11-10 16:23:11 +00:00
|
|
|
func (hrs *HTTPReadSeeker) Seek(offset int64, whence int) (int64, error) {
|
2015-05-15 23:25:00 +00:00
|
|
|
if hrs.err != nil {
|
|
|
|
return 0, hrs.err
|
|
|
|
}
|
|
|
|
|
2016-01-26 01:51:54 +00:00
|
|
|
lastReaderOffset := hrs.readerOffset
|
|
|
|
|
2018-08-06 21:34:15 +00:00
|
|
|
if whence == io.SeekStart && hrs.rc == nil {
|
2016-01-26 01:51:54 +00:00
|
|
|
// If no request has been made yet, and we are seeking to an
|
|
|
|
// absolute position, set the read offset as well to avoid an
|
|
|
|
// unnecessary request.
|
|
|
|
hrs.readerOffset = offset
|
|
|
|
}
|
|
|
|
|
2015-12-01 02:35:19 +00:00
|
|
|
_, err := hrs.reader()
|
|
|
|
if err != nil {
|
2016-01-26 01:51:54 +00:00
|
|
|
hrs.readerOffset = lastReaderOffset
|
2015-12-01 02:35:19 +00:00
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
newOffset := hrs.seekOffset
|
2015-05-15 23:25:00 +00:00
|
|
|
|
|
|
|
switch whence {
|
2018-08-06 21:34:15 +00:00
|
|
|
case io.SeekCurrent:
|
2016-01-26 01:51:54 +00:00
|
|
|
newOffset += offset
|
2018-08-06 21:34:15 +00:00
|
|
|
case io.SeekEnd:
|
2015-12-01 02:35:19 +00:00
|
|
|
if hrs.size < 0 {
|
|
|
|
return 0, errors.New("content length not known")
|
|
|
|
}
|
2016-01-26 01:51:54 +00:00
|
|
|
newOffset = hrs.size + offset
|
2018-08-06 21:34:15 +00:00
|
|
|
case io.SeekStart:
|
2016-01-26 01:51:54 +00:00
|
|
|
newOffset = offset
|
2015-05-15 23:25:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if newOffset < 0 {
|
|
|
|
err = errors.New("cannot seek to negative position")
|
|
|
|
} else {
|
2015-12-01 02:35:19 +00:00
|
|
|
hrs.seekOffset = newOffset
|
2015-05-15 23:25:00 +00:00
|
|
|
}
|
|
|
|
|
2015-12-01 02:35:19 +00:00
|
|
|
return hrs.seekOffset, err
|
2015-05-15 23:25:00 +00:00
|
|
|
}
|
|
|
|
|
2022-11-10 16:23:11 +00:00
|
|
|
func (hrs *HTTPReadSeeker) Close() error {
|
2015-05-15 23:25:00 +00:00
|
|
|
if hrs.err != nil {
|
|
|
|
return hrs.err
|
|
|
|
}
|
|
|
|
|
|
|
|
// close and release reader chain
|
|
|
|
if hrs.rc != nil {
|
|
|
|
hrs.rc.Close()
|
|
|
|
}
|
|
|
|
|
|
|
|
hrs.rc = nil
|
|
|
|
|
|
|
|
hrs.err = errors.New("httpLayer: closed")
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2022-11-10 16:23:11 +00:00
|
|
|
func (hrs *HTTPReadSeeker) reset() {
|
2015-05-15 23:25:00 +00:00
|
|
|
if hrs.err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if hrs.rc != nil {
|
|
|
|
hrs.rc.Close()
|
|
|
|
hrs.rc = nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-10 16:23:11 +00:00
|
|
|
func (hrs *HTTPReadSeeker) reader() (io.Reader, error) {
|
2015-05-15 23:25:00 +00:00
|
|
|
if hrs.err != nil {
|
|
|
|
return nil, hrs.err
|
|
|
|
}
|
|
|
|
|
|
|
|
if hrs.rc != nil {
|
2016-01-26 01:51:54 +00:00
|
|
|
return hrs.rc, nil
|
2015-05-15 23:25:00 +00:00
|
|
|
}
|
|
|
|
|
2022-11-02 22:31:23 +00:00
|
|
|
req, err := http.NewRequestWithContext(hrs.ctx, http.MethodGet, hrs.url, nil)
|
2015-05-15 23:25:00 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2015-12-01 02:35:19 +00:00
|
|
|
if hrs.readerOffset > 0 {
|
2015-05-15 23:25:00 +00:00
|
|
|
// If we are at different offset, issue a range request from there.
|
2016-01-26 01:51:54 +00:00
|
|
|
req.Header.Add("Range", fmt.Sprintf("bytes=%d-", hrs.readerOffset))
|
2015-05-15 23:25:00 +00:00
|
|
|
// TODO: get context in here
|
|
|
|
// context.GetLogger(hrs.context).Infof("Range: %s", req.Header.Get("Range"))
|
|
|
|
}
|
|
|
|
|
2023-05-04 11:17:34 +00:00
|
|
|
req.Header.Add("Accept-Encoding", "zstd, gzip, deflate")
|
2015-05-15 23:25:00 +00:00
|
|
|
resp, err := hrs.client.Do(req)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2015-07-24 23:14:04 +00:00
|
|
|
// Normally would use client.SuccessStatus, but that would be a cyclic
|
|
|
|
// import
|
|
|
|
if resp.StatusCode >= 200 && resp.StatusCode <= 399 {
|
2016-01-26 01:51:54 +00:00
|
|
|
if hrs.readerOffset > 0 {
|
|
|
|
if resp.StatusCode != http.StatusPartialContent {
|
|
|
|
return nil, ErrWrongCodeForByteRange
|
|
|
|
}
|
|
|
|
|
|
|
|
contentRange := resp.Header.Get("Content-Range")
|
|
|
|
if contentRange == "" {
|
|
|
|
return nil, errors.New("no Content-Range header found in HTTP 206 response")
|
|
|
|
}
|
|
|
|
|
|
|
|
submatches := contentRangeRegexp.FindStringSubmatch(contentRange)
|
|
|
|
if len(submatches) < 4 {
|
|
|
|
return nil, fmt.Errorf("could not parse Content-Range header: %s", contentRange)
|
|
|
|
}
|
|
|
|
|
|
|
|
startByte, err := strconv.ParseUint(submatches[1], 10, 64)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("could not parse start of range in Content-Range header: %s", contentRange)
|
|
|
|
}
|
|
|
|
|
|
|
|
if startByte != uint64(hrs.readerOffset) {
|
|
|
|
return nil, fmt.Errorf("received Content-Range starting at offset %d instead of requested %d", startByte, hrs.readerOffset)
|
|
|
|
}
|
|
|
|
|
|
|
|
endByte, err := strconv.ParseUint(submatches[2], 10, 64)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("could not parse end of range in Content-Range header: %s", contentRange)
|
|
|
|
}
|
|
|
|
|
|
|
|
if submatches[3] == "*" {
|
|
|
|
hrs.size = -1
|
|
|
|
} else {
|
|
|
|
size, err := strconv.ParseUint(submatches[3], 10, 64)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("could not parse total size in Content-Range header: %s", contentRange)
|
|
|
|
}
|
|
|
|
|
|
|
|
if endByte+1 != size {
|
|
|
|
return nil, fmt.Errorf("range in Content-Range stops before the end of the content: %s", contentRange)
|
|
|
|
}
|
|
|
|
|
|
|
|
hrs.size = int64(size)
|
|
|
|
}
|
|
|
|
} else if resp.StatusCode == http.StatusOK {
|
2015-12-01 02:35:19 +00:00
|
|
|
hrs.size = resp.ContentLength
|
|
|
|
} else {
|
|
|
|
hrs.size = -1
|
|
|
|
}
|
2023-05-04 11:17:34 +00:00
|
|
|
|
|
|
|
body := resp.Body
|
|
|
|
encoding := strings.FieldsFunc(resp.Header.Get("Content-Encoding"), func(r rune) bool {
|
|
|
|
return unicode.IsSpace(r) || r == ','
|
|
|
|
})
|
|
|
|
for i := len(encoding) - 1; i >= 0; i-- {
|
|
|
|
algorithm := strings.ToLower(encoding[i])
|
|
|
|
switch algorithm {
|
|
|
|
case "zstd":
|
|
|
|
r, err := zstd.NewReader(body)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
body = r.IOReadCloser()
|
|
|
|
case "gzip":
|
|
|
|
body, err = gzip.NewReader(body)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
case "deflate":
|
|
|
|
body = flate.NewReader(body)
|
|
|
|
case "":
|
|
|
|
// no content-encoding applied, use raw body
|
|
|
|
default:
|
|
|
|
return nil, errors.New("unsupported Content-Encoding algorithm: " + algorithm)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
hrs.rc = body
|
2015-07-24 23:14:04 +00:00
|
|
|
} else {
|
2015-05-15 23:25:00 +00:00
|
|
|
defer resp.Body.Close()
|
2015-12-01 02:35:19 +00:00
|
|
|
if hrs.errorHandler != nil {
|
|
|
|
return nil, hrs.errorHandler(resp)
|
|
|
|
}
|
2015-05-15 23:25:00 +00:00
|
|
|
return nil, fmt.Errorf("unexpected status resolving reader: %v", resp.Status)
|
|
|
|
}
|
|
|
|
|
2016-01-26 01:51:54 +00:00
|
|
|
return hrs.rc, nil
|
2015-05-15 23:25:00 +00:00
|
|
|
}
|