From a58b7625ba24cd6700d36c992fe7edf6981c3895 Mon Sep 17 00:00:00 2001 From: Aaron Lehmann Date: Mon, 25 Jan 2016 17:51:54 -0800 Subject: [PATCH 1/2] Support range requests in the client's httpReadSeeker Remove buffering on the reader, because it's not useful. Also remove artificial io.EOF return. Signed-off-by: Aaron Lehmann --- docs/client/transport/http_reader.go | 97 +++++++++++++++++++++------- 1 file changed, 72 insertions(+), 25 deletions(-) diff --git a/docs/client/transport/http_reader.go b/docs/client/transport/http_reader.go index b27b6c23..22b0b9d6 100644 --- a/docs/client/transport/http_reader.go +++ b/docs/client/transport/http_reader.go @@ -1,12 +1,22 @@ package transport import ( - "bufio" "errors" "fmt" "io" "net/http" "os" + "regexp" + "strconv" +) + +var ( + contentRangeRegexp = regexp.MustCompile(`bytes ([0-9]+)-([0-9]+)/([0-9]+|\\*)`) + + // ErrWrongCodeForByteRange is returned if the client sends a request + // with a Range header but the server returns a 2xx or 3xx code other + // than 206 Partial Content. + ErrWrongCodeForByteRange = errors.New("expected HTTP 206 from byte range request") ) // ReadSeekCloser combines io.ReadSeeker with io.Closer. @@ -40,8 +50,6 @@ type httpReadSeeker struct { // rc is the remote read closer. rc io.ReadCloser - // brd is a buffer for internal buffered io. - brd *bufio.Reader // readerOffset tracks the offset as of the last read. readerOffset int64 // seekOffset allows Seek to override the offset. Seek changes @@ -79,11 +87,6 @@ func (hrs *httpReadSeeker) Read(p []byte) (n int, err error) { hrs.seekOffset += int64(n) hrs.readerOffset += int64(n) - // Simulate io.EOF error if we reach filesize. - if err == nil && hrs.size >= 0 && hrs.readerOffset >= hrs.size { - err = io.EOF - } - return n, err } @@ -92,8 +95,18 @@ func (hrs *httpReadSeeker) Seek(offset int64, whence int) (int64, error) { return 0, hrs.err } + lastReaderOffset := hrs.readerOffset + + if whence == os.SEEK_SET && hrs.rc == nil { + // If no request has been made yet, and we are seeking to an + // absolute position, set the read offset as well to avoid an + // unnecessary request. + hrs.readerOffset = offset + } + _, err := hrs.reader() if err != nil { + hrs.readerOffset = lastReaderOffset return 0, err } @@ -101,14 +114,14 @@ func (hrs *httpReadSeeker) Seek(offset int64, whence int) (int64, error) { switch whence { case os.SEEK_CUR: - newOffset += int64(offset) + newOffset += offset case os.SEEK_END: if hrs.size < 0 { return 0, errors.New("content length not known") } - newOffset = hrs.size + int64(offset) + newOffset = hrs.size + offset case os.SEEK_SET: - newOffset = int64(offset) + newOffset = offset } if newOffset < 0 { @@ -131,7 +144,6 @@ func (hrs *httpReadSeeker) Close() error { } hrs.rc = nil - hrs.brd = nil hrs.err = errors.New("httpLayer: closed") @@ -154,7 +166,7 @@ func (hrs *httpReadSeeker) reader() (io.Reader, error) { } if hrs.rc != nil { - return hrs.brd, nil + return hrs.rc, nil } req, err := http.NewRequest("GET", hrs.url, nil) @@ -163,10 +175,8 @@ func (hrs *httpReadSeeker) reader() (io.Reader, error) { } if hrs.readerOffset > 0 { - // TODO(stevvooe): Get this working correctly. - // If we are at different offset, issue a range request from there. - req.Header.Add("Range", "1-") + req.Header.Add("Range", fmt.Sprintf("bytes=%d-", hrs.readerOffset)) // TODO: get context in here // context.GetLogger(hrs.context).Infof("Range: %s", req.Header.Get("Range")) } @@ -179,12 +189,55 @@ func (hrs *httpReadSeeker) reader() (io.Reader, error) { // Normally would use client.SuccessStatus, but that would be a cyclic // import if resp.StatusCode >= 200 && resp.StatusCode <= 399 { - hrs.rc = resp.Body - if resp.StatusCode == http.StatusOK { + if hrs.readerOffset > 0 { + if resp.StatusCode != http.StatusPartialContent { + return nil, ErrWrongCodeForByteRange + } + + contentRange := resp.Header.Get("Content-Range") + if contentRange == "" { + return nil, errors.New("no Content-Range header found in HTTP 206 response") + } + + submatches := contentRangeRegexp.FindStringSubmatch(contentRange) + if len(submatches) < 4 { + return nil, fmt.Errorf("could not parse Content-Range header: %s", contentRange) + } + + startByte, err := strconv.ParseUint(submatches[1], 10, 64) + if err != nil { + return nil, fmt.Errorf("could not parse start of range in Content-Range header: %s", contentRange) + } + + if startByte != uint64(hrs.readerOffset) { + return nil, fmt.Errorf("received Content-Range starting at offset %d instead of requested %d", startByte, hrs.readerOffset) + } + + endByte, err := strconv.ParseUint(submatches[2], 10, 64) + if err != nil { + return nil, fmt.Errorf("could not parse end of range in Content-Range header: %s", contentRange) + } + + if submatches[3] == "*" { + hrs.size = -1 + } else { + size, err := strconv.ParseUint(submatches[3], 10, 64) + if err != nil { + return nil, fmt.Errorf("could not parse total size in Content-Range header: %s", contentRange) + } + + if endByte+1 != size { + return nil, fmt.Errorf("range in Content-Range stops before the end of the content: %s", contentRange) + } + + hrs.size = int64(size) + } + } else if resp.StatusCode == http.StatusOK { hrs.size = resp.ContentLength } else { hrs.size = -1 } + hrs.rc = resp.Body } else { defer resp.Body.Close() if hrs.errorHandler != nil { @@ -193,11 +246,5 @@ func (hrs *httpReadSeeker) reader() (io.Reader, error) { return nil, fmt.Errorf("unexpected status resolving reader: %v", resp.Status) } - if hrs.brd == nil { - hrs.brd = bufio.NewReader(hrs.rc) - } else { - hrs.brd.Reset(hrs.rc) - } - - return hrs.brd, nil + return hrs.rc, nil } From 8e571dff41a6f544d16dd494d2136c8bb97e66f6 Mon Sep 17 00:00:00 2001 From: Aaron Lehmann Date: Wed, 27 Jan 2016 10:57:58 -0800 Subject: [PATCH 2/2] Add a CheckRedirect function to the HTTP client Use it to preserve Accept and Range headers that were added to the original request. Signed-off-by: Aaron Lehmann --- docs/client/repository.go | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/docs/client/repository.go b/docs/client/repository.go index 1f777add..d1bfc180 100644 --- a/docs/client/repository.go +++ b/docs/client/repository.go @@ -27,6 +27,26 @@ type Registry interface { Repositories(ctx context.Context, repos []string, last string) (n int, err error) } +// checkHTTPRedirect is a callback that can manipulate redirected HTTP +// requests. It is used to preserve Accept and Range headers. +func checkHTTPRedirect(req *http.Request, via []*http.Request) error { + if len(via) >= 10 { + return errors.New("stopped after 10 redirects") + } + + if len(via) > 0 { + for headerName, headerVals := range via[0].Header { + if headerName == "Accept" || headerName == "Range" { + for _, val := range headerVals { + req.Header.Add(headerName, val) + } + } + } + } + + return nil +} + // NewRegistry creates a registry namespace which can be used to get a listing of repositories func NewRegistry(ctx context.Context, baseURL string, transport http.RoundTripper) (Registry, error) { ub, err := v2.NewURLBuilderFromString(baseURL) @@ -35,8 +55,9 @@ func NewRegistry(ctx context.Context, baseURL string, transport http.RoundTrippe } client := &http.Client{ - Transport: transport, - Timeout: 1 * time.Minute, + Transport: transport, + Timeout: 1 * time.Minute, + CheckRedirect: checkHTTPRedirect, } return ®istry{ @@ -105,7 +126,8 @@ func NewRepository(ctx context.Context, name reference.Named, baseURL string, tr } client := &http.Client{ - Transport: transport, + Transport: transport, + CheckRedirect: checkHTTPRedirect, // TODO(dmcgowan): create cookie jar }