fs: improve ChunkedReader

- make Close permanent and return errors afterwards
- use RangeSeek from the wrapped reader if present
- add a limit to chunk growth
- correct RangeSeek interface behavior
- add tests
This commit is contained in:
Fabian Möller 2018-02-18 15:10:15 +01:00 committed by Nick Craig-Wood
parent fe25cb9c54
commit 9fdf273614
3 changed files with 320 additions and 29 deletions

View file

@ -1,43 +1,55 @@
package chunkedreader package chunkedreader
import ( import (
"errors"
"io" "io"
"sync" "sync"
"github.com/ncw/rclone/fs" "github.com/ncw/rclone/fs"
) )
// io related errors returned by ChunkedReader
var (
ErrorFileClosed = errors.New("file already closed")
ErrorInvalidSeek = errors.New("invalid seek position")
)
// ChunkedReader is a reader for a Object with the possibility // ChunkedReader is a reader for a Object with the possibility
// of reading the source in chunks of given size // of reading the source in chunks of given size
// //
// A initialChunkSize of 0 will disable chunked reading. // A initialChunkSize of <= 0 will disable chunked reading.
type ChunkedReader struct { type ChunkedReader struct {
mu sync.Mutex mu sync.Mutex // protects following fields
o fs.Object o fs.Object // source to read from
rc io.ReadCloser rc io.ReadCloser // reader for the current open chunk
offset int64 offset int64 // offset the next Read will start. -1 forces a reopen of o
chunkOffset int64 chunkOffset int64 // beginning of the current or next chunk
chunkSize int64 chunkSize int64 // length of the current or next chunk. -1 will open o from chunkOffset to the end
initialChunkSize int64 initialChunkSize int64 // default chunkSize after the chunk specified by RangeSeek is complete
chunkGrowth bool maxChunkSize int64 // consecutive read chunks will double in size until reached. -1 means no limit
doSeek bool customChunkSize bool // is the current chunkSize set by RangeSeek?
closed bool // has Close been called?
} }
// New returns a ChunkedReader for the Object. // New returns a ChunkedReader for the Object.
// //
// A initialChunkSize of 0 will disable chunked reading. // A initialChunkSize of <= 0 will disable chunked reading.
// If chunkGrowth is true, the chunk size will be doubled after each chunk read. // If maxChunkSize is greater than initialChunkSize, the chunk size will be
// doubled after each chunk read with a maximun of maxChunkSize.
// A Seek or RangeSeek will reset the chunk size to it's initial value // A Seek or RangeSeek will reset the chunk size to it's initial value
func New(o fs.Object, initialChunkSize int64, chunkGrowth bool) *ChunkedReader { func New(o fs.Object, initialChunkSize int64, maxChunkSize int64) *ChunkedReader {
if initialChunkSize < 0 { if initialChunkSize <= 0 {
initialChunkSize = 0 initialChunkSize = -1
}
if maxChunkSize != -1 && maxChunkSize < initialChunkSize {
maxChunkSize = initialChunkSize
} }
return &ChunkedReader{ return &ChunkedReader{
o: o, o: o,
offset: -1, offset: -1,
chunkSize: initialChunkSize, chunkSize: initialChunkSize,
initialChunkSize: initialChunkSize, initialChunkSize: initialChunkSize,
chunkGrowth: chunkGrowth, maxChunkSize: maxChunkSize,
} }
} }
@ -46,21 +58,32 @@ func (cr *ChunkedReader) Read(p []byte) (n int, err error) {
cr.mu.Lock() cr.mu.Lock()
defer cr.mu.Unlock() defer cr.mu.Unlock()
if cr.closed {
return 0, ErrorFileClosed
}
for reqSize := int64(len(p)); reqSize > 0; reqSize = int64(len(p)) { for reqSize := int64(len(p)); reqSize > 0; reqSize = int64(len(p)) {
// the current chunk boundary. valid only when chunkSize > 0
chunkEnd := cr.chunkOffset + cr.chunkSize chunkEnd := cr.chunkOffset + cr.chunkSize
fs.Debugf(cr.o, "ChunkedReader.Read at %d length %d chunkOffset %d chunkSize %d", cr.offset, reqSize, cr.chunkOffset, cr.chunkSize) fs.Debugf(cr.o, "ChunkedReader.Read at %d length %d chunkOffset %d chunkSize %d", cr.offset, reqSize, cr.chunkOffset, cr.chunkSize)
if atChunkEnd := cr.offset == chunkEnd; cr.offset == -1 || atChunkEnd { switch {
if atChunkEnd && cr.chunkSize > 0 { case cr.chunkSize > 0 && cr.offset == chunkEnd: // last chunk read completely
if cr.doSeek { cr.chunkOffset = cr.offset
cr.doSeek = false if cr.customChunkSize { // last chunkSize was set by RangeSeek
cr.chunkSize = cr.initialChunkSize cr.customChunkSize = false
} else if cr.chunkGrowth { cr.chunkSize = cr.initialChunkSize
cr.chunkSize *= 2 } else {
cr.chunkSize *= 2
if cr.chunkSize > cr.maxChunkSize && cr.maxChunkSize != -1 {
cr.chunkSize = cr.maxChunkSize
} }
cr.chunkOffset = cr.offset
} }
// recalculate the chunk boundary. valid only when chunkSize > 0
chunkEnd = cr.chunkOffset + cr.chunkSize
fallthrough
case cr.offset == -1: // first Read or Read after RangeSeek
err = cr.openRange() err = cr.openRange()
if err != nil { if err != nil {
return return
@ -69,7 +92,8 @@ func (cr *ChunkedReader) Read(p []byte) (n int, err error) {
var buf []byte var buf []byte
chunkRest := chunkEnd - cr.offset chunkRest := chunkEnd - cr.offset
if reqSize > chunkRest && cr.chunkSize != 0 { // limit read to chunk boundaries if chunkSize > 0
if reqSize > chunkRest && cr.chunkSize > 0 {
buf, p = p[0:chunkRest], p[chunkRest:] buf, p = p[0:chunkRest], p[chunkRest:]
} else { } else {
buf, p = p, nil buf, p = p, nil
@ -79,6 +103,9 @@ func (cr *ChunkedReader) Read(p []byte) (n int, err error) {
n += rn n += rn
cr.offset += int64(rn) cr.offset += int64(rn)
if err != nil { if err != nil {
if err == io.ErrUnexpectedEOF {
err = io.EOF
}
return return
} }
} }
@ -86,10 +113,17 @@ func (cr *ChunkedReader) Read(p []byte) (n int, err error) {
} }
// Close the file - for details see io.Closer // Close the file - for details see io.Closer
//
// All methods on ChunkedReader will return ErrorFileClosed afterwards
func (cr *ChunkedReader) Close() error { func (cr *ChunkedReader) Close() error {
cr.mu.Lock() cr.mu.Lock()
defer cr.mu.Unlock() defer cr.mu.Unlock()
if cr.closed {
return ErrorFileClosed
}
cr.closed = true
return cr.resetReader(nil, 0) return cr.resetReader(nil, 0)
} }
@ -99,11 +133,18 @@ func (cr *ChunkedReader) Seek(offset int64, whence int) (int64, error) {
} }
// RangeSeek the file - for details see RangeSeeker // RangeSeek the file - for details see RangeSeeker
//
// The specified length will only apply to the next chunk opened.
// RangeSeek will not reopen the source until Read is called.
func (cr *ChunkedReader) RangeSeek(offset int64, whence int, length int64) (int64, error) { func (cr *ChunkedReader) RangeSeek(offset int64, whence int, length int64) (int64, error) {
cr.mu.Lock() cr.mu.Lock()
defer cr.mu.Unlock() defer cr.mu.Unlock()
fs.Debugf(cr.o, "ChunkedReader.RangeSeek from %d to %d", cr.offset, offset) fs.Debugf(cr.o, "ChunkedReader.RangeSeek from %d to %d length %d", cr.offset, offset, length)
if cr.closed {
return 0, ErrorFileClosed
}
size := cr.o.Size() size := cr.o.Size()
switch whence { switch whence {
@ -112,15 +153,21 @@ func (cr *ChunkedReader) RangeSeek(offset int64, whence int, length int64) (int6
case io.SeekEnd: case io.SeekEnd:
cr.offset = size cr.offset = size
} }
// set the new chunk start
cr.chunkOffset = cr.offset + offset cr.chunkOffset = cr.offset + offset
// force reopen on next Read
cr.offset = -1 cr.offset = -1
cr.doSeek = true
if length > 0 { if length > 0 {
cr.customChunkSize = true
cr.chunkSize = length cr.chunkSize = length
} else { } else {
cr.chunkSize = cr.initialChunkSize cr.chunkSize = cr.initialChunkSize
} }
return cr.offset, nil if cr.chunkOffset < 0 || cr.chunkOffset >= size {
cr.chunkOffset = 0
return 0, ErrorInvalidSeek
}
return cr.chunkOffset, nil
} }
// Open forces the connection to be opened // Open forces the connection to be opened
@ -128,15 +175,39 @@ func (cr *ChunkedReader) Open() (*ChunkedReader, error) {
cr.mu.Lock() cr.mu.Lock()
defer cr.mu.Unlock() defer cr.mu.Unlock()
if cr.rc != nil && cr.offset != -1 {
return cr, nil
}
return cr, cr.openRange() return cr, cr.openRange()
} }
// openRange will open the source Object with the given range // openRange will open the source Object with the current chunk range
//
// If the current open reader implenets RangeSeeker, it is tried first.
// When RangeSeek failes, o.Open with a RangeOption is used.
//
// A length <= 0 will request till the end of the file // A length <= 0 will request till the end of the file
func (cr *ChunkedReader) openRange() error { func (cr *ChunkedReader) openRange() error {
offset, length := cr.chunkOffset, cr.chunkSize offset, length := cr.chunkOffset, cr.chunkSize
fs.Debugf(cr.o, "ChunkedReader.openRange at %d length %d", offset, length) fs.Debugf(cr.o, "ChunkedReader.openRange at %d length %d", offset, length)
if cr.closed {
return ErrorFileClosed
}
if rs, ok := cr.rc.(fs.RangeSeeker); ok {
n, err := rs.RangeSeek(offset, io.SeekStart, length)
if err == nil && n == offset {
cr.offset = offset
return nil
}
if err != nil {
fs.Debugf(cr.o, "ChunkedReader.openRange seek failed (%s). Trying Open", err)
} else {
fs.Debugf(cr.o, "ChunkedReader.openRange seeked to wrong offset. Wanted %d, got %d. Trying Open", offset, n)
}
}
var rc io.ReadCloser var rc io.ReadCloser
var err error var err error
if length <= 0 { if length <= 0 {

View file

@ -0,0 +1,111 @@
package chunkedreader
import (
"fmt"
"io"
"math/rand"
"testing"
"github.com/ncw/rclone/fstest/mockobject"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestChunkedReader(t *testing.T) {
content := makeContent(t, 1024)
for _, mode := range mockobject.SeekModes {
t.Run(mode.String(), testRead(content, mode))
}
}
func testRead(content []byte, mode mockobject.SeekMode) func(*testing.T) {
return func(t *testing.T) {
chunkSizes := []int64{-1, 0, 1, 15, 16, 17, 1023, 1024, 1025, 2000}
offsets := []int64{0, 1, 2, 3, 4, 5, 7, 8, 9, 15, 16, 17, 31, 32, 33,
63, 64, 65, 511, 512, 513, 1023, 1024, 1025}
limits := []int64{-1, 0, 1, 31, 32, 33, 1023, 1024, 1025}
cl := int64(len(content))
bl := 32
buf := make([]byte, bl)
o := mockobject.New("test.bin").WithContent(content, mode)
for ics, cs := range chunkSizes {
for icsMax, csMax := range chunkSizes {
// skip tests where chunkSize is much bigger than maxChunkSize
if ics > icsMax+1 {
continue
}
t.Run(fmt.Sprintf("Chunksize_%d_%d", cs, csMax), func(t *testing.T) {
cr := New(o, cs, csMax)
for _, offset := range offsets {
for _, limit := range limits {
what := fmt.Sprintf("offset %d, limit %d", offset, limit)
p, err := cr.RangeSeek(offset, io.SeekStart, limit)
if offset >= cl {
require.Error(t, err, what)
return
}
require.NoError(t, err, what)
require.Equal(t, offset, p, what)
n, err := cr.Read(buf)
end := offset + int64(bl)
if end > cl {
end = cl
}
l := int(end - offset)
if l < bl {
require.Equal(t, io.EOF, err, what)
} else {
require.NoError(t, err, what)
}
require.Equal(t, l, n, what)
require.Equal(t, content[offset:end], buf[:n], what)
}
}
})
}
}
}
}
func TestErrorAfterClose(t *testing.T) {
content := makeContent(t, 1024)
o := mockobject.New("test.bin").WithContent(content, mockobject.SeekModeNone)
// Close
cr := New(o, 0, 0)
require.NoError(t, cr.Close())
require.Error(t, cr.Close())
// Read
cr = New(o, 0, 0)
require.NoError(t, cr.Close())
var buf [1]byte
_, err := cr.Read(buf[:])
require.Error(t, err)
// Seek
cr = New(o, 0, 0)
require.NoError(t, cr.Close())
_, err = cr.Seek(1, io.SeekCurrent)
require.Error(t, err)
// RangeSeek
cr = New(o, 0, 0)
require.NoError(t, cr.Close())
_, err = cr.RangeSeek(1, io.SeekCurrent, 0)
require.Error(t, err)
}
func makeContent(t *testing.T, size int) []byte {
content := make([]byte, size)
r := rand.New(rand.NewSource(42))
_, err := io.ReadFull(r, content)
assert.NoError(t, err)
return content
}

View file

@ -2,7 +2,9 @@
package mockobject package mockobject
import ( import (
"bytes"
"errors" "errors"
"fmt"
"io" "io"
"time" "time"
@ -15,6 +17,11 @@ var errNotImpl = errors.New("not implemented")
// Object is a mock fs.Object useful for testing // Object is a mock fs.Object useful for testing
type Object string type Object string
// New returns mock fs.Object useful for testing
func New(name string) Object {
return Object(name)
}
// String returns a description of the Object // String returns a description of the Object
func (o Object) String() string { func (o Object) String() string {
return string(o) return string(o)
@ -69,3 +76,105 @@ func (o Object) Update(in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption
func (o Object) Remove() error { func (o Object) Remove() error {
return errNotImpl return errNotImpl
} }
// SeekMode specifies the optional Seek interface for the ReadCloser returned by Open
type SeekMode int
const (
// SeekModeNone specifies no seek interface
SeekModeNone SeekMode = iota
// SeekModeRegular specifies the regular io.Seek interface
SeekModeRegular
// SeekModeRange specifies the fs.RangeSeek interface
SeekModeRange
)
// SeekModes contains all valid SeekMode's
var SeekModes = []SeekMode{SeekModeNone, SeekModeRegular, SeekModeRange}
type contentMockObject struct {
Object
content []byte
seekMode SeekMode
}
// WithContent returns a fs.Object with the given content.
func (o Object) WithContent(content []byte, mode SeekMode) fs.Object {
return &contentMockObject{
Object: o,
content: content,
seekMode: mode,
}
}
func (o *contentMockObject) Open(options ...fs.OpenOption) (io.ReadCloser, error) {
var offset, limit int64 = 0, -1
for _, option := range options {
switch x := option.(type) {
case *fs.SeekOption:
offset = x.Offset
case *fs.RangeOption:
offset, limit = x.Decode(o.Size())
default:
if option.Mandatory() {
return nil, fmt.Errorf("Unsupported mandatory option: %v", option)
}
}
}
if limit == -1 || offset+limit > o.Size() {
limit = o.Size() - offset
}
var r *bytes.Reader
if o.seekMode == SeekModeNone {
r = bytes.NewReader(o.content[offset : offset+limit])
} else {
r = bytes.NewReader(o.content)
_, err := r.Seek(offset, io.SeekStart)
if err != nil {
return nil, err
}
}
switch o.seekMode {
case SeekModeNone:
return &readCloser{r}, nil
case SeekModeRegular:
return &readSeekCloser{r}, nil
case SeekModeRange:
return &readRangeSeekCloser{r}, nil
default:
return nil, errors.New(o.seekMode.String())
}
}
func (o *contentMockObject) Size() int64 {
return int64(len(o.content))
}
type readCloser struct{ io.Reader }
func (r *readCloser) Close() error { return nil }
type readSeekCloser struct{ io.ReadSeeker }
func (r *readSeekCloser) Close() error { return nil }
type readRangeSeekCloser struct{ io.ReadSeeker }
func (r *readRangeSeekCloser) RangeSeek(offset int64, whence int, length int64) (int64, error) {
return r.ReadSeeker.Seek(offset, whence)
}
func (r *readRangeSeekCloser) Close() error { return nil }
func (m SeekMode) String() string {
switch m {
case SeekModeNone:
return "SeekModeNone"
case SeekModeRegular:
return "SeekModeRegular"
case SeekModeRange:
return "SeekModeRange"
default:
return fmt.Sprintf("SeekModeInvalid(%d)", m)
}
}