Merge pull request #962 from restic/improve-s3-partial-download

Improve s3 backend
This commit is contained in:
Alexander Neumann 2017-05-14 22:00:16 +02:00
commit 089b04c8a9
3 changed files with 172 additions and 87 deletions

View file

@ -1,8 +1,9 @@
package s3 package s3
import ( import (
"bytes" "fmt"
"io" "io"
"os"
"path" "path"
"restic" "restic"
"strings" "strings"
@ -80,6 +81,50 @@ func (be *s3) Location() string {
return be.bucketname return be.bucketname
} }
// getRemainingSize returns number of bytes remaining. If it is not possible to
// determine the size, panic() is called.
func getRemainingSize(rd io.Reader) (size int64, err error) {
type Sizer interface {
Size() int64
}
type Lenner interface {
Len() int
}
if r, ok := rd.(Lenner); ok {
size = int64(r.Len())
} else if r, ok := rd.(Sizer); ok {
size = r.Size()
} else if f, ok := rd.(*os.File); ok {
fi, err := f.Stat()
if err != nil {
return 0, err
}
pos, err := f.Seek(0, io.SeekCurrent)
if err != nil {
return 0, err
}
size = fi.Size() - pos
} else {
panic(fmt.Sprintf("Save() got passed a reader without a method to determine the data size, type is %T", rd))
}
return size, nil
}
// preventCloser wraps an io.Reader to run a function instead of the original Close() function.
type preventCloser struct {
io.Reader
f func()
}
func (wr preventCloser) Close() error {
wr.f()
return nil
}
// Save stores data in the backend at the handle. // Save stores data in the backend at the handle.
func (be *s3) Save(h restic.Handle, rd io.Reader) (err error) { func (be *s3) Save(h restic.Handle, rd io.Reader) (err error) {
if err := h.Valid(); err != nil { if err := h.Valid(); err != nil {
@ -87,6 +132,10 @@ func (be *s3) Save(h restic.Handle, rd io.Reader) (err error) {
} }
objName := be.Filename(h) objName := be.Filename(h)
size, err := getRemainingSize(rd)
if err != nil {
return err
}
debug.Log("Save %v at %v", h, objName) debug.Log("Save %v at %v", h, objName)
@ -98,14 +147,23 @@ func (be *s3) Save(h restic.Handle, rd io.Reader) (err error) {
} }
<-be.connChan <-be.connChan
defer func() {
be.connChan <- struct{}{}
}()
debug.Log("PutObject(%v, %v)", // wrap the reader so that net/http client cannot close the reader, return
be.bucketname, objName) // the token instead.
n, err := be.client.PutObject(be.bucketname, objName, rd, "binary/octet-stream") rd = preventCloser{
debug.Log("%v -> %v bytes, err %#v", objName, n, err) Reader: rd,
f: func() {
debug.Log("Close()")
},
}
debug.Log("PutObject(%v, %v)", be.bucketname, objName)
coreClient := minio.Core{be.client}
info, err := coreClient.PutObject(be.bucketname, objName, size, rd, nil, nil, nil)
// return token
be.connChan <- struct{}{}
debug.Log("%v -> %v bytes, err %#v", objName, info.Size, err)
return errors.Wrap(err, "client.PutObject") return errors.Wrap(err, "client.PutObject")
} }
@ -139,96 +197,37 @@ func (be *s3) Load(h restic.Handle, length int, offset int64) (io.ReadCloser, er
return nil, errors.Errorf("invalid length %d", length) return nil, errors.Errorf("invalid length %d", length)
} }
var obj *minio.Object
var size int64
objName := be.Filename(h) objName := be.Filename(h)
// get token for connection // get token for connection
<-be.connChan <-be.connChan
obj, err := be.client.GetObject(be.bucketname, objName) byteRange := fmt.Sprintf("bytes=%d-", offset)
if err != nil { if length > 0 {
debug.Log(" err %v", err) byteRange = fmt.Sprintf("bytes=%d-%d", offset, offset+int64(length)-1)
}
headers := minio.NewGetReqHeaders()
headers.Add("Range", byteRange)
debug.Log("Load(%v) send range %v", h, byteRange)
coreClient := minio.Core{be.client}
rd, _, err := coreClient.GetObject(be.bucketname, objName, headers)
if err != nil {
// return token // return token
be.connChan <- struct{}{} be.connChan <- struct{}{}
return nil, err
return nil, errors.Wrap(err, "client.GetObject")
} }
// if we're going to read the whole object, just pass it on. closeRd := wrapReader{
if length == 0 { ReadCloser: rd,
debug.Log("Load %v: pass on object", h)
_, err = obj.Seek(offset, 0)
if err != nil {
_ = obj.Close()
// return token
be.connChan <- struct{}{}
return nil, errors.Wrap(err, "obj.Seek")
}
rd := wrapReader{
ReadCloser: obj,
f: func() { f: func() {
debug.Log("Close()") debug.Log("Close()")
// return token // return token
be.connChan <- struct{}{} be.connChan <- struct{}{}
}, },
} }
return rd, nil
}
defer func() { return closeRd, err
// return token
be.connChan <- struct{}{}
}()
// otherwise use a buffer with ReadAt
be.cacheMutex.RLock()
size, cacheHit := be.cacheObjSize[objName]
be.cacheMutex.RUnlock()
if !cacheHit {
info, err := obj.Stat()
if err != nil {
_ = obj.Close()
return nil, errors.Wrap(err, "obj.Stat")
}
size = info.Size
be.cacheMutex.Lock()
be.cacheObjSize[objName] = size
be.cacheMutex.Unlock()
}
if offset > size {
_ = obj.Close()
return nil, errors.New("offset larger than file size")
}
l := int64(length)
if offset+l > size {
l = size - offset
}
buf := make([]byte, l)
n, err := obj.ReadAt(buf, offset)
debug.Log("Load %v: use buffer with ReadAt: %v, %v", h, n, err)
if err == io.EOF {
debug.Log("Load %v: shorten buffer %v -> %v", h, len(buf), n)
buf = buf[:n]
err = nil
}
if err != nil {
_ = obj.Close()
return nil, errors.Wrap(err, "obj.ReadAt")
}
return backend.Closer{Reader: bytes.NewReader(buf)}, nil
} }
// Stat returns information about a blob. // Stat returns information about a blob.

View file

@ -0,0 +1,71 @@
package s3
import (
"bytes"
"io"
"io/ioutil"
"os"
"restic/test"
"testing"
)
func writeFile(t testing.TB, data []byte, offset int64) *os.File {
tempfile, err := ioutil.TempFile("", "restic-test-")
if err != nil {
t.Fatal(err)
}
if _, err = tempfile.Write(data); err != nil {
t.Fatal(err)
}
if _, err = tempfile.Seek(offset, io.SeekStart); err != nil {
t.Fatal(err)
}
return tempfile
}
func TestGetRemainingSize(t *testing.T) {
length := 18 * 1123
partialRead := 1005
data := test.Random(23, length)
partReader := bytes.NewReader(data)
buf := make([]byte, partialRead)
_, _ = io.ReadFull(partReader, buf)
partFileReader := writeFile(t, data, int64(partialRead))
defer func() {
if err := partFileReader.Close(); err != nil {
t.Fatal(err)
}
if err := os.Remove(partFileReader.Name()); err != nil {
t.Fatal(err)
}
}()
var tests = []struct {
io.Reader
size int64
}{
{bytes.NewReader([]byte("foobar test")), 11},
{partReader, int64(length - partialRead)},
{partFileReader, int64(length - partialRead)},
}
for _, test := range tests {
t.Run("", func(t *testing.T) {
size, err := getRemainingSize(test.Reader)
if err != nil {
t.Fatal(err)
}
if size != test.size {
t.Fatalf("invalid size returned, want %v, got %v", test.size, size)
}
})
}
}

View file

@ -293,7 +293,7 @@ func (s *Suite) TestSave(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
if _, err = tmpfile.Seek(0, 0); err != nil { if _, err = tmpfile.Seek(0, io.SeekStart); err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -306,11 +306,22 @@ func (s *Suite) TestSave(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
if err = tmpfile.Close(); err != nil { err = b.Remove(h)
if err != nil {
t.Fatalf("error removing item: %+v", err)
}
// try again directly with the temp file
if _, err = tmpfile.Seek(588, io.SeekStart); err != nil {
t.Fatal(err) t.Fatal(err)
} }
if err = os.Remove(tmpfile.Name()); err != nil { err = b.Save(h, tmpfile)
if err != nil {
t.Fatal(err)
}
if err = tmpfile.Close(); err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -318,6 +329,10 @@ func (s *Suite) TestSave(t *testing.T) {
if err != nil { if err != nil {
t.Fatalf("error removing item: %+v", err) t.Fatalf("error removing item: %+v", err)
} }
if err = os.Remove(tmpfile.Name()); err != nil {
t.Fatal(err)
}
} }
var filenameTests = []struct { var filenameTests = []struct {