forked from TrueCloudLab/rclone
0914ec316c
Prior to this fix we were uploading 0 length bodies if a retry was needed on a multipart upload chunk. This gave this error `http: ContentLength=268435496 with Body length 0`. Fix by remaking the hash appending reader in the Call loop. This is inefficient in the face of retries, but these are uncommon.
425 lines
11 KiB
Go
425 lines
11 KiB
Go
// Upload large files for b2
|
|
//
|
|
// Docs - https://www.backblaze.com/b2/docs/large_files.html
|
|
|
|
package b2
|
|
|
|
import (
|
|
"bytes"
|
|
"crypto/sha1"
|
|
"encoding/hex"
|
|
"fmt"
|
|
"hash"
|
|
"io"
|
|
"strings"
|
|
"sync"
|
|
|
|
"github.com/ncw/rclone/b2/api"
|
|
"github.com/ncw/rclone/fs"
|
|
"github.com/ncw/rclone/rest"
|
|
"github.com/pkg/errors"
|
|
)
|
|
|
|
type hashAppendingReader struct {
|
|
h hash.Hash
|
|
in io.Reader
|
|
hexSum string
|
|
hexReader io.Reader
|
|
}
|
|
|
|
// Read returns bytes all bytes from the original reader, then the hex sum
|
|
// of what was read so far, then EOF.
|
|
func (har *hashAppendingReader) Read(b []byte) (int, error) {
|
|
if har.hexReader == nil {
|
|
n, err := har.in.Read(b)
|
|
if err == io.EOF {
|
|
har.in = nil // allow GC
|
|
err = nil // allow reading hexSum before EOF
|
|
|
|
har.hexSum = hex.EncodeToString(har.h.Sum(nil))
|
|
har.hexReader = strings.NewReader(har.hexSum)
|
|
}
|
|
return n, err
|
|
}
|
|
return har.hexReader.Read(b)
|
|
}
|
|
|
|
// AdditionalLength returns how many bytes the appended hex sum will take up.
|
|
func (har *hashAppendingReader) AdditionalLength() int {
|
|
return hex.EncodedLen(har.h.Size())
|
|
}
|
|
|
|
// HexSum returns the hash sum as hex. It's only available after the original
|
|
// reader has EOF'd. It's an empty string before that.
|
|
func (har *hashAppendingReader) HexSum() string {
|
|
return har.hexSum
|
|
}
|
|
|
|
// newHashAppendingReader takes a Reader and a Hash and will append the hex sum
|
|
// after the original reader reaches EOF. The increased size depends on the
|
|
// given hash, which may be queried through AdditionalLength()
|
|
func newHashAppendingReader(in io.Reader, h hash.Hash) *hashAppendingReader {
|
|
withHash := io.TeeReader(in, h)
|
|
return &hashAppendingReader{h: h, in: withHash}
|
|
}
|
|
|
|
// largeUpload is used to control the upload of large files which need chunking
|
|
type largeUpload struct {
|
|
f *Fs // parent Fs
|
|
o *Object // object being uploaded
|
|
in io.Reader // read the data from here
|
|
id string // ID of the file being uploaded
|
|
size int64 // total size
|
|
parts int64 // calculated number of parts, if known
|
|
sha1s []string // slice of SHA1s for each part
|
|
uploadMu sync.Mutex // lock for upload variable
|
|
uploads []*api.GetUploadPartURLResponse // result of get upload URL calls
|
|
}
|
|
|
|
// newLargeUpload starts an upload of object o from in with metadata in src
|
|
func (f *Fs) newLargeUpload(o *Object, in io.Reader, src fs.ObjectInfo) (up *largeUpload, err error) {
|
|
remote := o.remote
|
|
size := src.Size()
|
|
parts := int64(0)
|
|
sha1SliceSize := int64(maxParts)
|
|
if size == -1 {
|
|
fs.Debugf(o, "Streaming upload with --b2-chunk-size %s allows uploads of up to %s and will fail only when that limit is reached.", fs.SizeSuffix(chunkSize), fs.SizeSuffix(maxParts*chunkSize))
|
|
} else {
|
|
parts = size / int64(chunkSize)
|
|
if size%int64(chunkSize) != 0 {
|
|
parts++
|
|
}
|
|
if parts > maxParts {
|
|
return nil, errors.Errorf("%q too big (%d bytes) makes too many parts %d > %d - increase --b2-chunk-size", remote, size, parts, maxParts)
|
|
}
|
|
sha1SliceSize = parts
|
|
}
|
|
|
|
modTime := src.ModTime()
|
|
opts := rest.Opts{
|
|
Method: "POST",
|
|
Path: "/b2_start_large_file",
|
|
}
|
|
bucketID, err := f.getBucketID()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var request = api.StartLargeFileRequest{
|
|
BucketID: bucketID,
|
|
Name: o.fs.root + remote,
|
|
ContentType: fs.MimeType(src),
|
|
Info: map[string]string{
|
|
timeKey: timeString(modTime),
|
|
},
|
|
}
|
|
// Set the SHA1 if known
|
|
if calculatedSha1, err := src.Hash(fs.HashSHA1); err == nil && calculatedSha1 != "" {
|
|
request.Info[sha1Key] = calculatedSha1
|
|
}
|
|
var response api.StartLargeFileResponse
|
|
err = f.pacer.Call(func() (bool, error) {
|
|
resp, err := f.srv.CallJSON(&opts, &request, &response)
|
|
return f.shouldRetry(resp, err)
|
|
})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
up = &largeUpload{
|
|
f: f,
|
|
o: o,
|
|
in: in,
|
|
id: response.ID,
|
|
size: size,
|
|
parts: parts,
|
|
sha1s: make([]string, sha1SliceSize),
|
|
}
|
|
return up, nil
|
|
}
|
|
|
|
// getUploadURL returns the upload info with the UploadURL and the AuthorizationToken
|
|
//
|
|
// This should be returned with returnUploadURL when finished
|
|
func (up *largeUpload) getUploadURL() (upload *api.GetUploadPartURLResponse, err error) {
|
|
up.uploadMu.Lock()
|
|
defer up.uploadMu.Unlock()
|
|
if len(up.uploads) == 0 {
|
|
opts := rest.Opts{
|
|
Method: "POST",
|
|
Path: "/b2_get_upload_part_url",
|
|
}
|
|
var request = api.GetUploadPartURLRequest{
|
|
ID: up.id,
|
|
}
|
|
err := up.f.pacer.Call(func() (bool, error) {
|
|
resp, err := up.f.srv.CallJSON(&opts, &request, &upload)
|
|
return up.f.shouldRetry(resp, err)
|
|
})
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "failed to get upload URL")
|
|
}
|
|
} else {
|
|
upload, up.uploads = up.uploads[0], up.uploads[1:]
|
|
}
|
|
return upload, nil
|
|
}
|
|
|
|
// returnUploadURL returns the UploadURL to the cache
|
|
func (up *largeUpload) returnUploadURL(upload *api.GetUploadPartURLResponse) {
|
|
if upload == nil {
|
|
return
|
|
}
|
|
up.uploadMu.Lock()
|
|
up.uploads = append(up.uploads, upload)
|
|
up.uploadMu.Unlock()
|
|
}
|
|
|
|
// clearUploadURL clears the current UploadURL and the AuthorizationToken
|
|
func (up *largeUpload) clearUploadURL() {
|
|
up.uploadMu.Lock()
|
|
up.uploads = nil
|
|
up.uploadMu.Unlock()
|
|
}
|
|
|
|
// Transfer a chunk
|
|
func (up *largeUpload) transferChunk(part int64, body []byte) error {
|
|
err := up.f.pacer.Call(func() (bool, error) {
|
|
fs.Debugf(up.o, "Sending chunk %d length %d", part, len(body))
|
|
|
|
// Get upload URL
|
|
upload, err := up.getUploadURL()
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
in := newHashAppendingReader(bytes.NewReader(body), sha1.New())
|
|
size := int64(len(body)) + int64(in.AdditionalLength())
|
|
|
|
// Authorization
|
|
//
|
|
// An upload authorization token, from b2_get_upload_part_url.
|
|
//
|
|
// X-Bz-Part-Number
|
|
//
|
|
// A number from 1 to 10000. The parts uploaded for one file
|
|
// must have contiguous numbers, starting with 1.
|
|
//
|
|
// Content-Length
|
|
//
|
|
// The number of bytes in the file being uploaded. Note that
|
|
// this header is required; you cannot leave it out and just
|
|
// use chunked encoding. The minimum size of every part but
|
|
// the last one is 100MB.
|
|
//
|
|
// X-Bz-Content-Sha1
|
|
//
|
|
// The SHA1 checksum of the this part of the file. B2 will
|
|
// check this when the part is uploaded, to make sure that the
|
|
// data arrived correctly. The same SHA1 checksum must be
|
|
// passed to b2_finish_large_file.
|
|
opts := rest.Opts{
|
|
Method: "POST",
|
|
RootURL: upload.UploadURL,
|
|
Body: fs.AccountPart(up.o, in),
|
|
ExtraHeaders: map[string]string{
|
|
"Authorization": upload.AuthorizationToken,
|
|
"X-Bz-Part-Number": fmt.Sprintf("%d", part),
|
|
sha1Header: "hex_digits_at_end",
|
|
},
|
|
ContentLength: &size,
|
|
}
|
|
|
|
var response api.UploadPartResponse
|
|
|
|
resp, err := up.f.srv.CallJSON(&opts, nil, &response)
|
|
retry, err := up.f.shouldRetry(resp, err)
|
|
// On retryable error clear PartUploadURL
|
|
if retry {
|
|
fs.Debugf(up.o, "Clearing part upload URL because of error: %v", err)
|
|
upload = nil
|
|
}
|
|
up.returnUploadURL(upload)
|
|
up.sha1s[part-1] = in.HexSum()
|
|
return retry, err
|
|
})
|
|
if err != nil {
|
|
fs.Debugf(up.o, "Error sending chunk %d: %v", part, err)
|
|
} else {
|
|
fs.Debugf(up.o, "Done sending chunk %d", part)
|
|
}
|
|
return err
|
|
}
|
|
|
|
// finish closes off the large upload
|
|
func (up *largeUpload) finish() error {
|
|
fs.Debugf(up.o, "Finishing large file upload with %d parts", up.parts)
|
|
opts := rest.Opts{
|
|
Method: "POST",
|
|
Path: "/b2_finish_large_file",
|
|
}
|
|
var request = api.FinishLargeFileRequest{
|
|
ID: up.id,
|
|
SHA1s: up.sha1s,
|
|
}
|
|
var response api.FileInfo
|
|
err := up.f.pacer.Call(func() (bool, error) {
|
|
resp, err := up.f.srv.CallJSON(&opts, &request, &response)
|
|
return up.f.shouldRetry(resp, err)
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return up.o.decodeMetaDataFileInfo(&response)
|
|
}
|
|
|
|
// cancel aborts the large upload
|
|
func (up *largeUpload) cancel() error {
|
|
opts := rest.Opts{
|
|
Method: "POST",
|
|
Path: "/b2_cancel_large_file",
|
|
}
|
|
var request = api.CancelLargeFileRequest{
|
|
ID: up.id,
|
|
}
|
|
var response api.CancelLargeFileResponse
|
|
err := up.f.pacer.Call(func() (bool, error) {
|
|
resp, err := up.f.srv.CallJSON(&opts, &request, &response)
|
|
return up.f.shouldRetry(resp, err)
|
|
})
|
|
return err
|
|
}
|
|
|
|
func (up *largeUpload) managedTransferChunk(wg *sync.WaitGroup, errs chan error, part int64, buf []byte) {
|
|
wg.Add(1)
|
|
go func(part int64, buf []byte) {
|
|
defer wg.Done()
|
|
defer up.f.putUploadBlock(buf)
|
|
err := up.transferChunk(part, buf)
|
|
if err != nil {
|
|
select {
|
|
case errs <- err:
|
|
default:
|
|
}
|
|
}
|
|
}(part, buf)
|
|
}
|
|
|
|
func (up *largeUpload) finishOrCancelOnError(err error, errs chan error) error {
|
|
if err == nil {
|
|
select {
|
|
case err = <-errs:
|
|
default:
|
|
}
|
|
}
|
|
if err != nil {
|
|
fs.Debugf(up.o, "Cancelling large file upload due to error: %v", err)
|
|
cancelErr := up.cancel()
|
|
if cancelErr != nil {
|
|
fs.Errorf(up.o, "Failed to cancel large file upload: %v", cancelErr)
|
|
}
|
|
return err
|
|
}
|
|
return up.finish()
|
|
}
|
|
|
|
// Stream uploads the chunks from the input, starting with a required initial
|
|
// chunk. Assumes the file size is unknown and will upload until the input
|
|
// reaches EOF.
|
|
func (up *largeUpload) Stream(initialUploadBlock []byte) (err error) {
|
|
fs.Debugf(up.o, "Starting streaming of large file (id %q)", up.id)
|
|
errs := make(chan error, 1)
|
|
hasMoreParts := true
|
|
var wg sync.WaitGroup
|
|
fs.AccountByPart(up.o) // Cancel whole file accounting before reading
|
|
|
|
// Transfer initial chunk
|
|
up.size = int64(len(initialUploadBlock))
|
|
up.managedTransferChunk(&wg, errs, 1, initialUploadBlock)
|
|
|
|
outer:
|
|
for part := int64(2); hasMoreParts; part++ {
|
|
// Check any errors
|
|
select {
|
|
case err = <-errs:
|
|
break outer
|
|
default:
|
|
}
|
|
|
|
// Get a block of memory
|
|
buf := up.f.getUploadBlock()
|
|
|
|
// Read the chunk
|
|
n, err := io.ReadFull(up.in, buf)
|
|
if err == io.ErrUnexpectedEOF {
|
|
fs.Debugf(up.o, "Read less than a full chunk, making this the last one.")
|
|
buf = buf[:n]
|
|
hasMoreParts = false
|
|
err = nil
|
|
} else if err == io.EOF {
|
|
fs.Debugf(up.o, "Could not read any more bytes, previous chunk was the last.")
|
|
up.f.putUploadBlock(buf)
|
|
hasMoreParts = false
|
|
err = nil
|
|
break outer
|
|
} else if err != nil {
|
|
// other kinds of errors indicate failure
|
|
up.f.putUploadBlock(buf)
|
|
break outer
|
|
}
|
|
|
|
// Keep stats up to date
|
|
up.parts = part
|
|
up.size += int64(n)
|
|
if part > maxParts {
|
|
err = errors.Errorf("%q too big (%d bytes so far) makes too many parts %d > %d - increase --b2-chunk-size", up.o, up.size, up.parts, maxParts)
|
|
break outer
|
|
}
|
|
|
|
// Transfer the chunk
|
|
up.managedTransferChunk(&wg, errs, part, buf)
|
|
}
|
|
wg.Wait()
|
|
up.sha1s = up.sha1s[:up.parts]
|
|
|
|
return up.finishOrCancelOnError(err, errs)
|
|
}
|
|
|
|
// Upload uploads the chunks from the input
|
|
func (up *largeUpload) Upload() error {
|
|
fs.Debugf(up.o, "Starting upload of large file in %d chunks (id %q)", up.parts, up.id)
|
|
remaining := up.size
|
|
errs := make(chan error, 1)
|
|
var wg sync.WaitGroup
|
|
var err error
|
|
fs.AccountByPart(up.o) // Cancel whole file accounting before reading
|
|
outer:
|
|
for part := int64(1); part <= up.parts; part++ {
|
|
// Check any errors
|
|
select {
|
|
case err = <-errs:
|
|
break outer
|
|
default:
|
|
}
|
|
|
|
reqSize := remaining
|
|
if reqSize >= int64(chunkSize) {
|
|
reqSize = int64(chunkSize)
|
|
}
|
|
|
|
// Get a block of memory
|
|
buf := up.f.getUploadBlock()[:reqSize]
|
|
|
|
// Read the chunk
|
|
_, err = io.ReadFull(up.in, buf)
|
|
if err != nil {
|
|
up.f.putUploadBlock(buf)
|
|
break outer
|
|
}
|
|
|
|
// Transfer the chunk
|
|
up.managedTransferChunk(&wg, errs, part, buf)
|
|
remaining -= reqSize
|
|
}
|
|
wg.Wait()
|
|
|
|
return up.finishOrCancelOnError(err, errs)
|
|
}
|