s3: fix retry logic, logging and error reporting for chunk upload
- move retries into correct place into lowest level functions - fix logging and error reporting
This commit is contained in:
parent
b4e3332e02
commit
e6fde67491
1 changed files with 86 additions and 80 deletions
130
backend/s3/s3.go
130
backend/s3/s3.go
|
@ -5316,10 +5316,15 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.Read
|
||||||
|
|
||||||
var warnStreamUpload sync.Once
|
var warnStreamUpload sync.Once
|
||||||
|
|
||||||
|
// OpenChunkWriter returns the chunk size and a ChunkWriter
|
||||||
|
//
|
||||||
|
// Pass in the remote and the src object
|
||||||
|
// You can also use options to hint at the desired chunk size
|
||||||
func (f *Fs) OpenChunkWriter(ctx context.Context, remote string, src fs.ObjectInfo, options ...fs.OpenOption) (chunkSizeResult int64, writer fs.ChunkWriter, err error) {
|
func (f *Fs) OpenChunkWriter(ctx context.Context, remote string, src fs.ObjectInfo, options ...fs.OpenOption) (chunkSizeResult int64, writer fs.ChunkWriter, err error) {
|
||||||
|
// This duplicates part of the logic in Update, however it is
|
||||||
|
// required until we migrate the MultiPartUpload to
|
||||||
|
// OpenChunkWriter/multi-thread op completely.
|
||||||
|
|
||||||
// This duplicates part of the logic in Update,
|
|
||||||
//however per my understanding it is required until we migrate the MultiPartUpload to OpenChunkWriter/multi-thread op completely
|
|
||||||
// Temporary Object under construction
|
// Temporary Object under construction
|
||||||
o := &Object{
|
o := &Object{
|
||||||
fs: f,
|
fs: f,
|
||||||
|
@ -5327,7 +5332,7 @@ func (f *Fs) OpenChunkWriter(ctx context.Context, remote string, src fs.ObjectIn
|
||||||
}
|
}
|
||||||
req, _, err := o.buildS3Req(ctx, src, options)
|
req, _, err := o.buildS3Req(ctx, src, options)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return -1, nil, fmt.Errorf("failed to build s3 request: %v", err)
|
return -1, nil, fmt.Errorf("failed to build s3 request: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
//structs.SetFrom(&mReq, req)
|
//structs.SetFrom(&mReq, req)
|
||||||
|
@ -5357,15 +5362,19 @@ func (f *Fs) OpenChunkWriter(ctx context.Context, remote string, src fs.ObjectIn
|
||||||
chunkSize = chunksize.Calculator(src, size, uploadParts, chunkSize)
|
chunkSize = chunksize.Calculator(src, size, uploadParts, chunkSize)
|
||||||
}
|
}
|
||||||
|
|
||||||
mOut, err := f.c.CreateMultipartUploadWithContext(ctx, &mReq)
|
var mOut *s3.CreateMultipartUploadOutput
|
||||||
|
err = f.pacer.Call(func() (bool, error) {
|
||||||
|
mOut, err = f.c.CreateMultipartUploadWithContext(ctx, &mReq)
|
||||||
|
return f.shouldRetry(ctx, err)
|
||||||
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return -1, nil, fmt.Errorf("CreateMultipartUpload failed: %w", err)
|
return -1, nil, fmt.Errorf("create multipart upload failed: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
chunkWriter := &s3ChunkWriter{
|
chunkWriter := &s3ChunkWriter{
|
||||||
ctx: ctx,
|
ctx: ctx,
|
||||||
chunkSize: int64(chunkSize),
|
chunkSize: int64(chunkSize),
|
||||||
size: src.Size(),
|
size: size,
|
||||||
f: f,
|
f: f,
|
||||||
bucket: mOut.Bucket,
|
bucket: mOut.Bucket,
|
||||||
key: mOut.Key,
|
key: mOut.Key,
|
||||||
|
@ -5394,13 +5403,18 @@ type s3ChunkWriter struct {
|
||||||
md5s []byte
|
md5s []byte
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *s3ChunkWriter) WriteChunk(chunkNumber int, reader io.ReadSeeker) (int64, error) {
|
// add a part number and etag to the completed parts
|
||||||
if chunkNumber < 0 {
|
func (w *s3ChunkWriter) addCompletedPart(partNum *int64, eTag *string) {
|
||||||
err := fmt.Errorf("invalid chunk number provided: %v", chunkNumber)
|
w.completedPartsMu.Lock()
|
||||||
return -1, err
|
defer w.completedPartsMu.Unlock()
|
||||||
|
w.completedParts = append(w.completedParts, &s3.CompletedPart{
|
||||||
|
PartNumber: partNum,
|
||||||
|
ETag: eTag,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
addMd5 := func(md5binary *[]byte, chunkNumber int64) {
|
// addMd5 adds a binary md5 to the md5 calculated so far
|
||||||
|
func (w *s3ChunkWriter) addMd5(md5binary *[]byte, chunkNumber int64) {
|
||||||
w.md5sMu.Lock()
|
w.md5sMu.Lock()
|
||||||
defer w.md5sMu.Unlock()
|
defer w.md5sMu.Unlock()
|
||||||
start := chunkNumber * md5.Size
|
start := chunkNumber * md5.Size
|
||||||
|
@ -5411,6 +5425,13 @@ func (w *s3ChunkWriter) WriteChunk(chunkNumber int, reader io.ReadSeeker) (int64
|
||||||
copy(w.md5s[start:end], (*md5binary)[:])
|
copy(w.md5s[start:end], (*md5binary)[:])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// WriteChunk will write chunk number with reader bytes, where chunk number >= 0
|
||||||
|
func (w *s3ChunkWriter) WriteChunk(chunkNumber int, reader io.ReadSeeker) (int64, error) {
|
||||||
|
if chunkNumber < 0 {
|
||||||
|
err := fmt.Errorf("invalid chunk number provided: %v", chunkNumber)
|
||||||
|
return -1, err
|
||||||
|
}
|
||||||
|
|
||||||
// create checksum of buffer for integrity checking
|
// create checksum of buffer for integrity checking
|
||||||
// currently there is no way to calculate the md5 without reading the chunk a 2nd time (1st read is in uploadMultipart)
|
// currently there is no way to calculate the md5 without reading the chunk a 2nd time (1st read is in uploadMultipart)
|
||||||
// possible in AWS SDK v2 with trailers?
|
// possible in AWS SDK v2 with trailers?
|
||||||
|
@ -5420,7 +5441,7 @@ func (w *s3ChunkWriter) WriteChunk(chunkNumber int, reader io.ReadSeeker) (int64
|
||||||
return -1, err
|
return -1, err
|
||||||
}
|
}
|
||||||
md5sumBinary := m.Sum([]byte{})
|
md5sumBinary := m.Sum([]byte{})
|
||||||
addMd5(&md5sumBinary, int64(chunkNumber))
|
w.addMd5(&md5sumBinary, int64(chunkNumber))
|
||||||
md5sum := base64.StdEncoding.EncodeToString(md5sumBinary[:])
|
md5sum := base64.StdEncoding.EncodeToString(md5sumBinary[:])
|
||||||
|
|
||||||
// reset the reader after we calculated the md5
|
// reset the reader after we calculated the md5
|
||||||
|
@ -5444,46 +5465,55 @@ func (w *s3ChunkWriter) WriteChunk(chunkNumber int, reader io.ReadSeeker) (int64
|
||||||
SSECustomerKey: w.multiPartUploadInput.SSECustomerKey,
|
SSECustomerKey: w.multiPartUploadInput.SSECustomerKey,
|
||||||
SSECustomerKeyMD5: w.multiPartUploadInput.SSECustomerKeyMD5,
|
SSECustomerKeyMD5: w.multiPartUploadInput.SSECustomerKeyMD5,
|
||||||
}
|
}
|
||||||
uout, err := w.f.c.UploadPartWithContext(w.ctx, uploadPartReq)
|
var uout *s3.UploadPartOutput
|
||||||
|
err = w.f.pacer.Call(func() (bool, error) {
|
||||||
|
uout, err = w.f.c.UploadPartWithContext(w.ctx, uploadPartReq)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fs.Errorf(w.f, "Failed to upload part: %v", err)
|
if chunkNumber <= 8 {
|
||||||
return -1, err
|
return w.f.shouldRetry(w.ctx, err)
|
||||||
|
}
|
||||||
|
// retry all chunks once have done the first few
|
||||||
|
return true, err
|
||||||
|
}
|
||||||
|
return false, nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return -1, fmt.Errorf("failed to upload chunk %d with %v bytes: %w", chunkNumber+1, currentChunkSize, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
addCompletedPart := func(partNum *int64, eTag *string) {
|
w.addCompletedPart(s3PartNumber, uout.ETag)
|
||||||
w.completedPartsMu.Lock()
|
|
||||||
defer w.completedPartsMu.Unlock()
|
|
||||||
w.completedParts = append(w.completedParts, &s3.CompletedPart{
|
|
||||||
PartNumber: partNum,
|
|
||||||
ETag: uout.ETag,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
addCompletedPart(s3PartNumber, uout.ETag)
|
|
||||||
|
|
||||||
fs.Debugf(w.f, "multipart upload wrote chunk %d with %v bytes and etag %v", chunkNumber+1, currentChunkSize, *uout.ETag)
|
fs.Debugf(w.f, "multipart upload wrote chunk %d with %v bytes and etag %v", chunkNumber+1, currentChunkSize, *uout.ETag)
|
||||||
return currentChunkSize, err
|
return currentChunkSize, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Abort the multpart upload
|
||||||
func (w *s3ChunkWriter) Abort() error {
|
func (w *s3ChunkWriter) Abort() error {
|
||||||
|
err := w.f.pacer.Call(func() (bool, error) {
|
||||||
_, err := w.f.c.AbortMultipartUploadWithContext(context.Background(), &s3.AbortMultipartUploadInput{
|
_, err := w.f.c.AbortMultipartUploadWithContext(context.Background(), &s3.AbortMultipartUploadInput{
|
||||||
Bucket: w.bucket,
|
Bucket: w.bucket,
|
||||||
Key: w.key,
|
Key: w.key,
|
||||||
UploadId: w.uploadId,
|
UploadId: w.uploadId,
|
||||||
RequestPayer: w.multiPartUploadInput.RequestPayer,
|
RequestPayer: w.multiPartUploadInput.RequestPayer,
|
||||||
})
|
})
|
||||||
|
return w.f.shouldRetry(w.ctx, err)
|
||||||
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fs.Errorf(w.f, "Failed to abort multipart upload: %v", err)
|
return fmt.Errorf("failed to abort multipart upload %q: %w", *w.uploadId, err)
|
||||||
}
|
}
|
||||||
fs.Debugf(w.f, "multipart upload '%v' aborted", *w.uploadId)
|
fs.Debugf(w.f, "multipart upload %q aborted", *w.uploadId)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *s3ChunkWriter) Close() error {
|
// Close and finalise the multipart upload
|
||||||
|
func (w *s3ChunkWriter) Close() (err error) {
|
||||||
// sort the completed parts by part number
|
// sort the completed parts by part number
|
||||||
sort.Slice(w.completedParts, func(i, j int) bool {
|
sort.Slice(w.completedParts, func(i, j int) bool {
|
||||||
return *w.completedParts[i].PartNumber < *w.completedParts[j].PartNumber
|
return *w.completedParts[i].PartNumber < *w.completedParts[j].PartNumber
|
||||||
})
|
})
|
||||||
resp, err := w.f.c.CompleteMultipartUploadWithContext(w.ctx, &s3.CompleteMultipartUploadInput{
|
var resp *s3.CompleteMultipartUploadOutput
|
||||||
|
err = w.f.pacer.Call(func() (bool, error) {
|
||||||
|
resp, err = w.f.c.CompleteMultipartUploadWithContext(w.ctx, &s3.CompleteMultipartUploadInput{
|
||||||
Bucket: w.bucket,
|
Bucket: w.bucket,
|
||||||
Key: w.key,
|
Key: w.key,
|
||||||
MultipartUpload: &s3.CompletedMultipartUpload{
|
MultipartUpload: &s3.CompletedMultipartUpload{
|
||||||
|
@ -5492,8 +5522,10 @@ func (w *s3ChunkWriter) Close() error {
|
||||||
RequestPayer: w.multiPartUploadInput.RequestPayer,
|
RequestPayer: w.multiPartUploadInput.RequestPayer,
|
||||||
UploadId: w.uploadId,
|
UploadId: w.uploadId,
|
||||||
})
|
})
|
||||||
|
return w.f.shouldRetry(w.ctx, err)
|
||||||
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fs.Errorf(w.f, "Failed to complete multipart upload: %v", err)
|
return fmt.Errorf("failed to complete multipart upload %q: %w", *w.uploadId, err)
|
||||||
}
|
}
|
||||||
if resp != nil {
|
if resp != nil {
|
||||||
if resp.ETag != nil {
|
if resp.ETag != nil {
|
||||||
|
@ -5503,7 +5535,7 @@ func (w *s3ChunkWriter) Close() error {
|
||||||
w.versionID = *resp.VersionId
|
w.versionID = *resp.VersionId
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fs.Debugf(w.f, "multipart upload '%v' closed", *w.uploadId)
|
fs.Debugf(w.f, "multipart upload %q closed", *w.uploadId)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5516,15 +5548,8 @@ func (o *Object) uploadMultipart(ctx context.Context, src fs.ObjectInfo, in io.R
|
||||||
concurrency = 1
|
concurrency = 1
|
||||||
}
|
}
|
||||||
tokens := pacer.NewTokenDispenser(concurrency)
|
tokens := pacer.NewTokenDispenser(concurrency)
|
||||||
openChunkWriter := f.Features().OpenChunkWriter
|
|
||||||
|
|
||||||
var chunkWriter fs.ChunkWriter
|
chunkSize, chunkWriter, err := f.OpenChunkWriter(ctx, src.Remote(), src)
|
||||||
var chunkSize int64
|
|
||||||
err = f.pacer.Call(func() (bool, error) {
|
|
||||||
var err error
|
|
||||||
chunkSize, chunkWriter, err = openChunkWriter(ctx, src.Remote(), src)
|
|
||||||
return f.shouldRetry(ctx, err)
|
|
||||||
})
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return wantETag, gotETag, nil, fmt.Errorf("multipart upload failed to initialise: %w", err)
|
return wantETag, gotETag, nil, fmt.Errorf("multipart upload failed to initialise: %w", err)
|
||||||
}
|
}
|
||||||
|
@ -5536,10 +5561,7 @@ func (o *Object) uploadMultipart(ctx context.Context, src fs.ObjectInfo, in io.R
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
fs.Debugf(o, "Cancelling multipart upload")
|
fs.Debugf(o, "Cancelling multipart upload")
|
||||||
errCancel := f.pacer.Call(func() (bool, error) {
|
errCancel := chunkWriter.Abort()
|
||||||
err := chunkWriter.Abort()
|
|
||||||
return f.shouldRetry(ctx, err)
|
|
||||||
})
|
|
||||||
if errCancel != nil {
|
if errCancel != nil {
|
||||||
fs.Debugf(o, "Failed to cancel multipart upload: %v", errCancel)
|
fs.Debugf(o, "Failed to cancel multipart upload: %v", errCancel)
|
||||||
}
|
}
|
||||||
|
@ -5589,21 +5611,8 @@ func (o *Object) uploadMultipart(ctx context.Context, src fs.ObjectInfo, in io.R
|
||||||
off += int64(n)
|
off += int64(n)
|
||||||
g.Go(func() (err error) {
|
g.Go(func() (err error) {
|
||||||
defer free()
|
defer free()
|
||||||
err = f.pacer.Call(func() (bool, error) {
|
_, err = chunkWriter.WriteChunk(int(partNum), bytes.NewReader(buf))
|
||||||
_, err := chunkWriter.WriteChunk(int(partNum), bytes.NewReader(buf))
|
return err
|
||||||
if err != nil {
|
|
||||||
if partNum <= int64(concurrency) {
|
|
||||||
return f.shouldRetry(gCtx, err)
|
|
||||||
}
|
|
||||||
// retry all chunks once have done the first batch
|
|
||||||
return true, err
|
|
||||||
}
|
|
||||||
return false, nil
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("multipart upload failed to upload part: %w", err)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
err = g.Wait()
|
err = g.Wait()
|
||||||
|
@ -5611,10 +5620,7 @@ func (o *Object) uploadMultipart(ctx context.Context, src fs.ObjectInfo, in io.R
|
||||||
return wantETag, gotETag, nil, err
|
return wantETag, gotETag, nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
err = f.pacer.Call(func() (bool, error) {
|
err = chunkWriter.Close()
|
||||||
err := chunkWriter.Close()
|
|
||||||
return f.shouldRetry(uploadCtx, err)
|
|
||||||
})
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return wantETag, gotETag, nil, fmt.Errorf("multipart upload failed to finalise: %w", err)
|
return wantETag, gotETag, nil, fmt.Errorf("multipart upload failed to finalise: %w", err)
|
||||||
}
|
}
|
||||||
|
@ -5945,7 +5951,7 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op
|
||||||
} else {
|
} else {
|
||||||
req, md5sumHex, err = o.buildS3Req(ctx, src, options)
|
req, md5sumHex, err = o.buildS3Req(ctx, src, options)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to build s3 request: %v", err)
|
return fmt.Errorf("failed to build s3 request: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if o.fs.opt.UsePresignedRequest {
|
if o.fs.opt.UsePresignedRequest {
|
||||||
|
|
Loading…
Reference in a new issue