s3: fix retry logic, logging and error reporting for chunk upload

- move retries into correct place into lowest level functions
- fix logging and error reporting
This commit is contained in:
Nick Craig-Wood 2023-08-15 14:27:21 +01:00
parent b4e3332e02
commit e6fde67491

View file

@ -5316,10 +5316,15 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.Read
var warnStreamUpload sync.Once var warnStreamUpload sync.Once
// OpenChunkWriter returns the chunk size and a ChunkWriter
//
// Pass in the remote and the src object
// You can also use options to hint at the desired chunk size
func (f *Fs) OpenChunkWriter(ctx context.Context, remote string, src fs.ObjectInfo, options ...fs.OpenOption) (chunkSizeResult int64, writer fs.ChunkWriter, err error) { func (f *Fs) OpenChunkWriter(ctx context.Context, remote string, src fs.ObjectInfo, options ...fs.OpenOption) (chunkSizeResult int64, writer fs.ChunkWriter, err error) {
// This duplicates part of the logic in Update, however it is
// required until we migrate the MultiPartUpload to
// OpenChunkWriter/multi-thread op completely.
// This duplicates part of the logic in Update,
//however per my understanding it is required until we migrate the MultiPartUpload to OpenChunkWriter/multi-thread op completely
// Temporary Object under construction // Temporary Object under construction
o := &Object{ o := &Object{
fs: f, fs: f,
@ -5327,7 +5332,7 @@ func (f *Fs) OpenChunkWriter(ctx context.Context, remote string, src fs.ObjectIn
} }
req, _, err := o.buildS3Req(ctx, src, options) req, _, err := o.buildS3Req(ctx, src, options)
if err != nil { if err != nil {
return -1, nil, fmt.Errorf("failed to build s3 request: %v", err) return -1, nil, fmt.Errorf("failed to build s3 request: %w", err)
} }
//structs.SetFrom(&mReq, req) //structs.SetFrom(&mReq, req)
@ -5357,15 +5362,19 @@ func (f *Fs) OpenChunkWriter(ctx context.Context, remote string, src fs.ObjectIn
chunkSize = chunksize.Calculator(src, size, uploadParts, chunkSize) chunkSize = chunksize.Calculator(src, size, uploadParts, chunkSize)
} }
mOut, err := f.c.CreateMultipartUploadWithContext(ctx, &mReq) var mOut *s3.CreateMultipartUploadOutput
err = f.pacer.Call(func() (bool, error) {
mOut, err = f.c.CreateMultipartUploadWithContext(ctx, &mReq)
return f.shouldRetry(ctx, err)
})
if err != nil { if err != nil {
return -1, nil, fmt.Errorf("CreateMultipartUpload failed: %w", err) return -1, nil, fmt.Errorf("create multipart upload failed: %w", err)
} }
chunkWriter := &s3ChunkWriter{ chunkWriter := &s3ChunkWriter{
ctx: ctx, ctx: ctx,
chunkSize: int64(chunkSize), chunkSize: int64(chunkSize),
size: src.Size(), size: size,
f: f, f: f,
bucket: mOut.Bucket, bucket: mOut.Bucket,
key: mOut.Key, key: mOut.Key,
@ -5394,23 +5403,35 @@ type s3ChunkWriter struct {
md5s []byte md5s []byte
} }
// add a part number and etag to the completed parts
func (w *s3ChunkWriter) addCompletedPart(partNum *int64, eTag *string) {
w.completedPartsMu.Lock()
defer w.completedPartsMu.Unlock()
w.completedParts = append(w.completedParts, &s3.CompletedPart{
PartNumber: partNum,
ETag: eTag,
})
}
// addMd5 adds a binary md5 to the md5 calculated so far
func (w *s3ChunkWriter) addMd5(md5binary *[]byte, chunkNumber int64) {
w.md5sMu.Lock()
defer w.md5sMu.Unlock()
start := chunkNumber * md5.Size
end := start + md5.Size
if extend := end - int64(len(w.md5s)); extend > 0 {
w.md5s = append(w.md5s, make([]byte, extend)...)
}
copy(w.md5s[start:end], (*md5binary)[:])
}
// WriteChunk will write chunk number with reader bytes, where chunk number >= 0
func (w *s3ChunkWriter) WriteChunk(chunkNumber int, reader io.ReadSeeker) (int64, error) { func (w *s3ChunkWriter) WriteChunk(chunkNumber int, reader io.ReadSeeker) (int64, error) {
if chunkNumber < 0 { if chunkNumber < 0 {
err := fmt.Errorf("invalid chunk number provided: %v", chunkNumber) err := fmt.Errorf("invalid chunk number provided: %v", chunkNumber)
return -1, err return -1, err
} }
addMd5 := func(md5binary *[]byte, chunkNumber int64) {
w.md5sMu.Lock()
defer w.md5sMu.Unlock()
start := chunkNumber * md5.Size
end := start + md5.Size
if extend := end - int64(len(w.md5s)); extend > 0 {
w.md5s = append(w.md5s, make([]byte, extend)...)
}
copy(w.md5s[start:end], (*md5binary)[:])
}
// create checksum of buffer for integrity checking // create checksum of buffer for integrity checking
// currently there is no way to calculate the md5 without reading the chunk a 2nd time (1st read is in uploadMultipart) // currently there is no way to calculate the md5 without reading the chunk a 2nd time (1st read is in uploadMultipart)
// possible in AWS SDK v2 with trailers? // possible in AWS SDK v2 with trailers?
@ -5420,7 +5441,7 @@ func (w *s3ChunkWriter) WriteChunk(chunkNumber int, reader io.ReadSeeker) (int64
return -1, err return -1, err
} }
md5sumBinary := m.Sum([]byte{}) md5sumBinary := m.Sum([]byte{})
addMd5(&md5sumBinary, int64(chunkNumber)) w.addMd5(&md5sumBinary, int64(chunkNumber))
md5sum := base64.StdEncoding.EncodeToString(md5sumBinary[:]) md5sum := base64.StdEncoding.EncodeToString(md5sumBinary[:])
// reset the reader after we calculated the md5 // reset the reader after we calculated the md5
@ -5444,56 +5465,67 @@ func (w *s3ChunkWriter) WriteChunk(chunkNumber int, reader io.ReadSeeker) (int64
SSECustomerKey: w.multiPartUploadInput.SSECustomerKey, SSECustomerKey: w.multiPartUploadInput.SSECustomerKey,
SSECustomerKeyMD5: w.multiPartUploadInput.SSECustomerKeyMD5, SSECustomerKeyMD5: w.multiPartUploadInput.SSECustomerKeyMD5,
} }
uout, err := w.f.c.UploadPartWithContext(w.ctx, uploadPartReq) var uout *s3.UploadPartOutput
err = w.f.pacer.Call(func() (bool, error) {
uout, err = w.f.c.UploadPartWithContext(w.ctx, uploadPartReq)
if err != nil {
if chunkNumber <= 8 {
return w.f.shouldRetry(w.ctx, err)
}
// retry all chunks once have done the first few
return true, err
}
return false, nil
})
if err != nil { if err != nil {
fs.Errorf(w.f, "Failed to upload part: %v", err) return -1, fmt.Errorf("failed to upload chunk %d with %v bytes: %w", chunkNumber+1, currentChunkSize, err)
return -1, err
} }
addCompletedPart := func(partNum *int64, eTag *string) { w.addCompletedPart(s3PartNumber, uout.ETag)
w.completedPartsMu.Lock()
defer w.completedPartsMu.Unlock()
w.completedParts = append(w.completedParts, &s3.CompletedPart{
PartNumber: partNum,
ETag: uout.ETag,
})
}
addCompletedPart(s3PartNumber, uout.ETag)
fs.Debugf(w.f, "multipart upload wrote chunk %d with %v bytes and etag %v", chunkNumber+1, currentChunkSize, *uout.ETag) fs.Debugf(w.f, "multipart upload wrote chunk %d with %v bytes and etag %v", chunkNumber+1, currentChunkSize, *uout.ETag)
return currentChunkSize, err return currentChunkSize, err
} }
// Abort the multpart upload
func (w *s3ChunkWriter) Abort() error { func (w *s3ChunkWriter) Abort() error {
_, err := w.f.c.AbortMultipartUploadWithContext(context.Background(), &s3.AbortMultipartUploadInput{ err := w.f.pacer.Call(func() (bool, error) {
Bucket: w.bucket, _, err := w.f.c.AbortMultipartUploadWithContext(context.Background(), &s3.AbortMultipartUploadInput{
Key: w.key, Bucket: w.bucket,
UploadId: w.uploadId, Key: w.key,
RequestPayer: w.multiPartUploadInput.RequestPayer, UploadId: w.uploadId,
RequestPayer: w.multiPartUploadInput.RequestPayer,
})
return w.f.shouldRetry(w.ctx, err)
}) })
if err != nil { if err != nil {
fs.Errorf(w.f, "Failed to abort multipart upload: %v", err) return fmt.Errorf("failed to abort multipart upload %q: %w", *w.uploadId, err)
} }
fs.Debugf(w.f, "multipart upload '%v' aborted", *w.uploadId) fs.Debugf(w.f, "multipart upload %q aborted", *w.uploadId)
return err return err
} }
func (w *s3ChunkWriter) Close() error { // Close and finalise the multipart upload
func (w *s3ChunkWriter) Close() (err error) {
// sort the completed parts by part number // sort the completed parts by part number
sort.Slice(w.completedParts, func(i, j int) bool { sort.Slice(w.completedParts, func(i, j int) bool {
return *w.completedParts[i].PartNumber < *w.completedParts[j].PartNumber return *w.completedParts[i].PartNumber < *w.completedParts[j].PartNumber
}) })
resp, err := w.f.c.CompleteMultipartUploadWithContext(w.ctx, &s3.CompleteMultipartUploadInput{ var resp *s3.CompleteMultipartUploadOutput
Bucket: w.bucket, err = w.f.pacer.Call(func() (bool, error) {
Key: w.key, resp, err = w.f.c.CompleteMultipartUploadWithContext(w.ctx, &s3.CompleteMultipartUploadInput{
MultipartUpload: &s3.CompletedMultipartUpload{ Bucket: w.bucket,
Parts: w.completedParts, Key: w.key,
}, MultipartUpload: &s3.CompletedMultipartUpload{
RequestPayer: w.multiPartUploadInput.RequestPayer, Parts: w.completedParts,
UploadId: w.uploadId, },
RequestPayer: w.multiPartUploadInput.RequestPayer,
UploadId: w.uploadId,
})
return w.f.shouldRetry(w.ctx, err)
}) })
if err != nil { if err != nil {
fs.Errorf(w.f, "Failed to complete multipart upload: %v", err) return fmt.Errorf("failed to complete multipart upload %q: %w", *w.uploadId, err)
} }
if resp != nil { if resp != nil {
if resp.ETag != nil { if resp.ETag != nil {
@ -5503,7 +5535,7 @@ func (w *s3ChunkWriter) Close() error {
w.versionID = *resp.VersionId w.versionID = *resp.VersionId
} }
} }
fs.Debugf(w.f, "multipart upload '%v' closed", *w.uploadId) fs.Debugf(w.f, "multipart upload %q closed", *w.uploadId)
return err return err
} }
@ -5516,15 +5548,8 @@ func (o *Object) uploadMultipart(ctx context.Context, src fs.ObjectInfo, in io.R
concurrency = 1 concurrency = 1
} }
tokens := pacer.NewTokenDispenser(concurrency) tokens := pacer.NewTokenDispenser(concurrency)
openChunkWriter := f.Features().OpenChunkWriter
var chunkWriter fs.ChunkWriter chunkSize, chunkWriter, err := f.OpenChunkWriter(ctx, src.Remote(), src)
var chunkSize int64
err = f.pacer.Call(func() (bool, error) {
var err error
chunkSize, chunkWriter, err = openChunkWriter(ctx, src.Remote(), src)
return f.shouldRetry(ctx, err)
})
if err != nil { if err != nil {
return wantETag, gotETag, nil, fmt.Errorf("multipart upload failed to initialise: %w", err) return wantETag, gotETag, nil, fmt.Errorf("multipart upload failed to initialise: %w", err)
} }
@ -5536,10 +5561,7 @@ func (o *Object) uploadMultipart(ctx context.Context, src fs.ObjectInfo, in io.R
return return
} }
fs.Debugf(o, "Cancelling multipart upload") fs.Debugf(o, "Cancelling multipart upload")
errCancel := f.pacer.Call(func() (bool, error) { errCancel := chunkWriter.Abort()
err := chunkWriter.Abort()
return f.shouldRetry(ctx, err)
})
if errCancel != nil { if errCancel != nil {
fs.Debugf(o, "Failed to cancel multipart upload: %v", errCancel) fs.Debugf(o, "Failed to cancel multipart upload: %v", errCancel)
} }
@ -5589,21 +5611,8 @@ func (o *Object) uploadMultipart(ctx context.Context, src fs.ObjectInfo, in io.R
off += int64(n) off += int64(n)
g.Go(func() (err error) { g.Go(func() (err error) {
defer free() defer free()
err = f.pacer.Call(func() (bool, error) { _, err = chunkWriter.WriteChunk(int(partNum), bytes.NewReader(buf))
_, err := chunkWriter.WriteChunk(int(partNum), bytes.NewReader(buf)) return err
if err != nil {
if partNum <= int64(concurrency) {
return f.shouldRetry(gCtx, err)
}
// retry all chunks once have done the first batch
return true, err
}
return false, nil
})
if err != nil {
return fmt.Errorf("multipart upload failed to upload part: %w", err)
}
return nil
}) })
} }
err = g.Wait() err = g.Wait()
@ -5611,10 +5620,7 @@ func (o *Object) uploadMultipart(ctx context.Context, src fs.ObjectInfo, in io.R
return wantETag, gotETag, nil, err return wantETag, gotETag, nil, err
} }
err = f.pacer.Call(func() (bool, error) { err = chunkWriter.Close()
err := chunkWriter.Close()
return f.shouldRetry(uploadCtx, err)
})
if err != nil { if err != nil {
return wantETag, gotETag, nil, fmt.Errorf("multipart upload failed to finalise: %w", err) return wantETag, gotETag, nil, fmt.Errorf("multipart upload failed to finalise: %w", err)
} }
@ -5945,7 +5951,7 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op
} else { } else {
req, md5sumHex, err = o.buildS3Req(ctx, src, options) req, md5sumHex, err = o.buildS3Req(ctx, src, options)
if err != nil { if err != nil {
return fmt.Errorf("failed to build s3 request: %v", err) return fmt.Errorf("failed to build s3 request: %w", err)
} }
if o.fs.opt.UsePresignedRequest { if o.fs.opt.UsePresignedRequest {