S3: Use (custom) pacer, to retry operations when reasonable - fixes #2503

This commit is contained in:
Craig Miskell 2018-09-03 16:41:04 +12:00 committed by Nick Craig-Wood
parent 19cf3bb9e7
commit 2543278c3f
3 changed files with 165 additions and 12 deletions

View file

@ -39,9 +39,11 @@ import (
"github.com/ncw/rclone/fs" "github.com/ncw/rclone/fs"
"github.com/ncw/rclone/fs/config/configmap" "github.com/ncw/rclone/fs/config/configmap"
"github.com/ncw/rclone/fs/config/configstruct" "github.com/ncw/rclone/fs/config/configstruct"
"github.com/ncw/rclone/fs/fserrors"
"github.com/ncw/rclone/fs/fshttp" "github.com/ncw/rclone/fs/fshttp"
"github.com/ncw/rclone/fs/hash" "github.com/ncw/rclone/fs/hash"
"github.com/ncw/rclone/fs/walk" "github.com/ncw/rclone/fs/walk"
"github.com/ncw/rclone/lib/pacer"
"github.com/ncw/rclone/lib/rest" "github.com/ncw/rclone/lib/rest"
"github.com/ncw/swift" "github.com/ncw/swift"
"github.com/pkg/errors" "github.com/pkg/errors"
@ -570,6 +572,7 @@ const (
maxRetries = 10 // number of retries to make of operations maxRetries = 10 // number of retries to make of operations
maxSizeForCopy = 5 * 1024 * 1024 * 1024 // The maximum size of object we can COPY maxSizeForCopy = 5 * 1024 * 1024 * 1024 // The maximum size of object we can COPY
maxFileSize = 5 * 1024 * 1024 * 1024 * 1024 // largest possible upload file size maxFileSize = 5 * 1024 * 1024 * 1024 * 1024 // largest possible upload file size
minSleep = 10 * time.Millisecond // In case of error, start at 10ms sleep.
) )
// Options defines the configuration for this backend // Options defines the configuration for this backend
@ -604,6 +607,7 @@ type Fs struct {
bucketOKMu sync.Mutex // mutex to protect bucket OK bucketOKMu sync.Mutex // mutex to protect bucket OK
bucketOK bool // true if we have created the bucket bucketOK bool // true if we have created the bucket
bucketDeleted bool // true if we have deleted the bucket bucketDeleted bool // true if we have deleted the bucket
pacer *pacer.Pacer // To pace the API calls
} }
// Object describes a s3 object // Object describes a s3 object
@ -649,6 +653,37 @@ func (f *Fs) Features() *fs.Features {
return f.features return f.features
} }
// retryErrorCodes is a slice of error codes that we will retry
// See: https://docs.aws.amazon.com/AmazonS3/latest/API/ErrorResponses.html
var retryErrorCodes = []int{
409, // Conflict - various states that could be resolved on a retry
503, // Service Unavailable/Slow Down - "Reduce your request rate"
}
//S3 is pretty resilient, and the built in retry handling is probably sufficient
// as it should notice closed connections and timeouts which are the most likely
// sort of failure modes
func shouldRetry(err error) (bool, error) {
// If this is an awserr object, try and extract more useful information to determine if we should retry
if awsError, ok := err.(awserr.Error); ok {
// Simple case, check the original embedded error in case it's generically retriable
if fserrors.ShouldRetry(awsError.OrigErr()) {
return true, err
}
//Failing that, if it's a RequestFailure it's probably got an http status code we can check
if reqErr, ok := err.(awserr.RequestFailure); ok {
for _, e := range retryErrorCodes {
if reqErr.StatusCode() == e {
return true, err
}
}
}
}
//Ok, not an awserr, check for generic failure conditions
return fserrors.ShouldRetry(err), err
}
// Pattern to match a s3 path // Pattern to match a s3 path
var matcher = regexp.MustCompile(`^/*([^/]*)(.*)$`) var matcher = regexp.MustCompile(`^/*([^/]*)(.*)$`)
@ -774,6 +809,7 @@ func NewFs(name, root string, m configmap.Mapper) (fs.Fs, error) {
c: c, c: c,
bucket: bucket, bucket: bucket,
ses: ses, ses: ses,
pacer: pacer.New().SetMinSleep(minSleep).SetPacer(pacer.S3Pacer),
} }
f.features = (&fs.Features{ f.features = (&fs.Features{
ReadMimeType: true, ReadMimeType: true,
@ -787,7 +823,10 @@ func NewFs(name, root string, m configmap.Mapper) (fs.Fs, error) {
Bucket: &f.bucket, Bucket: &f.bucket,
Key: &directory, Key: &directory,
} }
err = f.pacer.Call(func() (bool, error) {
_, err = f.c.HeadObject(&req) _, err = f.c.HeadObject(&req)
return shouldRetry(err)
})
if err == nil { if err == nil {
f.root = path.Dir(directory) f.root = path.Dir(directory)
if f.root == "." { if f.root == "." {
@ -864,7 +903,12 @@ func (f *Fs) list(dir string, recurse bool, fn listFn) error {
MaxKeys: &maxKeys, MaxKeys: &maxKeys,
Marker: marker, Marker: marker,
} }
resp, err := f.c.ListObjects(&req) var resp *s3.ListObjectsOutput
var err error
err = f.pacer.Call(func() (bool, error) {
resp, err = f.c.ListObjects(&req)
return shouldRetry(err)
})
if err != nil { if err != nil {
if awsErr, ok := err.(awserr.RequestFailure); ok { if awsErr, ok := err.(awserr.RequestFailure); ok {
if awsErr.StatusCode() == http.StatusNotFound { if awsErr.StatusCode() == http.StatusNotFound {
@ -989,7 +1033,11 @@ func (f *Fs) listBuckets(dir string) (entries fs.DirEntries, err error) {
return nil, fs.ErrorListBucketRequired return nil, fs.ErrorListBucketRequired
} }
req := s3.ListBucketsInput{} req := s3.ListBucketsInput{}
resp, err := f.c.ListBuckets(&req) var resp *s3.ListBucketsOutput
err = f.pacer.Call(func() (bool, error) {
resp, err = f.c.ListBuckets(&req)
return shouldRetry(err)
})
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -1074,7 +1122,10 @@ func (f *Fs) dirExists() (bool, error) {
req := s3.HeadBucketInput{ req := s3.HeadBucketInput{
Bucket: &f.bucket, Bucket: &f.bucket,
} }
err := f.pacer.Call(func() (bool, error) {
_, err := f.c.HeadBucket(&req) _, err := f.c.HeadBucket(&req)
return shouldRetry(err)
})
if err == nil { if err == nil {
return true, nil return true, nil
} }
@ -1111,7 +1162,10 @@ func (f *Fs) Mkdir(dir string) error {
LocationConstraint: &f.opt.LocationConstraint, LocationConstraint: &f.opt.LocationConstraint,
} }
} }
err := f.pacer.Call(func() (bool, error) {
_, err := f.c.CreateBucket(&req) _, err := f.c.CreateBucket(&req)
return shouldRetry(err)
})
if err, ok := err.(awserr.Error); ok { if err, ok := err.(awserr.Error); ok {
if err.Code() == "BucketAlreadyOwnedByYou" { if err.Code() == "BucketAlreadyOwnedByYou" {
err = nil err = nil
@ -1136,7 +1190,10 @@ func (f *Fs) Rmdir(dir string) error {
req := s3.DeleteBucketInput{ req := s3.DeleteBucketInput{
Bucket: &f.bucket, Bucket: &f.bucket,
} }
err := f.pacer.Call(func() (bool, error) {
_, err := f.c.DeleteBucket(&req) _, err := f.c.DeleteBucket(&req)
return shouldRetry(err)
})
if err == nil { if err == nil {
f.bucketOK = false f.bucketOK = false
f.bucketDeleted = true f.bucketDeleted = true
@ -1183,7 +1240,10 @@ func (f *Fs) Copy(src fs.Object, remote string) (fs.Object, error) {
CopySource: &source, CopySource: &source,
MetadataDirective: aws.String(s3.MetadataDirectiveCopy), MetadataDirective: aws.String(s3.MetadataDirectiveCopy),
} }
err = f.pacer.Call(func() (bool, error) {
_, err = f.c.CopyObject(&req) _, err = f.c.CopyObject(&req)
return shouldRetry(err)
})
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -1260,7 +1320,12 @@ func (o *Object) readMetaData() (err error) {
Bucket: &o.fs.bucket, Bucket: &o.fs.bucket,
Key: &key, Key: &key,
} }
resp, err := o.fs.c.HeadObject(&req) var resp *s3.HeadObjectOutput
err = o.fs.pacer.Call(func() (bool, error) {
var err error
resp, err = o.fs.c.HeadObject(&req)
return shouldRetry(err)
})
if err != nil { if err != nil {
if awsErr, ok := err.(awserr.RequestFailure); ok { if awsErr, ok := err.(awserr.RequestFailure); ok {
if awsErr.StatusCode() == http.StatusNotFound { if awsErr.StatusCode() == http.StatusNotFound {
@ -1344,7 +1409,10 @@ func (o *Object) SetModTime(modTime time.Time) error {
Metadata: o.meta, Metadata: o.meta,
MetadataDirective: &directive, MetadataDirective: &directive,
} }
_, err = o.fs.c.CopyObject(&req) err = o.fs.pacer.Call(func() (bool, error) {
_, err := o.fs.c.CopyObject(&req)
return shouldRetry(err)
})
return err return err
} }
@ -1371,7 +1439,12 @@ func (o *Object) Open(options ...fs.OpenOption) (in io.ReadCloser, err error) {
} }
} }
} }
resp, err := o.fs.c.GetObject(&req) var resp *s3.GetObjectOutput
err = o.fs.pacer.Call(func() (bool, error) {
var err error
resp, err = o.fs.c.GetObject(&req)
return shouldRetry(err)
})
if err, ok := err.(awserr.RequestFailure); ok { if err, ok := err.(awserr.RequestFailure); ok {
if err.Code() == "InvalidObjectState" { if err.Code() == "InvalidObjectState" {
return nil, errors.Errorf("Object in GLACIER, restore first: %v", key) return nil, errors.Errorf("Object in GLACIER, restore first: %v", key)
@ -1450,7 +1523,10 @@ func (o *Object) Update(in io.Reader, src fs.ObjectInfo, options ...fs.OpenOptio
if o.fs.opt.StorageClass != "" { if o.fs.opt.StorageClass != "" {
req.StorageClass = &o.fs.opt.StorageClass req.StorageClass = &o.fs.opt.StorageClass
} }
err = o.fs.pacer.CallNoRetry(func() (bool, error) {
_, err = uploader.Upload(&req) _, err = uploader.Upload(&req)
return shouldRetry(err)
})
if err != nil { if err != nil {
return err return err
} }
@ -1468,7 +1544,10 @@ func (o *Object) Remove() error {
Bucket: &o.fs.bucket, Bucket: &o.fs.bucket,
Key: &key, Key: &key,
} }
err := o.fs.pacer.Call(func() (bool, error) {
_, err := o.fs.c.DeleteObject(&req) _, err := o.fs.c.DeleteObject(&req)
return shouldRetry(err)
})
return err return err
} }

View file

@ -59,6 +59,12 @@ const (
// //
// See https://developers.google.com/drive/v2/web/handle-errors#exponential-backoff // See https://developers.google.com/drive/v2/web/handle-errors#exponential-backoff
GoogleDrivePacer GoogleDrivePacer
// S3Pacer is a specialised pacer for S3
//
// It is basically the defaultPacer, but allows the sleep time to go to 0
// when things are going well.
S3Pacer
) )
// Paced is a function which is called by the Call and CallNoRetry // Paced is a function which is called by the Call and CallNoRetry
@ -185,6 +191,8 @@ func (p *Pacer) SetPacer(t Type) *Pacer {
p.calculatePace = p.acdPacer p.calculatePace = p.acdPacer
case GoogleDrivePacer: case GoogleDrivePacer:
p.calculatePace = p.drivePacer p.calculatePace = p.drivePacer
case S3Pacer:
p.calculatePace = p.s3Pacer
default: default:
p.calculatePace = p.defaultPacer p.calculatePace = p.defaultPacer
} }
@ -309,6 +317,46 @@ func (p *Pacer) drivePacer(retry bool) {
} }
} }
// s3Pacer implements a pacer compatible with our expectations of S3, where it tries to not
// delay at all between successful calls, but backs off in the default fashion in response
// to any errors.
// The assumption is that errors should be exceedingly rare (S3 seems to have largely solved
// the sort of scability questions rclone is likely to run into), and in the happy case
// it can handle calls with no delays between them.
//
// Basically defaultPacer, but with some handling of sleepTime going to/from 0ms
// Ignores minSleep entirely
//
// Call with p.mu held
func (p *Pacer) s3Pacer(retry bool) {
oldSleepTime := p.sleepTime
if retry {
if p.attackConstant == 0 {
p.sleepTime = p.maxSleep
} else {
if p.sleepTime == 0 {
p.sleepTime = p.minSleep
} else {
p.sleepTime = (p.sleepTime << p.attackConstant) / ((1 << p.attackConstant) - 1)
}
}
if p.sleepTime > p.maxSleep {
p.sleepTime = p.maxSleep
}
if p.sleepTime != oldSleepTime {
fs.Debugf("pacer", "Rate limited, increasing sleep to %v", p.sleepTime)
}
} else {
p.sleepTime = (p.sleepTime<<p.decayConstant - p.sleepTime) >> p.decayConstant
if p.sleepTime < p.minSleep {
p.sleepTime = 0
}
if p.sleepTime != oldSleepTime {
fs.Debugf("pacer", "Reducing sleep to %v", p.sleepTime)
}
}
}
// endCall implements the pacing algorithm // endCall implements the pacing algorithm
// //
// This should calculate a new sleepTime. It takes a boolean as to // This should calculate a new sleepTime. It takes a boolean as to

View file

@ -340,6 +340,32 @@ func TestGoogleDrivePacer(t *testing.T) {
} }
} }
func TestS3Pacer(t *testing.T) {
p := New().SetMinSleep(10 * time.Millisecond).SetPacer(S3Pacer).SetMaxSleep(time.Second).SetDecayConstant(2)
for _, test := range []struct {
in time.Duration
retry bool
want time.Duration
}{
{0, true, 10 * time.Millisecond}, //Things were going ok, we failed once, back off to minSleep
{10 * time.Millisecond, true, 20 * time.Millisecond}, //Another fail, double the backoff
{10 * time.Millisecond, false, 0}, //Things start going ok when we're at minSleep; should result in no sleep
{12 * time.Millisecond, false, 0}, //*near* minsleep and going ok, decay would take below minSleep, should go to 0
{0, false, 0}, //Things have been going ok; not retrying should keep sleep at 0
{time.Second, true, time.Second}, //Check maxSleep is enforced
{(3 * time.Second) / 4, true, time.Second}, //Check attack heading to maxSleep doesn't exceed maxSleep
{time.Second, false, 750 * time.Millisecond}, //Check decay from maxSleep
{48 * time.Millisecond, false, 36 * time.Millisecond}, //Check simple decay above minSleep
} {
p.sleepTime = test.in
p.s3Pacer(test.retry)
got := p.sleepTime
if got != test.want {
t.Errorf("bad sleep for %v with retry %v: want %v got %v", test.in, test.retry, test.want, got)
}
}
}
func TestEndCall(t *testing.T) { func TestEndCall(t *testing.T) {
p := New().SetMaxConnections(5) p := New().SetMaxConnections(5)
emptyTokens(p) emptyTokens(p)