retry: limit retries based on elapsed time not count

Depending on how long an operation takes to fail, the total retry
duration can currently vary between 1.5 and 15 minutes. In particular
for temporarily interrupted network connections, the former timeout is
too short. Thus always use a limit of 15 minutes.
This commit is contained in:
Michael Eischer 2024-04-29 21:12:21 +02:00
parent a3633cad9e
commit a60ee9b764
3 changed files with 24 additions and 16 deletions

View file

@ -425,7 +425,7 @@ func OpenRepository(ctx context.Context, opts GlobalOptions) (*repository.Reposi
success := func(msg string, retries int) { success := func(msg string, retries int) {
Warnf("%v operation successful after %d retries\n", msg, retries) Warnf("%v operation successful after %d retries\n", msg, retries)
} }
be = retry.New(be, 10, report, success) be = retry.New(be, 15*time.Minute, report, success)
// wrap backend if a test specified a hook // wrap backend if a test specified a hook
if opts.backendTestHook != nil { if opts.backendTestHook != nil {

View file

@ -18,9 +18,9 @@ import (
// backoff. // backoff.
type Backend struct { type Backend struct {
backend.Backend backend.Backend
MaxTries int MaxElapsedTime time.Duration
Report func(string, error, time.Duration) Report func(string, error, time.Duration)
Success func(string, int) Success func(string, int)
failedLoads sync.Map failedLoads sync.Map
} }
@ -32,12 +32,12 @@ var _ backend.Backend = &Backend{}
// backoff. report is called with a description and the error, if one occurred. // backoff. report is called with a description and the error, if one occurred.
// success is called with the number of retries before a successful operation // success is called with the number of retries before a successful operation
// (it is not called if it succeeded on the first try) // (it is not called if it succeeded on the first try)
func New(be backend.Backend, maxTries int, report func(string, error, time.Duration), success func(string, int)) *Backend { func New(be backend.Backend, maxElapsedTime time.Duration, report func(string, error, time.Duration), success func(string, int)) *Backend {
return &Backend{ return &Backend{
Backend: be, Backend: be,
MaxTries: maxTries, MaxElapsedTime: maxElapsedTime,
Report: report, Report: report,
Success: success, Success: success,
} }
} }
@ -82,9 +82,15 @@ func (be *Backend) retry(ctx context.Context, msg string, f func() error) error
} }
bo := backoff.NewExponentialBackOff() bo := backoff.NewExponentialBackOff()
bo.MaxElapsedTime = be.MaxElapsedTime
if fastRetries { if fastRetries {
// speed up integration tests // speed up integration tests
bo.InitialInterval = 1 * time.Millisecond bo.InitialInterval = 1 * time.Millisecond
maxElapsedTime := 200 * time.Millisecond
if bo.MaxElapsedTime > maxElapsedTime {
bo.MaxElapsedTime = maxElapsedTime
}
} }
err := retryNotifyErrorWithSuccess( err := retryNotifyErrorWithSuccess(
@ -97,7 +103,7 @@ func (be *Backend) retry(ctx context.Context, msg string, f func() error) error
} }
return err return err
}, },
backoff.WithContext(backoff.WithMaxRetries(bo, uint64(be.MaxTries)), ctx), backoff.WithContext(bo, ctx),
func(err error, d time.Duration) { func(err error, d time.Duration) {
if be.Report != nil { if be.Report != nil {
be.Report(msg, err, d) be.Report(msg, err, d)

View file

@ -193,8 +193,9 @@ func TestBackendListRetryErrorBackend(t *testing.T) {
} }
TestFastRetries(t) TestFastRetries(t)
const maxRetries = 2 const maxElapsedTime = 10 * time.Millisecond
retryBackend := New(be, maxRetries, nil, nil) now := time.Now()
retryBackend := New(be, maxElapsedTime, nil, nil)
var listed []string var listed []string
err := retryBackend.List(context.TODO(), backend.PackFile, func(fi backend.FileInfo) error { err := retryBackend.List(context.TODO(), backend.PackFile, func(fi backend.FileInfo) error {
@ -207,8 +208,9 @@ func TestBackendListRetryErrorBackend(t *testing.T) {
t.Fatalf("wrong error returned, want %v, got %v", ErrBackendTest, err) t.Fatalf("wrong error returned, want %v, got %v", ErrBackendTest, err)
} }
if retries != maxRetries+1 { duration := time.Since(now)
t.Fatalf("List was called %d times, wanted %v", retries, maxRetries+1) if duration > 100*time.Millisecond {
t.Fatalf("list retries took %v, expected at most 10ms", duration)
} }
test.Equals(t, names[:2], listed) test.Equals(t, names[:2], listed)
@ -327,7 +329,7 @@ func TestBackendLoadCircuitBreaker(t *testing.T) {
// trip the circuit breaker for file "other" // trip the circuit breaker for file "other"
err := retryBackend.Load(context.TODO(), backend.Handle{Name: "other"}, 0, 0, nilRd) err := retryBackend.Load(context.TODO(), backend.Handle{Name: "other"}, 0, 0, nilRd)
test.Equals(t, otherError, err, "unexpected error") test.Equals(t, otherError, err, "unexpected error")
test.Equals(t, 3, attempt) test.Equals(t, 2, attempt)
attempt = 0 attempt = 0
err = retryBackend.Load(context.TODO(), backend.Handle{Name: "other"}, 0, 0, nilRd) err = retryBackend.Load(context.TODO(), backend.Handle{Name: "other"}, 0, 0, nilRd)
@ -407,7 +409,7 @@ func TestBackendRetryPermanent(t *testing.T) {
return errors.New("something") return errors.New("something")
}) })
test.Assert(t, !be.IsPermanentErrorFn(err), "error unexpectedly considered permanent %v", err) test.Assert(t, !be.IsPermanentErrorFn(err), "error unexpectedly considered permanent %v", err)
test.Equals(t, 3, attempt) test.Equals(t, 2, attempt)
} }