retry: limit retries based on elapsed time not count

Depending on how long an operation takes to fail, the total retry
duration can currently vary between 1.5 and 15 minutes. In particular
for temporarily interrupted network connections, the former timeout is
too short. Thus always use a limit of 15 minutes.
This commit is contained in:
Michael Eischer 2024-04-29 21:12:21 +02:00
parent a3633cad9e
commit a60ee9b764
3 changed files with 24 additions and 16 deletions

View file

@ -425,7 +425,7 @@ func OpenRepository(ctx context.Context, opts GlobalOptions) (*repository.Reposi
success := func(msg string, retries int) {
Warnf("%v operation successful after %d retries\n", msg, retries)
}
be = retry.New(be, 10, report, success)
be = retry.New(be, 15*time.Minute, report, success)
// wrap backend if a test specified a hook
if opts.backendTestHook != nil {

View file

@ -18,9 +18,9 @@ import (
// backoff.
type Backend struct {
backend.Backend
MaxTries int
Report func(string, error, time.Duration)
Success func(string, int)
MaxElapsedTime time.Duration
Report func(string, error, time.Duration)
Success func(string, int)
failedLoads sync.Map
}
@ -32,12 +32,12 @@ var _ backend.Backend = &Backend{}
// backoff. report is called with a description and the error, if one occurred.
// success is called with the number of retries before a successful operation
// (it is not called if it succeeded on the first try)
func New(be backend.Backend, maxTries int, report func(string, error, time.Duration), success func(string, int)) *Backend {
func New(be backend.Backend, maxElapsedTime time.Duration, report func(string, error, time.Duration), success func(string, int)) *Backend {
return &Backend{
Backend: be,
MaxTries: maxTries,
Report: report,
Success: success,
Backend: be,
MaxElapsedTime: maxElapsedTime,
Report: report,
Success: success,
}
}
@ -82,9 +82,15 @@ func (be *Backend) retry(ctx context.Context, msg string, f func() error) error
}
bo := backoff.NewExponentialBackOff()
bo.MaxElapsedTime = be.MaxElapsedTime
if fastRetries {
// speed up integration tests
bo.InitialInterval = 1 * time.Millisecond
maxElapsedTime := 200 * time.Millisecond
if bo.MaxElapsedTime > maxElapsedTime {
bo.MaxElapsedTime = maxElapsedTime
}
}
err := retryNotifyErrorWithSuccess(
@ -97,7 +103,7 @@ func (be *Backend) retry(ctx context.Context, msg string, f func() error) error
}
return err
},
backoff.WithContext(backoff.WithMaxRetries(bo, uint64(be.MaxTries)), ctx),
backoff.WithContext(bo, ctx),
func(err error, d time.Duration) {
if be.Report != nil {
be.Report(msg, err, d)

View file

@ -193,8 +193,9 @@ func TestBackendListRetryErrorBackend(t *testing.T) {
}
TestFastRetries(t)
const maxRetries = 2
retryBackend := New(be, maxRetries, nil, nil)
const maxElapsedTime = 10 * time.Millisecond
now := time.Now()
retryBackend := New(be, maxElapsedTime, nil, nil)
var listed []string
err := retryBackend.List(context.TODO(), backend.PackFile, func(fi backend.FileInfo) error {
@ -207,8 +208,9 @@ func TestBackendListRetryErrorBackend(t *testing.T) {
t.Fatalf("wrong error returned, want %v, got %v", ErrBackendTest, err)
}
if retries != maxRetries+1 {
t.Fatalf("List was called %d times, wanted %v", retries, maxRetries+1)
duration := time.Since(now)
if duration > 100*time.Millisecond {
t.Fatalf("list retries took %v, expected at most 10ms", duration)
}
test.Equals(t, names[:2], listed)
@ -327,7 +329,7 @@ func TestBackendLoadCircuitBreaker(t *testing.T) {
// trip the circuit breaker for file "other"
err := retryBackend.Load(context.TODO(), backend.Handle{Name: "other"}, 0, 0, nilRd)
test.Equals(t, otherError, err, "unexpected error")
test.Equals(t, 3, attempt)
test.Equals(t, 2, attempt)
attempt = 0
err = retryBackend.Load(context.TODO(), backend.Handle{Name: "other"}, 0, 0, nilRd)
@ -407,7 +409,7 @@ func TestBackendRetryPermanent(t *testing.T) {
return errors.New("something")
})
test.Assert(t, !be.IsPermanentErrorFn(err), "error unexpectedly considered permanent %v", err)
test.Equals(t, 3, attempt)
test.Equals(t, 2, attempt)
}