From a60ee9b764fe25e1f923e4faeab7f1e3bfb9f057 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Mon, 29 Apr 2024 21:12:21 +0200 Subject: [PATCH] retry: limit retries based on elapsed time not count Depending on how long an operation takes to fail, the total retry duration can currently vary between 1.5 and 15 minutes. In particular for temporarily interrupted network connections, the former timeout is too short. Thus always use a limit of 15 minutes. --- cmd/restic/global.go | 2 +- internal/backend/retry/backend_retry.go | 24 ++++++++++++-------- internal/backend/retry/backend_retry_test.go | 14 +++++++----- 3 files changed, 24 insertions(+), 16 deletions(-) diff --git a/cmd/restic/global.go b/cmd/restic/global.go index d0facc674..c954a4270 100644 --- a/cmd/restic/global.go +++ b/cmd/restic/global.go @@ -425,7 +425,7 @@ func OpenRepository(ctx context.Context, opts GlobalOptions) (*repository.Reposi success := func(msg string, retries int) { Warnf("%v operation successful after %d retries\n", msg, retries) } - be = retry.New(be, 10, report, success) + be = retry.New(be, 15*time.Minute, report, success) // wrap backend if a test specified a hook if opts.backendTestHook != nil { diff --git a/internal/backend/retry/backend_retry.go b/internal/backend/retry/backend_retry.go index c6815413c..fb2e6cf98 100644 --- a/internal/backend/retry/backend_retry.go +++ b/internal/backend/retry/backend_retry.go @@ -18,9 +18,9 @@ import ( // backoff. type Backend struct { backend.Backend - MaxTries int - Report func(string, error, time.Duration) - Success func(string, int) + MaxElapsedTime time.Duration + Report func(string, error, time.Duration) + Success func(string, int) failedLoads sync.Map } @@ -32,12 +32,12 @@ var _ backend.Backend = &Backend{} // backoff. report is called with a description and the error, if one occurred. // success is called with the number of retries before a successful operation // (it is not called if it succeeded on the first try) -func New(be backend.Backend, maxTries int, report func(string, error, time.Duration), success func(string, int)) *Backend { +func New(be backend.Backend, maxElapsedTime time.Duration, report func(string, error, time.Duration), success func(string, int)) *Backend { return &Backend{ - Backend: be, - MaxTries: maxTries, - Report: report, - Success: success, + Backend: be, + MaxElapsedTime: maxElapsedTime, + Report: report, + Success: success, } } @@ -82,9 +82,15 @@ func (be *Backend) retry(ctx context.Context, msg string, f func() error) error } bo := backoff.NewExponentialBackOff() + bo.MaxElapsedTime = be.MaxElapsedTime + if fastRetries { // speed up integration tests bo.InitialInterval = 1 * time.Millisecond + maxElapsedTime := 200 * time.Millisecond + if bo.MaxElapsedTime > maxElapsedTime { + bo.MaxElapsedTime = maxElapsedTime + } } err := retryNotifyErrorWithSuccess( @@ -97,7 +103,7 @@ func (be *Backend) retry(ctx context.Context, msg string, f func() error) error } return err }, - backoff.WithContext(backoff.WithMaxRetries(bo, uint64(be.MaxTries)), ctx), + backoff.WithContext(bo, ctx), func(err error, d time.Duration) { if be.Report != nil { be.Report(msg, err, d) diff --git a/internal/backend/retry/backend_retry_test.go b/internal/backend/retry/backend_retry_test.go index de86e6cf6..ce0b99637 100644 --- a/internal/backend/retry/backend_retry_test.go +++ b/internal/backend/retry/backend_retry_test.go @@ -193,8 +193,9 @@ func TestBackendListRetryErrorBackend(t *testing.T) { } TestFastRetries(t) - const maxRetries = 2 - retryBackend := New(be, maxRetries, nil, nil) + const maxElapsedTime = 10 * time.Millisecond + now := time.Now() + retryBackend := New(be, maxElapsedTime, nil, nil) var listed []string err := retryBackend.List(context.TODO(), backend.PackFile, func(fi backend.FileInfo) error { @@ -207,8 +208,9 @@ func TestBackendListRetryErrorBackend(t *testing.T) { t.Fatalf("wrong error returned, want %v, got %v", ErrBackendTest, err) } - if retries != maxRetries+1 { - t.Fatalf("List was called %d times, wanted %v", retries, maxRetries+1) + duration := time.Since(now) + if duration > 100*time.Millisecond { + t.Fatalf("list retries took %v, expected at most 10ms", duration) } test.Equals(t, names[:2], listed) @@ -327,7 +329,7 @@ func TestBackendLoadCircuitBreaker(t *testing.T) { // trip the circuit breaker for file "other" err := retryBackend.Load(context.TODO(), backend.Handle{Name: "other"}, 0, 0, nilRd) test.Equals(t, otherError, err, "unexpected error") - test.Equals(t, 3, attempt) + test.Equals(t, 2, attempt) attempt = 0 err = retryBackend.Load(context.TODO(), backend.Handle{Name: "other"}, 0, 0, nilRd) @@ -407,7 +409,7 @@ func TestBackendRetryPermanent(t *testing.T) { return errors.New("something") }) test.Assert(t, !be.IsPermanentErrorFn(err), "error unexpectedly considered permanent %v", err) - test.Equals(t, 3, attempt) + test.Equals(t, 2, attempt) }