retry: ensure that there's always at least one retry

Previously, if an operation failed after 15 minutes, then it would never
be retried. This means that large backend requests are more unreliable
than smaller ones.
This commit is contained in:
Michael Eischer 2024-04-29 21:16:24 +02:00
parent a60ee9b764
commit 512cd6ef07
2 changed files with 65 additions and 1 deletions

View file

@ -68,6 +68,30 @@ func retryNotifyErrorWithSuccess(operation backoff.Operation, b backoff.BackOff,
return err
}
func withRetryAtLeastOnce(delegate *backoff.ExponentialBackOff) *retryAtLeastOnce {
return &retryAtLeastOnce{delegate: delegate}
}
type retryAtLeastOnce struct {
delegate *backoff.ExponentialBackOff
numTries uint64
}
func (b *retryAtLeastOnce) NextBackOff() time.Duration {
delay := b.delegate.NextBackOff()
b.numTries++
if b.numTries == 1 && b.delegate.Stop == delay {
return b.delegate.InitialInterval
}
return delay
}
func (b *retryAtLeastOnce) Reset() {
b.numTries = 0
b.delegate.Reset()
}
var fastRetries = false
func (be *Backend) retry(ctx context.Context, msg string, f func() error) error {
@ -103,7 +127,7 @@ func (be *Backend) retry(ctx context.Context, msg string, f func() error) error
}
return err
},
backoff.WithContext(bo, ctx),
backoff.WithContext(withRetryAtLeastOnce(bo), ctx),
func(err error, d time.Duration) {
if be.Report != nil {
be.Report(msg, err, d)

View file

@ -520,3 +520,43 @@ func TestNotifyWithSuccessFinalError(t *testing.T) {
test.Equals(t, 6, notifyCalled, "notify should have been called 6 times")
test.Equals(t, 0, successCalled, "success should not have been called")
}
type testClock struct {
Time time.Time
}
func (c *testClock) Now() time.Time {
return c.Time
}
func TestRetryAtLeastOnce(t *testing.T) {
expBackOff := backoff.NewExponentialBackOff()
expBackOff.InitialInterval = 500 * time.Millisecond
expBackOff.RandomizationFactor = 0
expBackOff.MaxElapsedTime = 5 * time.Second
expBackOff.Multiplier = 2 // guarantee numerical stability
clock := &testClock{Time: time.Now()}
expBackOff.Clock = clock
expBackOff.Reset()
retry := withRetryAtLeastOnce(expBackOff)
// expire backoff
clock.Time = clock.Time.Add(10 * time.Second)
delay := retry.NextBackOff()
test.Equals(t, expBackOff.InitialInterval, delay, "must retry at least once")
delay = retry.NextBackOff()
test.Equals(t, expBackOff.Stop, delay, "must not retry more than once")
// test reset behavior
retry.Reset()
test.Equals(t, uint64(0), retry.numTries, "numTries should be reset to 0")
// Verify that after reset, NextBackOff returns the initial interval again
delay = retry.NextBackOff()
test.Equals(t, expBackOff.InitialInterval, delay, "retries must work after reset")
delay = retry.NextBackOff()
test.Equals(t, expBackOff.InitialInterval*time.Duration(expBackOff.Multiplier), delay, "retries must work after reset")
}