forked from TrueCloudLab/restic
retry: ensure that there's always at least one retry
Previously, if an operation failed after 15 minutes, then it would never be retried. This means that large backend requests are more unreliable than smaller ones.
This commit is contained in:
parent
a60ee9b764
commit
512cd6ef07
2 changed files with 65 additions and 1 deletions
|
@ -68,6 +68,30 @@ func retryNotifyErrorWithSuccess(operation backoff.Operation, b backoff.BackOff,
|
|||
return err
|
||||
}
|
||||
|
||||
func withRetryAtLeastOnce(delegate *backoff.ExponentialBackOff) *retryAtLeastOnce {
|
||||
return &retryAtLeastOnce{delegate: delegate}
|
||||
}
|
||||
|
||||
type retryAtLeastOnce struct {
|
||||
delegate *backoff.ExponentialBackOff
|
||||
numTries uint64
|
||||
}
|
||||
|
||||
func (b *retryAtLeastOnce) NextBackOff() time.Duration {
|
||||
delay := b.delegate.NextBackOff()
|
||||
|
||||
b.numTries++
|
||||
if b.numTries == 1 && b.delegate.Stop == delay {
|
||||
return b.delegate.InitialInterval
|
||||
}
|
||||
return delay
|
||||
}
|
||||
|
||||
func (b *retryAtLeastOnce) Reset() {
|
||||
b.numTries = 0
|
||||
b.delegate.Reset()
|
||||
}
|
||||
|
||||
var fastRetries = false
|
||||
|
||||
func (be *Backend) retry(ctx context.Context, msg string, f func() error) error {
|
||||
|
@ -103,7 +127,7 @@ func (be *Backend) retry(ctx context.Context, msg string, f func() error) error
|
|||
}
|
||||
return err
|
||||
},
|
||||
backoff.WithContext(bo, ctx),
|
||||
backoff.WithContext(withRetryAtLeastOnce(bo), ctx),
|
||||
func(err error, d time.Duration) {
|
||||
if be.Report != nil {
|
||||
be.Report(msg, err, d)
|
||||
|
|
|
@ -520,3 +520,43 @@ func TestNotifyWithSuccessFinalError(t *testing.T) {
|
|||
test.Equals(t, 6, notifyCalled, "notify should have been called 6 times")
|
||||
test.Equals(t, 0, successCalled, "success should not have been called")
|
||||
}
|
||||
|
||||
type testClock struct {
|
||||
Time time.Time
|
||||
}
|
||||
|
||||
func (c *testClock) Now() time.Time {
|
||||
return c.Time
|
||||
}
|
||||
|
||||
func TestRetryAtLeastOnce(t *testing.T) {
|
||||
expBackOff := backoff.NewExponentialBackOff()
|
||||
expBackOff.InitialInterval = 500 * time.Millisecond
|
||||
expBackOff.RandomizationFactor = 0
|
||||
expBackOff.MaxElapsedTime = 5 * time.Second
|
||||
expBackOff.Multiplier = 2 // guarantee numerical stability
|
||||
clock := &testClock{Time: time.Now()}
|
||||
expBackOff.Clock = clock
|
||||
expBackOff.Reset()
|
||||
|
||||
retry := withRetryAtLeastOnce(expBackOff)
|
||||
|
||||
// expire backoff
|
||||
clock.Time = clock.Time.Add(10 * time.Second)
|
||||
delay := retry.NextBackOff()
|
||||
test.Equals(t, expBackOff.InitialInterval, delay, "must retry at least once")
|
||||
|
||||
delay = retry.NextBackOff()
|
||||
test.Equals(t, expBackOff.Stop, delay, "must not retry more than once")
|
||||
|
||||
// test reset behavior
|
||||
retry.Reset()
|
||||
test.Equals(t, uint64(0), retry.numTries, "numTries should be reset to 0")
|
||||
|
||||
// Verify that after reset, NextBackOff returns the initial interval again
|
||||
delay = retry.NextBackOff()
|
||||
test.Equals(t, expBackOff.InitialInterval, delay, "retries must work after reset")
|
||||
|
||||
delay = retry.NextBackOff()
|
||||
test.Equals(t, expBackOff.InitialInterval*time.Duration(expBackOff.Multiplier), delay, "retries must work after reset")
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue