backend/retry: don't trip circuit breaker if context is canceled

When the context used for a load operation is canceled, then the result
is always an error independent of whether the file could be retrieved
from the backend. Do not false positively trip the circuit breaker in
this case.

The old behavior was problematic when trying to lock a repository. When
`Lock.checkForOtherLocks` listed multiple lock files in parallel and one
of them fails to load, then all other loads were canceled. This
cancelation was remembered by the circuit breaker, such that locking
retries would fail.
This commit is contained in:
Michael Eischer 2024-08-22 23:16:12 +02:00
parent a99b824508
commit 8206cd19c8
3 changed files with 37 additions and 2 deletions

View file

@ -0,0 +1,10 @@
Bugfix: Fix rare failures to retry locking a repository
Restic 0.17.0 could in rare cases fail to retry locking a repository if
one of the lock files failed to load. The lock operation failed with error
`unable to create lock in backend: circuit breaker open for file <lock/1234567890>`
The error handling has been fixed to correctly retry locking the repository.
https://github.com/restic/restic/issues/5005
https://github.com/restic/restic/pull/5011

View file

@ -209,9 +209,10 @@ func (be *Backend) Load(ctx context.Context, h backend.Handle, length int, offse
return be.Backend.Load(ctx, h, length, offset, consumer)
})
if feature.Flag.Enabled(feature.BackendErrorRedesign) && err != nil && !be.IsPermanentError(err) {
if feature.Flag.Enabled(feature.BackendErrorRedesign) && err != nil && ctx.Err() == nil && !be.IsPermanentError(err) {
// We've exhausted the retries, the file is likely inaccessible. By excluding permanent
// errors, not found or truncated files are not recorded.
// errors, not found or truncated files are not recorded. Also ignore errors if the context
// was canceled.
be.failedLoads.LoadOrStore(key, time.Now())
}

View file

@ -357,6 +357,30 @@ func TestBackendLoadCircuitBreaker(t *testing.T) {
test.Equals(t, notFound, err, "expected circuit breaker to reset, got %v")
}
func TestBackendLoadCircuitBreakerCancel(t *testing.T) {
cctx, cancel := context.WithCancel(context.Background())
be := mock.NewBackend()
be.OpenReaderFn = func(ctx context.Context, h backend.Handle, length int, offset int64) (io.ReadCloser, error) {
cancel()
return nil, errors.New("something")
}
nilRd := func(rd io.Reader) (err error) {
return nil
}
TestFastRetries(t)
retryBackend := New(be, 2, nil, nil)
// canceling the context should not trip the circuit breaker
err := retryBackend.Load(cctx, backend.Handle{Name: "other"}, 0, 0, nilRd)
test.Equals(t, context.Canceled, err, "unexpected error")
// reset context and check that the cirucit breaker does not return an error
cctx, cancel = context.WithCancel(context.Background())
defer cancel()
err = retryBackend.Load(cctx, backend.Handle{Name: "other"}, 0, 0, nilRd)
test.Equals(t, context.Canceled, err, "unexpected error")
}
func TestBackendStatNotExists(t *testing.T) {
// stat should not retry if the error matches IsNotExist
notFound := errors.New("not found")