Merge pull request #5012 from MichaelEischer/fix-lock-retries

lock: introduce short delay between failed locking retries
This commit is contained in:
Michael Eischer 2024-08-26 18:10:30 +02:00 committed by GitHub
commit 1931beab8e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 29 additions and 1 deletions

View file

@ -5,6 +5,9 @@ one of the lock files failed to load. The lock operation failed with error
`unable to create lock in backend: circuit breaker open for file <lock/1234567890>`
The error handling has been fixed to correctly retry locking the repository.
In addition, restic now waits a few seconds between locking retries to
increase chances of success.
https://github.com/restic/restic/issues/5005
https://github.com/restic/restic/pull/5011
https://github.com/restic/restic/pull/5012

View file

@ -103,10 +103,14 @@ func NewExclusiveLock(ctx context.Context, repo Unpacked) (*Lock, error) {
var waitBeforeLockCheck = 200 * time.Millisecond
// delay increases by factor 2 on each retry
var initialWaitBetweenLockRetries = 5 * time.Second
// TestSetLockTimeout can be used to reduce the lock wait timeout for tests.
func TestSetLockTimeout(t testing.TB, d time.Duration) {
t.Logf("setting lock timeout to %v", d)
waitBeforeLockCheck = d
initialWaitBetweenLockRetries = d
}
func newLock(ctx context.Context, repo Unpacked, excl bool) (*Lock, error) {
@ -170,8 +174,17 @@ func (l *Lock) checkForOtherLocks(ctx context.Context) error {
if l.lockID != nil {
checkedIDs.Insert(*l.lockID)
}
delay := initialWaitBetweenLockRetries
// retry locking a few times
for i := 0; i < 3; i++ {
for i := 0; i < 4; i++ {
if i != 0 {
// sleep between retries to give backend some time to settle
if err := cancelableDelay(ctx, delay); err != nil {
return err
}
delay *= 2
}
// Store updates in new IDSet to prevent data races
var m sync.Mutex
newCheckedIDs := NewIDSet(checkedIDs.List()...)
@ -213,6 +226,18 @@ func (l *Lock) checkForOtherLocks(ctx context.Context) error {
return err
}
func cancelableDelay(ctx context.Context, delay time.Duration) error {
// delay next try a bit
timer := time.NewTimer(delay)
select {
case <-ctx.Done():
timer.Stop()
return ctx.Err()
case <-timer.C:
}
return nil
}
// createLock acquires the lock by creating a file in the repository.
func (l *Lock) createLock(ctx context.Context) (ID, error) {
id, err := SaveJSONUnpacked(ctx, l.repo, LockFile, l)