forked from TrueCloudLab/restic
Merge pull request #5012 from MichaelEischer/fix-lock-retries
lock: introduce short delay between failed locking retries
This commit is contained in:
commit
1931beab8e
2 changed files with 29 additions and 1 deletions
|
@ -5,6 +5,9 @@ one of the lock files failed to load. The lock operation failed with error
|
||||||
`unable to create lock in backend: circuit breaker open for file <lock/1234567890>`
|
`unable to create lock in backend: circuit breaker open for file <lock/1234567890>`
|
||||||
|
|
||||||
The error handling has been fixed to correctly retry locking the repository.
|
The error handling has been fixed to correctly retry locking the repository.
|
||||||
|
In addition, restic now waits a few seconds between locking retries to
|
||||||
|
increase chances of success.
|
||||||
|
|
||||||
https://github.com/restic/restic/issues/5005
|
https://github.com/restic/restic/issues/5005
|
||||||
https://github.com/restic/restic/pull/5011
|
https://github.com/restic/restic/pull/5011
|
||||||
|
https://github.com/restic/restic/pull/5012
|
|
@ -103,10 +103,14 @@ func NewExclusiveLock(ctx context.Context, repo Unpacked) (*Lock, error) {
|
||||||
|
|
||||||
var waitBeforeLockCheck = 200 * time.Millisecond
|
var waitBeforeLockCheck = 200 * time.Millisecond
|
||||||
|
|
||||||
|
// delay increases by factor 2 on each retry
|
||||||
|
var initialWaitBetweenLockRetries = 5 * time.Second
|
||||||
|
|
||||||
// TestSetLockTimeout can be used to reduce the lock wait timeout for tests.
|
// TestSetLockTimeout can be used to reduce the lock wait timeout for tests.
|
||||||
func TestSetLockTimeout(t testing.TB, d time.Duration) {
|
func TestSetLockTimeout(t testing.TB, d time.Duration) {
|
||||||
t.Logf("setting lock timeout to %v", d)
|
t.Logf("setting lock timeout to %v", d)
|
||||||
waitBeforeLockCheck = d
|
waitBeforeLockCheck = d
|
||||||
|
initialWaitBetweenLockRetries = d
|
||||||
}
|
}
|
||||||
|
|
||||||
func newLock(ctx context.Context, repo Unpacked, excl bool) (*Lock, error) {
|
func newLock(ctx context.Context, repo Unpacked, excl bool) (*Lock, error) {
|
||||||
|
@ -170,8 +174,17 @@ func (l *Lock) checkForOtherLocks(ctx context.Context) error {
|
||||||
if l.lockID != nil {
|
if l.lockID != nil {
|
||||||
checkedIDs.Insert(*l.lockID)
|
checkedIDs.Insert(*l.lockID)
|
||||||
}
|
}
|
||||||
|
delay := initialWaitBetweenLockRetries
|
||||||
// retry locking a few times
|
// retry locking a few times
|
||||||
for i := 0; i < 3; i++ {
|
for i := 0; i < 4; i++ {
|
||||||
|
if i != 0 {
|
||||||
|
// sleep between retries to give backend some time to settle
|
||||||
|
if err := cancelableDelay(ctx, delay); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
delay *= 2
|
||||||
|
}
|
||||||
|
|
||||||
// Store updates in new IDSet to prevent data races
|
// Store updates in new IDSet to prevent data races
|
||||||
var m sync.Mutex
|
var m sync.Mutex
|
||||||
newCheckedIDs := NewIDSet(checkedIDs.List()...)
|
newCheckedIDs := NewIDSet(checkedIDs.List()...)
|
||||||
|
@ -213,6 +226,18 @@ func (l *Lock) checkForOtherLocks(ctx context.Context) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func cancelableDelay(ctx context.Context, delay time.Duration) error {
|
||||||
|
// delay next try a bit
|
||||||
|
timer := time.NewTimer(delay)
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
timer.Stop()
|
||||||
|
return ctx.Err()
|
||||||
|
case <-timer.C:
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// createLock acquires the lock by creating a file in the repository.
|
// createLock acquires the lock by creating a file in the repository.
|
||||||
func (l *Lock) createLock(ctx context.Context) (ID, error) {
|
func (l *Lock) createLock(ctx context.Context) (ID, error) {
|
||||||
id, err := SaveJSONUnpacked(ctx, l.repo, LockFile, l)
|
id, err := SaveJSONUnpacked(ctx, l.repo, LockFile, l)
|
||||||
|
|
Loading…
Reference in a new issue