forked from TrueCloudLab/restic
Merge pull request #5012 from MichaelEischer/fix-lock-retries
lock: introduce short delay between failed locking retries
This commit is contained in:
commit
1931beab8e
2 changed files with 29 additions and 1 deletions
|
@ -5,6 +5,9 @@ one of the lock files failed to load. The lock operation failed with error
|
|||
`unable to create lock in backend: circuit breaker open for file <lock/1234567890>`
|
||||
|
||||
The error handling has been fixed to correctly retry locking the repository.
|
||||
In addition, restic now waits a few seconds between locking retries to
|
||||
increase chances of success.
|
||||
|
||||
https://github.com/restic/restic/issues/5005
|
||||
https://github.com/restic/restic/pull/5011
|
||||
https://github.com/restic/restic/pull/5012
|
|
@ -103,10 +103,14 @@ func NewExclusiveLock(ctx context.Context, repo Unpacked) (*Lock, error) {
|
|||
|
||||
var waitBeforeLockCheck = 200 * time.Millisecond
|
||||
|
||||
// delay increases by factor 2 on each retry
|
||||
var initialWaitBetweenLockRetries = 5 * time.Second
|
||||
|
||||
// TestSetLockTimeout can be used to reduce the lock wait timeout for tests.
|
||||
func TestSetLockTimeout(t testing.TB, d time.Duration) {
|
||||
t.Logf("setting lock timeout to %v", d)
|
||||
waitBeforeLockCheck = d
|
||||
initialWaitBetweenLockRetries = d
|
||||
}
|
||||
|
||||
func newLock(ctx context.Context, repo Unpacked, excl bool) (*Lock, error) {
|
||||
|
@ -170,8 +174,17 @@ func (l *Lock) checkForOtherLocks(ctx context.Context) error {
|
|||
if l.lockID != nil {
|
||||
checkedIDs.Insert(*l.lockID)
|
||||
}
|
||||
delay := initialWaitBetweenLockRetries
|
||||
// retry locking a few times
|
||||
for i := 0; i < 3; i++ {
|
||||
for i := 0; i < 4; i++ {
|
||||
if i != 0 {
|
||||
// sleep between retries to give backend some time to settle
|
||||
if err := cancelableDelay(ctx, delay); err != nil {
|
||||
return err
|
||||
}
|
||||
delay *= 2
|
||||
}
|
||||
|
||||
// Store updates in new IDSet to prevent data races
|
||||
var m sync.Mutex
|
||||
newCheckedIDs := NewIDSet(checkedIDs.List()...)
|
||||
|
@ -213,6 +226,18 @@ func (l *Lock) checkForOtherLocks(ctx context.Context) error {
|
|||
return err
|
||||
}
|
||||
|
||||
func cancelableDelay(ctx context.Context, delay time.Duration) error {
|
||||
// delay next try a bit
|
||||
timer := time.NewTimer(delay)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
timer.Stop()
|
||||
return ctx.Err()
|
||||
case <-timer.C:
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// createLock acquires the lock by creating a file in the repository.
|
||||
func (l *Lock) createLock(ctx context.Context) (ID, error) {
|
||||
id, err := SaveJSONUnpacked(ctx, l.repo, LockFile, l)
|
||||
|
|
Loading…
Reference in a new issue