repository: wait max 1 minutes for lock removal if context is canceled

The toplevel context in restic only canceled if the user interrupts a restic operation. If the network connection has failed this can require waiting the full retry duration of 15 minutes which is a bad user experience for interactive usage. Thus limit the delay to one minute in this case.
2024-04-29 21:43:28 +02:00 · 2024-04-29 21:43:28 +02:00 · b1266867d2
commit b1266867d2
parent 98709a4372
3 changed files with 51 additions and 20 deletions
--- a/internal/repository/lock.go
+++ b/internal/repository/lock.go
@ -132,7 +132,7 @@ func (l *locker) refreshLocks(ctx context.Context, backend backend.Backend, lock

 		// remove the lock from the repo
 		debug.Log("unlocking repository with lock %v", lock)
-		if err := lock.Unlock(); err != nil {
+		if err := lock.Unlock(ctx); err != nil {
 			debug.Log("error while unlocking: %v", err)
 			logger("error while unlocking: %v", err)
 		}
--- a/internal/restic/lock.go
+++ b/internal/restic/lock.go
@ -17,6 +17,10 @@ import (
 	"github.com/restic/restic/internal/debug"
 )

+// UnlockCancelDelay bounds the duration how long lock cleanup operations will wait
+// if the passed in context was canceled.
+const UnlockCancelDelay time.Duration = 1 * time.Minute
+
 // Lock represents a process locking the repository for an operation.
 //
 // There are two types of locks: exclusive and non-exclusive. There may be many
@ -136,7 +140,7 @@ func newLock(ctx context.Context, repo Unpacked, excl bool) (*Lock, error) {
 	time.Sleep(waitBeforeLockCheck)

 	if err = lock.checkForOtherLocks(ctx); err != nil {
-		_ = lock.Unlock()
+		_ = lock.Unlock(ctx)
 		return nil, err
 	}

@ -220,12 +224,15 @@ func (l *Lock) createLock(ctx context.Context) (ID, error) {
 }

 // Unlock removes the lock from the repository.
-func (l *Lock) Unlock() error {
+func (l *Lock) Unlock(ctx context.Context) error {
 	if l == nil || l.lockID == nil {
 		return nil
 	}

-	return l.repo.RemoveUnpacked(context.TODO(), LockFile, *l.lockID)
+	ctx, cancel := delayedCancelContext(ctx, UnlockCancelDelay)
+	defer cancel()
+
+	return l.repo.RemoveUnpacked(ctx, LockFile, *l.lockID)
 }

 var StaleLockTimeout = 30 * time.Minute
@ -266,6 +273,23 @@ func (l *Lock) Stale() bool {
 	return false
 }

+func delayedCancelContext(parentCtx context.Context, delay time.Duration) (context.Context, context.CancelFunc) {
+	ctx, cancel := context.WithCancel(context.Background())
+
+	go func() {
+		select {
+		case <-parentCtx.Done():
+		case <-ctx.Done():
+			return
+		}
+
+		time.Sleep(delay)
+		cancel()
+	}()
+
+	return ctx, cancel
+}
+
 // Refresh refreshes the lock by creating a new file in the backend with a new
 // timestamp. Afterwards the old lock is removed.
 func (l *Lock) Refresh(ctx context.Context) error {
@ -285,7 +309,10 @@ func (l *Lock) Refresh(ctx context.Context) error {
 	oldLockID := l.lockID
 	l.lockID = &id

-	return l.repo.RemoveUnpacked(context.TODO(), LockFile, *oldLockID)
+	ctx, cancel := delayedCancelContext(ctx, UnlockCancelDelay)
+	defer cancel()
+
+	return l.repo.RemoveUnpacked(ctx, LockFile, *oldLockID)
 }

 // RefreshStaleLock is an extended variant of Refresh that can also refresh stale lock files.
@ -312,15 +339,19 @@ func (l *Lock) RefreshStaleLock(ctx context.Context) error {
 	time.Sleep(waitBeforeLockCheck)

 	exists, err = l.checkExistence(ctx)
+
+	ctx, cancel := delayedCancelContext(ctx, UnlockCancelDelay)
+	defer cancel()
+
 	if err != nil {
 		// cleanup replacement lock
-		_ = l.repo.RemoveUnpacked(context.TODO(), LockFile, id)
+		_ = l.repo.RemoveUnpacked(ctx, LockFile, id)
 		return err
 	}

 	if !exists {
 		// cleanup replacement lock
-		_ = l.repo.RemoveUnpacked(context.TODO(), LockFile, id)
+		_ = l.repo.RemoveUnpacked(ctx, LockFile, id)
 		return ErrRemovedLock
 	}

@ -331,7 +362,7 @@ func (l *Lock) RefreshStaleLock(ctx context.Context) error {
 	oldLockID := l.lockID
 	l.lockID = &id

-	return l.repo.RemoveUnpacked(context.TODO(), LockFile, *oldLockID)
+	return l.repo.RemoveUnpacked(ctx, LockFile, *oldLockID)
 }

 func (l *Lock) checkExistence(ctx context.Context) (bool, error) {
--- a/internal/restic/lock_test.go
+++ b/internal/restic/lock_test.go
@ -22,7 +22,7 @@ func TestLock(t *testing.T) {
 	lock, err := restic.NewLock(context.TODO(), repo)
 	rtest.OK(t, err)

-	rtest.OK(t, lock.Unlock())
+	rtest.OK(t, lock.Unlock(context.TODO()))
 }

 func TestDoubleUnlock(t *testing.T) {
@ -32,9 +32,9 @@ func TestDoubleUnlock(t *testing.T) {
 	lock, err := restic.NewLock(context.TODO(), repo)
 	rtest.OK(t, err)

-	rtest.OK(t, lock.Unlock())
+	rtest.OK(t, lock.Unlock(context.TODO()))

-	err = lock.Unlock()
+	err = lock.Unlock(context.TODO())
 	rtest.Assert(t, err != nil,
 		"double unlock didn't return an error, got %v", err)
 }
@ -49,8 +49,8 @@ func TestMultipleLock(t *testing.T) {
 	lock2, err := restic.NewLock(context.TODO(), repo)
 	rtest.OK(t, err)

-	rtest.OK(t, lock1.Unlock())
-	rtest.OK(t, lock2.Unlock())
+	rtest.OK(t, lock1.Unlock(context.TODO()))
+	rtest.OK(t, lock2.Unlock(context.TODO()))
 }

 type failLockLoadingBackend struct {
@ -75,7 +75,7 @@ func TestMultipleLockFailure(t *testing.T) {
 	_, err = restic.NewLock(context.TODO(), repo)
 	rtest.Assert(t, err != nil, "unreadable lock file did not result in an error")

-	rtest.OK(t, lock1.Unlock())
+	rtest.OK(t, lock1.Unlock(context.TODO()))
 }

 func TestLockExclusive(t *testing.T) {
@ -83,7 +83,7 @@ func TestLockExclusive(t *testing.T) {

 	elock, err := restic.NewExclusiveLock(context.TODO(), repo)
 	rtest.OK(t, err)
-	rtest.OK(t, elock.Unlock())
+	rtest.OK(t, elock.Unlock(context.TODO()))
 }

 func TestLockOnExclusiveLockedRepo(t *testing.T) {
@ -99,8 +99,8 @@ func TestLockOnExclusiveLockedRepo(t *testing.T) {
 	rtest.Assert(t, restic.IsAlreadyLocked(err),
 		"create normal lock with exclusively locked repo didn't return the correct error")

-	rtest.OK(t, lock.Unlock())
-	rtest.OK(t, elock.Unlock())
+	rtest.OK(t, lock.Unlock(context.TODO()))
+	rtest.OK(t, elock.Unlock(context.TODO()))
 }

 func TestExclusiveLockOnLockedRepo(t *testing.T) {
@ -116,8 +116,8 @@ func TestExclusiveLockOnLockedRepo(t *testing.T) {
 	rtest.Assert(t, restic.IsAlreadyLocked(err),
 		"create normal lock with exclusively locked repo didn't return the correct error")

-	rtest.OK(t, lock.Unlock())
-	rtest.OK(t, elock.Unlock())
+	rtest.OK(t, lock.Unlock(context.TODO()))
+	rtest.OK(t, elock.Unlock(context.TODO()))
 }

 func createFakeLock(repo restic.SaverUnpacked, t time.Time, pid int) (restic.ID, error) {
@ -296,7 +296,7 @@ func testLockRefresh(t *testing.T, refresh func(lock *restic.Lock) error) {
 	rtest.OK(t, err)
 	rtest.Assert(t, lock2.Time.After(time0),
 		"expected a later timestamp after lock refresh")
-	rtest.OK(t, lock.Unlock())
+	rtest.OK(t, lock.Unlock(context.TODO()))
 }

 func TestLockRefresh(t *testing.T) {