From debc4a3a99417df9503ef89864fc868f5c7eee02 Mon Sep 17 00:00:00 2001
From: Michael Eischer <michael.eischer@fau.de>
Date: Wed, 30 Dec 2020 17:31:22 +0100
Subject: [PATCH] archiver: fix race condition during worker startup

When the tomb is created with a canceled context, then the workers
started via `t.Go` exist nearly immediately. Once for the first time all
started goroutines have been stopped, it is not allowed to issue further
calls to `t.Go`. This is a problem when the started goroutines exit
immediately, as for example the first goroutine might already have
stopped before starting the second one, which is not allowed as once the
first goroutines has stopped no goroutines were running.

To fix this race condition the startup and main task of the archiver now
also run within a `t.Go` function. This also allows unifying the error
handling as it is no longer necessary to distinguish between errors
returned by the workers or the saveTree processing. The tomb now just
returns the first error encountered, which should also be the most
descriptive one.
---
 internal/archiver/archiver.go | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/internal/archiver/archiver.go b/internal/archiver/archiver.go
index 4992d439b..2fd170a3a 100644
--- a/internal/archiver/archiver.go
+++ b/internal/archiver/archiver.go
@@ -784,33 +784,31 @@ func (arch *Archiver) Snapshot(ctx context.Context, targets []string, opts Snaps
 
 	var t tomb.Tomb
 	wctx := t.Context(ctx)
-
-	arch.runWorkers(wctx, &t)
-
 	start := time.Now()
 
-	debug.Log("starting snapshot")
-	rootTreeID, stats, err := func() (restic.ID, ItemStats, error) {
+	var rootTreeID restic.ID
+	var stats ItemStats
+	t.Go(func() error {
+		arch.runWorkers(wctx, &t)
+
+		debug.Log("starting snapshot")
 		tree, err := arch.SaveTree(wctx, "/", atree, arch.loadParentTree(wctx, opts.ParentSnapshot))
 		if err != nil {
-			return restic.ID{}, ItemStats{}, err
+			return err
 		}
 
 		if len(tree.Nodes) == 0 {
-			return restic.ID{}, ItemStats{}, errors.New("snapshot is empty")
+			return errors.New("snapshot is empty")
 		}
 
-		return arch.saveTree(wctx, tree)
-	}()
-	debug.Log("saved tree, error: %v", err)
+		rootTreeID, stats, err = arch.saveTree(wctx, tree)
+		// trigger shutdown but don't set an error
+		t.Kill(nil)
+		return err
+	})
 
-	t.Kill(nil)
-	werr := t.Wait()
-	debug.Log("err is %v, werr is %v", err, werr)
-	// Use werr when it might contain a more specific error than "context canceled"
-	if err == nil || (errors.Cause(err) == context.Canceled && werr != nil) {
-		err = werr
-	}
+	err = t.Wait()
+	debug.Log("err is %v", err)
 
 	if err != nil {
 		debug.Log("error while saving tree: %v", err)