Stream errors are not handled by tree pool #336

Closed
opened 2025-02-28 06:56:09 +00:00 by alexvanin · 0 comments
Owner

Tree pool creates stream and returns it to the caller. However, this stream may contain error so pool may retry request. This does not happen now.

Describe the solution you'd like

Pool can read at least one message of the stream, buffer it and check error before sending it to the client. This does not cover a case when error happens during transmission, but I suppose most errors are returned by FrostFS storage immediately.

Additional context

We can use similar test to check behaviour

diff --git a/pool/tree/pool_server_test.go b/pool/tree/pool_server_test.go
index edba93f..eeb523e 100644
--- a/pool/tree/pool_server_test.go
+++ b/pool/tree/pool_server_test.go
@@ -28,6 +28,9 @@ type mockTreeServer struct {
 
 	healthy    bool
 	addCounter int
+
+	getSubTreeError   error
+	getSubTreeCounter int
 }
 
 type mockNetmapSource struct {
@@ -91,7 +94,8 @@ func (m *mockTreeServer) GetNodeByPath(context.Context, *tree.GetNodeByPathReque
 }
 
 func (m *mockTreeServer) GetSubTree(*tree.GetSubTreeRequest, tree.TreeService_GetSubTreeServer) error {
-	panic("implement me")
+	m.getSubTreeCounter++
+	return m.getSubTreeError
 }
 
 func (m *mockTreeServer) TreeList(context.Context, *tree.TreeListRequest) (*tree.TreeListResponse, error) {
@@ -235,3 +239,28 @@ func TestConnectionLeak(t *testing.T) {
 	// not more than 1 extra goroutine is created due to async operations
 	require.LessOrEqual(t, runtime.NumGoroutine()-routinesBefore, 1)
 }
+
+func TestStreamRetry(t *testing.T) {
+	const (
+		numberOfNodes   = 4
+		placementPolicy = "REP 2"
+	)
+
+	// Initialize gRPC servers and create pool with netmap source
+	treePool, servers, _ := preparePoolWithNetmapSource(t, numberOfNodes, placementPolicy)
+	for i := range servers {
+		servers[i].getSubTreeError = errors.New("tree not found")
+		defer servers[i].Stop()
+	}
+
+	cnr := cidtest.ID()
+	ctx := context.Background()
+
+	_, err := treePool.GetSubTree(ctx, GetSubTreeParams{CID: cnr})
+	require.Error(t, err)
+
+	for i := range servers {
+		// check we retried every available node in the pool
+		require.Equal(t, 1, servers[i].getSubTreeCounter)
+	}
+}
## Is your feature request related to a problem? Please describe. Tree pool creates stream and returns it to the caller. However, this stream may contain error so pool may retry request. This does not happen now. ## Describe the solution you'd like Pool can read at least one message of the stream, buffer it and check error before sending it to the client. This does not cover a case when error happens during transmission, but I suppose most errors are returned by FrostFS storage immediately. ## Additional context We can use similar test to check behaviour ```diff diff --git a/pool/tree/pool_server_test.go b/pool/tree/pool_server_test.go index edba93f..eeb523e 100644 --- a/pool/tree/pool_server_test.go +++ b/pool/tree/pool_server_test.go @@ -28,6 +28,9 @@ type mockTreeServer struct { healthy bool addCounter int + + getSubTreeError error + getSubTreeCounter int } type mockNetmapSource struct { @@ -91,7 +94,8 @@ func (m *mockTreeServer) GetNodeByPath(context.Context, *tree.GetNodeByPathReque } func (m *mockTreeServer) GetSubTree(*tree.GetSubTreeRequest, tree.TreeService_GetSubTreeServer) error { - panic("implement me") + m.getSubTreeCounter++ + return m.getSubTreeError } func (m *mockTreeServer) TreeList(context.Context, *tree.TreeListRequest) (*tree.TreeListResponse, error) { @@ -235,3 +239,28 @@ func TestConnectionLeak(t *testing.T) { // not more than 1 extra goroutine is created due to async operations require.LessOrEqual(t, runtime.NumGoroutine()-routinesBefore, 1) } + +func TestStreamRetry(t *testing.T) { + const ( + numberOfNodes = 4 + placementPolicy = "REP 2" + ) + + // Initialize gRPC servers and create pool with netmap source + treePool, servers, _ := preparePoolWithNetmapSource(t, numberOfNodes, placementPolicy) + for i := range servers { + servers[i].getSubTreeError = errors.New("tree not found") + defer servers[i].Stop() + } + + cnr := cidtest.ID() + ctx := context.Background() + + _, err := treePool.GetSubTree(ctx, GetSubTreeParams{CID: cnr}) + require.Error(t, err) + + for i := range servers { + // check we retried every available node in the pool + require.Equal(t, 1, servers[i].getSubTreeCounter) + } +} ```
alexvanin added the
bug
label 2025-02-28 06:56:09 +00:00
dkirillov was assigned by alexvanin 2025-02-28 06:56:11 +00:00
dkirillov referenced this issue from a commit 2025-02-28 09:43:30 +00:00
dkirillov referenced this issue from a commit 2025-02-28 09:45:19 +00:00
dkirillov referenced this issue from a commit 2025-02-28 10:17:08 +00:00
dkirillov referenced this issue from a commit 2025-02-28 11:13:10 +00:00
fyrchik changed title from Stream errors are not handeled by tree pool to Stream errors are not handled by tree pool 2025-03-03 08:55:42 +00:00
Sign in to join this conversation.
No milestone
No project
No assignees
1 participant
Notifications
Due date
The due date is invalid or out of range. Please use the format "yyyy-mm-dd".

No due date set.

Dependencies

No dependencies set.

Reference: TrueCloudLab/frostfs-sdk-go#336
No description provided.