Add metabase and write_cache operations

Signed-off-by: Andrey Berezin <a.berezin@yadro.com>
2023-05-30 16:32:38 +03:00 · 2023-05-30 16:32:38 +03:00 · 2bad0f1db6
commit 2bad0f1db6
parent a26f8e9c80
10 changed files with 152 additions and 14 deletions
--- a/src/frostfs_testlib/storage/controllers/cluster_state_controller.py
+++ b/src/frostfs_testlib/storage/controllers/cluster_state_controller.py
@ -1,11 +1,11 @@
 import time
+from concurrent.futures import ThreadPoolExecutor

 import frostfs_testlib.resources.optionals as optionals
 from frostfs_testlib.reporter import get_reporter
 from frostfs_testlib.shell import CommandOptions, Shell
 from frostfs_testlib.steps import epoch
 from frostfs_testlib.storage.cluster import Cluster, ClusterNode, StorageNode
-from frostfs_testlib.steps.node_management import wait_for_node_to_be_ready
 from frostfs_testlib.storage.controllers.disk_controller import DiskController
 from frostfs_testlib.testing.test_control import run_optionally, wait_for_success
 from frostfs_testlib.utils.failover_utils import (
@ -22,7 +22,7 @@ class ClusterStateController:
    def __init__(self, shell: Shell, cluster: Cluster) -> None:
        self.stopped_nodes: list[ClusterNode] = []
        self.detached_disks: dict[str, DiskController] = {}
-        self.stopped_storage_nodes: list[StorageNode] = []
+        self.stopped_storage_nodes: list[ClusterNode] = []
        self.cluster = cluster
        self.shell = shell

@ -48,6 +48,16 @@ class ClusterStateController:
        for node in nodes:
            wait_for_host_offline(self.shell, node.storage_node)

+    @run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
+    @reporter.step_deco("Stop all storage services on cluster")
+    def stop_all_storage_services(self, reversed_order: bool = False):
+        nodes = (
+            reversed(self.cluster.cluster_nodes) if reversed_order else self.cluster.cluster_nodes
+        )
+
+        for node in nodes:
+            self.stop_storage_service(node)
+
    @run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
    @reporter.step_deco("Start host of node {node}")
    def start_node_host(self, node: ClusterNode):
@ -92,19 +102,31 @@ class ClusterStateController:
    @reporter.step_deco("Stop storage service on {node}")
    def stop_storage_service(self, node: ClusterNode):
        node.storage_node.stop_service()
-        self.stopped_storage_nodes.append(node.storage_node)
+        self.stopped_storage_nodes.append(node)

    @run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
    @reporter.step_deco("Start storage service on {node}")
    def start_storage_service(self, node: ClusterNode):
        node.storage_node.start_service()
-        self.stopped_storage_nodes.remove(node.storage_node)
+        self.stopped_storage_nodes.remove(node)

    @run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
    @reporter.step_deco("Start stopped storage services")
    def start_stopped_storage_services(self):
-        for node in self.stopped_storage_nodes:
-            node.start_service()
+        if self.stopped_storage_nodes:
+            # In case if we stopped couple services, for example (s01-s04):
+            # After starting only s01, it may require connections to s02-s04, which is still down, and fail to start.
+            # Also, if something goes wrong here, we might skip s02-s04 start at all, and cluster will be left in a bad state.
+            # So in order to make sure that services are at least attempted to be started, using threads here.
+            with ThreadPoolExecutor(max_workers=len(self.stopped_storage_nodes)) as executor:
+                start_result = executor.map(self.start_storage_service, self.stopped_storage_nodes)
+
+            # Looks tricky, but if exception is raised in any thread, it will be "eaten" by ThreadPoolExecutor,
+            # But will be thrown here.
+            # Not ideal solution, but okay for now
+            for _ in start_result:
+                pass
+
        wait_all_storage_nodes_returned(self.shell, self.cluster)
        self.stopped_storage_nodes = []