From f06e44642a743488b5441e78e6982a3c62cb617b Mon Sep 17 00:00:00 2001 From: Andrey Berezin Date: Tue, 19 Mar 2024 18:40:42 +0300 Subject: [PATCH] [#210] Update failover case to not use stopped node Signed-off-by: Andrey Berezin --- .../failovers/test_failover_server.py | 115 +++++++----------- 1 file changed, 43 insertions(+), 72 deletions(-) diff --git a/pytest_tests/testsuites/failovers/test_failover_server.py b/pytest_tests/testsuites/failovers/test_failover_server.py index 42470ed..0d5ee39 100644 --- a/pytest_tests/testsuites/failovers/test_failover_server.py +++ b/pytest_tests/testsuites/failovers/test_failover_server.py @@ -5,7 +5,6 @@ import random import allure import pytest from frostfs_testlib import reporter -from frostfs_testlib.resources.common import MORPH_BLOCK_TIME from frostfs_testlib.resources.wellknown_acl import PUBLIC_ACL from frostfs_testlib.steps.cli.container import StorageContainer, StorageContainerInfo, create_container from frostfs_testlib.steps.cli.object import get_object, get_object_nodes, put_object @@ -16,8 +15,8 @@ from frostfs_testlib.storage.dataclasses.object_size import ObjectSize from frostfs_testlib.storage.dataclasses.storage_object_info import StorageObjectInfo from frostfs_testlib.storage.dataclasses.wallet import WalletInfo from frostfs_testlib.testing.cluster_test_base import ClusterTestBase +from frostfs_testlib.testing.parallel import parallel from frostfs_testlib.testing.test_control import wait_for_success -from frostfs_testlib.utils import datetime_utils from frostfs_testlib.utils.failover_utils import wait_object_replication from frostfs_testlib.utils.file_utils import get_file_hash from pytest import FixtureRequest @@ -169,7 +168,6 @@ class TestFailoverServer(ClusterTestBase): node_to_stop: ClusterNode, cluster_state_controller: ClusterStateController, ): - with reporter.step(f"Remove {node_to_stop} from the list of nodes"): alive_nodes = list(set(self.cluster.cluster_nodes) - {node_to_stop}) @@ -183,7 +181,6 @@ class TestFailoverServer(ClusterTestBase): with reporter.step("Verify that there are no corrupted objects"): corrupted_objects_list = self.get_corrupted_objects_list(storage_nodes, storage_objects) - assert not corrupted_objects_list with reporter.step(f"check {node_to_stop.storage_node} in map"): @@ -216,7 +213,7 @@ class TestFailoverServer(ClusterTestBase): self.tick_epochs(1, storage_nodes[0], wait_block=2) with reporter.step(f"Stop node"): - cluster_state_controller.stop_node_host(node=node_to_stop, mode="hard") + cluster_state_controller.stop_node_host(node_to_stop, "hard") with reporter.step("Verify that there are no corrupted objects"): corrupted_objects_list = self.get_corrupted_objects_list(storage_nodes, storage_objects) @@ -234,7 +231,6 @@ class TestFailoverServer(ClusterTestBase): @allure.title("Not enough nodes in the container with policy - 'REP 3 CBF 1 SELECT 4 FROM *'") def test_not_enough_nodes_in_container_rep_3( self, - container: list[StorageContainer], object_and_nodes: tuple[StorageObjectInfo, list[ClusterNode]], default_wallet: WalletInfo, cluster_state_controller: ClusterStateController, @@ -242,39 +238,23 @@ class TestFailoverServer(ClusterTestBase): up_stop_nodes: None, ): object_info, object_nodes = object_and_nodes - node_not_object = list(set(self.cluster.cluster_nodes) - set(object_nodes))[0] + endpoint_without_object = list(set(self.cluster.cluster_nodes) - set(object_nodes))[ + 0 + ].storage_node.get_rpc_endpoint() + endpoint_with_object = object_nodes[0].storage_node.get_rpc_endpoint() - with reporter.step("Stop all nodes with object, except 1"): - for cluster_node in object_nodes[1:]: - cluster_state_controller.stop_node_host(node=cluster_node, mode="hard") + with reporter.step("Stop all nodes with object except first one"): + parallel(cluster_state_controller.stop_node_host, object_nodes[1:], mode="hard") - with reporter.step("Get object"): - with reporter.step(f"Get operation to {node_not_object} where it does not exist, expect success"): - get_object( - wallet=default_wallet, - cid=object_info.cid, - oid=object_info.oid, - shell=self.shell, - endpoint=node_not_object.storage_node.get_rpc_endpoint(), - ) - with reporter.step(f"Get operation to {object_nodes[0]} with object, expect success"): - get_object( - wallet=default_wallet, - cid=object_info.cid, - oid=object_info.oid, - shell=self.shell, - endpoint=object_nodes[0].storage_node.get_rpc_endpoint(), - ) + with reporter.step(f"Get object from node without object"): + get_object(default_wallet, object_info.cid, object_info.oid, self.shell, endpoint_without_object) + + with reporter.step(f"Get object from node with object"): + get_object(default_wallet, object_info.cid, object_info.oid, self.shell, endpoint_with_object) with reporter.step(f"Put operation to node with object, expect error"): with pytest.raises(RuntimeError): - put_object( - wallet=default_wallet, - path=simple_file, - cid=object_info.cid, - shell=self.shell, - endpoint=node_not_object.storage_node.get_rpc_endpoint(), - ) + put_object(default_wallet, simple_file, object_info.cid, self.shell, endpoint_with_object) @allure.title("Not enough nodes in the container with policy - 'REP 2 CBF 2 SELECT 4 FROM *'") def test_not_enough_nodes_in_container_rep_2( @@ -284,52 +264,43 @@ class TestFailoverServer(ClusterTestBase): simple_file: str, up_stop_nodes: None, ): - with reporter.step("Creating a container with a full network map"): - select = len(self.cluster.cluster_nodes) - placement_rule = f"REP {select - 2} IN X CBF 2 SELECT {select} FROM * AS X" - cid_1 = create_container( + with reporter.step("Create container with full network map"): + node_count = len(self.cluster.cluster_nodes) + placement_rule = f"REP {node_count - 2} IN X CBF 2 SELECT {node_count} FROM * AS X" + cid = create_container( default_wallet, - shell=self.shell, - endpoint=self.cluster.default_rpc_endpoint, + self.shell, + self.cluster.default_rpc_endpoint, rule=placement_rule, basic_acl=PUBLIC_ACL, ) + with reporter.step("Put object"): - oid = put_object( - wallet=default_wallet, - path=simple_file, - cid=cid_1, - shell=self.shell, - endpoint=self.cluster.default_rpc_endpoint, - ) + oid = put_object(default_wallet, simple_file, cid, self.shell, self.cluster.default_rpc_endpoint) + with reporter.step("Search nodes with object"): - object_nodes = get_object_nodes( - cluster=self.cluster, cid=cid_1, oid=oid, alive_node=self.cluster.cluster_nodes[0] - ) - with reporter.step("Turn off random node with object"): - cluster_state_controller.stop_node_host(node=random.choice(object_nodes[1:]), mode="hard") - with reporter.step("Checking PUT operation"): - oid_2 = put_object( - wallet=default_wallet, - path=simple_file, - cid=cid_1, - shell=self.shell, - endpoint=self.cluster.default_rpc_endpoint, - ) - with reporter.step("Checking GET operation"): - get_file = get_object( - wallet=default_wallet, - cid=cid_1, - oid=oid_2, - shell=self.shell, - endpoint=object_nodes[0].storage_node.get_rpc_endpoint(), - ) + object_nodes = get_object_nodes(self.cluster, cid, oid, self.cluster.cluster_nodes[0]) + + with reporter.step("Choose node to stop"): + node_to_stop = random.choice(object_nodes) + alive_node_with_object = random.choice(list(set(object_nodes) - {node_to_stop})) + alive_endpoint_with_object = alive_node_with_object.storage_node.get_rpc_endpoint() + + with reporter.step("Stop random node with object"): + cluster_state_controller.stop_node_host(node_to_stop, "hard") + + with reporter.step("Put object to alive node with object"): + oid_2 = put_object(default_wallet, simple_file, cid, self.shell, alive_endpoint_with_object) + + with reporter.step("Get object from alive node with object"): + get_file = get_object(default_wallet, cid, oid_2, self.shell, alive_endpoint_with_object) os.remove(get_file) - with reporter.step("Checking creating container"): - _ = create_container( + + with reporter.step("Create container on alive node"): + create_container( default_wallet, - shell=self.shell, - endpoint=object_nodes[0].storage_node.get_rpc_endpoint(), + self.shell, + alive_endpoint_with_object, rule=placement_rule, basic_acl=PUBLIC_ACL, )