Update failover case to not use stopped node #210

Merged
abereziny merged 1 commit from abereziny/frostfs-testcases:bugfix-failover-tests into master 2024-03-20 08:07:13 +00:00

View file

@ -5,7 +5,6 @@ import random
import allure import allure
import pytest import pytest
from frostfs_testlib import reporter from frostfs_testlib import reporter
from frostfs_testlib.resources.common import MORPH_BLOCK_TIME
from frostfs_testlib.resources.wellknown_acl import PUBLIC_ACL from frostfs_testlib.resources.wellknown_acl import PUBLIC_ACL
from frostfs_testlib.steps.cli.container import StorageContainer, StorageContainerInfo, create_container from frostfs_testlib.steps.cli.container import StorageContainer, StorageContainerInfo, create_container
from frostfs_testlib.steps.cli.object import get_object, get_object_nodes, put_object from frostfs_testlib.steps.cli.object import get_object, get_object_nodes, put_object
@ -16,8 +15,8 @@ from frostfs_testlib.storage.dataclasses.object_size import ObjectSize
from frostfs_testlib.storage.dataclasses.storage_object_info import StorageObjectInfo from frostfs_testlib.storage.dataclasses.storage_object_info import StorageObjectInfo
from frostfs_testlib.storage.dataclasses.wallet import WalletInfo from frostfs_testlib.storage.dataclasses.wallet import WalletInfo
from frostfs_testlib.testing.cluster_test_base import ClusterTestBase from frostfs_testlib.testing.cluster_test_base import ClusterTestBase
from frostfs_testlib.testing.parallel import parallel
from frostfs_testlib.testing.test_control import wait_for_success from frostfs_testlib.testing.test_control import wait_for_success
from frostfs_testlib.utils import datetime_utils
from frostfs_testlib.utils.failover_utils import wait_object_replication from frostfs_testlib.utils.failover_utils import wait_object_replication
from frostfs_testlib.utils.file_utils import get_file_hash from frostfs_testlib.utils.file_utils import get_file_hash
from pytest import FixtureRequest from pytest import FixtureRequest
@ -169,7 +168,6 @@ class TestFailoverServer(ClusterTestBase):
node_to_stop: ClusterNode, node_to_stop: ClusterNode,
cluster_state_controller: ClusterStateController, cluster_state_controller: ClusterStateController,
): ):
with reporter.step(f"Remove {node_to_stop} from the list of nodes"): with reporter.step(f"Remove {node_to_stop} from the list of nodes"):
alive_nodes = list(set(self.cluster.cluster_nodes) - {node_to_stop}) alive_nodes = list(set(self.cluster.cluster_nodes) - {node_to_stop})
@ -183,7 +181,6 @@ class TestFailoverServer(ClusterTestBase):
with reporter.step("Verify that there are no corrupted objects"): with reporter.step("Verify that there are no corrupted objects"):
corrupted_objects_list = self.get_corrupted_objects_list(storage_nodes, storage_objects) corrupted_objects_list = self.get_corrupted_objects_list(storage_nodes, storage_objects)
assert not corrupted_objects_list assert not corrupted_objects_list
with reporter.step(f"check {node_to_stop.storage_node} in map"): with reporter.step(f"check {node_to_stop.storage_node} in map"):
@ -216,7 +213,7 @@ class TestFailoverServer(ClusterTestBase):
self.tick_epochs(1, storage_nodes[0], wait_block=2) self.tick_epochs(1, storage_nodes[0], wait_block=2)
with reporter.step(f"Stop node"): with reporter.step(f"Stop node"):
cluster_state_controller.stop_node_host(node=node_to_stop, mode="hard") cluster_state_controller.stop_node_host(node_to_stop, "hard")
with reporter.step("Verify that there are no corrupted objects"): with reporter.step("Verify that there are no corrupted objects"):
corrupted_objects_list = self.get_corrupted_objects_list(storage_nodes, storage_objects) corrupted_objects_list = self.get_corrupted_objects_list(storage_nodes, storage_objects)
@ -234,7 +231,6 @@ class TestFailoverServer(ClusterTestBase):
@allure.title("Not enough nodes in the container with policy - 'REP 3 CBF 1 SELECT 4 FROM *'") @allure.title("Not enough nodes in the container with policy - 'REP 3 CBF 1 SELECT 4 FROM *'")
def test_not_enough_nodes_in_container_rep_3( def test_not_enough_nodes_in_container_rep_3(
self, self,
container: list[StorageContainer],
object_and_nodes: tuple[StorageObjectInfo, list[ClusterNode]], object_and_nodes: tuple[StorageObjectInfo, list[ClusterNode]],
default_wallet: WalletInfo, default_wallet: WalletInfo,
cluster_state_controller: ClusterStateController, cluster_state_controller: ClusterStateController,
@ -242,39 +238,23 @@ class TestFailoverServer(ClusterTestBase):
up_stop_nodes: None, up_stop_nodes: None,
): ):
object_info, object_nodes = object_and_nodes object_info, object_nodes = object_and_nodes
node_not_object = list(set(self.cluster.cluster_nodes) - set(object_nodes))[0] endpoint_without_object = list(set(self.cluster.cluster_nodes) - set(object_nodes))[
0
].storage_node.get_rpc_endpoint()
endpoint_with_object = object_nodes[0].storage_node.get_rpc_endpoint()
with reporter.step("Stop all nodes with object, except 1"): with reporter.step("Stop all nodes with object except first one"):
for cluster_node in object_nodes[1:]: parallel(cluster_state_controller.stop_node_host, object_nodes[1:], mode="hard")
cluster_state_controller.stop_node_host(node=cluster_node, mode="hard")
with reporter.step("Get object"): with reporter.step(f"Get object from node without object"):
with reporter.step(f"Get operation to {node_not_object} where it does not exist, expect success"): get_object(default_wallet, object_info.cid, object_info.oid, self.shell, endpoint_without_object)
get_object(
wallet=default_wallet, with reporter.step(f"Get object from node with object"):
cid=object_info.cid, get_object(default_wallet, object_info.cid, object_info.oid, self.shell, endpoint_with_object)
oid=object_info.oid,
shell=self.shell,
endpoint=node_not_object.storage_node.get_rpc_endpoint(),
)
with reporter.step(f"Get operation to {object_nodes[0]} with object, expect success"):
get_object(
wallet=default_wallet,
cid=object_info.cid,
oid=object_info.oid,
shell=self.shell,
endpoint=object_nodes[0].storage_node.get_rpc_endpoint(),
)
with reporter.step(f"Put operation to node with object, expect error"): with reporter.step(f"Put operation to node with object, expect error"):
with pytest.raises(RuntimeError): with pytest.raises(RuntimeError):
put_object( put_object(default_wallet, simple_file, object_info.cid, self.shell, endpoint_with_object)
wallet=default_wallet,
path=simple_file,
cid=object_info.cid,
shell=self.shell,
endpoint=node_not_object.storage_node.get_rpc_endpoint(),
)
@allure.title("Not enough nodes in the container with policy - 'REP 2 CBF 2 SELECT 4 FROM *'") @allure.title("Not enough nodes in the container with policy - 'REP 2 CBF 2 SELECT 4 FROM *'")
def test_not_enough_nodes_in_container_rep_2( def test_not_enough_nodes_in_container_rep_2(
@ -284,52 +264,43 @@ class TestFailoverServer(ClusterTestBase):
simple_file: str, simple_file: str,
up_stop_nodes: None, up_stop_nodes: None,
): ):
with reporter.step("Creating a container with a full network map"): with reporter.step("Create container with full network map"):
select = len(self.cluster.cluster_nodes) node_count = len(self.cluster.cluster_nodes)
placement_rule = f"REP {select - 2} IN X CBF 2 SELECT {select} FROM * AS X" placement_rule = f"REP {node_count - 2} IN X CBF 2 SELECT {node_count} FROM * AS X"
cid_1 = create_container( cid = create_container(
default_wallet, default_wallet,
shell=self.shell, self.shell,
endpoint=self.cluster.default_rpc_endpoint, self.cluster.default_rpc_endpoint,
rule=placement_rule, rule=placement_rule,
basic_acl=PUBLIC_ACL, basic_acl=PUBLIC_ACL,
) )
with reporter.step("Put object"): with reporter.step("Put object"):
oid = put_object( oid = put_object(default_wallet, simple_file, cid, self.shell, self.cluster.default_rpc_endpoint)
wallet=default_wallet,
path=simple_file,
cid=cid_1,
shell=self.shell,
endpoint=self.cluster.default_rpc_endpoint,
)
with reporter.step("Search nodes with object"): with reporter.step("Search nodes with object"):
object_nodes = get_object_nodes( object_nodes = get_object_nodes(self.cluster, cid, oid, self.cluster.cluster_nodes[0])
cluster=self.cluster, cid=cid_1, oid=oid, alive_node=self.cluster.cluster_nodes[0]
) with reporter.step("Choose node to stop"):
with reporter.step("Turn off random node with object"): node_to_stop = random.choice(object_nodes)
cluster_state_controller.stop_node_host(node=random.choice(object_nodes[1:]), mode="hard") alive_node_with_object = random.choice(list(set(object_nodes) - {node_to_stop}))
with reporter.step("Checking PUT operation"): alive_endpoint_with_object = alive_node_with_object.storage_node.get_rpc_endpoint()
oid_2 = put_object(
wallet=default_wallet, with reporter.step("Stop random node with object"):
path=simple_file, cluster_state_controller.stop_node_host(node_to_stop, "hard")
cid=cid_1,
shell=self.shell, with reporter.step("Put object to alive node with object"):
endpoint=self.cluster.default_rpc_endpoint, oid_2 = put_object(default_wallet, simple_file, cid, self.shell, alive_endpoint_with_object)
)
with reporter.step("Checking GET operation"): with reporter.step("Get object from alive node with object"):
get_file = get_object( get_file = get_object(default_wallet, cid, oid_2, self.shell, alive_endpoint_with_object)
wallet=default_wallet,
cid=cid_1,
oid=oid_2,
shell=self.shell,
endpoint=object_nodes[0].storage_node.get_rpc_endpoint(),
)
os.remove(get_file) os.remove(get_file)
with reporter.step("Checking creating container"):
_ = create_container( with reporter.step("Create container on alive node"):
create_container(
default_wallet, default_wallet,
shell=self.shell, self.shell,
endpoint=object_nodes[0].storage_node.get_rpc_endpoint(), alive_endpoint_with_object,
rule=placement_rule, rule=placement_rule,
basic_acl=PUBLIC_ACL, basic_acl=PUBLIC_ACL,
) )