diff --git a/pytest_tests/testsuites/failovers/test_failover_storage.py b/pytest_tests/testsuites/failovers/test_failover_storage.py index 594dd66..0edd011 100644 --- a/pytest_tests/testsuites/failovers/test_failover_storage.py +++ b/pytest_tests/testsuites/failovers/test_failover_storage.py @@ -4,6 +4,7 @@ from time import sleep import allure import pytest +from common import NEOFS_NETMAP, NEOFS_NETMAP_DICT, STORAGE_NODE_CONFIG_PATH from python_keywords.container import create_container from python_keywords.neofs_verbs import get_object, put_object from python_keywords.utility_keywords import generate_file, get_file_hash @@ -50,11 +51,43 @@ def panic_reboot_host(ip: str = None): def return_all_storage_nodes(sbercloud_client: SberCloud): for host in stopped_hosts: - sbercloud_client.start_node(node_ip=host.split(':')[-2]) - stopped_hosts.remove(host) + with allure.step(f'Start storage node {host}'): + sbercloud_client.start_node(node_ip=host.split(':')[-2]) + stopped_hosts.clear() + wait_all_storage_node_returned() -def wait_object_replication(wallet, cid, oid, expected_copies: int) -> [str]: +def is_all_storage_node_returned() -> bool: + with allure.step('Run health check for all storage nodes'): + for node_name in NEOFS_NETMAP_DICT.keys(): + try: + health_check = node_healthcheck(node_name) + except (AssertionError, HostIsNotAvailable, TimeoutError): + ### REMOVE AFTER FIX STORAGE_NODE_CONFIG_PATH ### + try: + for host in NEOFS_NETMAP: + ssh = HostClient(ip=host.split(':')[-2]) + ssh.exec(f'echo -e "wallet: /etc/neofs/storage/wallet.json\npassword: \"\"" > {STORAGE_NODE_CONFIG_PATH}') + except Exception: + pass + ### REMOVE AFTER FIX STORAGE_NODE_CONFIG_PATH ### + return False + if health_check.health_status != 'READY' or health_check.network_status != 'ONLINE': + return False + return True + + +def wait_all_storage_node_returned(): + sleep_interval, attempts = 10, 12 + for __attempt in range(attempts): + if is_all_storage_node_returned(): + return + sleep(sleep_interval) + raise AssertionError('Storage node(s) is broken') + + +def wait_object_replication(wallet, cid, oid, expected_copies: int, excluded_nodes: [str] = None) -> [str]: + excluded_nodes = excluded_nodes or [] sleep_interval, attempts = 10, 12 nodes = [] for __attempt in range(attempts): @@ -84,15 +117,15 @@ def test_lost_storage_node(prepare_wallet_and_deposit, sbercloud_client: SberClo new_nodes = wait_object_replication(wallet, cid, oid, 2) assert not [node for node in nodes if node in new_nodes] - got_file_path = get_object(wallet, cid, oid) + got_file_path = get_object(wallet, cid, oid, endpoint=new_nodes[0]) assert get_file_hash(source_file_path) == get_file_hash(got_file_path) with allure.step(f'Return storage nodes'): return_all_storage_nodes(sbercloud_client) - wait_object_replication(wallet, cid, oid, 2) + new_nodes = wait_object_replication(wallet, cid, oid, 2) - got_file_path = get_object(wallet, cid, oid) + got_file_path = get_object(wallet, cid, oid, endpoint=new_nodes[0]) assert get_file_hash(source_file_path) == get_file_hash(got_file_path) @@ -107,18 +140,19 @@ def test_panic_storage_node(prepare_wallet_and_deposit, free_storage_check, sequ oid = put_object(wallet, source_file_path, cid) nodes = wait_object_replication(wallet, cid, oid, 2) - allure.attach.file('\n'.join(nodes), 'Current nodes with object', allure.attachment_type.TEXT) + new_nodes = [] + allure.attach('\n'.join(nodes), 'Current nodes with object', allure.attachment_type.TEXT) for node in nodes: with allure.step(f'Hard reboot host {node} via magic SysRq option'): panic_reboot_host(ip=node.split(':')[-2]) if sequence: new_nodes = wait_object_replication(wallet, cid, oid, 2) - allure.attach.file('\n'.join(new_nodes), f'Nodes with object after {node} fail', + allure.attach('\n'.join(new_nodes), f'Nodes with object after {node} fail', allure.attachment_type.TEXT) if not sequence: new_nodes = wait_object_replication(wallet, cid, oid, 2) - allure.attach.file('\n'.join(new_nodes), 'Nodes with object after nodes fail', allure.attachment_type.TEXT) + allure.attach('\n'.join(new_nodes), 'Nodes with object after nodes fail', allure.attachment_type.TEXT) - got_file_path = get_object(wallet, cid, oid) + got_file_path = get_object(wallet, cid, oid, endpoint=new_nodes[0]) assert get_file_hash(source_file_path) == get_file_hash(got_file_path) diff --git a/robot/resources/lib/python_keywords/node_management.py b/robot/resources/lib/python_keywords/node_management.py index 2c21a7f..aec3a68 100644 --- a/robot/resources/lib/python_keywords/node_management.py +++ b/robot/resources/lib/python_keywords/node_management.py @@ -122,7 +122,7 @@ def get_locode(): @keyword('Stop Nodes Remote') -def stop_nodes_remote(number: int, nodes: list) -> None: +def stop_nodes_remote(number: int, nodes: list) -> list: """ The function shuts down the given number of randomly selected nodes in docker.