diff --git a/pytest_tests/testsuites/failovers/test_failover_storage.py b/pytest_tests/testsuites/failovers/test_failover_storage.py index 0edd011c..ef5abd79 100644 --- a/pytest_tests/testsuites/failovers/test_failover_storage.py +++ b/pytest_tests/testsuites/failovers/test_failover_storage.py @@ -4,16 +4,16 @@ from time import sleep import allure import pytest -from common import NEOFS_NETMAP, NEOFS_NETMAP_DICT, STORAGE_NODE_CONFIG_PATH +from common import NEOFS_NETMAP_DICT from python_keywords.container import create_container from python_keywords.neofs_verbs import get_object, put_object +from python_keywords.node_management import node_healthcheck from python_keywords.utility_keywords import generate_file, get_file_hash from sbercloud_helper import SberCloud from ssh_helper import HostClient, HostIsNotAvailable from storage_policy import get_nodes_with_object from wellknown_acl import PUBLIC_ACL -SSH_PK_PATH = f'{os.getcwd()}/configuration/id_rsa' logger = logging.getLogger('NeoLogger') stopped_hosts = [] @@ -41,9 +41,7 @@ def return_all_storage_nodes_fixture(sbercloud_client): def panic_reboot_host(ip: str = None): - ssh = HostClient(ip=ip, init_ssh_client=False) - ssh.pk = SSH_PK_PATH - ssh.create_connection(attempts=1) + ssh = HostClient(ip=ip) ssh.exec('echo 1 > /proc/sys/kernel/sysrq') with pytest.raises(HostIsNotAvailable): ssh.exec('echo b > /proc/sysrq-trigger', timeout=1) @@ -53,7 +51,7 @@ def return_all_storage_nodes(sbercloud_client: SberCloud): for host in stopped_hosts: with allure.step(f'Start storage node {host}'): sbercloud_client.start_node(node_ip=host.split(':')[-2]) - stopped_hosts.clear() + stopped_hosts.remove(host) wait_all_storage_node_returned() @@ -63,14 +61,6 @@ def is_all_storage_node_returned() -> bool: try: health_check = node_healthcheck(node_name) except (AssertionError, HostIsNotAvailable, TimeoutError): - ### REMOVE AFTER FIX STORAGE_NODE_CONFIG_PATH ### - try: - for host in NEOFS_NETMAP: - ssh = HostClient(ip=host.split(':')[-2]) - ssh.exec(f'echo -e "wallet: /etc/neofs/storage/wallet.json\npassword: \"\"" > {STORAGE_NODE_CONFIG_PATH}') - except Exception: - pass - ### REMOVE AFTER FIX STORAGE_NODE_CONFIG_PATH ### return False if health_check.health_status != 'READY' or health_check.network_status != 'ONLINE': return False @@ -91,7 +81,7 @@ def wait_object_replication(wallet, cid, oid, expected_copies: int, excluded_nod sleep_interval, attempts = 10, 12 nodes = [] for __attempt in range(attempts): - nodes = get_nodes_with_object(wallet, cid, oid) + nodes = [node for node in get_nodes_with_object(wallet, cid, oid) if node not in excluded_nodes] if len(nodes) == expected_copies: return nodes sleep(sleep_interval) @@ -112,9 +102,10 @@ def test_lost_storage_node(prepare_wallet_and_deposit, sbercloud_client: SberClo new_nodes = [] for node in nodes: + stopped_hosts.append(node) with allure.step(f'Stop storage node {node}'): sbercloud_client.stop_node(node_ip=node.split(':')[-2], hard=hard_reboot) - new_nodes = wait_object_replication(wallet, cid, oid, 2) + new_nodes = wait_object_replication(wallet, cid, oid, 2, excluded_nodes=[node]) assert not [node for node in nodes if node in new_nodes] got_file_path = get_object(wallet, cid, oid, endpoint=new_nodes[0]) @@ -140,18 +131,17 @@ def test_panic_storage_node(prepare_wallet_and_deposit, free_storage_check, sequ oid = put_object(wallet, source_file_path, cid) nodes = wait_object_replication(wallet, cid, oid, 2) - new_nodes = [] allure.attach('\n'.join(nodes), 'Current nodes with object', allure.attachment_type.TEXT) for node in nodes: with allure.step(f'Hard reboot host {node} via magic SysRq option'): panic_reboot_host(ip=node.split(':')[-2]) if sequence: - new_nodes = wait_object_replication(wallet, cid, oid, 2) + new_nodes = wait_object_replication(wallet, cid, oid, 2, excluded_nodes=[node]) allure.attach('\n'.join(new_nodes), f'Nodes with object after {node} fail', allure.attachment_type.TEXT) if not sequence: - new_nodes = wait_object_replication(wallet, cid, oid, 2) + new_nodes = wait_object_replication(wallet, cid, oid, 2, excluded_nodes=nodes) allure.attach('\n'.join(new_nodes), 'Nodes with object after nodes fail', allure.attachment_type.TEXT) got_file_path = get_object(wallet, cid, oid, endpoint=new_nodes[0]) diff --git a/robot/resources/lib/python_keywords/node_management.py b/robot/resources/lib/python_keywords/node_management.py index aec3a68b..bafd1603 100644 --- a/robot/resources/lib/python_keywords/node_management.py +++ b/robot/resources/lib/python_keywords/node_management.py @@ -12,10 +12,11 @@ from dataclasses import dataclass from typing import List import docker -from common import NEOFS_NETMAP_DICT, STORAGE_NODE_BIN_PATH, STORAGE_NODE_CONFIG_PATH, STORAGE_NODE_PRIVATE_CONTROL_ENDPOINT, STORAGE_NODE_PWD, STORAGE_NODE_USER +from common import (NEOFS_NETMAP_DICT, STORAGE_NODE_BIN_PATH, STORAGE_NODE_CONFIG_PATH, + STORAGE_NODE_PRIVATE_CONTROL_ENDPOINT, STORAGE_NODE_PWD, STORAGE_NODE_USER) from robot.api import logger from robot.api.deco import keyword -from ssh_helper import HostClient +from ssh_helper import HostClient, HostIsNotAvailable ROBOT_AUTO_KEYWORDS = False @@ -43,7 +44,10 @@ def create_ssh_client(node_name: str) -> HostClient: node_config = NEOFS_NETMAP_DICT.get(node_name) host = node_config.get('control').split(':')[0] - ssh_client = HostClient(host, STORAGE_NODE_USER, STORAGE_NODE_PWD) + try: + ssh_client = HostClient(host, STORAGE_NODE_USER, STORAGE_NODE_PWD) + except HostIsNotAvailable: + ssh_client = HostClient(host) try: yield ssh_client @@ -52,7 +56,7 @@ def create_ssh_client(node_name: str) -> HostClient: @keyword('Stop Nodes') -def stop_nodes(number: int, nodes: list) -> None: +def stop_nodes(number: int, nodes: list) -> list: """ The function shuts down the given number of randomly selected nodes in docker.