From eb5532c08e71e217efb3874b475b3b84d201a4df Mon Sep 17 00:00:00 2001 From: Vladimir Avdeev Date: Thu, 14 Jul 2022 10:20:39 +0300 Subject: [PATCH] Extend allure logging for failover tests Signed-off-by: Vladimir Avdeev --- pytest_tests/pytest.ini | 1 + .../failovers/test_failover_storage.py | 70 +++++++++++++------ .../lib/python_keywords/node_management.py | 14 ++-- 3 files changed, 58 insertions(+), 27 deletions(-) diff --git a/pytest_tests/pytest.ini b/pytest_tests/pytest.ini index cbb3b04..d6a73ad 100644 --- a/pytest_tests/pytest.ini +++ b/pytest_tests/pytest.ini @@ -10,6 +10,7 @@ markers = sanity: small tests subset staging: test to be excluded from run in verifier/pr-validation/sanity jobs and run test in staging job # functional markers + container: tests for container creation grpc_api: standard gRPC API tests http_gate: HTTP gate contract s3_gate: S3 gate tests diff --git a/pytest_tests/testsuites/failovers/test_failover_storage.py b/pytest_tests/testsuites/failovers/test_failover_storage.py index 121c349..ef5abd7 100644 --- a/pytest_tests/testsuites/failovers/test_failover_storage.py +++ b/pytest_tests/testsuites/failovers/test_failover_storage.py @@ -4,15 +4,16 @@ from time import sleep import allure import pytest +from common import NEOFS_NETMAP_DICT from python_keywords.container import create_container from python_keywords.neofs_verbs import get_object, put_object +from python_keywords.node_management import node_healthcheck from python_keywords.utility_keywords import generate_file, get_file_hash from sbercloud_helper import SberCloud from ssh_helper import HostClient, HostIsNotAvailable from storage_policy import get_nodes_with_object from wellknown_acl import PUBLIC_ACL -SSH_PK_PATH = f'{os.getcwd()}/configuration/id_rsa' logger = logging.getLogger('NeoLogger') stopped_hosts = [] @@ -20,7 +21,7 @@ stopped_hosts = [] @pytest.fixture(scope='session') def free_storage_check(): if os.getenv('FREE_STORAGE', default='False').lower() not in ('true', '1'): - pytest.skip('Test work only on SberCloud infrastructure') + pytest.skip('Test only works on SberCloud infrastructure') yield @@ -31,7 +32,6 @@ def sbercloud_client(): yield SberCloud(f'{os.getcwd()}/configuration/sbercloud.yaml') except Exception: pytest.fail('SberCloud infrastructure not available') - yield None @pytest.fixture(scope='session', autouse=True) @@ -40,11 +40,8 @@ def return_all_storage_nodes_fixture(sbercloud_client): return_all_storage_nodes(sbercloud_client) -@allure.title('Hard reboot host via magic SysRq option') def panic_reboot_host(ip: str = None): - ssh = HostClient(ip=ip, init_ssh_client=False) - ssh.pk = SSH_PK_PATH - ssh.create_connection(attempts=1) + ssh = HostClient(ip=ip) ssh.exec('echo 1 > /proc/sys/kernel/sysrq') with pytest.raises(HostIsNotAvailable): ssh.exec('echo b > /proc/sysrq-trigger', timeout=1) @@ -52,15 +49,39 @@ def panic_reboot_host(ip: str = None): def return_all_storage_nodes(sbercloud_client: SberCloud): for host in stopped_hosts: - sbercloud_client.start_node(node_ip=host.split(':')[-2]) + with allure.step(f'Start storage node {host}'): + sbercloud_client.start_node(node_ip=host.split(':')[-2]) stopped_hosts.remove(host) + wait_all_storage_node_returned() -def wait_object_replication(wallet, cid, oid, expected_copies: int) -> [str]: +def is_all_storage_node_returned() -> bool: + with allure.step('Run health check for all storage nodes'): + for node_name in NEOFS_NETMAP_DICT.keys(): + try: + health_check = node_healthcheck(node_name) + except (AssertionError, HostIsNotAvailable, TimeoutError): + return False + if health_check.health_status != 'READY' or health_check.network_status != 'ONLINE': + return False + return True + + +def wait_all_storage_node_returned(): + sleep_interval, attempts = 10, 12 + for __attempt in range(attempts): + if is_all_storage_node_returned(): + return + sleep(sleep_interval) + raise AssertionError('Storage node(s) is broken') + + +def wait_object_replication(wallet, cid, oid, expected_copies: int, excluded_nodes: [str] = None) -> [str]: + excluded_nodes = excluded_nodes or [] sleep_interval, attempts = 10, 12 nodes = [] for __attempt in range(attempts): - nodes = get_nodes_with_object(wallet, cid, oid) + nodes = [node for node in get_nodes_with_object(wallet, cid, oid) if node not in excluded_nodes] if len(nodes) == expected_copies: return nodes sleep(sleep_interval) @@ -81,20 +102,21 @@ def test_lost_storage_node(prepare_wallet_and_deposit, sbercloud_client: SberClo new_nodes = [] for node in nodes: + stopped_hosts.append(node) with allure.step(f'Stop storage node {node}'): sbercloud_client.stop_node(node_ip=node.split(':')[-2], hard=hard_reboot) - new_nodes = wait_object_replication(wallet, cid, oid, 2) + new_nodes = wait_object_replication(wallet, cid, oid, 2, excluded_nodes=[node]) assert not [node for node in nodes if node in new_nodes] - got_file_path = get_object(wallet, cid, oid) + got_file_path = get_object(wallet, cid, oid, endpoint=new_nodes[0]) assert get_file_hash(source_file_path) == get_file_hash(got_file_path) with allure.step(f'Return storage nodes'): return_all_storage_nodes(sbercloud_client) - wait_object_replication(wallet, cid, oid, 2) + new_nodes = wait_object_replication(wallet, cid, oid, 2) - got_file_path = get_object(wallet, cid, oid) + got_file_path = get_object(wallet, cid, oid, endpoint=new_nodes[0]) assert get_file_hash(source_file_path) == get_file_hash(got_file_path) @@ -108,15 +130,19 @@ def test_panic_storage_node(prepare_wallet_and_deposit, free_storage_check, sequ cid = create_container(wallet, rule=placement_rule, basic_acl=PUBLIC_ACL) oid = put_object(wallet, source_file_path, cid) - with allure.step(f'Return storage nodes'): - nodes = wait_object_replication(wallet, cid, oid, 2) - for node in nodes: + nodes = wait_object_replication(wallet, cid, oid, 2) + allure.attach('\n'.join(nodes), 'Current nodes with object', allure.attachment_type.TEXT) + for node in nodes: + with allure.step(f'Hard reboot host {node} via magic SysRq option'): panic_reboot_host(ip=node.split(':')[-2]) if sequence: - wait_object_replication(wallet, cid, oid, 2) + new_nodes = wait_object_replication(wallet, cid, oid, 2, excluded_nodes=[node]) + allure.attach('\n'.join(new_nodes), f'Nodes with object after {node} fail', + allure.attachment_type.TEXT) - if not sequence: - wait_object_replication(wallet, cid, oid, 2) + if not sequence: + new_nodes = wait_object_replication(wallet, cid, oid, 2, excluded_nodes=nodes) + allure.attach('\n'.join(new_nodes), 'Nodes with object after nodes fail', allure.attachment_type.TEXT) - got_file_path = get_object(wallet, cid, oid) - assert get_file_hash(source_file_path) == get_file_hash(got_file_path) + got_file_path = get_object(wallet, cid, oid, endpoint=new_nodes[0]) + assert get_file_hash(source_file_path) == get_file_hash(got_file_path) diff --git a/robot/resources/lib/python_keywords/node_management.py b/robot/resources/lib/python_keywords/node_management.py index 2c21a7f..bafd160 100644 --- a/robot/resources/lib/python_keywords/node_management.py +++ b/robot/resources/lib/python_keywords/node_management.py @@ -12,10 +12,11 @@ from dataclasses import dataclass from typing import List import docker -from common import NEOFS_NETMAP_DICT, STORAGE_NODE_BIN_PATH, STORAGE_NODE_CONFIG_PATH, STORAGE_NODE_PRIVATE_CONTROL_ENDPOINT, STORAGE_NODE_PWD, STORAGE_NODE_USER +from common import (NEOFS_NETMAP_DICT, STORAGE_NODE_BIN_PATH, STORAGE_NODE_CONFIG_PATH, + STORAGE_NODE_PRIVATE_CONTROL_ENDPOINT, STORAGE_NODE_PWD, STORAGE_NODE_USER) from robot.api import logger from robot.api.deco import keyword -from ssh_helper import HostClient +from ssh_helper import HostClient, HostIsNotAvailable ROBOT_AUTO_KEYWORDS = False @@ -43,7 +44,10 @@ def create_ssh_client(node_name: str) -> HostClient: node_config = NEOFS_NETMAP_DICT.get(node_name) host = node_config.get('control').split(':')[0] - ssh_client = HostClient(host, STORAGE_NODE_USER, STORAGE_NODE_PWD) + try: + ssh_client = HostClient(host, STORAGE_NODE_USER, STORAGE_NODE_PWD) + except HostIsNotAvailable: + ssh_client = HostClient(host) try: yield ssh_client @@ -52,7 +56,7 @@ def create_ssh_client(node_name: str) -> HostClient: @keyword('Stop Nodes') -def stop_nodes(number: int, nodes: list) -> None: +def stop_nodes(number: int, nodes: list) -> list: """ The function shuts down the given number of randomly selected nodes in docker. @@ -122,7 +126,7 @@ def get_locode(): @keyword('Stop Nodes Remote') -def stop_nodes_remote(number: int, nodes: list) -> None: +def stop_nodes_remote(number: int, nodes: list) -> list: """ The function shuts down the given number of randomly selected nodes in docker.