Try fix failovers tests

2022-07-15 03:10:46 +03:00 · 2022-07-15 03:10:46 +03:00 · 7f6cd8e3b0
commit 7f6cd8e3b0
parent dbf679bfbc
2 changed files with 45 additions and 11 deletions
--- a/pytest_tests/testsuites/failovers/test_failover_storage.py
+++ b/pytest_tests/testsuites/failovers/test_failover_storage.py
@ -4,6 +4,7 @@ from time import sleep

 import allure
 import pytest
+from common import NEOFS_NETMAP, NEOFS_NETMAP_DICT, STORAGE_NODE_CONFIG_PATH
 from python_keywords.container import create_container
 from python_keywords.neofs_verbs import get_object, put_object
 from python_keywords.utility_keywords import generate_file, get_file_hash
@ -50,11 +51,43 @@ def panic_reboot_host(ip: str = None):

 def return_all_storage_nodes(sbercloud_client: SberCloud):
    for host in stopped_hosts:
-        sbercloud_client.start_node(node_ip=host.split(':')[-2])
-        stopped_hosts.remove(host)
+        with allure.step(f'Start storage node {host}'):
+            sbercloud_client.start_node(node_ip=host.split(':')[-2])
+    stopped_hosts.clear()
+    wait_all_storage_node_returned()


-def wait_object_replication(wallet, cid, oid, expected_copies: int) -> [str]:
+def is_all_storage_node_returned() -> bool:
+    with allure.step('Run health check for all storage nodes'):
+        for node_name in NEOFS_NETMAP_DICT.keys():
+            try:
+                health_check = node_healthcheck(node_name)
+            except (AssertionError, HostIsNotAvailable, TimeoutError):
+                ###  REMOVE AFTER FIX STORAGE_NODE_CONFIG_PATH ###
+                try:
+                    for host in NEOFS_NETMAP:
+                        ssh = HostClient(ip=host.split(':')[-2])
+                        ssh.exec(f'echo -e "wallet: /etc/neofs/storage/wallet.json\npassword: \"\"" > {STORAGE_NODE_CONFIG_PATH}')
+                except Exception:
+                    pass
+                ###  REMOVE AFTER FIX STORAGE_NODE_CONFIG_PATH ###
+                return False
+            if health_check.health_status != 'READY' or health_check.network_status != 'ONLINE':
+                return False
+    return True
+
+
+def wait_all_storage_node_returned():
+    sleep_interval, attempts = 10, 12
+    for __attempt in range(attempts):
+        if is_all_storage_node_returned():
+            return
+        sleep(sleep_interval)
+    raise AssertionError('Storage node(s) is broken')
+
+
+def wait_object_replication(wallet, cid, oid, expected_copies: int, excluded_nodes: [str] = None) -> [str]:
+    excluded_nodes = excluded_nodes or []
    sleep_interval, attempts = 10, 12
    nodes = []
    for __attempt in range(attempts):
@ -84,15 +117,15 @@ def test_lost_storage_node(prepare_wallet_and_deposit, sbercloud_client: SberClo
        new_nodes = wait_object_replication(wallet, cid, oid, 2)

    assert not [node for node in nodes if node in new_nodes]
-    got_file_path = get_object(wallet, cid, oid)
+    got_file_path = get_object(wallet, cid, oid, endpoint=new_nodes[0])
    assert get_file_hash(source_file_path) == get_file_hash(got_file_path)

    with allure.step(f'Return storage nodes'):
        return_all_storage_nodes(sbercloud_client)

-    wait_object_replication(wallet, cid, oid, 2)
+    new_nodes = wait_object_replication(wallet, cid, oid, 2)

-    got_file_path = get_object(wallet, cid, oid)
+    got_file_path = get_object(wallet, cid, oid, endpoint=new_nodes[0])
    assert get_file_hash(source_file_path) == get_file_hash(got_file_path)


@ -107,18 +140,19 @@ def test_panic_storage_node(prepare_wallet_and_deposit, free_storage_check, sequ
    oid = put_object(wallet, source_file_path, cid)

    nodes = wait_object_replication(wallet, cid, oid, 2)
-    allure.attach.file('\n'.join(nodes), 'Current nodes with object', allure.attachment_type.TEXT)
+    new_nodes = []
+    allure.attach('\n'.join(nodes), 'Current nodes with object', allure.attachment_type.TEXT)
    for node in nodes:
        with allure.step(f'Hard reboot host {node} via magic SysRq option'):
            panic_reboot_host(ip=node.split(':')[-2])
            if sequence:
                new_nodes = wait_object_replication(wallet, cid, oid, 2)
-                allure.attach.file('\n'.join(new_nodes), f'Nodes with object after {node} fail',
+                allure.attach('\n'.join(new_nodes), f'Nodes with object after {node} fail',
                                   allure.attachment_type.TEXT)

    if not sequence:
        new_nodes = wait_object_replication(wallet, cid, oid, 2)
-        allure.attach.file('\n'.join(new_nodes), 'Nodes with object after nodes fail', allure.attachment_type.TEXT)
+        allure.attach('\n'.join(new_nodes), 'Nodes with object after nodes fail', allure.attachment_type.TEXT)

-    got_file_path = get_object(wallet, cid, oid)
+    got_file_path = get_object(wallet, cid, oid, endpoint=new_nodes[0])
    assert get_file_hash(source_file_path) == get_file_hash(got_file_path)
--- a/robot/resources/lib/python_keywords/node_management.py
+++ b/robot/resources/lib/python_keywords/node_management.py
@ -122,7 +122,7 @@ def get_locode():


@keyword('Stop Nodes Remote')
-def stop_nodes_remote(number: int, nodes: list) -> None:
+def stop_nodes_remote(number: int, nodes: list) -> list:
    """
        The function shuts down the given number of randomly
        selected nodes in docker.