forked from TrueCloudLab/frostfs-testcases
Try fix failovers tests
This commit is contained in:
parent
dbf679bfbc
commit
7f6cd8e3b0
2 changed files with 45 additions and 11 deletions
|
@ -4,6 +4,7 @@ from time import sleep
|
|||
|
||||
import allure
|
||||
import pytest
|
||||
from common import NEOFS_NETMAP, NEOFS_NETMAP_DICT, STORAGE_NODE_CONFIG_PATH
|
||||
from python_keywords.container import create_container
|
||||
from python_keywords.neofs_verbs import get_object, put_object
|
||||
from python_keywords.utility_keywords import generate_file, get_file_hash
|
||||
|
@ -50,11 +51,43 @@ def panic_reboot_host(ip: str = None):
|
|||
|
||||
def return_all_storage_nodes(sbercloud_client: SberCloud):
|
||||
for host in stopped_hosts:
|
||||
sbercloud_client.start_node(node_ip=host.split(':')[-2])
|
||||
stopped_hosts.remove(host)
|
||||
with allure.step(f'Start storage node {host}'):
|
||||
sbercloud_client.start_node(node_ip=host.split(':')[-2])
|
||||
stopped_hosts.clear()
|
||||
wait_all_storage_node_returned()
|
||||
|
||||
|
||||
def wait_object_replication(wallet, cid, oid, expected_copies: int) -> [str]:
|
||||
def is_all_storage_node_returned() -> bool:
|
||||
with allure.step('Run health check for all storage nodes'):
|
||||
for node_name in NEOFS_NETMAP_DICT.keys():
|
||||
try:
|
||||
health_check = node_healthcheck(node_name)
|
||||
except (AssertionError, HostIsNotAvailable, TimeoutError):
|
||||
### REMOVE AFTER FIX STORAGE_NODE_CONFIG_PATH ###
|
||||
try:
|
||||
for host in NEOFS_NETMAP:
|
||||
ssh = HostClient(ip=host.split(':')[-2])
|
||||
ssh.exec(f'echo -e "wallet: /etc/neofs/storage/wallet.json\npassword: \"\"" > {STORAGE_NODE_CONFIG_PATH}')
|
||||
except Exception:
|
||||
pass
|
||||
### REMOVE AFTER FIX STORAGE_NODE_CONFIG_PATH ###
|
||||
return False
|
||||
if health_check.health_status != 'READY' or health_check.network_status != 'ONLINE':
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def wait_all_storage_node_returned():
|
||||
sleep_interval, attempts = 10, 12
|
||||
for __attempt in range(attempts):
|
||||
if is_all_storage_node_returned():
|
||||
return
|
||||
sleep(sleep_interval)
|
||||
raise AssertionError('Storage node(s) is broken')
|
||||
|
||||
|
||||
def wait_object_replication(wallet, cid, oid, expected_copies: int, excluded_nodes: [str] = None) -> [str]:
|
||||
excluded_nodes = excluded_nodes or []
|
||||
sleep_interval, attempts = 10, 12
|
||||
nodes = []
|
||||
for __attempt in range(attempts):
|
||||
|
@ -84,15 +117,15 @@ def test_lost_storage_node(prepare_wallet_and_deposit, sbercloud_client: SberClo
|
|||
new_nodes = wait_object_replication(wallet, cid, oid, 2)
|
||||
|
||||
assert not [node for node in nodes if node in new_nodes]
|
||||
got_file_path = get_object(wallet, cid, oid)
|
||||
got_file_path = get_object(wallet, cid, oid, endpoint=new_nodes[0])
|
||||
assert get_file_hash(source_file_path) == get_file_hash(got_file_path)
|
||||
|
||||
with allure.step(f'Return storage nodes'):
|
||||
return_all_storage_nodes(sbercloud_client)
|
||||
|
||||
wait_object_replication(wallet, cid, oid, 2)
|
||||
new_nodes = wait_object_replication(wallet, cid, oid, 2)
|
||||
|
||||
got_file_path = get_object(wallet, cid, oid)
|
||||
got_file_path = get_object(wallet, cid, oid, endpoint=new_nodes[0])
|
||||
assert get_file_hash(source_file_path) == get_file_hash(got_file_path)
|
||||
|
||||
|
||||
|
@ -107,18 +140,19 @@ def test_panic_storage_node(prepare_wallet_and_deposit, free_storage_check, sequ
|
|||
oid = put_object(wallet, source_file_path, cid)
|
||||
|
||||
nodes = wait_object_replication(wallet, cid, oid, 2)
|
||||
allure.attach.file('\n'.join(nodes), 'Current nodes with object', allure.attachment_type.TEXT)
|
||||
new_nodes = []
|
||||
allure.attach('\n'.join(nodes), 'Current nodes with object', allure.attachment_type.TEXT)
|
||||
for node in nodes:
|
||||
with allure.step(f'Hard reboot host {node} via magic SysRq option'):
|
||||
panic_reboot_host(ip=node.split(':')[-2])
|
||||
if sequence:
|
||||
new_nodes = wait_object_replication(wallet, cid, oid, 2)
|
||||
allure.attach.file('\n'.join(new_nodes), f'Nodes with object after {node} fail',
|
||||
allure.attach('\n'.join(new_nodes), f'Nodes with object after {node} fail',
|
||||
allure.attachment_type.TEXT)
|
||||
|
||||
if not sequence:
|
||||
new_nodes = wait_object_replication(wallet, cid, oid, 2)
|
||||
allure.attach.file('\n'.join(new_nodes), 'Nodes with object after nodes fail', allure.attachment_type.TEXT)
|
||||
allure.attach('\n'.join(new_nodes), 'Nodes with object after nodes fail', allure.attachment_type.TEXT)
|
||||
|
||||
got_file_path = get_object(wallet, cid, oid)
|
||||
got_file_path = get_object(wallet, cid, oid, endpoint=new_nodes[0])
|
||||
assert get_file_hash(source_file_path) == get_file_hash(got_file_path)
|
||||
|
|
|
@ -122,7 +122,7 @@ def get_locode():
|
|||
|
||||
|
||||
@keyword('Stop Nodes Remote')
|
||||
def stop_nodes_remote(number: int, nodes: list) -> None:
|
||||
def stop_nodes_remote(number: int, nodes: list) -> list:
|
||||
"""
|
||||
The function shuts down the given number of randomly
|
||||
selected nodes in docker.
|
||||
|
|
Loading…
Reference in a new issue