Try fix failovers tests

This commit is contained in:
Vladimir Avdeev 2022-07-14 21:44:43 +03:00 committed by Vladimir Domnich
parent 276dc523e9
commit d7a93ee8bd
2 changed files with 17 additions and 23 deletions

View file

@ -4,16 +4,16 @@ from time import sleep
import allure
import pytest
from common import NEOFS_NETMAP, NEOFS_NETMAP_DICT, STORAGE_NODE_CONFIG_PATH
from common import NEOFS_NETMAP_DICT
from python_keywords.container import create_container
from python_keywords.neofs_verbs import get_object, put_object
from python_keywords.node_management import node_healthcheck
from python_keywords.utility_keywords import generate_file, get_file_hash
from sbercloud_helper import SberCloud
from ssh_helper import HostClient, HostIsNotAvailable
from storage_policy import get_nodes_with_object
from wellknown_acl import PUBLIC_ACL
SSH_PK_PATH = f'{os.getcwd()}/configuration/id_rsa'
logger = logging.getLogger('NeoLogger')
stopped_hosts = []
@ -41,9 +41,7 @@ def return_all_storage_nodes_fixture(sbercloud_client):
def panic_reboot_host(ip: str = None):
ssh = HostClient(ip=ip, init_ssh_client=False)
ssh.pk = SSH_PK_PATH
ssh.create_connection(attempts=1)
ssh = HostClient(ip=ip)
ssh.exec('echo 1 > /proc/sys/kernel/sysrq')
with pytest.raises(HostIsNotAvailable):
ssh.exec('echo b > /proc/sysrq-trigger', timeout=1)
@ -53,7 +51,7 @@ def return_all_storage_nodes(sbercloud_client: SberCloud):
for host in stopped_hosts:
with allure.step(f'Start storage node {host}'):
sbercloud_client.start_node(node_ip=host.split(':')[-2])
stopped_hosts.clear()
stopped_hosts.remove(host)
wait_all_storage_node_returned()
@ -63,14 +61,6 @@ def is_all_storage_node_returned() -> bool:
try:
health_check = node_healthcheck(node_name)
except (AssertionError, HostIsNotAvailable, TimeoutError):
### REMOVE AFTER FIX STORAGE_NODE_CONFIG_PATH ###
try:
for host in NEOFS_NETMAP:
ssh = HostClient(ip=host.split(':')[-2])
ssh.exec(f'echo -e "wallet: /etc/neofs/storage/wallet.json\npassword: \"\"" > {STORAGE_NODE_CONFIG_PATH}')
except Exception:
pass
### REMOVE AFTER FIX STORAGE_NODE_CONFIG_PATH ###
return False
if health_check.health_status != 'READY' or health_check.network_status != 'ONLINE':
return False
@ -91,7 +81,7 @@ def wait_object_replication(wallet, cid, oid, expected_copies: int, excluded_nod
sleep_interval, attempts = 10, 12
nodes = []
for __attempt in range(attempts):
nodes = get_nodes_with_object(wallet, cid, oid)
nodes = [node for node in get_nodes_with_object(wallet, cid, oid) if node not in excluded_nodes]
if len(nodes) == expected_copies:
return nodes
sleep(sleep_interval)
@ -112,9 +102,10 @@ def test_lost_storage_node(prepare_wallet_and_deposit, sbercloud_client: SberClo
new_nodes = []
for node in nodes:
stopped_hosts.append(node)
with allure.step(f'Stop storage node {node}'):
sbercloud_client.stop_node(node_ip=node.split(':')[-2], hard=hard_reboot)
new_nodes = wait_object_replication(wallet, cid, oid, 2)
new_nodes = wait_object_replication(wallet, cid, oid, 2, excluded_nodes=[node])
assert not [node for node in nodes if node in new_nodes]
got_file_path = get_object(wallet, cid, oid, endpoint=new_nodes[0])
@ -140,18 +131,17 @@ def test_panic_storage_node(prepare_wallet_and_deposit, free_storage_check, sequ
oid = put_object(wallet, source_file_path, cid)
nodes = wait_object_replication(wallet, cid, oid, 2)
new_nodes = []
allure.attach('\n'.join(nodes), 'Current nodes with object', allure.attachment_type.TEXT)
for node in nodes:
with allure.step(f'Hard reboot host {node} via magic SysRq option'):
panic_reboot_host(ip=node.split(':')[-2])
if sequence:
new_nodes = wait_object_replication(wallet, cid, oid, 2)
new_nodes = wait_object_replication(wallet, cid, oid, 2, excluded_nodes=[node])
allure.attach('\n'.join(new_nodes), f'Nodes with object after {node} fail',
allure.attachment_type.TEXT)
if not sequence:
new_nodes = wait_object_replication(wallet, cid, oid, 2)
new_nodes = wait_object_replication(wallet, cid, oid, 2, excluded_nodes=nodes)
allure.attach('\n'.join(new_nodes), 'Nodes with object after nodes fail', allure.attachment_type.TEXT)
got_file_path = get_object(wallet, cid, oid, endpoint=new_nodes[0])

View file

@ -12,10 +12,11 @@ from dataclasses import dataclass
from typing import List
import docker
from common import NEOFS_NETMAP_DICT, STORAGE_NODE_BIN_PATH, STORAGE_NODE_CONFIG_PATH, STORAGE_NODE_PRIVATE_CONTROL_ENDPOINT, STORAGE_NODE_PWD, STORAGE_NODE_USER
from common import (NEOFS_NETMAP_DICT, STORAGE_NODE_BIN_PATH, STORAGE_NODE_CONFIG_PATH,
STORAGE_NODE_PRIVATE_CONTROL_ENDPOINT, STORAGE_NODE_PWD, STORAGE_NODE_USER)
from robot.api import logger
from robot.api.deco import keyword
from ssh_helper import HostClient
from ssh_helper import HostClient, HostIsNotAvailable
ROBOT_AUTO_KEYWORDS = False
@ -43,7 +44,10 @@ def create_ssh_client(node_name: str) -> HostClient:
node_config = NEOFS_NETMAP_DICT.get(node_name)
host = node_config.get('control').split(':')[0]
ssh_client = HostClient(host, STORAGE_NODE_USER, STORAGE_NODE_PWD)
try:
ssh_client = HostClient(host, STORAGE_NODE_USER, STORAGE_NODE_PWD)
except HostIsNotAvailable:
ssh_client = HostClient(host)
try:
yield ssh_client
@ -52,7 +56,7 @@ def create_ssh_client(node_name: str) -> HostClient:
@keyword('Stop Nodes')
def stop_nodes(number: int, nodes: list) -> None:
def stop_nodes(number: int, nodes: list) -> list:
"""
The function shuts down the given number of randomly
selected nodes in docker.