forked from TrueCloudLab/frostfs-testcases
Extend allure logging for failover tests
Signed-off-by: Vladimir Avdeev <v.avdeev@yadro.com>
This commit is contained in:
parent
b6b95b86e8
commit
eb5532c08e
3 changed files with 58 additions and 27 deletions
|
@ -10,6 +10,7 @@ markers =
|
||||||
sanity: small tests subset
|
sanity: small tests subset
|
||||||
staging: test to be excluded from run in verifier/pr-validation/sanity jobs and run test in staging job
|
staging: test to be excluded from run in verifier/pr-validation/sanity jobs and run test in staging job
|
||||||
# functional markers
|
# functional markers
|
||||||
|
container: tests for container creation
|
||||||
grpc_api: standard gRPC API tests
|
grpc_api: standard gRPC API tests
|
||||||
http_gate: HTTP gate contract
|
http_gate: HTTP gate contract
|
||||||
s3_gate: S3 gate tests
|
s3_gate: S3 gate tests
|
||||||
|
|
|
@ -4,15 +4,16 @@ from time import sleep
|
||||||
|
|
||||||
import allure
|
import allure
|
||||||
import pytest
|
import pytest
|
||||||
|
from common import NEOFS_NETMAP_DICT
|
||||||
from python_keywords.container import create_container
|
from python_keywords.container import create_container
|
||||||
from python_keywords.neofs_verbs import get_object, put_object
|
from python_keywords.neofs_verbs import get_object, put_object
|
||||||
|
from python_keywords.node_management import node_healthcheck
|
||||||
from python_keywords.utility_keywords import generate_file, get_file_hash
|
from python_keywords.utility_keywords import generate_file, get_file_hash
|
||||||
from sbercloud_helper import SberCloud
|
from sbercloud_helper import SberCloud
|
||||||
from ssh_helper import HostClient, HostIsNotAvailable
|
from ssh_helper import HostClient, HostIsNotAvailable
|
||||||
from storage_policy import get_nodes_with_object
|
from storage_policy import get_nodes_with_object
|
||||||
from wellknown_acl import PUBLIC_ACL
|
from wellknown_acl import PUBLIC_ACL
|
||||||
|
|
||||||
SSH_PK_PATH = f'{os.getcwd()}/configuration/id_rsa'
|
|
||||||
logger = logging.getLogger('NeoLogger')
|
logger = logging.getLogger('NeoLogger')
|
||||||
stopped_hosts = []
|
stopped_hosts = []
|
||||||
|
|
||||||
|
@ -20,7 +21,7 @@ stopped_hosts = []
|
||||||
@pytest.fixture(scope='session')
|
@pytest.fixture(scope='session')
|
||||||
def free_storage_check():
|
def free_storage_check():
|
||||||
if os.getenv('FREE_STORAGE', default='False').lower() not in ('true', '1'):
|
if os.getenv('FREE_STORAGE', default='False').lower() not in ('true', '1'):
|
||||||
pytest.skip('Test work only on SberCloud infrastructure')
|
pytest.skip('Test only works on SberCloud infrastructure')
|
||||||
yield
|
yield
|
||||||
|
|
||||||
|
|
||||||
|
@ -31,7 +32,6 @@ def sbercloud_client():
|
||||||
yield SberCloud(f'{os.getcwd()}/configuration/sbercloud.yaml')
|
yield SberCloud(f'{os.getcwd()}/configuration/sbercloud.yaml')
|
||||||
except Exception:
|
except Exception:
|
||||||
pytest.fail('SberCloud infrastructure not available')
|
pytest.fail('SberCloud infrastructure not available')
|
||||||
yield None
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope='session', autouse=True)
|
@pytest.fixture(scope='session', autouse=True)
|
||||||
|
@ -40,11 +40,8 @@ def return_all_storage_nodes_fixture(sbercloud_client):
|
||||||
return_all_storage_nodes(sbercloud_client)
|
return_all_storage_nodes(sbercloud_client)
|
||||||
|
|
||||||
|
|
||||||
@allure.title('Hard reboot host via magic SysRq option')
|
|
||||||
def panic_reboot_host(ip: str = None):
|
def panic_reboot_host(ip: str = None):
|
||||||
ssh = HostClient(ip=ip, init_ssh_client=False)
|
ssh = HostClient(ip=ip)
|
||||||
ssh.pk = SSH_PK_PATH
|
|
||||||
ssh.create_connection(attempts=1)
|
|
||||||
ssh.exec('echo 1 > /proc/sys/kernel/sysrq')
|
ssh.exec('echo 1 > /proc/sys/kernel/sysrq')
|
||||||
with pytest.raises(HostIsNotAvailable):
|
with pytest.raises(HostIsNotAvailable):
|
||||||
ssh.exec('echo b > /proc/sysrq-trigger', timeout=1)
|
ssh.exec('echo b > /proc/sysrq-trigger', timeout=1)
|
||||||
|
@ -52,15 +49,39 @@ def panic_reboot_host(ip: str = None):
|
||||||
|
|
||||||
def return_all_storage_nodes(sbercloud_client: SberCloud):
|
def return_all_storage_nodes(sbercloud_client: SberCloud):
|
||||||
for host in stopped_hosts:
|
for host in stopped_hosts:
|
||||||
|
with allure.step(f'Start storage node {host}'):
|
||||||
sbercloud_client.start_node(node_ip=host.split(':')[-2])
|
sbercloud_client.start_node(node_ip=host.split(':')[-2])
|
||||||
stopped_hosts.remove(host)
|
stopped_hosts.remove(host)
|
||||||
|
wait_all_storage_node_returned()
|
||||||
|
|
||||||
|
|
||||||
def wait_object_replication(wallet, cid, oid, expected_copies: int) -> [str]:
|
def is_all_storage_node_returned() -> bool:
|
||||||
|
with allure.step('Run health check for all storage nodes'):
|
||||||
|
for node_name in NEOFS_NETMAP_DICT.keys():
|
||||||
|
try:
|
||||||
|
health_check = node_healthcheck(node_name)
|
||||||
|
except (AssertionError, HostIsNotAvailable, TimeoutError):
|
||||||
|
return False
|
||||||
|
if health_check.health_status != 'READY' or health_check.network_status != 'ONLINE':
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def wait_all_storage_node_returned():
|
||||||
|
sleep_interval, attempts = 10, 12
|
||||||
|
for __attempt in range(attempts):
|
||||||
|
if is_all_storage_node_returned():
|
||||||
|
return
|
||||||
|
sleep(sleep_interval)
|
||||||
|
raise AssertionError('Storage node(s) is broken')
|
||||||
|
|
||||||
|
|
||||||
|
def wait_object_replication(wallet, cid, oid, expected_copies: int, excluded_nodes: [str] = None) -> [str]:
|
||||||
|
excluded_nodes = excluded_nodes or []
|
||||||
sleep_interval, attempts = 10, 12
|
sleep_interval, attempts = 10, 12
|
||||||
nodes = []
|
nodes = []
|
||||||
for __attempt in range(attempts):
|
for __attempt in range(attempts):
|
||||||
nodes = get_nodes_with_object(wallet, cid, oid)
|
nodes = [node for node in get_nodes_with_object(wallet, cid, oid) if node not in excluded_nodes]
|
||||||
if len(nodes) == expected_copies:
|
if len(nodes) == expected_copies:
|
||||||
return nodes
|
return nodes
|
||||||
sleep(sleep_interval)
|
sleep(sleep_interval)
|
||||||
|
@ -81,20 +102,21 @@ def test_lost_storage_node(prepare_wallet_and_deposit, sbercloud_client: SberClo
|
||||||
|
|
||||||
new_nodes = []
|
new_nodes = []
|
||||||
for node in nodes:
|
for node in nodes:
|
||||||
|
stopped_hosts.append(node)
|
||||||
with allure.step(f'Stop storage node {node}'):
|
with allure.step(f'Stop storage node {node}'):
|
||||||
sbercloud_client.stop_node(node_ip=node.split(':')[-2], hard=hard_reboot)
|
sbercloud_client.stop_node(node_ip=node.split(':')[-2], hard=hard_reboot)
|
||||||
new_nodes = wait_object_replication(wallet, cid, oid, 2)
|
new_nodes = wait_object_replication(wallet, cid, oid, 2, excluded_nodes=[node])
|
||||||
|
|
||||||
assert not [node for node in nodes if node in new_nodes]
|
assert not [node for node in nodes if node in new_nodes]
|
||||||
got_file_path = get_object(wallet, cid, oid)
|
got_file_path = get_object(wallet, cid, oid, endpoint=new_nodes[0])
|
||||||
assert get_file_hash(source_file_path) == get_file_hash(got_file_path)
|
assert get_file_hash(source_file_path) == get_file_hash(got_file_path)
|
||||||
|
|
||||||
with allure.step(f'Return storage nodes'):
|
with allure.step(f'Return storage nodes'):
|
||||||
return_all_storage_nodes(sbercloud_client)
|
return_all_storage_nodes(sbercloud_client)
|
||||||
|
|
||||||
wait_object_replication(wallet, cid, oid, 2)
|
new_nodes = wait_object_replication(wallet, cid, oid, 2)
|
||||||
|
|
||||||
got_file_path = get_object(wallet, cid, oid)
|
got_file_path = get_object(wallet, cid, oid, endpoint=new_nodes[0])
|
||||||
assert get_file_hash(source_file_path) == get_file_hash(got_file_path)
|
assert get_file_hash(source_file_path) == get_file_hash(got_file_path)
|
||||||
|
|
||||||
|
|
||||||
|
@ -108,15 +130,19 @@ def test_panic_storage_node(prepare_wallet_and_deposit, free_storage_check, sequ
|
||||||
cid = create_container(wallet, rule=placement_rule, basic_acl=PUBLIC_ACL)
|
cid = create_container(wallet, rule=placement_rule, basic_acl=PUBLIC_ACL)
|
||||||
oid = put_object(wallet, source_file_path, cid)
|
oid = put_object(wallet, source_file_path, cid)
|
||||||
|
|
||||||
with allure.step(f'Return storage nodes'):
|
|
||||||
nodes = wait_object_replication(wallet, cid, oid, 2)
|
nodes = wait_object_replication(wallet, cid, oid, 2)
|
||||||
|
allure.attach('\n'.join(nodes), 'Current nodes with object', allure.attachment_type.TEXT)
|
||||||
for node in nodes:
|
for node in nodes:
|
||||||
|
with allure.step(f'Hard reboot host {node} via magic SysRq option'):
|
||||||
panic_reboot_host(ip=node.split(':')[-2])
|
panic_reboot_host(ip=node.split(':')[-2])
|
||||||
if sequence:
|
if sequence:
|
||||||
wait_object_replication(wallet, cid, oid, 2)
|
new_nodes = wait_object_replication(wallet, cid, oid, 2, excluded_nodes=[node])
|
||||||
|
allure.attach('\n'.join(new_nodes), f'Nodes with object after {node} fail',
|
||||||
|
allure.attachment_type.TEXT)
|
||||||
|
|
||||||
if not sequence:
|
if not sequence:
|
||||||
wait_object_replication(wallet, cid, oid, 2)
|
new_nodes = wait_object_replication(wallet, cid, oid, 2, excluded_nodes=nodes)
|
||||||
|
allure.attach('\n'.join(new_nodes), 'Nodes with object after nodes fail', allure.attachment_type.TEXT)
|
||||||
|
|
||||||
got_file_path = get_object(wallet, cid, oid)
|
got_file_path = get_object(wallet, cid, oid, endpoint=new_nodes[0])
|
||||||
assert get_file_hash(source_file_path) == get_file_hash(got_file_path)
|
assert get_file_hash(source_file_path) == get_file_hash(got_file_path)
|
||||||
|
|
|
@ -12,10 +12,11 @@ from dataclasses import dataclass
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
import docker
|
import docker
|
||||||
from common import NEOFS_NETMAP_DICT, STORAGE_NODE_BIN_PATH, STORAGE_NODE_CONFIG_PATH, STORAGE_NODE_PRIVATE_CONTROL_ENDPOINT, STORAGE_NODE_PWD, STORAGE_NODE_USER
|
from common import (NEOFS_NETMAP_DICT, STORAGE_NODE_BIN_PATH, STORAGE_NODE_CONFIG_PATH,
|
||||||
|
STORAGE_NODE_PRIVATE_CONTROL_ENDPOINT, STORAGE_NODE_PWD, STORAGE_NODE_USER)
|
||||||
from robot.api import logger
|
from robot.api import logger
|
||||||
from robot.api.deco import keyword
|
from robot.api.deco import keyword
|
||||||
from ssh_helper import HostClient
|
from ssh_helper import HostClient, HostIsNotAvailable
|
||||||
|
|
||||||
ROBOT_AUTO_KEYWORDS = False
|
ROBOT_AUTO_KEYWORDS = False
|
||||||
|
|
||||||
|
@ -43,7 +44,10 @@ def create_ssh_client(node_name: str) -> HostClient:
|
||||||
|
|
||||||
node_config = NEOFS_NETMAP_DICT.get(node_name)
|
node_config = NEOFS_NETMAP_DICT.get(node_name)
|
||||||
host = node_config.get('control').split(':')[0]
|
host = node_config.get('control').split(':')[0]
|
||||||
|
try:
|
||||||
ssh_client = HostClient(host, STORAGE_NODE_USER, STORAGE_NODE_PWD)
|
ssh_client = HostClient(host, STORAGE_NODE_USER, STORAGE_NODE_PWD)
|
||||||
|
except HostIsNotAvailable:
|
||||||
|
ssh_client = HostClient(host)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
yield ssh_client
|
yield ssh_client
|
||||||
|
@ -52,7 +56,7 @@ def create_ssh_client(node_name: str) -> HostClient:
|
||||||
|
|
||||||
|
|
||||||
@keyword('Stop Nodes')
|
@keyword('Stop Nodes')
|
||||||
def stop_nodes(number: int, nodes: list) -> None:
|
def stop_nodes(number: int, nodes: list) -> list:
|
||||||
"""
|
"""
|
||||||
The function shuts down the given number of randomly
|
The function shuts down the given number of randomly
|
||||||
selected nodes in docker.
|
selected nodes in docker.
|
||||||
|
@ -122,7 +126,7 @@ def get_locode():
|
||||||
|
|
||||||
|
|
||||||
@keyword('Stop Nodes Remote')
|
@keyword('Stop Nodes Remote')
|
||||||
def stop_nodes_remote(number: int, nodes: list) -> None:
|
def stop_nodes_remote(number: int, nodes: list) -> list:
|
||||||
"""
|
"""
|
||||||
The function shuts down the given number of randomly
|
The function shuts down the given number of randomly
|
||||||
selected nodes in docker.
|
selected nodes in docker.
|
||||||
|
|
Loading…
Reference in a new issue