forked from TrueCloudLab/frostfs-testcases
Add test for network failover
Signed-off-by: a.y.volkov <a.y.volkov@yadro.com>
This commit is contained in:
parent
5f53e80f93
commit
642af0a888
9 changed files with 247 additions and 67 deletions
16
pytest_tests/helpers/iptables_helper.py
Normal file
16
pytest_tests/helpers/iptables_helper.py
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
from ssh_helper import HostClient
|
||||||
|
|
||||||
|
|
||||||
|
class IpTablesHelper:
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def drop_input_traffic_to_port(client: HostClient, ports: list[str]):
|
||||||
|
for port in ports:
|
||||||
|
cmd_output = client.exec(cmd=f'iptables -A INPUT -p tcp --dport {port} -j DROP')
|
||||||
|
assert cmd_output.rc == 0
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def restore_input_traffic_to_port(client: HostClient, ports: list[str]):
|
||||||
|
for port in ports:
|
||||||
|
cmd_output = client.exec(cmd=f'iptables -D INPUT -p tcp --dport {port} -j DROP')
|
||||||
|
assert cmd_output.rc == 0
|
|
@ -66,15 +66,19 @@ class HostClient:
|
||||||
TIMEOUT_RESTORE_CONNECTION = 10, 24
|
TIMEOUT_RESTORE_CONNECTION = 10, 24
|
||||||
|
|
||||||
def __init__(self, ip: str, login: str, password: Optional[str] = None,
|
def __init__(self, ip: str, login: str, password: Optional[str] = None,
|
||||||
private_key_path: Optional[str] = None, init_ssh_client=True) -> None:
|
private_key_path: Optional[str] = None, private_key_passphrase: Optional[str] = None,
|
||||||
|
init_ssh_client=True) -> None:
|
||||||
self.ip = ip
|
self.ip = ip
|
||||||
self.login = login
|
self.login = login
|
||||||
self.password = password
|
self.password = password
|
||||||
self.private_key_path = private_key_path
|
self.private_key_path = private_key_path
|
||||||
|
self.private_key_passphrase = private_key_passphrase
|
||||||
if init_ssh_client:
|
if init_ssh_client:
|
||||||
self.create_connection(self.SSH_CONNECTION_ATTEMPTS)
|
self.create_connection(self.SSH_CONNECTION_ATTEMPTS)
|
||||||
|
|
||||||
def exec(self, cmd: str, verify=True, timeout=90) -> SSHCommand:
|
def exec(self, cmd: str, verify=True, timeout=90) -> SSHCommand:
|
||||||
|
if self.login != 'root':
|
||||||
|
cmd = f'sudo {cmd}'
|
||||||
cmd_result = self._inner_exec(cmd, timeout)
|
cmd_result = self._inner_exec(cmd, timeout)
|
||||||
if verify:
|
if verify:
|
||||||
assert cmd_result.rc == 0, f'Non zero rc from command: "{cmd}"'
|
assert cmd_result.rc == 0, f'Non zero rc from command: "{cmd}"'
|
||||||
|
@ -110,6 +114,15 @@ class HostClient:
|
||||||
self.login, self.password = keep_user, keep_password
|
self.login, self.password = keep_user, keep_password
|
||||||
self.create_connection()
|
self.create_connection()
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def create_ssh_connection(self) -> 'SSHClient':
|
||||||
|
if not self.ssh_client:
|
||||||
|
self.create_connection()
|
||||||
|
try:
|
||||||
|
yield self.ssh_client
|
||||||
|
finally:
|
||||||
|
self.drop()
|
||||||
|
|
||||||
@allure.step('Restore connection')
|
@allure.step('Restore connection')
|
||||||
def restore_ssh_connection(self):
|
def restore_ssh_connection(self):
|
||||||
retry_time, retry_count = self.TIMEOUT_RESTORE_CONNECTION
|
retry_time, retry_count = self.TIMEOUT_RESTORE_CONNECTION
|
||||||
|
@ -151,13 +164,13 @@ class HostClient:
|
||||||
try:
|
try:
|
||||||
if self.private_key_path:
|
if self.private_key_path:
|
||||||
logging.info(
|
logging.info(
|
||||||
f"Trying to connect to host {self.ip} using SSH key "
|
f"Trying to connect to host {self.ip} as {self.login} using SSH key "
|
||||||
f"{self.private_key_path} (attempt {attempt})"
|
f"{self.private_key_path} (attempt {attempt})"
|
||||||
)
|
)
|
||||||
self.ssh_client.connect(
|
self.ssh_client.connect(
|
||||||
hostname=self.ip,
|
hostname=self.ip,
|
||||||
username=self.login,
|
username=self.login,
|
||||||
pkey=RSAKey.from_private_key_file(self.private_key_path, self.password),
|
pkey=RSAKey.from_private_key_file(self.private_key_path, self.private_key_passphrase),
|
||||||
timeout=self.CONNECTION_TIMEOUT
|
timeout=self.CONNECTION_TIMEOUT
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
@ -175,7 +188,7 @@ class HostClient:
|
||||||
|
|
||||||
except AuthenticationException as auth_err:
|
except AuthenticationException as auth_err:
|
||||||
logging.error(f'Host: {self.ip}. {auth_err}')
|
logging.error(f'Host: {self.ip}. {auth_err}')
|
||||||
|
self.drop()
|
||||||
raise auth_err
|
raise auth_err
|
||||||
|
|
||||||
except (
|
except (
|
||||||
|
@ -186,6 +199,7 @@ class HostClient:
|
||||||
OSError
|
OSError
|
||||||
) as ssh_err:
|
) as ssh_err:
|
||||||
exc_err = ssh_err
|
exc_err = ssh_err
|
||||||
|
self.drop()
|
||||||
logging.error(f'Host: {self.ip}, connection error. {exc_err}')
|
logging.error(f'Host: {self.ip}, connection error. {exc_err}')
|
||||||
|
|
||||||
raise HostIsNotAvailable(self.ip, exc_err)
|
raise HostIsNotAvailable(self.ip, exc_err)
|
||||||
|
@ -197,8 +211,6 @@ class HostClient:
|
||||||
def _inner_exec(self, cmd: str, timeout: int) -> SSHCommand:
|
def _inner_exec(self, cmd: str, timeout: int) -> SSHCommand:
|
||||||
if not self.ssh_client:
|
if not self.ssh_client:
|
||||||
self.create_connection()
|
self.create_connection()
|
||||||
if self.login != "root":
|
|
||||||
cmd = f"sudo {cmd}"
|
|
||||||
for _ in range(self.SSH_CONNECTION_ATTEMPTS):
|
for _ in range(self.SSH_CONNECTION_ATTEMPTS):
|
||||||
try:
|
try:
|
||||||
_, stdout, stderr = self.ssh_client.exec_command(cmd, timeout=timeout)
|
_, stdout, stderr = self.ssh_client.exec_command(cmd, timeout=timeout)
|
||||||
|
|
|
@ -18,4 +18,6 @@ markers =
|
||||||
long: long tests (with long execution time)
|
long: long tests (with long execution time)
|
||||||
node_mgmt: neofs control commands
|
node_mgmt: neofs control commands
|
||||||
acl: tests for basic and extended ACL
|
acl: tests for basic and extended ACL
|
||||||
failover: tests for system recovery after a failure
|
failover: tests for system recovery after a failure
|
||||||
|
failover_panic: tests for system recovery after panic reboot of a node
|
||||||
|
failover_net: tests for network failure
|
||||||
|
|
|
@ -23,6 +23,13 @@ deco.keyword = robot_keyword_adapter
|
||||||
logger = logging.getLogger('NeoLogger')
|
logger = logging.getLogger('NeoLogger')
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope='session')
|
||||||
|
def free_storage_check():
|
||||||
|
if os.getenv('FREE_STORAGE', default='False').lower() not in ('true', '1'):
|
||||||
|
pytest.skip('Test only works on SberCloud infrastructure')
|
||||||
|
yield
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope='session', autouse=True)
|
@pytest.fixture(scope='session', autouse=True)
|
||||||
@allure.title('Check binary versions')
|
@allure.title('Check binary versions')
|
||||||
def check_binary_versions(request):
|
def check_binary_versions(request):
|
||||||
|
@ -81,6 +88,7 @@ def init_wallet_with_address(prepare_tmp_dir):
|
||||||
def prepare_wallet_and_deposit(init_wallet_with_address):
|
def prepare_wallet_and_deposit(init_wallet_with_address):
|
||||||
wallet, addr, _ = init_wallet_with_address
|
wallet, addr, _ = init_wallet_with_address
|
||||||
logger.info(f'Init wallet: {wallet},\naddr: {addr}')
|
logger.info(f'Init wallet: {wallet},\naddr: {addr}')
|
||||||
|
allure.attach.file(wallet, os.path.basename(wallet), allure.attachment_type.JSON)
|
||||||
|
|
||||||
if not FREE_STORAGE:
|
if not FREE_STORAGE:
|
||||||
deposit = 30
|
deposit = 30
|
||||||
|
|
0
pytest_tests/testsuites/failovers/__init__.py
Normal file
0
pytest_tests/testsuites/failovers/__init__.py
Normal file
49
pytest_tests/testsuites/failovers/failover_utils.py
Normal file
49
pytest_tests/testsuites/failovers/failover_utils.py
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
import logging
|
||||||
|
from time import sleep
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import allure
|
||||||
|
|
||||||
|
from common import NEOFS_NETMAP_DICT
|
||||||
|
from python_keywords.node_management import node_healthcheck
|
||||||
|
from storage_policy import get_nodes_with_object
|
||||||
|
|
||||||
|
logger = logging.getLogger('NeoLogger')
|
||||||
|
|
||||||
|
|
||||||
|
@allure.step('Wait for object replication')
|
||||||
|
def wait_object_replication_on_nodes(wallet: str, cid: str, oid: str, expected_copies: int,
|
||||||
|
excluded_nodes: Optional[list[str]] = None) -> list[str]:
|
||||||
|
excluded_nodes = excluded_nodes or []
|
||||||
|
sleep_interval, attempts = 10, 18
|
||||||
|
nodes = []
|
||||||
|
for __attempt in range(attempts):
|
||||||
|
nodes = get_nodes_with_object(wallet, cid, oid, skip_nodes=excluded_nodes)
|
||||||
|
if len(nodes) == expected_copies:
|
||||||
|
return nodes
|
||||||
|
sleep(sleep_interval)
|
||||||
|
raise AssertionError(f'Expected {expected_copies} copies of object, but found {len(nodes)}. '
|
||||||
|
f'Waiting time {sleep_interval * attempts}')
|
||||||
|
|
||||||
|
|
||||||
|
@allure.step('Wait for storage node returned to cluster')
|
||||||
|
def wait_all_storage_node_returned():
|
||||||
|
sleep_interval, attempts = 10, 12
|
||||||
|
for __attempt in range(attempts):
|
||||||
|
if is_all_storage_node_returned():
|
||||||
|
return
|
||||||
|
sleep(sleep_interval)
|
||||||
|
raise AssertionError('Storage node(s) is broken')
|
||||||
|
|
||||||
|
|
||||||
|
def is_all_storage_node_returned() -> bool:
|
||||||
|
with allure.step('Run health check for all storage nodes'):
|
||||||
|
for node_name in NEOFS_NETMAP_DICT.keys():
|
||||||
|
try:
|
||||||
|
health_check = node_healthcheck(node_name)
|
||||||
|
except Exception as err:
|
||||||
|
logger.warning(f'Node healthcheck fails with error {err}')
|
||||||
|
return False
|
||||||
|
if health_check.health_status != 'READY' or health_check.network_status != 'ONLINE':
|
||||||
|
return False
|
||||||
|
return True
|
118
pytest_tests/testsuites/failovers/test_failover_network.py
Normal file
118
pytest_tests/testsuites/failovers/test_failover_network.py
Normal file
|
@ -0,0 +1,118 @@
|
||||||
|
import logging
|
||||||
|
from random import choices
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
|
import allure
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from common import STORAGE_NODE_SSH_PRIVATE_KEY_PATH, STORAGE_NODE_SSH_USER, STORAGE_NODE_SSH_PASSWORD, \
|
||||||
|
NEOFS_NETMAP_DICT
|
||||||
|
from iptables_helper import IpTablesHelper
|
||||||
|
from python_keywords.container import create_container
|
||||||
|
from python_keywords.neofs_verbs import get_object, put_object
|
||||||
|
from python_keywords.utility_keywords import generate_file, get_file_hash
|
||||||
|
from ssh_helper import HostClient
|
||||||
|
from wellknown_acl import PUBLIC_ACL
|
||||||
|
from .failover_utils import wait_all_storage_node_returned, wait_object_replication_on_nodes
|
||||||
|
|
||||||
|
logger = logging.getLogger('NeoLogger')
|
||||||
|
STORAGE_NODE_COMMUNICATION_PORT = '8080'
|
||||||
|
STORAGE_NODE_COMMUNICATION_PORT_TLS = '8082'
|
||||||
|
PORTS_TO_BLOCK = [STORAGE_NODE_COMMUNICATION_PORT, STORAGE_NODE_COMMUNICATION_PORT_TLS]
|
||||||
|
blocked_hosts = []
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
@allure.step('Install iptables if needed')
|
||||||
|
def install_iptables_if_needed():
|
||||||
|
check_command = 'iptables --version'
|
||||||
|
install_command = 'apt-get --yes install iptables'
|
||||||
|
for node_config in NEOFS_NETMAP_DICT.values():
|
||||||
|
host = node_config.get('rpc').split(':')[0]
|
||||||
|
client = HostClient(ip=host, login=STORAGE_NODE_SSH_USER,
|
||||||
|
password=STORAGE_NODE_SSH_PASSWORD,
|
||||||
|
private_key_path=STORAGE_NODE_SSH_PRIVATE_KEY_PATH)
|
||||||
|
with client.create_ssh_connection():
|
||||||
|
try:
|
||||||
|
client.exec(check_command)
|
||||||
|
except AssertionError as err:
|
||||||
|
logger.info(f'Command {check_command} fails with error {err}')
|
||||||
|
client.exec(install_command)
|
||||||
|
client.exec(check_command)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
@allure.step('Restore network')
|
||||||
|
def restore_network():
|
||||||
|
yield
|
||||||
|
|
||||||
|
not_empty = len(blocked_hosts) != 0
|
||||||
|
for host in list(blocked_hosts):
|
||||||
|
with allure.step(f'Start storage node {host}'):
|
||||||
|
client = HostClient(ip=host, login=STORAGE_NODE_SSH_USER,
|
||||||
|
password=STORAGE_NODE_SSH_PASSWORD,
|
||||||
|
private_key_path=STORAGE_NODE_SSH_PRIVATE_KEY_PATH)
|
||||||
|
with client.create_ssh_connection():
|
||||||
|
IpTablesHelper.restore_input_traffic_to_port(client, PORTS_TO_BLOCK)
|
||||||
|
blocked_hosts.remove(host)
|
||||||
|
if not_empty:
|
||||||
|
wait_all_storage_node_returned()
|
||||||
|
|
||||||
|
|
||||||
|
@allure.title('Block Storage node traffic')
|
||||||
|
@pytest.mark.failover
|
||||||
|
@pytest.mark.failover_net
|
||||||
|
def test_block_storage_node_traffic(prepare_wallet_and_deposit, free_storage_check):
|
||||||
|
"""
|
||||||
|
Block storage nodes traffic using iptables and wait for replication for objects.
|
||||||
|
"""
|
||||||
|
wallet = prepare_wallet_and_deposit
|
||||||
|
placement_rule = 'REP 2 IN X CBF 2 SELECT 2 FROM * AS X'
|
||||||
|
excluded_nodes = []
|
||||||
|
wakeup_node_timeout = 10 # timeout to let nodes detect that traffic has blocked
|
||||||
|
nodes_to_block_count = 2
|
||||||
|
|
||||||
|
source_file_path = generate_file()
|
||||||
|
cid = create_container(wallet, rule=placement_rule, basic_acl=PUBLIC_ACL)
|
||||||
|
oid = put_object(wallet, source_file_path, cid)
|
||||||
|
nodes = wait_object_replication_on_nodes(wallet, cid, oid, 2)
|
||||||
|
|
||||||
|
logger.info(f'Nodes are {nodes}')
|
||||||
|
random_nodes = [(node, node.split(':')[0]) for node in nodes]
|
||||||
|
if nodes_to_block_count > len(nodes):
|
||||||
|
random_nodes = [(node, node.split(':')[0]) for node in choices(nodes, k=2)]
|
||||||
|
|
||||||
|
for random_node, random_node_ip in random_nodes:
|
||||||
|
client = HostClient(ip=random_node_ip, login=STORAGE_NODE_SSH_USER,
|
||||||
|
password=STORAGE_NODE_SSH_PASSWORD,
|
||||||
|
private_key_path=STORAGE_NODE_SSH_PRIVATE_KEY_PATH)
|
||||||
|
|
||||||
|
with allure.step(f'Block incoming traffic for node {random_node} on port {PORTS_TO_BLOCK}'):
|
||||||
|
with client.create_ssh_connection():
|
||||||
|
IpTablesHelper.drop_input_traffic_to_port(client, PORTS_TO_BLOCK)
|
||||||
|
blocked_hosts.append(random_node_ip)
|
||||||
|
excluded_nodes.append(random_node)
|
||||||
|
sleep(wakeup_node_timeout)
|
||||||
|
|
||||||
|
new_nodes = wait_object_replication_on_nodes(wallet, cid, oid, 2, excluded_nodes=excluded_nodes)
|
||||||
|
|
||||||
|
assert random_node not in new_nodes
|
||||||
|
|
||||||
|
got_file_path = get_object(wallet, cid, oid, endpoint=new_nodes[0])
|
||||||
|
assert get_file_hash(source_file_path) == get_file_hash(got_file_path)
|
||||||
|
|
||||||
|
for random_node, random_node_ip in random_nodes:
|
||||||
|
client = HostClient(ip=random_node_ip, login=STORAGE_NODE_SSH_USER,
|
||||||
|
password=STORAGE_NODE_SSH_PASSWORD,
|
||||||
|
private_key_path=STORAGE_NODE_SSH_PRIVATE_KEY_PATH)
|
||||||
|
|
||||||
|
with allure.step(f'Unblock incoming traffic for node {random_node} on port {PORTS_TO_BLOCK}'):
|
||||||
|
with client.create_ssh_connection():
|
||||||
|
IpTablesHelper.restore_input_traffic_to_port(client, PORTS_TO_BLOCK)
|
||||||
|
blocked_hosts.remove(random_node_ip)
|
||||||
|
sleep(wakeup_node_timeout)
|
||||||
|
|
||||||
|
new_nodes = wait_object_replication_on_nodes(wallet, cid, oid, 2)
|
||||||
|
|
||||||
|
got_file_path = get_object(wallet, cid, oid, endpoint=new_nodes[0])
|
||||||
|
assert get_file_hash(source_file_path) == get_file_hash(got_file_path)
|
|
@ -1,91 +1,54 @@
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
from time import sleep
|
|
||||||
|
|
||||||
import allure
|
import allure
|
||||||
import pytest
|
import pytest
|
||||||
from common import NEOFS_NETMAP_DICT
|
|
||||||
|
from common import STORAGE_NODE_SSH_PRIVATE_KEY_PATH, STORAGE_NODE_SSH_USER, STORAGE_NODE_SSH_PASSWORD
|
||||||
from python_keywords.container import create_container
|
from python_keywords.container import create_container
|
||||||
from python_keywords.neofs_verbs import get_object, put_object
|
from python_keywords.neofs_verbs import get_object, put_object
|
||||||
from python_keywords.node_management import node_healthcheck
|
|
||||||
from python_keywords.utility_keywords import generate_file, get_file_hash
|
from python_keywords.utility_keywords import generate_file, get_file_hash
|
||||||
from sbercloud_helper import SberCloud
|
from sbercloud_helper import SberCloud
|
||||||
from ssh_helper import HostClient, HostIsNotAvailable
|
from ssh_helper import HostClient, HostIsNotAvailable
|
||||||
from storage_policy import get_nodes_with_object
|
|
||||||
from wellknown_acl import PUBLIC_ACL
|
from wellknown_acl import PUBLIC_ACL
|
||||||
|
from .failover_utils import wait_all_storage_node_returned, wait_object_replication_on_nodes
|
||||||
|
|
||||||
logger = logging.getLogger('NeoLogger')
|
logger = logging.getLogger('NeoLogger')
|
||||||
stopped_hosts = []
|
stopped_hosts = []
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope='session')
|
|
||||||
def free_storage_check():
|
|
||||||
if os.getenv('FREE_STORAGE', default='False').lower() not in ('true', '1'):
|
|
||||||
pytest.skip('Test only works on SberCloud infrastructure')
|
|
||||||
yield
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope='session')
|
@pytest.fixture(scope='session')
|
||||||
def sbercloud_client():
|
def sbercloud_client():
|
||||||
with allure.step('Connect to SberCloud'):
|
with allure.step('Connect to SberCloud'):
|
||||||
try:
|
try:
|
||||||
yield SberCloud(f'{os.getcwd()}/configuration/sbercloud.yaml')
|
yield SberCloud(f'{os.getcwd()}/configuration/sbercloud.yaml')
|
||||||
except Exception:
|
except Exception as err:
|
||||||
pytest.fail('SberCloud infrastructure not available')
|
pytest.fail(f'SberCloud infrastructure not available. Error\n{err}')
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
@pytest.fixture(autouse=True)
|
||||||
|
@allure.step('Return all storage nodes')
|
||||||
def return_all_storage_nodes_fixture(sbercloud_client):
|
def return_all_storage_nodes_fixture(sbercloud_client):
|
||||||
yield
|
yield
|
||||||
return_all_storage_nodes(sbercloud_client)
|
return_all_storage_nodes(sbercloud_client)
|
||||||
|
|
||||||
|
|
||||||
def panic_reboot_host(ip: str = None):
|
def panic_reboot_host(ip: str = None):
|
||||||
ssh = HostClient(ip=ip, login="root", private_key_path=f"{os.getcwd()}/configuration/id_rsa")
|
ssh = HostClient(ip=ip, login=STORAGE_NODE_SSH_USER,
|
||||||
ssh.exec('echo 1 > /proc/sys/kernel/sysrq')
|
password=STORAGE_NODE_SSH_PASSWORD,
|
||||||
|
private_key_path=STORAGE_NODE_SSH_PRIVATE_KEY_PATH)
|
||||||
|
ssh.exec('sudo sh -c "echo 1 > /proc/sys/kernel/sysrq"')
|
||||||
with pytest.raises(HostIsNotAvailable):
|
with pytest.raises(HostIsNotAvailable):
|
||||||
ssh.exec('echo b > /proc/sysrq-trigger', timeout=1)
|
ssh.exec('sudo sh -c "echo b > /proc/sysrq-trigger"', timeout=1)
|
||||||
|
|
||||||
|
|
||||||
def return_all_storage_nodes(sbercloud_client: SberCloud):
|
def return_all_storage_nodes(sbercloud_client: SberCloud):
|
||||||
for host in stopped_hosts:
|
for host in list(stopped_hosts):
|
||||||
with allure.step(f'Start storage node {host}'):
|
with allure.step(f'Start storage node {host}'):
|
||||||
sbercloud_client.start_node(node_ip=host.split(':')[-2])
|
sbercloud_client.start_node(node_ip=host.split(':')[-2])
|
||||||
|
stopped_hosts.remove(host)
|
||||||
|
|
||||||
wait_all_storage_node_returned()
|
wait_all_storage_node_returned()
|
||||||
stopped_hosts.clear()
|
|
||||||
|
|
||||||
|
|
||||||
def is_all_storage_node_returned() -> bool:
|
|
||||||
with allure.step('Run health check for all storage nodes'):
|
|
||||||
for node_name in NEOFS_NETMAP_DICT.keys():
|
|
||||||
try:
|
|
||||||
health_check = node_healthcheck(node_name)
|
|
||||||
except (AssertionError, HostIsNotAvailable, TimeoutError):
|
|
||||||
return False
|
|
||||||
if health_check.health_status != 'READY' or health_check.network_status != 'ONLINE':
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def wait_all_storage_node_returned():
|
|
||||||
sleep_interval, attempts = 10, 12
|
|
||||||
for __attempt in range(attempts):
|
|
||||||
if is_all_storage_node_returned():
|
|
||||||
return
|
|
||||||
sleep(sleep_interval)
|
|
||||||
raise AssertionError('Storage node(s) is broken')
|
|
||||||
|
|
||||||
|
|
||||||
def wait_object_replication(wallet, cid, oid, expected_copies: int, excluded_nodes: [str] = None) -> [str]:
|
|
||||||
excluded_nodes = excluded_nodes or []
|
|
||||||
sleep_interval, attempts = 10, 12
|
|
||||||
nodes = []
|
|
||||||
for __attempt in range(attempts):
|
|
||||||
nodes = [node for node in get_nodes_with_object(wallet, cid, oid) if node not in excluded_nodes]
|
|
||||||
if len(nodes) == expected_copies:
|
|
||||||
return nodes
|
|
||||||
sleep(sleep_interval)
|
|
||||||
raise AssertionError(f'Expected {expected_copies} copies of object, but found {len(nodes)} ')
|
|
||||||
|
|
||||||
|
|
||||||
@allure.title('Lost and return nodes')
|
@allure.title('Lost and return nodes')
|
||||||
|
@ -98,14 +61,14 @@ def test_lost_storage_node(prepare_wallet_and_deposit, sbercloud_client: SberClo
|
||||||
source_file_path = generate_file()
|
source_file_path = generate_file()
|
||||||
cid = create_container(wallet, rule=placement_rule, basic_acl=PUBLIC_ACL)
|
cid = create_container(wallet, rule=placement_rule, basic_acl=PUBLIC_ACL)
|
||||||
oid = put_object(wallet, source_file_path, cid)
|
oid = put_object(wallet, source_file_path, cid)
|
||||||
nodes = wait_object_replication(wallet, cid, oid, 2)
|
nodes = wait_object_replication_on_nodes(wallet, cid, oid, 2)
|
||||||
|
|
||||||
new_nodes = []
|
new_nodes = []
|
||||||
for node in nodes:
|
for node in nodes:
|
||||||
stopped_hosts.append(node)
|
stopped_hosts.append(node)
|
||||||
with allure.step(f'Stop storage node {node}'):
|
with allure.step(f'Stop storage node {node}'):
|
||||||
sbercloud_client.stop_node(node_ip=node.split(':')[-2], hard=hard_reboot)
|
sbercloud_client.stop_node(node_ip=node.split(':')[-2], hard=hard_reboot)
|
||||||
new_nodes = wait_object_replication(wallet, cid, oid, 2, excluded_nodes=[node])
|
new_nodes = wait_object_replication_on_nodes(wallet, cid, oid, 2, excluded_nodes=[node])
|
||||||
|
|
||||||
assert not [node for node in nodes if node in new_nodes]
|
assert not [node for node in nodes if node in new_nodes]
|
||||||
got_file_path = get_object(wallet, cid, oid, endpoint=new_nodes[0])
|
got_file_path = get_object(wallet, cid, oid, endpoint=new_nodes[0])
|
||||||
|
@ -114,7 +77,7 @@ def test_lost_storage_node(prepare_wallet_and_deposit, sbercloud_client: SberClo
|
||||||
with allure.step(f'Return storage nodes'):
|
with allure.step(f'Return storage nodes'):
|
||||||
return_all_storage_nodes(sbercloud_client)
|
return_all_storage_nodes(sbercloud_client)
|
||||||
|
|
||||||
new_nodes = wait_object_replication(wallet, cid, oid, 2)
|
new_nodes = wait_object_replication_on_nodes(wallet, cid, oid, 2)
|
||||||
|
|
||||||
got_file_path = get_object(wallet, cid, oid, endpoint=new_nodes[0])
|
got_file_path = get_object(wallet, cid, oid, endpoint=new_nodes[0])
|
||||||
assert get_file_hash(source_file_path) == get_file_hash(got_file_path)
|
assert get_file_hash(source_file_path) == get_file_hash(got_file_path)
|
||||||
|
@ -122,6 +85,7 @@ def test_lost_storage_node(prepare_wallet_and_deposit, sbercloud_client: SberClo
|
||||||
|
|
||||||
@allure.title('Panic storage node(s)')
|
@allure.title('Panic storage node(s)')
|
||||||
@pytest.mark.parametrize('sequence', [True, False])
|
@pytest.mark.parametrize('sequence', [True, False])
|
||||||
|
@pytest.mark.failover_panic
|
||||||
@pytest.mark.failover
|
@pytest.mark.failover
|
||||||
def test_panic_storage_node(prepare_wallet_and_deposit, free_storage_check, sequence: bool):
|
def test_panic_storage_node(prepare_wallet_and_deposit, free_storage_check, sequence: bool):
|
||||||
wallet = prepare_wallet_and_deposit
|
wallet = prepare_wallet_and_deposit
|
||||||
|
@ -130,18 +94,22 @@ def test_panic_storage_node(prepare_wallet_and_deposit, free_storage_check, sequ
|
||||||
cid = create_container(wallet, rule=placement_rule, basic_acl=PUBLIC_ACL)
|
cid = create_container(wallet, rule=placement_rule, basic_acl=PUBLIC_ACL)
|
||||||
oid = put_object(wallet, source_file_path, cid)
|
oid = put_object(wallet, source_file_path, cid)
|
||||||
|
|
||||||
nodes = wait_object_replication(wallet, cid, oid, 2)
|
nodes = wait_object_replication_on_nodes(wallet, cid, oid, 2)
|
||||||
allure.attach('\n'.join(nodes), 'Current nodes with object', allure.attachment_type.TEXT)
|
allure.attach('\n'.join(nodes), 'Current nodes with object', allure.attachment_type.TEXT)
|
||||||
for node in nodes:
|
for node in nodes:
|
||||||
with allure.step(f'Hard reboot host {node} via magic SysRq option'):
|
with allure.step(f'Hard reboot host {node} via magic SysRq option'):
|
||||||
panic_reboot_host(ip=node.split(':')[-2])
|
panic_reboot_host(ip=node.split(':')[-2])
|
||||||
if sequence:
|
if sequence:
|
||||||
new_nodes = wait_object_replication(wallet, cid, oid, 2, excluded_nodes=[node])
|
try:
|
||||||
|
new_nodes = wait_object_replication_on_nodes(wallet, cid, oid, 2, excluded_nodes=[node])
|
||||||
|
except AssertionError:
|
||||||
|
new_nodes = wait_object_replication_on_nodes(wallet, cid, oid, 2)
|
||||||
|
|
||||||
allure.attach('\n'.join(new_nodes), f'Nodes with object after {node} fail',
|
allure.attach('\n'.join(new_nodes), f'Nodes with object after {node} fail',
|
||||||
allure.attachment_type.TEXT)
|
allure.attachment_type.TEXT)
|
||||||
|
|
||||||
if not sequence:
|
if not sequence:
|
||||||
new_nodes = wait_object_replication(wallet, cid, oid, 2, excluded_nodes=nodes)
|
new_nodes = wait_object_replication_on_nodes(wallet, cid, oid, 2)
|
||||||
allure.attach('\n'.join(new_nodes), 'Nodes with object after nodes fail', allure.attachment_type.TEXT)
|
allure.attach('\n'.join(new_nodes), 'Nodes with object after nodes fail', allure.attachment_type.TEXT)
|
||||||
|
|
||||||
got_file_path = get_object(wallet, cid, oid, endpoint=new_nodes[0])
|
got_file_path = get_object(wallet, cid, oid, endpoint=new_nodes[0])
|
||||||
|
|
|
@ -5,6 +5,8 @@
|
||||||
that storage policies are kept.
|
that storage policies are kept.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
from robot.api import logger
|
from robot.api import logger
|
||||||
from robot.api.deco import keyword
|
from robot.api.deco import keyword
|
||||||
|
|
||||||
|
@ -86,7 +88,7 @@ def get_complex_object_copies(wallet: str, cid: str, oid: str):
|
||||||
|
|
||||||
|
|
||||||
@keyword('Get Nodes With Object')
|
@keyword('Get Nodes With Object')
|
||||||
def get_nodes_with_object(wallet: str, cid: str, oid: str):
|
def get_nodes_with_object(wallet: str, cid: str, oid: str, skip_nodes: Optional[list[str]] = None) -> list[str]:
|
||||||
"""
|
"""
|
||||||
The function returns list of nodes which store
|
The function returns list of nodes which store
|
||||||
the given object.
|
the given object.
|
||||||
|
@ -95,11 +97,16 @@ def get_nodes_with_object(wallet: str, cid: str, oid: str):
|
||||||
we request the nodes
|
we request the nodes
|
||||||
cid (str): ID of the container which store the object
|
cid (str): ID of the container which store the object
|
||||||
oid (str): object ID
|
oid (str): object ID
|
||||||
|
skip_nodes (list): list of nodes that should be excluded from check
|
||||||
Returns:
|
Returns:
|
||||||
(list): nodes which store the object
|
(list): nodes which store the object
|
||||||
"""
|
"""
|
||||||
|
nodes_to_search = NEOFS_NETMAP
|
||||||
|
if skip_nodes:
|
||||||
|
nodes_to_search = [node for node in NEOFS_NETMAP if node not in skip_nodes]
|
||||||
|
|
||||||
nodes_list = []
|
nodes_list = []
|
||||||
for node in NEOFS_NETMAP:
|
for node in nodes_to_search:
|
||||||
try:
|
try:
|
||||||
res = neofs_verbs.head_object(wallet, cid, oid,
|
res = neofs_verbs.head_object(wallet, cid, oid,
|
||||||
endpoint=node,
|
endpoint=node,
|
||||||
|
|
Loading…
Reference in a new issue