From 93a52b4a664d7245bfba5db548d9b1253daddbab Mon Sep 17 00:00:00 2001 From: Vladimir Avdeev Date: Mon, 11 Jul 2022 17:11:26 +0300 Subject: [PATCH] Add failover tests for storage nodes The tests are aimed to work on cloud infrastructure (SberCloud VMs) Signed-off-by: Vladimir Avdeev --- pytest_tests/helpers/sbercloud_helper.py | 110 ++++++++++++++++++ pytest_tests/testsuites/conftest.py | 1 + .../failovers/test_failover_storage.py | 104 +++++++++++++++++ .../lib/python_keywords/aws_cli_client.py | 2 +- 4 files changed, 216 insertions(+), 1 deletion(-) create mode 100644 pytest_tests/helpers/sbercloud_helper.py create mode 100644 pytest_tests/testsuites/failovers/test_failover_storage.py diff --git a/pytest_tests/helpers/sbercloud_helper.py b/pytest_tests/helpers/sbercloud_helper.py new file mode 100644 index 00000000..a5fcf45d --- /dev/null +++ b/pytest_tests/helpers/sbercloud_helper.py @@ -0,0 +1,110 @@ +import json +from dataclasses import dataclass + +import requests +from yaml import FullLoader +from yaml import load as yaml_load + + +@dataclass +class SberCloudCtx: + sber_login: str = None + sber_password: str = None + sber_domain: str = None + sber_project_id: str = None + sber_iam_url: str = None + sber_ecss: list = None + + @staticmethod + def from_dict(sbercloud_dict: dict) -> 'SberCloudCtx': + return SberCloudCtx(**sbercloud_dict) + + @staticmethod + def from_yaml(config: str) -> 'SberCloudCtx': + with open(config) as yaml_file: + config_from_yaml = yaml_load(yaml_file, Loader=FullLoader) + return SberCloudCtx.from_dict(config_from_yaml) + + +class SberCloud: + def __init__(self, config: str): + self.sbercloud_config = SberCloudCtx().from_yaml(config) + self.ecs_url = None + self.project_id = None + self.token = None + self.update_token() + self.ecss = self.get_ecss() + + def update_token(self): + data = { + 'auth': { + 'identity': { + 'methods': ['password'], + 'password': { + 'user': { + 'domain': { + 'name': self.sbercloud_config.sber_domain + }, + 'name': self.sbercloud_config.sber_login, + 'password': self.sbercloud_config.sber_password + } + } + }, + 'scope': { + 'project': { + 'id': self.sbercloud_config.sber_project_id + } + } + } + } + response = requests.post(f'{self.sbercloud_config.sber_iam_url}/v3/auth/tokens', data=json.dumps(data), + headers={'Content-Type': 'application/json'}) + self.ecs_url = [catalog['endpoints'][0]['url'] + for catalog in response.json()['token']['catalog'] if catalog['type'] == 'ecs'][0] + self.project_id = self.ecs_url.split('/')[-1] + self.token = response.headers['X-Subject-Token'] + + def find_esc_by_ip(self, ip: str, update: bool = False) -> str: + if not self.ecss or update: + self.ecss = self.get_ecss() + ecss = [ecs for ecs in self.ecss if ip in [ + ecs_ip['addr'] for ecs_ip in [ecs_ip for ecs_ips in ecs['addresses'].values() for ecs_ip in ecs_ips]]] + assert len(ecss) == 1 + return ecss[0]['id'] + + def get_ecss(self) -> [dict]: + response = requests.get(f'{self.ecs_url}/cloudservers/detail', + headers={'X-Auth-Token': self.token}).json() + return response['servers'] + + def start_node(self, node_id: str = None, node_ip: str = None): + data = { + 'os-start': { + 'servers': [ + { + 'id': node_id or self.find_esc_by_ip(node_ip) + } + ] + } + } + response = requests.post(f'{self.ecs_url}/cloudservers/action', + data=json.dumps(data), + headers={'Content-Type': 'application/json', 'X-Auth-Token': self.token}) + assert response.status_code < 300, f'Status:{response.status_code}. Server not started: {response.json()}' + + def stop_node(self, node_id: str = None, node_ip: str = None, hard: bool = False): + data = { + 'os-stop': { + 'type': 'HARD' if hard else 'SOFT', + 'servers': [ + { + 'id': node_id or self.find_esc_by_ip(node_ip) + } + + ] + } + } + response = requests.post(f'{self.ecs_url}/cloudservers/action', + data=json.dumps(data), + headers={'Content-Type': 'application/json', 'X-Auth-Token': self.token}) + assert response.status_code < 300, f'Status:{response.status_code}. Server not stopped: {response.json()}' diff --git a/pytest_tests/testsuites/conftest.py b/pytest_tests/testsuites/conftest.py index 1fc2185b..c3303884 100644 --- a/pytest_tests/testsuites/conftest.py +++ b/pytest_tests/testsuites/conftest.py @@ -49,6 +49,7 @@ def init_wallet_with_address(): @allure.title('Prepare tmp directory') def prepare_tmp_dir(): full_path = f'{os.getcwd()}/{ASSETS_DIR}' + shutil.rmtree(full_path, ignore_errors=True) os.mkdir(full_path) yield shutil.rmtree(full_path) diff --git a/pytest_tests/testsuites/failovers/test_failover_storage.py b/pytest_tests/testsuites/failovers/test_failover_storage.py new file mode 100644 index 00000000..3fd95b16 --- /dev/null +++ b/pytest_tests/testsuites/failovers/test_failover_storage.py @@ -0,0 +1,104 @@ +import logging +import os +from time import sleep + +import allure +import pytest +from common import STORAGE_NODE_PWD, STORAGE_NODE_USER +from python_keywords.container import create_container +from python_keywords.neofs_verbs import get_object, put_object +from python_keywords.utility_keywords import get_file_hash +from sbercloud_helper import SberCloud +from ssh_helper import HostClient, HostIsNotAvailable +from storage_policy import get_nodes_with_object +from wellknown_acl import PUBLIC_ACL + +logger = logging.getLogger('NeoLogger') +stopped_hosts = [] + + +@pytest.fixture(scope='session') +def sbercloud_client(): + yield SberCloud(f'{os.getcwd()}/configuration/sbercloud.yaml') + + +@pytest.fixture(scope='session', autouse=True) +def return_all_storage_nodes_fixture(sbercloud_client): + yield + return_all_storage_nodes(sbercloud_client) + + +@allure.title('Hard reboot host via magic SysRq option') +def panic_reboot_host(ip: str = None): + ssh = HostClient(ip=ip, login=STORAGE_NODE_USER, password=STORAGE_NODE_PWD) + ssh.create_connection(attempts=1) + ssh.exec('echo 1 > /proc/sys/kernel/sysrq') + with pytest.raises(HostIsNotAvailable): + ssh.exec('echo b > /proc/sysrq-trigger', timeout=1) + + +def return_all_storage_nodes(sbercloud_client: SberCloud): + for host in stopped_hosts: + sbercloud_client.start_node(node_ip=host.split(':')[-2]) + stopped_hosts.remove(host) + + +def wait_object_replication(wallet, cid, oid, expected_copies: int) -> [str]: + sleep_interval, attempts = 10, 12 + nodes = [] + for __attempt in range(attempts): + nodes = get_nodes_with_object(wallet, cid, oid) + if len(nodes) == expected_copies: + return nodes + sleep(sleep_interval) + raise AssertionError(f'Expected {expected_copies} copies of object, but found {len(nodes)} ') + + +@allure.title('Lost and return nodes') +@pytest.mark.parametrize('hard_reboot', [True, False]) +def test_lost_storage_node(prepare_wallet_and_deposit, generate_file, sbercloud_client: SberCloud, hard_reboot: bool): + wallet, _ = prepare_wallet_and_deposit + placement_rule = 'REP 2 IN X CBF 2 SELECT 2 FROM * AS X' + cid = create_container(wallet, rule=placement_rule, basic_acl=PUBLIC_ACL) + oid = put_object(wallet, generate_file, cid) + nodes = wait_object_replication(wallet, cid, oid, 2) + + new_nodes = [] + for node in nodes: + with allure.step(f'Stop storage node {node}'): + sbercloud_client.stop_node(node_ip=node.split(':')[-2], hard=hard_reboot) + new_nodes = wait_object_replication(wallet, cid, oid, 2) + + assert not [node for node in nodes if node in new_nodes] + got_file_path = get_object(wallet, cid, oid) + assert get_file_hash(generate_file) == get_file_hash(got_file_path) + + with allure.step(f'Return storage nodes'): + return_all_storage_nodes(sbercloud_client) + + wait_object_replication(wallet, cid, oid, 2) + + got_file_path = get_object(wallet, cid, oid) + assert get_file_hash(generate_file) == get_file_hash(got_file_path) + + +@allure.title('Panic storage node(s)') +@pytest.mark.parametrize('sequence', [True, False]) +def test_panic_storage_node(prepare_wallet_and_deposit, generate_file, sequence: bool): + wallet, _ = prepare_wallet_and_deposit + placement_rule = 'REP 2 IN X CBF 2 SELECT 2 FROM * AS X' + cid = create_container(wallet, rule=placement_rule, basic_acl=PUBLIC_ACL) + oid = put_object(wallet, generate_file, cid) + + with allure.step(f'Return storage nodes'): + nodes = wait_object_replication(wallet, cid, oid, 2) + for node in nodes: + panic_reboot_host(ip=node.split(':')[-2]) + if sequence: + wait_object_replication(wallet, cid, oid, 2) + + if not sequence: + wait_object_replication(wallet, cid, oid, 2) + + got_file_path = get_object(wallet, cid, oid) + assert get_file_hash(generate_file) == get_file_hash(got_file_path) diff --git a/robot/resources/lib/python_keywords/aws_cli_client.py b/robot/resources/lib/python_keywords/aws_cli_client.py index 104e1d6a..c5139855 100644 --- a/robot/resources/lib/python_keywords/aws_cli_client.py +++ b/robot/resources/lib/python_keywords/aws_cli_client.py @@ -89,7 +89,7 @@ class AwsCliClient: version = f' --version-id {VersionId}' if VersionId else '' cmd = f' aws --no-verify-ssl s3api get-object --bucket {Bucket} ' \ f'--key {Key} {version} {file_path} --endpoint {S3_GATE}' - output = _cmd_run(cmd) + output = _cmd_run(cmd, timeout=90) return self._to_json(output) def delete_objects(self, Bucket: str, Delete: dict) -> dict: