Add test for adding node to cluster
Signed-off-by: a.y.volkov <a.y.volkov@yadro.com>
This commit is contained in:
parent
b468a06f4e
commit
f97bfed183
9 changed files with 218 additions and 30 deletions
|
@ -21,3 +21,4 @@ markers =
|
|||
failover: tests for system recovery after a failure
|
||||
failover_panic: tests for system recovery after panic reboot of a node
|
||||
failover_net: tests for network failure
|
||||
add_nodes: add nodes to cluster
|
||||
|
|
|
@ -11,7 +11,7 @@ import wallet
|
|||
from cli_helpers import _cmd_run
|
||||
from common import ASSETS_DIR, FREE_STORAGE, MAINNET_WALLET_PATH, NEOFS_NETMAP_DICT
|
||||
from payment_neogo import neofs_deposit, transfer_mainnet_gas
|
||||
from python_keywords.node_management import node_healthcheck
|
||||
from python_keywords.node_management import node_healthcheck, create_ssh_client
|
||||
from sbercloud_helper import SberCloudConfig
|
||||
|
||||
|
||||
|
@ -26,24 +26,37 @@ logger = logging.getLogger('NeoLogger')
|
|||
|
||||
@pytest.fixture(scope='session')
|
||||
def cloud_infrastructure_check():
|
||||
cloud_config = SberCloudConfig.from_env()
|
||||
if not cloud_config.project_id:
|
||||
if not is_cloud_infrastructure():
|
||||
pytest.skip('Test only works on SberCloud infrastructure')
|
||||
yield
|
||||
|
||||
|
||||
def is_cloud_infrastructure():
|
||||
cloud_config = SberCloudConfig.from_env()
|
||||
return cloud_config.project_id is not None
|
||||
|
||||
|
||||
@pytest.fixture(scope='session', autouse=True)
|
||||
@allure.title('Check binary versions')
|
||||
def check_binary_versions(request):
|
||||
environment_dir = request.config.getoption('--alluredir')
|
||||
|
||||
is_cloud = is_cloud_infrastructure()
|
||||
# Collect versions of neo binaries
|
||||
binaries = ['neo-go', 'neofs-cli', 'neofs-authmate']
|
||||
env_out = {}
|
||||
for binary in binaries:
|
||||
out = _cmd_run(f'{binary} --version')
|
||||
version = re.search(r'version[:\s]*(.+)', out, re.IGNORECASE)
|
||||
env_out[binary.upper()] = version.group(1) if version else 'Unknown'
|
||||
env_out = _get_binaries_version_local(binaries)
|
||||
|
||||
if is_cloud:
|
||||
binaries = ['neo-go',
|
||||
'neofs-adm',
|
||||
'neofs-cli',
|
||||
'neofs-http-gw',
|
||||
'neofs-ir',
|
||||
'neofs-lens',
|
||||
'neofs-node',
|
||||
'neofs-s3-authmate',
|
||||
'neofs-s3-gw',
|
||||
'neogo-morph-cn']
|
||||
env_out = _get_binaries_version_remote(binaries)
|
||||
|
||||
# Get version of aws binary
|
||||
out = _cmd_run('aws --version')
|
||||
|
@ -56,6 +69,32 @@ def check_binary_versions(request):
|
|||
out_file.write(f'{env}={env_value}\n')
|
||||
|
||||
|
||||
def _get_binaries_version_local(binaries: list) -> dict:
|
||||
env_out = {}
|
||||
for binary in binaries:
|
||||
out = _cmd_run(f'{binary} --version')
|
||||
version = re.search(r'version[:\s]*(.+)', out, re.IGNORECASE)
|
||||
env_out[binary.upper()] = version.group(1) if version else 'Unknown'
|
||||
return env_out
|
||||
|
||||
|
||||
def _get_binaries_version_remote(binaries: list) -> dict:
|
||||
env_out = {}
|
||||
|
||||
for node_name in NEOFS_NETMAP_DICT:
|
||||
with create_ssh_client(node_name) as ssh_client:
|
||||
for binary in binaries:
|
||||
out = ssh_client.exec(f'{binary} --version').stdout
|
||||
version = re.search(r'version[:\s]*(.+)', out, re.IGNORECASE)
|
||||
version = version.group(1) if version else 'Unknown'
|
||||
if not env_out.get(binary.upper()):
|
||||
env_out[binary.upper()] = version
|
||||
else:
|
||||
msg = f'Expected binary {binary} versions on node s1 and {node_name} are the same'
|
||||
assert env_out[binary.upper()] == version, msg
|
||||
return env_out
|
||||
|
||||
|
||||
@pytest.fixture(scope='session', autouse=True)
|
||||
@allure.title('Run health check for all storage nodes')
|
||||
def run_health_check():
|
||||
|
|
|
@ -5,15 +5,15 @@ from time import sleep
|
|||
import allure
|
||||
import pytest
|
||||
|
||||
from common import STORAGE_NODE_SSH_PRIVATE_KEY_PATH, STORAGE_NODE_SSH_USER, STORAGE_NODE_SSH_PASSWORD, \
|
||||
NEOFS_NETMAP_DICT
|
||||
from common import (STORAGE_NODE_SSH_PRIVATE_KEY_PATH, STORAGE_NODE_SSH_USER,
|
||||
STORAGE_NODE_SSH_PASSWORD, NEOFS_NETMAP_DICT)
|
||||
from failover_utils import wait_all_storage_node_returned, wait_object_replication_on_nodes
|
||||
from iptables_helper import IpTablesHelper
|
||||
from python_keywords.container import create_container
|
||||
from python_keywords.neofs_verbs import get_object, put_object
|
||||
from python_keywords.utility_keywords import generate_file, get_file_hash
|
||||
from ssh_helper import HostClient
|
||||
from wellknown_acl import PUBLIC_ACL
|
||||
from .failover_utils import wait_all_storage_node_returned, wait_object_replication_on_nodes
|
||||
|
||||
logger = logging.getLogger('NeoLogger')
|
||||
STORAGE_NODE_COMMUNICATION_PORT = '8080'
|
||||
|
|
|
@ -5,13 +5,14 @@ import pytest
|
|||
|
||||
from common import (STORAGE_NODE_SSH_PRIVATE_KEY_PATH, STORAGE_NODE_SSH_USER,
|
||||
STORAGE_NODE_SSH_PASSWORD)
|
||||
from failover_utils import wait_all_storage_node_returned, wait_object_replication_on_nodes
|
||||
from python_keywords.container import create_container
|
||||
from python_keywords.neofs_verbs import get_object, put_object
|
||||
from python_keywords.utility_keywords import generate_file, get_file_hash
|
||||
from sbercloud_helper import SberCloud, SberCloudConfig
|
||||
from ssh_helper import HostClient, HostIsNotAvailable
|
||||
from ssh_helper import HostClient
|
||||
from wellknown_acl import PUBLIC_ACL
|
||||
from .failover_utils import wait_all_storage_node_returned, wait_object_replication_on_nodes
|
||||
|
||||
|
||||
logger = logging.getLogger('NeoLogger')
|
||||
stopped_hosts = []
|
||||
|
|
|
@ -6,24 +6,22 @@ import allure
|
|||
import pytest
|
||||
from data_formatters import get_wallet_public_key
|
||||
from common import (COMPLEX_OBJ_SIZE, MAINNET_BLOCK_TIME, NEOFS_CONTRACT_CACHE_TIMEOUT,
|
||||
NEOFS_NETMAP_DICT, STORAGE_WALLET_PASS)
|
||||
NEOFS_NETMAP_DICT, STORAGE_RPC_ENDPOINT_1, STORAGE_WALLET_PASS)
|
||||
from epoch import tick_epoch
|
||||
from python_keywords.container import create_container, get_container
|
||||
from python_keywords.neofs_verbs import (delete_object, get_object,
|
||||
head_object, put_object)
|
||||
from python_keywords.node_management import (drop_object, get_netmap_snapshot,
|
||||
get_locode,
|
||||
node_healthcheck,
|
||||
node_set_status, node_shard_list,
|
||||
node_shard_set_mode,
|
||||
start_nodes_remote,
|
||||
stop_nodes_remote)
|
||||
from python_keywords.failover_utils import wait_object_replication_on_nodes
|
||||
from python_keywords.neofs_verbs import delete_object, get_object, head_object, put_object
|
||||
from python_keywords.node_management import (create_ssh_client, drop_object, get_netmap_snapshot,
|
||||
get_locode, node_healthcheck, node_set_status,
|
||||
node_shard_list, node_shard_set_mode,
|
||||
start_nodes_remote, stop_nodes_remote)
|
||||
from storage_policy import get_nodes_with_object, get_simple_object_copies
|
||||
from utility import placement_policy_from_container, robot_time_to_int, wait_for_gc_pass_on_storage_nodes
|
||||
from utility_keywords import generate_file
|
||||
from wellknown_acl import PUBLIC_ACL
|
||||
|
||||
logger = logging.getLogger('NeoLogger')
|
||||
check_nodes = []
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
@ -72,6 +70,137 @@ def after_run_set_all_nodes_online():
|
|||
logger.error(f"Node status change fails with error:\n{err}")
|
||||
|
||||
|
||||
def wait_for_service_started(ssh_client, service_name: str):
|
||||
expected_state = 'active (running)'
|
||||
for __attempt in range(10):
|
||||
output = ssh_client.exec(f'systemctl status {service_name}')
|
||||
if expected_state in output.stdout:
|
||||
return
|
||||
sleep(3)
|
||||
raise AssertionError(f'Service {service_name} is not in {expected_state} state')
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def return_nodes_after_test_run():
|
||||
yield
|
||||
return_nodes()
|
||||
|
||||
|
||||
def cleanup_node(node_to_cleanup, alive_node):
|
||||
exclude_node_from_network_map(node_to_cleanup, alive_node)
|
||||
|
||||
with create_ssh_client(node_to_cleanup) as ssh_client:
|
||||
ssh_client.exec(f'systemctl stop neofs-storage.service')
|
||||
ssh_client.exec('rm -rf /srv/neofs/*')
|
||||
sleep(robot_time_to_int(MAINNET_BLOCK_TIME))
|
||||
|
||||
|
||||
@allure.step('Return node to cluster')
|
||||
def return_nodes(alive_node: str = None):
|
||||
for node in list(check_nodes):
|
||||
with create_ssh_client(node) as ssh_client:
|
||||
ssh_client.exec(f'systemctl start neofs-storage.service')
|
||||
wait_for_service_started(ssh_client, 'neofs-storage.service')
|
||||
sleep(robot_time_to_int(MAINNET_BLOCK_TIME))
|
||||
|
||||
with allure.step(f'Move node {node} to online state'):
|
||||
node_set_status(node, status='online', retry=True)
|
||||
|
||||
check_nodes.remove(node)
|
||||
sleep(robot_time_to_int(MAINNET_BLOCK_TIME))
|
||||
for __attempt in range(3):
|
||||
try:
|
||||
tick_epoch()
|
||||
break
|
||||
except RuntimeError:
|
||||
sleep(3)
|
||||
|
||||
check_node_in_map(node, alive_node)
|
||||
|
||||
|
||||
def exclude_node_from_network_map(node_to_exclude, alive_node):
|
||||
node_wallet_path = NEOFS_NETMAP_DICT[node_to_exclude]['wallet_path']
|
||||
node_netmap_key = get_wallet_public_key(
|
||||
node_wallet_path,
|
||||
STORAGE_WALLET_PASS,
|
||||
format="base58"
|
||||
)
|
||||
|
||||
with allure.step(f'Move node {node_to_exclude} to offline state'):
|
||||
node_set_status(node_to_exclude, status='offline')
|
||||
|
||||
sleep(robot_time_to_int(MAINNET_BLOCK_TIME))
|
||||
tick_epoch()
|
||||
|
||||
snapshot = get_netmap_snapshot(node_name=alive_node)
|
||||
assert node_netmap_key not in snapshot, f'Expected node with key {node_netmap_key} not in network map'
|
||||
|
||||
|
||||
def include_node_to_network_map(node_to_include, alive_node):
|
||||
with allure.step(f'Move node {node_to_include} to online state'):
|
||||
node_set_status(node_to_include, status='online')
|
||||
|
||||
sleep(robot_time_to_int(MAINNET_BLOCK_TIME))
|
||||
tick_epoch()
|
||||
|
||||
check_node_in_map(node_to_include, alive_node)
|
||||
|
||||
|
||||
@allure.step('Check node {node_name} in network map')
|
||||
def check_node_in_map(node_name: str, alive_node: str = None):
|
||||
alive_node = alive_node or node_name
|
||||
node_wallet_path = NEOFS_NETMAP_DICT[node_name]['wallet_path']
|
||||
node_netmap_key = get_wallet_public_key(
|
||||
node_wallet_path,
|
||||
STORAGE_WALLET_PASS,
|
||||
format="base58"
|
||||
)
|
||||
|
||||
logger.info(f'Node {node_name} netmap key: {node_netmap_key}')
|
||||
|
||||
snapshot = get_netmap_snapshot(node_name=alive_node)
|
||||
assert node_netmap_key in snapshot, f'Expected node with key {node_netmap_key} in network map'
|
||||
|
||||
|
||||
@allure.title('Add one node to cluster')
|
||||
@pytest.mark.add_nodes
|
||||
@pytest.mark.node_mgmt
|
||||
def test_add_nodes(prepare_tmp_dir, prepare_wallet_and_deposit, return_nodes_after_test_run):
|
||||
wallet = prepare_wallet_and_deposit
|
||||
placement_rule_3 = 'REP 3 IN X CBF 1 SELECT 3 FROM * AS X'
|
||||
placement_rule_4 = 'REP 4 IN X CBF 1 SELECT 4 FROM * AS X'
|
||||
source_file_path = generate_file()
|
||||
|
||||
additional_node = choice(list(
|
||||
node for node, node_config in NEOFS_NETMAP_DICT.items() if node_config.get('rpc') != STORAGE_RPC_ENDPOINT_1))
|
||||
alive_node = choice([node for node in NEOFS_NETMAP_DICT if node != additional_node])
|
||||
|
||||
check_node_in_map(additional_node, alive_node)
|
||||
|
||||
with allure.step(f'Exclude node {additional_node} from map and clean it up'):
|
||||
cleanup_node(additional_node, alive_node)
|
||||
check_nodes.append(additional_node)
|
||||
|
||||
cid = create_container(wallet, rule=placement_rule_3, basic_acl=PUBLIC_ACL)
|
||||
oid = put_object(wallet, source_file_path, cid, endpoint=NEOFS_NETMAP_DICT[alive_node].get('rpc'))
|
||||
wait_object_replication_on_nodes(wallet, cid, oid, 3)
|
||||
|
||||
return_nodes(alive_node)
|
||||
|
||||
with allure.step('Check data could be replicated to new node'):
|
||||
random_node = choice([node for node in NEOFS_NETMAP_DICT if node not in (additional_node, alive_node)])
|
||||
exclude_node_from_network_map(random_node, alive_node)
|
||||
|
||||
wait_object_replication_on_nodes(wallet, cid, oid, 3, excluded_nodes=[random_node])
|
||||
include_node_to_network_map(random_node, alive_node)
|
||||
wait_object_replication_on_nodes(wallet, cid, oid, 3)
|
||||
|
||||
with allure.step('Check container could be created with new node'):
|
||||
cid = create_container(wallet, rule=placement_rule_4, basic_acl=PUBLIC_ACL)
|
||||
oid = put_object(wallet, source_file_path, cid, endpoint=NEOFS_NETMAP_DICT[alive_node].get('rpc'))
|
||||
wait_object_replication_on_nodes(wallet, cid, oid, 4)
|
||||
|
||||
|
||||
@allure.title('Control Operations with storage nodes')
|
||||
@pytest.mark.node_mgmt
|
||||
def test_nodes_management(prepare_tmp_dir):
|
||||
|
|
|
@ -16,6 +16,8 @@ import pexpect
|
|||
from robot.api import logger
|
||||
|
||||
ROBOT_AUTO_KEYWORDS = False
|
||||
COLOR_GREEN = "\033[92m"
|
||||
COLOR_OFF = "\033[0m"
|
||||
|
||||
|
||||
def _cmd_run(cmd: str, timeout: int = 30) -> str:
|
||||
|
@ -26,7 +28,7 @@ def _cmd_run(cmd: str, timeout: int = 30) -> str:
|
|||
compl_proc = None
|
||||
start_time = datetime.now()
|
||||
try:
|
||||
logger.info(f"Executing command: {cmd}")
|
||||
logger.info(f"{COLOR_GREEN}Executing command: {cmd}{COLOR_OFF}")
|
||||
start_time = datetime.utcnow()
|
||||
compl_proc = subprocess.run(cmd, check=True, universal_newlines=True,
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
||||
|
@ -35,7 +37,7 @@ def _cmd_run(cmd: str, timeout: int = 30) -> str:
|
|||
output = compl_proc.stdout
|
||||
return_code = compl_proc.returncode
|
||||
end_time = datetime.utcnow()
|
||||
logger.info(f"Output: {output}")
|
||||
logger.info(f"{COLOR_GREEN}Output: {output}{COLOR_OFF}")
|
||||
_attach_allure_log(cmd, output, return_code, start_time, end_time)
|
||||
|
||||
return output
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#!/usr/bin/python3.9
|
||||
import sys
|
||||
|
||||
import allure
|
||||
from robot.api import logger
|
||||
from robot.api.deco import keyword
|
||||
|
||||
|
@ -28,13 +30,22 @@ def tick_epoch():
|
|||
# If neofs-adm is available, then we tick epoch with it (to be consistent with UAT tests)
|
||||
cmd = f"{NEOFS_ADM_EXEC} morph force-new-epoch -c {NEOFS_ADM_CONFIG_PATH}"
|
||||
logger.info(f"Executing shell command: {cmd}")
|
||||
out = ''
|
||||
err = ''
|
||||
try:
|
||||
out = wrappers.run_sh(cmd)
|
||||
logger.info(f"Command completed with output: {out}")
|
||||
except Exception as exc:
|
||||
logger.error(exc)
|
||||
err = str(exc)
|
||||
raise RuntimeError("Failed to tick epoch") from exc
|
||||
|
||||
finally:
|
||||
if 'allure' in sys.modules:
|
||||
allure.attach((
|
||||
f'COMMAND: {cmd}\n'
|
||||
f'OUTPUT:\n {out}\n'
|
||||
f'ERROR: {err}\n'
|
||||
), 'Tick Epoch', allure.attachment_type.TEXT)
|
||||
return
|
||||
|
||||
# Otherwise we tick epoch using transaction
|
||||
|
|
|
@ -178,7 +178,7 @@ def node_healthcheck(node_name: str) -> HealthStatus:
|
|||
|
||||
|
||||
@keyword('Set status for node')
|
||||
def node_set_status(node_name: str, status: str) -> None:
|
||||
def node_set_status(node_name: str, status: str, retry=False) -> None:
|
||||
"""
|
||||
The function sets particular status for given node.
|
||||
Args:
|
||||
|
@ -188,6 +188,11 @@ def node_set_status(node_name: str, status: str) -> None:
|
|||
(void)
|
||||
"""
|
||||
command = f"control set-status --status {status}"
|
||||
try:
|
||||
run_control_command(node_name, command)
|
||||
except AssertionError as err:
|
||||
if not retry:
|
||||
raise AssertionError(f'Command control set-status failed with error {err}') from err
|
||||
run_control_command(node_name, command)
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue