frostfs-testcases/robot/resources/lib/python_keywords/node_management.py
anikeev-yadro 9650dfb4aa Add comments to timeout between commands
Signed-off-by: anikeev-yadro <a.anikeev@yadro.com>
2022-11-10 17:38:26 +03:00

247 lines
8.6 KiB
Python

import logging
import random
import re
import time
from dataclasses import dataclass
from typing import Optional
import allure
from common import (
MORPH_BLOCK_TIME,
NEOFS_CLI_EXEC,
NEOFS_NETMAP_DICT,
STORAGE_WALLET_CONFIG,
STORAGE_WALLET_PASS,
)
from data_formatters import get_wallet_public_key
from epoch import tick_epoch
from neofs_testlib.cli import NeofsCli
from neofs_testlib.hosting import Hosting
from neofs_testlib.shell import Shell
from utility import parse_time
logger = logging.getLogger("NeoLogger")
@dataclass
class HealthStatus:
network_status: Optional[str] = None
health_status: Optional[str] = None
@staticmethod
def from_stdout(output: str) -> "HealthStatus":
network, health = None, None
for line in output.split("\n"):
if "Network status" in line:
network = line.split(":")[-1].strip()
if "Health status" in line:
health = line.split(":")[-1].strip()
return HealthStatus(network, health)
@allure.step("Stop storage nodes")
def stop_nodes(hosting: Hosting, number: int, nodes: list[str]) -> list[str]:
"""
Shuts down the given number of randomly selected storage nodes.
Args:
number (int): the number of nodes to shut down
nodes (list): the list of nodes for possible shut down
Returns:
(list): the list of nodes that were shut down
"""
nodes_to_stop = random.sample(nodes, number)
for node in nodes_to_stop:
host = hosting.get_host_by_service(node)
host.stop_service(node)
return nodes_to_stop
@allure.step("Start storage nodes")
def start_nodes(hosting: Hosting, nodes: list[str]) -> None:
"""
The function starts specified storage nodes.
Args:
nodes (list): the list of nodes to start
"""
for node in nodes:
host = hosting.get_host_by_service(node)
host.start_service(node)
@allure.step("Get Locode")
def get_locode() -> str:
endpoint_values = random.choice(list(NEOFS_NETMAP_DICT.values()))
locode = endpoint_values["UN-LOCODE"]
logger.info(f"Random locode chosen: {locode}")
return locode
@allure.step("Healthcheck for node {node_name}")
def node_healthcheck(hosting: Hosting, node_name: str) -> HealthStatus:
"""
The function returns node's health status.
Args:
node_name str: node name for which health status should be retrieved.
Returns:
health status as HealthStatus object.
"""
command = "control healthcheck"
output = _run_control_command_with_retries(hosting, node_name, command)
return HealthStatus.from_stdout(output)
@allure.step("Set status for node {node_name}")
def node_set_status(hosting: Hosting, node_name: str, status: str, retries: int = 0) -> None:
"""
The function sets particular status for given node.
Args:
node_name: node name for which status should be set.
status: online or offline.
retries (optional, int): number of retry attempts if it didn't work from the first time
"""
command = f"control set-status --status {status}"
_run_control_command_with_retries(hosting, node_name, command, retries)
@allure.step("Get netmap snapshot")
def get_netmap_snapshot(node_name: str, shell: Shell) -> str:
"""
The function returns string representation of netmap snapshot.
Args:
node_name str: node name from which netmap snapshot should be requested.
Returns:
string representation of netmap
"""
node_info = NEOFS_NETMAP_DICT[node_name]
cli = NeofsCli(shell, NEOFS_CLI_EXEC, config_file=STORAGE_WALLET_CONFIG)
return cli.netmap.snapshot(
rpc_endpoint=node_info["rpc"],
wallet=node_info["wallet_path"],
).stdout
@allure.step("Get shard list for node {node_name}")
def node_shard_list(hosting: Hosting, node_name: str) -> list[str]:
"""
The function returns list of shards for specified node.
Args:
node_name str: node name for which shards should be returned.
Returns:
list of shards.
"""
command = "control shards list"
output = _run_control_command_with_retries(hosting, node_name, command)
return re.findall(r"Shard (.*):", output)
@allure.step("Shard set for node {node_name}")
def node_shard_set_mode(hosting: Hosting, node_name: str, shard: str, mode: str) -> str:
"""
The function sets mode for specified shard.
Args:
node_name str: node name on which shard mode should be set.
"""
command = f"control shards set-mode --id {shard} --mode {mode}"
return _run_control_command_with_retries(hosting, node_name, command)
@allure.step("Drop object from node {node_name}")
def drop_object(hosting: Hosting, node_name: str, cid: str, oid: str) -> str:
"""
The function drops object from specified node.
Args:
node_name str: node name from which object should be dropped.
"""
command = f"control drop-objects -o {cid}/{oid}"
return _run_control_command_with_retries(hosting, node_name, command)
@allure.step("Delete data of node {node_name}")
def delete_node_data(hosting: Hosting, node_name: str) -> None:
host = hosting.get_host_by_service(node_name)
host.stop_service(node_name)
host.delete_storage_node_data(node_name)
time.sleep(parse_time(MORPH_BLOCK_TIME))
@allure.step("Exclude node {node_to_exclude} from network map")
def exclude_node_from_network_map(
hosting: Hosting, node_to_exclude: str, alive_node: str, shell: Shell
) -> None:
node_wallet_path = NEOFS_NETMAP_DICT[node_to_exclude]["wallet_path"]
node_netmap_key = get_wallet_public_key(node_wallet_path, STORAGE_WALLET_PASS)
node_set_status(hosting, node_to_exclude, status="offline")
time.sleep(parse_time(MORPH_BLOCK_TIME))
tick_epoch(shell=shell)
snapshot = get_netmap_snapshot(node_name=alive_node, shell=shell)
assert (
node_netmap_key not in snapshot
), f"Expected node with key {node_netmap_key} not in network map"
@allure.step("Include node {node_to_include} into network map")
def include_node_to_network_map(
hosting: Hosting, node_to_include: str, alive_node: str, shell: Shell
) -> None:
node_set_status(hosting, node_to_include, status="online")
# Per suggestion of @fyrchik we need to wait for 2 blocks after we set status and after tick epoch.
# First sleep can be ommited afer https://github.com/nspcc-dev/neofs-node/issues/1790 complete.
time.sleep(parse_time(MORPH_BLOCK_TIME) * 2)
tick_epoch(shell=shell)
time.sleep(parse_time(MORPH_BLOCK_TIME) * 2)
check_node_in_map(node_to_include, shell, alive_node)
@allure.step("Check node {node_name} in network map")
def check_node_in_map(node_name: str, shell: Shell, alive_node: Optional[str] = None) -> None:
alive_node = alive_node or node_name
node_wallet_path = NEOFS_NETMAP_DICT[node_name]["wallet_path"]
node_netmap_key = get_wallet_public_key(node_wallet_path, STORAGE_WALLET_PASS)
logger.info(f"Node {node_name} netmap key: {node_netmap_key}")
snapshot = get_netmap_snapshot(node_name=alive_node, shell=shell)
assert node_netmap_key in snapshot, f"Expected node with key {node_netmap_key} in network map"
def _run_control_command_with_retries(
hosting: Hosting, node_name: str, command: str, retries: int = 0
) -> str:
for attempt in range(1 + retries): # original attempt + specified retries
try:
return _run_control_command(hosting, node_name, command)
except AssertionError as err:
if attempt < retries:
logger.warning(f"Command {command} failed with error {err} and will be retried")
continue
raise AssertionError(f"Command {command} failed with error {err}") from err
def _run_control_command(hosting: Hosting, service_name: str, command: str) -> None:
host = hosting.get_host_by_service(service_name)
service_config = host.get_service_config(service_name)
wallet_path = service_config.attributes["wallet_path"]
wallet_password = service_config.attributes["wallet_password"]
control_endpoint = service_config.attributes["control_endpoint"]
shell = host.get_shell()
wallet_config_path = f"/tmp/{service_name}-config.yaml"
wallet_config = f'password: "{wallet_password}"'
shell.exec(f"echo '{wallet_config}' > {wallet_config_path}")
cli_config = host.get_cli_config("neofs-cli")
# TODO: implement cli.control
# cli = NeofsCli(shell, cli_config.exec_path, wallet_config_path)
result = shell.exec(
f"{cli_config.exec_path} {command} --endpoint {control_endpoint} "
f"--wallet {wallet_path} --config {wallet_config_path}"
)
return result.stdout