From c17f0f6173bfc04b623dc0385c425573a59e78ef Mon Sep 17 00:00:00 2001 From: Andrey Berezin Date: Fri, 24 Nov 2023 19:46:35 +0300 Subject: [PATCH] [#130] Add service healthcheck and allow to skip version check for some binaries Signed-off-by: Andrey Berezin --- .../healthcheck/basic_healthcheck.py | 23 ++++++++++- src/frostfs_testlib/healthcheck/interfaces.py | 4 ++ src/frostfs_testlib/storage/cluster.py | 20 +++++++++- src/frostfs_testlib/utils/failover_utils.py | 7 ++-- src/frostfs_testlib/utils/version_utils.py | 39 ++++++++++--------- 5 files changed, 69 insertions(+), 24 deletions(-) diff --git a/src/frostfs_testlib/healthcheck/basic_healthcheck.py b/src/frostfs_testlib/healthcheck/basic_healthcheck.py index 6f21534..4cb3a48 100644 --- a/src/frostfs_testlib/healthcheck/basic_healthcheck.py +++ b/src/frostfs_testlib/healthcheck/basic_healthcheck.py @@ -6,8 +6,9 @@ from frostfs_testlib.reporter import get_reporter from frostfs_testlib.resources.cli import FROSTFS_CLI_EXEC from frostfs_testlib.shell import CommandOptions from frostfs_testlib.steps.node_management import storage_node_healthcheck -from frostfs_testlib.storage.cluster import ClusterNode +from frostfs_testlib.storage.cluster import ClusterNode, ServiceClass from frostfs_testlib.testing.test_control import wait_for_success +from frostfs_testlib.utils.failover_utils import check_services_status reporter = get_reporter() @@ -51,6 +52,26 @@ class BasicHealthcheck(Healthcheck): with reporter.step(f"Perform storage healthcheck on {cluster_node}"): self._perform(cluster_node, checks) + @wait_for_success(120, 5) + def services_healthcheck(self, cluster_node: ClusterNode): + svcs_to_check = cluster_node.services + checks = { + check_services_status: { + "service_list": svcs_to_check, + "expected_status": "active", + }, + self._check_services: {"services": svcs_to_check}, + } + + with reporter.step(f"Perform service healthcheck on {cluster_node}"): + self._perform(cluster_node, checks) + + def _check_services(self, cluster_node: ClusterNode, services: list[ServiceClass]): + for svc in services: + result = svc.service_healthcheck() + if result == False: + return f"Service {svc.get_service_systemctl_name()} healthcheck failed on node {cluster_node}." + @reporter.step_deco("Storage healthcheck on {cluster_node}") def _storage_healthcheck(self, cluster_node: ClusterNode) -> str | None: result = storage_node_healthcheck(cluster_node.storage_node) diff --git a/src/frostfs_testlib/healthcheck/interfaces.py b/src/frostfs_testlib/healthcheck/interfaces.py index 83fa021..c665b8a 100644 --- a/src/frostfs_testlib/healthcheck/interfaces.py +++ b/src/frostfs_testlib/healthcheck/interfaces.py @@ -15,3 +15,7 @@ class Healthcheck(ABC): @abstractmethod def storage_healthcheck(self, cluster_node: ClusterNode): """Perform storage service healthcheck on target cluster node""" + + @abstractmethod + def services_healthcheck(self, cluster_node: ClusterNode): + """Perform service status check on target cluster node""" diff --git a/src/frostfs_testlib/storage/cluster.py b/src/frostfs_testlib/storage/cluster.py index b8c32ca..02601ac 100644 --- a/src/frostfs_testlib/storage/cluster.py +++ b/src/frostfs_testlib/storage/cluster.py @@ -116,8 +116,24 @@ class ClusterNode: self.host, ) - def get_list_of_services(self) -> list[str]: - return [config.attributes[ConfigAttributes.SERVICE_NAME] for config in self.host.config.services] + @property + def services(self) -> list[NodeBase]: + svcs: list[NodeBase] = [] + svcs_names_on_node = [svc.name for svc in self.host.config.services] + for entry in self.class_registry._class_mapping.values(): + hosting_svc_name = entry["hosting_service_name"] + pattern = f"{hosting_svc_name}{self.id:02}" + if pattern in svcs_names_on_node: + config = self.host.get_service_config(pattern) + svcs.append( + entry["cls"]( + self.id, + config.name, + self.host, + ) + ) + + return svcs def get_all_interfaces(self) -> dict[str, str]: return self.host.config.interfaces diff --git a/src/frostfs_testlib/utils/failover_utils.py b/src/frostfs_testlib/utils/failover_utils.py index d4892c4..507168e 100644 --- a/src/frostfs_testlib/utils/failover_utils.py +++ b/src/frostfs_testlib/utils/failover_utils.py @@ -11,6 +11,7 @@ from frostfs_testlib.steps.node_management import storage_node_healthcheck from frostfs_testlib.steps.storage_policy import get_nodes_with_object from frostfs_testlib.storage.cluster import Cluster, ClusterNode, NodeBase, StorageNode from frostfs_testlib.storage.dataclasses.frostfs_services import MorphChain +from frostfs_testlib.storage.dataclasses.node_base import ServiceClass from frostfs_testlib.testing.test_control import wait_for_success from frostfs_testlib.utils.datetime_utils import parse_time @@ -96,11 +97,11 @@ def multiple_restart( @reporter.step_deco("Get status of list of services and check expected status") @wait_for_success(60, 5) -def check_services_status(service_list: list[str], expected_status: str, shell: Shell): +def check_services_status(cluster_node: ClusterNode, service_list: list[ServiceClass], expected_status: str): cmd = "" for service in service_list: - cmd += f' sudo systemctl status {service} --lines=0 | grep "Active:";' - result = shell.exec(cmd).stdout.rstrip() + cmd += f' sudo systemctl status {service.get_service_systemctl_name()} --lines=0 | grep "Active:";' + result = cluster_node.host.get_shell().exec(cmd).stdout.rstrip() statuses = list() for line in result.split("\n"): status_substring = line.split() diff --git a/src/frostfs_testlib/utils/version_utils.py b/src/frostfs_testlib/utils/version_utils.py index 68f8578..42bde6d 100644 --- a/src/frostfs_testlib/utils/version_utils.py +++ b/src/frostfs_testlib/utils/version_utils.py @@ -3,12 +3,7 @@ import re from frostfs_testlib.cli import FrostfsAdm, FrostfsCli from frostfs_testlib.hosting import Hosting -from frostfs_testlib.resources.cli import ( - FROSTFS_ADM_EXEC, - FROSTFS_AUTHMATE_EXEC, - FROSTFS_CLI_EXEC, - NEOGO_EXECUTABLE, -) +from frostfs_testlib.resources.cli import FROSTFS_ADM_EXEC, FROSTFS_AUTHMATE_EXEC, FROSTFS_CLI_EXEC, NEOGO_EXECUTABLE from frostfs_testlib.resources.common import DEFAULT_WALLET_CONFIG from frostfs_testlib.shell import Shell @@ -44,36 +39,44 @@ def get_remote_binaries_versions(hosting: Hosting) -> dict[str, str]: binary_path_by_name = {} # Maps binary name to executable path for service_config in host.config.services: exec_path = service_config.attributes.get("exec_path") + requires_check = service_config.attributes.get("requires_version_check", "true") if exec_path: - binary_path_by_name[service_config.name] = exec_path + binary_path_by_name[service_config.name] = { + "exec_path": exec_path, + "check": requires_check.lower() == "true", + } for cli_config in host.config.clis: - binary_path_by_name[cli_config.name] = cli_config.exec_path + requires_check = cli_config.attributes.get("requires_version_check", "true") + binary_path_by_name[cli_config.name] = { + "exec_path": cli_config.exec_path, + "check": requires_check.lower() == "true", + } shell = host.get_shell() versions_at_host = {} - for binary_name, binary_path in binary_path_by_name.items(): + for binary_name, binary in binary_path_by_name.items(): try: + binary_path = binary["exec_path"] result = shell.exec(f"{binary_path} --version") - versions_at_host[binary_name] = _parse_version(result.stdout) + versions_at_host[binary_name] = {"version": _parse_version(result.stdout), "check": binary["check"]} except Exception as exc: logger.error(f"Cannot get version for {binary_path} because of\n{exc}") - versions_at_host[binary_name] = "Unknown" + versions_at_host[binary_name] = {"version": "Unknown", "check": binary["check"]} versions_by_host[host.config.address] = versions_at_host # Consolidate versions across all hosts versions = {} for host, binary_versions in versions_by_host.items(): - for name, version in binary_versions.items(): - captured_version = versions.get(name) + for name, binary in binary_versions.items(): + captured_version = versions.get(name, {}).get("version") + version = binary["version"] if captured_version: - assert ( - captured_version == version - ), f"Binary {name} has inconsistent version on host {host}" + assert captured_version == version, f"Binary {name} has inconsistent version on host {host}" else: - versions[name] = version + versions[name] = {"version": version, "check": binary["check"]} return versions def _parse_version(version_output: str) -> str: version = re.search(r"version[:\s]*v?(.+)", version_output, re.IGNORECASE) - return version.group(1).strip() if version else "Unknown" + return version.group(1).strip() if version else version_output