[#130] Add service healthcheck and allow to skip version check for some binaries

Signed-off-by: Andrey Berezin <a.berezin@yadro.com>
This commit is contained in:
Andrey Berezin 2023-11-24 19:46:35 +03:00
parent d1ba7eb661
commit c17f0f6173
5 changed files with 69 additions and 24 deletions

View file

@ -6,8 +6,9 @@ from frostfs_testlib.reporter import get_reporter
from frostfs_testlib.resources.cli import FROSTFS_CLI_EXEC from frostfs_testlib.resources.cli import FROSTFS_CLI_EXEC
from frostfs_testlib.shell import CommandOptions from frostfs_testlib.shell import CommandOptions
from frostfs_testlib.steps.node_management import storage_node_healthcheck from frostfs_testlib.steps.node_management import storage_node_healthcheck
from frostfs_testlib.storage.cluster import ClusterNode from frostfs_testlib.storage.cluster import ClusterNode, ServiceClass
from frostfs_testlib.testing.test_control import wait_for_success from frostfs_testlib.testing.test_control import wait_for_success
from frostfs_testlib.utils.failover_utils import check_services_status
reporter = get_reporter() reporter = get_reporter()
@ -51,6 +52,26 @@ class BasicHealthcheck(Healthcheck):
with reporter.step(f"Perform storage healthcheck on {cluster_node}"): with reporter.step(f"Perform storage healthcheck on {cluster_node}"):
self._perform(cluster_node, checks) self._perform(cluster_node, checks)
@wait_for_success(120, 5)
def services_healthcheck(self, cluster_node: ClusterNode):
svcs_to_check = cluster_node.services
checks = {
check_services_status: {
"service_list": svcs_to_check,
"expected_status": "active",
},
self._check_services: {"services": svcs_to_check},
}
with reporter.step(f"Perform service healthcheck on {cluster_node}"):
self._perform(cluster_node, checks)
def _check_services(self, cluster_node: ClusterNode, services: list[ServiceClass]):
for svc in services:
result = svc.service_healthcheck()
if result == False:
return f"Service {svc.get_service_systemctl_name()} healthcheck failed on node {cluster_node}."
@reporter.step_deco("Storage healthcheck on {cluster_node}") @reporter.step_deco("Storage healthcheck on {cluster_node}")
def _storage_healthcheck(self, cluster_node: ClusterNode) -> str | None: def _storage_healthcheck(self, cluster_node: ClusterNode) -> str | None:
result = storage_node_healthcheck(cluster_node.storage_node) result = storage_node_healthcheck(cluster_node.storage_node)

View file

@ -15,3 +15,7 @@ class Healthcheck(ABC):
@abstractmethod @abstractmethod
def storage_healthcheck(self, cluster_node: ClusterNode): def storage_healthcheck(self, cluster_node: ClusterNode):
"""Perform storage service healthcheck on target cluster node""" """Perform storage service healthcheck on target cluster node"""
@abstractmethod
def services_healthcheck(self, cluster_node: ClusterNode):
"""Perform service status check on target cluster node"""

View file

@ -116,8 +116,24 @@ class ClusterNode:
self.host, self.host,
) )
def get_list_of_services(self) -> list[str]: @property
return [config.attributes[ConfigAttributes.SERVICE_NAME] for config in self.host.config.services] def services(self) -> list[NodeBase]:
svcs: list[NodeBase] = []
svcs_names_on_node = [svc.name for svc in self.host.config.services]
for entry in self.class_registry._class_mapping.values():
hosting_svc_name = entry["hosting_service_name"]
pattern = f"{hosting_svc_name}{self.id:02}"
if pattern in svcs_names_on_node:
config = self.host.get_service_config(pattern)
svcs.append(
entry["cls"](
self.id,
config.name,
self.host,
)
)
return svcs
def get_all_interfaces(self) -> dict[str, str]: def get_all_interfaces(self) -> dict[str, str]:
return self.host.config.interfaces return self.host.config.interfaces

View file

@ -11,6 +11,7 @@ from frostfs_testlib.steps.node_management import storage_node_healthcheck
from frostfs_testlib.steps.storage_policy import get_nodes_with_object from frostfs_testlib.steps.storage_policy import get_nodes_with_object
from frostfs_testlib.storage.cluster import Cluster, ClusterNode, NodeBase, StorageNode from frostfs_testlib.storage.cluster import Cluster, ClusterNode, NodeBase, StorageNode
from frostfs_testlib.storage.dataclasses.frostfs_services import MorphChain from frostfs_testlib.storage.dataclasses.frostfs_services import MorphChain
from frostfs_testlib.storage.dataclasses.node_base import ServiceClass
from frostfs_testlib.testing.test_control import wait_for_success from frostfs_testlib.testing.test_control import wait_for_success
from frostfs_testlib.utils.datetime_utils import parse_time from frostfs_testlib.utils.datetime_utils import parse_time
@ -96,11 +97,11 @@ def multiple_restart(
@reporter.step_deco("Get status of list of services and check expected status") @reporter.step_deco("Get status of list of services and check expected status")
@wait_for_success(60, 5) @wait_for_success(60, 5)
def check_services_status(service_list: list[str], expected_status: str, shell: Shell): def check_services_status(cluster_node: ClusterNode, service_list: list[ServiceClass], expected_status: str):
cmd = "" cmd = ""
for service in service_list: for service in service_list:
cmd += f' sudo systemctl status {service} --lines=0 | grep "Active:";' cmd += f' sudo systemctl status {service.get_service_systemctl_name()} --lines=0 | grep "Active:";'
result = shell.exec(cmd).stdout.rstrip() result = cluster_node.host.get_shell().exec(cmd).stdout.rstrip()
statuses = list() statuses = list()
for line in result.split("\n"): for line in result.split("\n"):
status_substring = line.split() status_substring = line.split()

View file

@ -3,12 +3,7 @@ import re
from frostfs_testlib.cli import FrostfsAdm, FrostfsCli from frostfs_testlib.cli import FrostfsAdm, FrostfsCli
from frostfs_testlib.hosting import Hosting from frostfs_testlib.hosting import Hosting
from frostfs_testlib.resources.cli import ( from frostfs_testlib.resources.cli import FROSTFS_ADM_EXEC, FROSTFS_AUTHMATE_EXEC, FROSTFS_CLI_EXEC, NEOGO_EXECUTABLE
FROSTFS_ADM_EXEC,
FROSTFS_AUTHMATE_EXEC,
FROSTFS_CLI_EXEC,
NEOGO_EXECUTABLE,
)
from frostfs_testlib.resources.common import DEFAULT_WALLET_CONFIG from frostfs_testlib.resources.common import DEFAULT_WALLET_CONFIG
from frostfs_testlib.shell import Shell from frostfs_testlib.shell import Shell
@ -44,36 +39,44 @@ def get_remote_binaries_versions(hosting: Hosting) -> dict[str, str]:
binary_path_by_name = {} # Maps binary name to executable path binary_path_by_name = {} # Maps binary name to executable path
for service_config in host.config.services: for service_config in host.config.services:
exec_path = service_config.attributes.get("exec_path") exec_path = service_config.attributes.get("exec_path")
requires_check = service_config.attributes.get("requires_version_check", "true")
if exec_path: if exec_path:
binary_path_by_name[service_config.name] = exec_path binary_path_by_name[service_config.name] = {
"exec_path": exec_path,
"check": requires_check.lower() == "true",
}
for cli_config in host.config.clis: for cli_config in host.config.clis:
binary_path_by_name[cli_config.name] = cli_config.exec_path requires_check = cli_config.attributes.get("requires_version_check", "true")
binary_path_by_name[cli_config.name] = {
"exec_path": cli_config.exec_path,
"check": requires_check.lower() == "true",
}
shell = host.get_shell() shell = host.get_shell()
versions_at_host = {} versions_at_host = {}
for binary_name, binary_path in binary_path_by_name.items(): for binary_name, binary in binary_path_by_name.items():
try: try:
binary_path = binary["exec_path"]
result = shell.exec(f"{binary_path} --version") result = shell.exec(f"{binary_path} --version")
versions_at_host[binary_name] = _parse_version(result.stdout) versions_at_host[binary_name] = {"version": _parse_version(result.stdout), "check": binary["check"]}
except Exception as exc: except Exception as exc:
logger.error(f"Cannot get version for {binary_path} because of\n{exc}") logger.error(f"Cannot get version for {binary_path} because of\n{exc}")
versions_at_host[binary_name] = "Unknown" versions_at_host[binary_name] = {"version": "Unknown", "check": binary["check"]}
versions_by_host[host.config.address] = versions_at_host versions_by_host[host.config.address] = versions_at_host
# Consolidate versions across all hosts # Consolidate versions across all hosts
versions = {} versions = {}
for host, binary_versions in versions_by_host.items(): for host, binary_versions in versions_by_host.items():
for name, version in binary_versions.items(): for name, binary in binary_versions.items():
captured_version = versions.get(name) captured_version = versions.get(name, {}).get("version")
version = binary["version"]
if captured_version: if captured_version:
assert ( assert captured_version == version, f"Binary {name} has inconsistent version on host {host}"
captured_version == version
), f"Binary {name} has inconsistent version on host {host}"
else: else:
versions[name] = version versions[name] = {"version": version, "check": binary["check"]}
return versions return versions
def _parse_version(version_output: str) -> str: def _parse_version(version_output: str) -> str:
version = re.search(r"version[:\s]*v?(.+)", version_output, re.IGNORECASE) version = re.search(r"version[:\s]*v?(.+)", version_output, re.IGNORECASE)
return version.group(1).strip() if version else "Unknown" return version.group(1).strip() if version else version_output