[#130] Add service healthcheck and allow to skip version check for some binaries
Signed-off-by: Andrey Berezin <a.berezin@yadro.com>
This commit is contained in:
parent
d1ba7eb661
commit
c17f0f6173
5 changed files with 69 additions and 24 deletions
|
@ -6,8 +6,9 @@ from frostfs_testlib.reporter import get_reporter
|
|||
from frostfs_testlib.resources.cli import FROSTFS_CLI_EXEC
|
||||
from frostfs_testlib.shell import CommandOptions
|
||||
from frostfs_testlib.steps.node_management import storage_node_healthcheck
|
||||
from frostfs_testlib.storage.cluster import ClusterNode
|
||||
from frostfs_testlib.storage.cluster import ClusterNode, ServiceClass
|
||||
from frostfs_testlib.testing.test_control import wait_for_success
|
||||
from frostfs_testlib.utils.failover_utils import check_services_status
|
||||
|
||||
reporter = get_reporter()
|
||||
|
||||
|
@ -51,6 +52,26 @@ class BasicHealthcheck(Healthcheck):
|
|||
with reporter.step(f"Perform storage healthcheck on {cluster_node}"):
|
||||
self._perform(cluster_node, checks)
|
||||
|
||||
@wait_for_success(120, 5)
|
||||
def services_healthcheck(self, cluster_node: ClusterNode):
|
||||
svcs_to_check = cluster_node.services
|
||||
checks = {
|
||||
check_services_status: {
|
||||
"service_list": svcs_to_check,
|
||||
"expected_status": "active",
|
||||
},
|
||||
self._check_services: {"services": svcs_to_check},
|
||||
}
|
||||
|
||||
with reporter.step(f"Perform service healthcheck on {cluster_node}"):
|
||||
self._perform(cluster_node, checks)
|
||||
|
||||
def _check_services(self, cluster_node: ClusterNode, services: list[ServiceClass]):
|
||||
for svc in services:
|
||||
result = svc.service_healthcheck()
|
||||
if result == False:
|
||||
return f"Service {svc.get_service_systemctl_name()} healthcheck failed on node {cluster_node}."
|
||||
|
||||
@reporter.step_deco("Storage healthcheck on {cluster_node}")
|
||||
def _storage_healthcheck(self, cluster_node: ClusterNode) -> str | None:
|
||||
result = storage_node_healthcheck(cluster_node.storage_node)
|
||||
|
|
|
@ -15,3 +15,7 @@ class Healthcheck(ABC):
|
|||
@abstractmethod
|
||||
def storage_healthcheck(self, cluster_node: ClusterNode):
|
||||
"""Perform storage service healthcheck on target cluster node"""
|
||||
|
||||
@abstractmethod
|
||||
def services_healthcheck(self, cluster_node: ClusterNode):
|
||||
"""Perform service status check on target cluster node"""
|
||||
|
|
|
@ -116,8 +116,24 @@ class ClusterNode:
|
|||
self.host,
|
||||
)
|
||||
|
||||
def get_list_of_services(self) -> list[str]:
|
||||
return [config.attributes[ConfigAttributes.SERVICE_NAME] for config in self.host.config.services]
|
||||
@property
|
||||
def services(self) -> list[NodeBase]:
|
||||
svcs: list[NodeBase] = []
|
||||
svcs_names_on_node = [svc.name for svc in self.host.config.services]
|
||||
for entry in self.class_registry._class_mapping.values():
|
||||
hosting_svc_name = entry["hosting_service_name"]
|
||||
pattern = f"{hosting_svc_name}{self.id:02}"
|
||||
if pattern in svcs_names_on_node:
|
||||
config = self.host.get_service_config(pattern)
|
||||
svcs.append(
|
||||
entry["cls"](
|
||||
self.id,
|
||||
config.name,
|
||||
self.host,
|
||||
)
|
||||
)
|
||||
|
||||
return svcs
|
||||
|
||||
def get_all_interfaces(self) -> dict[str, str]:
|
||||
return self.host.config.interfaces
|
||||
|
|
|
@ -11,6 +11,7 @@ from frostfs_testlib.steps.node_management import storage_node_healthcheck
|
|||
from frostfs_testlib.steps.storage_policy import get_nodes_with_object
|
||||
from frostfs_testlib.storage.cluster import Cluster, ClusterNode, NodeBase, StorageNode
|
||||
from frostfs_testlib.storage.dataclasses.frostfs_services import MorphChain
|
||||
from frostfs_testlib.storage.dataclasses.node_base import ServiceClass
|
||||
from frostfs_testlib.testing.test_control import wait_for_success
|
||||
from frostfs_testlib.utils.datetime_utils import parse_time
|
||||
|
||||
|
@ -96,11 +97,11 @@ def multiple_restart(
|
|||
|
||||
@reporter.step_deco("Get status of list of services and check expected status")
|
||||
@wait_for_success(60, 5)
|
||||
def check_services_status(service_list: list[str], expected_status: str, shell: Shell):
|
||||
def check_services_status(cluster_node: ClusterNode, service_list: list[ServiceClass], expected_status: str):
|
||||
cmd = ""
|
||||
for service in service_list:
|
||||
cmd += f' sudo systemctl status {service} --lines=0 | grep "Active:";'
|
||||
result = shell.exec(cmd).stdout.rstrip()
|
||||
cmd += f' sudo systemctl status {service.get_service_systemctl_name()} --lines=0 | grep "Active:";'
|
||||
result = cluster_node.host.get_shell().exec(cmd).stdout.rstrip()
|
||||
statuses = list()
|
||||
for line in result.split("\n"):
|
||||
status_substring = line.split()
|
||||
|
|
|
@ -3,12 +3,7 @@ import re
|
|||
|
||||
from frostfs_testlib.cli import FrostfsAdm, FrostfsCli
|
||||
from frostfs_testlib.hosting import Hosting
|
||||
from frostfs_testlib.resources.cli import (
|
||||
FROSTFS_ADM_EXEC,
|
||||
FROSTFS_AUTHMATE_EXEC,
|
||||
FROSTFS_CLI_EXEC,
|
||||
NEOGO_EXECUTABLE,
|
||||
)
|
||||
from frostfs_testlib.resources.cli import FROSTFS_ADM_EXEC, FROSTFS_AUTHMATE_EXEC, FROSTFS_CLI_EXEC, NEOGO_EXECUTABLE
|
||||
from frostfs_testlib.resources.common import DEFAULT_WALLET_CONFIG
|
||||
from frostfs_testlib.shell import Shell
|
||||
|
||||
|
@ -44,36 +39,44 @@ def get_remote_binaries_versions(hosting: Hosting) -> dict[str, str]:
|
|||
binary_path_by_name = {} # Maps binary name to executable path
|
||||
for service_config in host.config.services:
|
||||
exec_path = service_config.attributes.get("exec_path")
|
||||
requires_check = service_config.attributes.get("requires_version_check", "true")
|
||||
if exec_path:
|
||||
binary_path_by_name[service_config.name] = exec_path
|
||||
binary_path_by_name[service_config.name] = {
|
||||
"exec_path": exec_path,
|
||||
"check": requires_check.lower() == "true",
|
||||
}
|
||||
for cli_config in host.config.clis:
|
||||
binary_path_by_name[cli_config.name] = cli_config.exec_path
|
||||
requires_check = cli_config.attributes.get("requires_version_check", "true")
|
||||
binary_path_by_name[cli_config.name] = {
|
||||
"exec_path": cli_config.exec_path,
|
||||
"check": requires_check.lower() == "true",
|
||||
}
|
||||
|
||||
shell = host.get_shell()
|
||||
versions_at_host = {}
|
||||
for binary_name, binary_path in binary_path_by_name.items():
|
||||
for binary_name, binary in binary_path_by_name.items():
|
||||
try:
|
||||
binary_path = binary["exec_path"]
|
||||
result = shell.exec(f"{binary_path} --version")
|
||||
versions_at_host[binary_name] = _parse_version(result.stdout)
|
||||
versions_at_host[binary_name] = {"version": _parse_version(result.stdout), "check": binary["check"]}
|
||||
except Exception as exc:
|
||||
logger.error(f"Cannot get version for {binary_path} because of\n{exc}")
|
||||
versions_at_host[binary_name] = "Unknown"
|
||||
versions_at_host[binary_name] = {"version": "Unknown", "check": binary["check"]}
|
||||
versions_by_host[host.config.address] = versions_at_host
|
||||
|
||||
# Consolidate versions across all hosts
|
||||
versions = {}
|
||||
for host, binary_versions in versions_by_host.items():
|
||||
for name, version in binary_versions.items():
|
||||
captured_version = versions.get(name)
|
||||
for name, binary in binary_versions.items():
|
||||
captured_version = versions.get(name, {}).get("version")
|
||||
version = binary["version"]
|
||||
if captured_version:
|
||||
assert (
|
||||
captured_version == version
|
||||
), f"Binary {name} has inconsistent version on host {host}"
|
||||
assert captured_version == version, f"Binary {name} has inconsistent version on host {host}"
|
||||
else:
|
||||
versions[name] = version
|
||||
versions[name] = {"version": version, "check": binary["check"]}
|
||||
return versions
|
||||
|
||||
|
||||
def _parse_version(version_output: str) -> str:
|
||||
version = re.search(r"version[:\s]*v?(.+)", version_output, re.IGNORECASE)
|
||||
return version.group(1).strip() if version else "Unknown"
|
||||
return version.group(1).strip() if version else version_output
|
||||
|
|
Loading…
Reference in a new issue