forked from TrueCloudLab/frostfs-testlib
[#130] Add service healthcheck and allow to skip version check for some binaries
Signed-off-by: Andrey Berezin <a.berezin@yadro.com>
This commit is contained in:
parent
d1ba7eb661
commit
c17f0f6173
5 changed files with 69 additions and 24 deletions
|
@ -6,8 +6,9 @@ from frostfs_testlib.reporter import get_reporter
|
||||||
from frostfs_testlib.resources.cli import FROSTFS_CLI_EXEC
|
from frostfs_testlib.resources.cli import FROSTFS_CLI_EXEC
|
||||||
from frostfs_testlib.shell import CommandOptions
|
from frostfs_testlib.shell import CommandOptions
|
||||||
from frostfs_testlib.steps.node_management import storage_node_healthcheck
|
from frostfs_testlib.steps.node_management import storage_node_healthcheck
|
||||||
from frostfs_testlib.storage.cluster import ClusterNode
|
from frostfs_testlib.storage.cluster import ClusterNode, ServiceClass
|
||||||
from frostfs_testlib.testing.test_control import wait_for_success
|
from frostfs_testlib.testing.test_control import wait_for_success
|
||||||
|
from frostfs_testlib.utils.failover_utils import check_services_status
|
||||||
|
|
||||||
reporter = get_reporter()
|
reporter = get_reporter()
|
||||||
|
|
||||||
|
@ -51,6 +52,26 @@ class BasicHealthcheck(Healthcheck):
|
||||||
with reporter.step(f"Perform storage healthcheck on {cluster_node}"):
|
with reporter.step(f"Perform storage healthcheck on {cluster_node}"):
|
||||||
self._perform(cluster_node, checks)
|
self._perform(cluster_node, checks)
|
||||||
|
|
||||||
|
@wait_for_success(120, 5)
|
||||||
|
def services_healthcheck(self, cluster_node: ClusterNode):
|
||||||
|
svcs_to_check = cluster_node.services
|
||||||
|
checks = {
|
||||||
|
check_services_status: {
|
||||||
|
"service_list": svcs_to_check,
|
||||||
|
"expected_status": "active",
|
||||||
|
},
|
||||||
|
self._check_services: {"services": svcs_to_check},
|
||||||
|
}
|
||||||
|
|
||||||
|
with reporter.step(f"Perform service healthcheck on {cluster_node}"):
|
||||||
|
self._perform(cluster_node, checks)
|
||||||
|
|
||||||
|
def _check_services(self, cluster_node: ClusterNode, services: list[ServiceClass]):
|
||||||
|
for svc in services:
|
||||||
|
result = svc.service_healthcheck()
|
||||||
|
if result == False:
|
||||||
|
return f"Service {svc.get_service_systemctl_name()} healthcheck failed on node {cluster_node}."
|
||||||
|
|
||||||
@reporter.step_deco("Storage healthcheck on {cluster_node}")
|
@reporter.step_deco("Storage healthcheck on {cluster_node}")
|
||||||
def _storage_healthcheck(self, cluster_node: ClusterNode) -> str | None:
|
def _storage_healthcheck(self, cluster_node: ClusterNode) -> str | None:
|
||||||
result = storage_node_healthcheck(cluster_node.storage_node)
|
result = storage_node_healthcheck(cluster_node.storage_node)
|
||||||
|
|
|
@ -15,3 +15,7 @@ class Healthcheck(ABC):
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def storage_healthcheck(self, cluster_node: ClusterNode):
|
def storage_healthcheck(self, cluster_node: ClusterNode):
|
||||||
"""Perform storage service healthcheck on target cluster node"""
|
"""Perform storage service healthcheck on target cluster node"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def services_healthcheck(self, cluster_node: ClusterNode):
|
||||||
|
"""Perform service status check on target cluster node"""
|
||||||
|
|
|
@ -116,8 +116,24 @@ class ClusterNode:
|
||||||
self.host,
|
self.host,
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_list_of_services(self) -> list[str]:
|
@property
|
||||||
return [config.attributes[ConfigAttributes.SERVICE_NAME] for config in self.host.config.services]
|
def services(self) -> list[NodeBase]:
|
||||||
|
svcs: list[NodeBase] = []
|
||||||
|
svcs_names_on_node = [svc.name for svc in self.host.config.services]
|
||||||
|
for entry in self.class_registry._class_mapping.values():
|
||||||
|
hosting_svc_name = entry["hosting_service_name"]
|
||||||
|
pattern = f"{hosting_svc_name}{self.id:02}"
|
||||||
|
if pattern in svcs_names_on_node:
|
||||||
|
config = self.host.get_service_config(pattern)
|
||||||
|
svcs.append(
|
||||||
|
entry["cls"](
|
||||||
|
self.id,
|
||||||
|
config.name,
|
||||||
|
self.host,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return svcs
|
||||||
|
|
||||||
def get_all_interfaces(self) -> dict[str, str]:
|
def get_all_interfaces(self) -> dict[str, str]:
|
||||||
return self.host.config.interfaces
|
return self.host.config.interfaces
|
||||||
|
|
|
@ -11,6 +11,7 @@ from frostfs_testlib.steps.node_management import storage_node_healthcheck
|
||||||
from frostfs_testlib.steps.storage_policy import get_nodes_with_object
|
from frostfs_testlib.steps.storage_policy import get_nodes_with_object
|
||||||
from frostfs_testlib.storage.cluster import Cluster, ClusterNode, NodeBase, StorageNode
|
from frostfs_testlib.storage.cluster import Cluster, ClusterNode, NodeBase, StorageNode
|
||||||
from frostfs_testlib.storage.dataclasses.frostfs_services import MorphChain
|
from frostfs_testlib.storage.dataclasses.frostfs_services import MorphChain
|
||||||
|
from frostfs_testlib.storage.dataclasses.node_base import ServiceClass
|
||||||
from frostfs_testlib.testing.test_control import wait_for_success
|
from frostfs_testlib.testing.test_control import wait_for_success
|
||||||
from frostfs_testlib.utils.datetime_utils import parse_time
|
from frostfs_testlib.utils.datetime_utils import parse_time
|
||||||
|
|
||||||
|
@ -96,11 +97,11 @@ def multiple_restart(
|
||||||
|
|
||||||
@reporter.step_deco("Get status of list of services and check expected status")
|
@reporter.step_deco("Get status of list of services and check expected status")
|
||||||
@wait_for_success(60, 5)
|
@wait_for_success(60, 5)
|
||||||
def check_services_status(service_list: list[str], expected_status: str, shell: Shell):
|
def check_services_status(cluster_node: ClusterNode, service_list: list[ServiceClass], expected_status: str):
|
||||||
cmd = ""
|
cmd = ""
|
||||||
for service in service_list:
|
for service in service_list:
|
||||||
cmd += f' sudo systemctl status {service} --lines=0 | grep "Active:";'
|
cmd += f' sudo systemctl status {service.get_service_systemctl_name()} --lines=0 | grep "Active:";'
|
||||||
result = shell.exec(cmd).stdout.rstrip()
|
result = cluster_node.host.get_shell().exec(cmd).stdout.rstrip()
|
||||||
statuses = list()
|
statuses = list()
|
||||||
for line in result.split("\n"):
|
for line in result.split("\n"):
|
||||||
status_substring = line.split()
|
status_substring = line.split()
|
||||||
|
|
|
@ -3,12 +3,7 @@ import re
|
||||||
|
|
||||||
from frostfs_testlib.cli import FrostfsAdm, FrostfsCli
|
from frostfs_testlib.cli import FrostfsAdm, FrostfsCli
|
||||||
from frostfs_testlib.hosting import Hosting
|
from frostfs_testlib.hosting import Hosting
|
||||||
from frostfs_testlib.resources.cli import (
|
from frostfs_testlib.resources.cli import FROSTFS_ADM_EXEC, FROSTFS_AUTHMATE_EXEC, FROSTFS_CLI_EXEC, NEOGO_EXECUTABLE
|
||||||
FROSTFS_ADM_EXEC,
|
|
||||||
FROSTFS_AUTHMATE_EXEC,
|
|
||||||
FROSTFS_CLI_EXEC,
|
|
||||||
NEOGO_EXECUTABLE,
|
|
||||||
)
|
|
||||||
from frostfs_testlib.resources.common import DEFAULT_WALLET_CONFIG
|
from frostfs_testlib.resources.common import DEFAULT_WALLET_CONFIG
|
||||||
from frostfs_testlib.shell import Shell
|
from frostfs_testlib.shell import Shell
|
||||||
|
|
||||||
|
@ -44,36 +39,44 @@ def get_remote_binaries_versions(hosting: Hosting) -> dict[str, str]:
|
||||||
binary_path_by_name = {} # Maps binary name to executable path
|
binary_path_by_name = {} # Maps binary name to executable path
|
||||||
for service_config in host.config.services:
|
for service_config in host.config.services:
|
||||||
exec_path = service_config.attributes.get("exec_path")
|
exec_path = service_config.attributes.get("exec_path")
|
||||||
|
requires_check = service_config.attributes.get("requires_version_check", "true")
|
||||||
if exec_path:
|
if exec_path:
|
||||||
binary_path_by_name[service_config.name] = exec_path
|
binary_path_by_name[service_config.name] = {
|
||||||
|
"exec_path": exec_path,
|
||||||
|
"check": requires_check.lower() == "true",
|
||||||
|
}
|
||||||
for cli_config in host.config.clis:
|
for cli_config in host.config.clis:
|
||||||
binary_path_by_name[cli_config.name] = cli_config.exec_path
|
requires_check = cli_config.attributes.get("requires_version_check", "true")
|
||||||
|
binary_path_by_name[cli_config.name] = {
|
||||||
|
"exec_path": cli_config.exec_path,
|
||||||
|
"check": requires_check.lower() == "true",
|
||||||
|
}
|
||||||
|
|
||||||
shell = host.get_shell()
|
shell = host.get_shell()
|
||||||
versions_at_host = {}
|
versions_at_host = {}
|
||||||
for binary_name, binary_path in binary_path_by_name.items():
|
for binary_name, binary in binary_path_by_name.items():
|
||||||
try:
|
try:
|
||||||
|
binary_path = binary["exec_path"]
|
||||||
result = shell.exec(f"{binary_path} --version")
|
result = shell.exec(f"{binary_path} --version")
|
||||||
versions_at_host[binary_name] = _parse_version(result.stdout)
|
versions_at_host[binary_name] = {"version": _parse_version(result.stdout), "check": binary["check"]}
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.error(f"Cannot get version for {binary_path} because of\n{exc}")
|
logger.error(f"Cannot get version for {binary_path} because of\n{exc}")
|
||||||
versions_at_host[binary_name] = "Unknown"
|
versions_at_host[binary_name] = {"version": "Unknown", "check": binary["check"]}
|
||||||
versions_by_host[host.config.address] = versions_at_host
|
versions_by_host[host.config.address] = versions_at_host
|
||||||
|
|
||||||
# Consolidate versions across all hosts
|
# Consolidate versions across all hosts
|
||||||
versions = {}
|
versions = {}
|
||||||
for host, binary_versions in versions_by_host.items():
|
for host, binary_versions in versions_by_host.items():
|
||||||
for name, version in binary_versions.items():
|
for name, binary in binary_versions.items():
|
||||||
captured_version = versions.get(name)
|
captured_version = versions.get(name, {}).get("version")
|
||||||
|
version = binary["version"]
|
||||||
if captured_version:
|
if captured_version:
|
||||||
assert (
|
assert captured_version == version, f"Binary {name} has inconsistent version on host {host}"
|
||||||
captured_version == version
|
|
||||||
), f"Binary {name} has inconsistent version on host {host}"
|
|
||||||
else:
|
else:
|
||||||
versions[name] = version
|
versions[name] = {"version": version, "check": binary["check"]}
|
||||||
return versions
|
return versions
|
||||||
|
|
||||||
|
|
||||||
def _parse_version(version_output: str) -> str:
|
def _parse_version(version_output: str) -> str:
|
||||||
version = re.search(r"version[:\s]*v?(.+)", version_output, re.IGNORECASE)
|
version = re.search(r"version[:\s]*v?(.+)", version_output, re.IGNORECASE)
|
||||||
return version.group(1).strip() if version else "Unknown"
|
return version.group(1).strip() if version else version_output
|
||||||
|
|
Loading…
Reference in a new issue