forked from TrueCloudLab/frostfs-testlib
[126] small healthcheck and stop start hosts rework
Signed-off-by: Andrey Berezin <a.berezin@yadro.com>
This commit is contained in:
parent
9ab4def44f
commit
253bb3b1d8
4 changed files with 112 additions and 105 deletions
|
@ -1,22 +1,65 @@
|
|||
from typing import Callable
|
||||
|
||||
from frostfs_testlib.cli.frostfs_cli.cli import FrostfsCli
|
||||
from frostfs_testlib.healthcheck.interfaces import Healthcheck
|
||||
from frostfs_testlib.reporter import get_reporter
|
||||
from frostfs_testlib.resources.cli import FROSTFS_CLI_EXEC
|
||||
from frostfs_testlib.shell import CommandOptions
|
||||
from frostfs_testlib.steps.node_management import storage_node_healthcheck
|
||||
from frostfs_testlib.storage.cluster import ClusterNode
|
||||
from frostfs_testlib.testing.test_control import wait_for_success
|
||||
|
||||
reporter = get_reporter()
|
||||
|
||||
|
||||
class BasicHealthcheck(Healthcheck):
|
||||
@reporter.step_deco("Perform healthcheck for {cluster_node}")
|
||||
def perform(self, cluster_node: ClusterNode):
|
||||
result = self.storage_healthcheck(cluster_node)
|
||||
if result:
|
||||
raise AssertionError(result)
|
||||
def _perform(self, cluster_node: ClusterNode, checks: dict[Callable, dict]):
|
||||
issues: list[str] = []
|
||||
for check, kwargs in checks.items():
|
||||
issue = check(cluster_node, **kwargs)
|
||||
if issue:
|
||||
issues.append(issue)
|
||||
|
||||
assert not issues, "Issues found:\n" + "\n".join(issues)
|
||||
|
||||
@wait_for_success(900, 30)
|
||||
def full_healthcheck(self, cluster_node: ClusterNode):
|
||||
checks = {
|
||||
self.storage_healthcheck: {},
|
||||
self._tree_healthcheck: {},
|
||||
}
|
||||
|
||||
with reporter.step(f"Perform full healthcheck for {cluster_node}"):
|
||||
self._perform(cluster_node, checks)
|
||||
|
||||
@wait_for_success(900, 30)
|
||||
def startup_healthcheck(self, cluster_node: ClusterNode):
|
||||
checks = {
|
||||
self.storage_healthcheck: {},
|
||||
self._tree_healthcheck: {},
|
||||
}
|
||||
|
||||
with reporter.step(f"Perform startup healthcheck on {cluster_node}"):
|
||||
self._perform(cluster_node, checks)
|
||||
|
||||
@wait_for_success(900, 30)
|
||||
def storage_healthcheck(self, cluster_node: ClusterNode) -> str | None:
|
||||
checks = {
|
||||
self._storage_healthcheck: {},
|
||||
}
|
||||
|
||||
with reporter.step(f"Perform storage healthcheck on {cluster_node}"):
|
||||
self._perform(cluster_node, checks)
|
||||
|
||||
@reporter.step_deco("Storage healthcheck on {cluster_node}")
|
||||
def _storage_healthcheck(self, cluster_node: ClusterNode) -> str | None:
|
||||
result = storage_node_healthcheck(cluster_node.storage_node)
|
||||
self._gather_socket_info(cluster_node)
|
||||
if result.health_status != "READY" or result.network_status != "ONLINE":
|
||||
return f"Node {cluster_node} is not healthy. Health={result.health_status}. Network={result.network_status}"
|
||||
|
||||
@reporter.step_deco("Tree healthcheck on {cluster_node}")
|
||||
def tree_healthcheck(self, cluster_node: ClusterNode) -> str | None:
|
||||
def _tree_healthcheck(self, cluster_node: ClusterNode) -> str | None:
|
||||
host = cluster_node.host
|
||||
service_config = host.get_service_config(cluster_node.storage_node.name)
|
||||
wallet_path = service_config.attributes["wallet_path"]
|
||||
|
@ -34,10 +77,10 @@ class BasicHealthcheck(Healthcheck):
|
|||
)
|
||||
result = remote_cli.tree.healthcheck(rpc_endpoint="127.0.0.1:8080")
|
||||
if result.return_code != 0:
|
||||
return f"Error during tree healthcheck (rc={result.return_code}): {result.stdout}. \n Stderr: {result.stderr}"
|
||||
return (
|
||||
f"Error during tree healthcheck (rc={result.return_code}): {result.stdout}. \n Stderr: {result.stderr}"
|
||||
)
|
||||
|
||||
@reporter.step_deco("Storage healthcheck on {cluster_node}")
|
||||
def storage_healthcheck(self, cluster_node: ClusterNode) -> str | None:
|
||||
result = storage_node_healthcheck(cluster_node.storage_node)
|
||||
if result.health_status != "READY" or result.network_status != "ONLINE":
|
||||
return f"Node {cluster_node} is not healthy. Health={result.health_status}. Network={result.network_status}"
|
||||
@reporter.step_deco("Gather socket info for {cluster_node}")
|
||||
def _gather_socket_info(self, cluster_node: ClusterNode):
|
||||
cluster_node.host.get_shell().exec("ss -tuln | grep 8080", CommandOptions(check=False))
|
||||
|
|
|
@ -5,13 +5,13 @@ from frostfs_testlib.storage.cluster import ClusterNode
|
|||
|
||||
class Healthcheck(ABC):
|
||||
@abstractmethod
|
||||
def perform(self, cluster_node: ClusterNode):
|
||||
"""Perform healthcheck on the target cluster node"""
|
||||
def full_healthcheck(self, cluster_node: ClusterNode):
|
||||
"""Perform full healthcheck on the target cluster node"""
|
||||
|
||||
@abstractmethod
|
||||
def tree_healthcheck(self, cluster_node: ClusterNode):
|
||||
"""Check tree sync status on target cluster node"""
|
||||
def startup_healthcheck(self, cluster_node: ClusterNode):
|
||||
"""Perform healthcheck required on startup of target cluster node"""
|
||||
|
||||
@abstractmethod
|
||||
def storage_healthcheck(self, cluster_node: ClusterNode):
|
||||
"""Perform storage node healthcheck on target cluster node"""
|
||||
"""Perform storage service healthcheck on target cluster node"""
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue