[#102] Updates for failover

Signed-off-by: Andrey Berezin <a.berezin@yadro.com>
port-fill-percent-fix
Andrey Berezin 2023-10-25 15:57:38 +03:00
parent 0c3bb20af5
commit b1a3d740e9
5 changed files with 51 additions and 8 deletions

View File

@ -22,7 +22,7 @@ dependencies = [
"neo-mamba==1.0.0", "neo-mamba==1.0.0",
"paramiko>=2.10.3", "paramiko>=2.10.3",
"pexpect>=4.8.0", "pexpect>=4.8.0",
"requests>=2.28.0", "requests==2.28.1",
"docstring_parser>=0.15", "docstring_parser>=0.15",
"testrail-api>=1.12.0", "testrail-api>=1.12.0",
"pytest==7.1.2", "pytest==7.1.2",

View File

@ -208,6 +208,42 @@ class Cluster:
def morph_chain(self) -> list[MorphChain]: def morph_chain(self) -> list[MorphChain]:
return self.services(MorphChain) return self.services(MorphChain)
def nodes(self, services: list[ServiceClass]) -> list[ClusterNode]:
"""
Resolve which cluster nodes hosting the specified services.
Args:
services: list of services to resolve hosting cluster nodes.
Returns:
list of cluster nodes which host specified services.
"""
cluster_nodes = set()
for service in services:
cluster_nodes.update(
[node for node in self.cluster_nodes if node.service(type(service)) == service]
)
return list(cluster_nodes)
def node(self, service: ServiceClass) -> ClusterNode:
"""
Resolve single cluster node hosting the specified service.
Args:
services: list of services to resolve hosting cluster nodes.
Returns:
list of cluster nodes which host specified services.
"""
nodes = [node for node in self.cluster_nodes if node.service(type(service)) == service]
if not len(nodes):
raise RuntimeError(f"Cannot find service {service} on any node")
return nodes[0]
def services(self, service_type: type[ServiceClass]) -> list[ServiceClass]: def services(self, service_type: type[ServiceClass]) -> list[ServiceClass]:
""" """
Get all services in a cluster of specified type. Get all services in a cluster of specified type.

View File

@ -41,10 +41,10 @@ class ClusterStateController:
provider = SshConnectionProvider() provider = SshConnectionProvider()
provider.drop(node.host_ip) provider.drop(node.host_ip)
self.stopped_nodes.append(node)
with reporter.step(f"Stop host {node.host.config.address}"): with reporter.step(f"Stop host {node.host.config.address}"):
node.host.stop_host(mode=mode) node.host.stop_host(mode=mode)
wait_for_host_offline(self.shell, node.storage_node) wait_for_host_offline(self.shell, node.storage_node)
self.stopped_nodes.append(node)
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED) @run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Shutdown whole cluster") @reporter.step_deco("Shutdown whole cluster")
@ -136,8 +136,8 @@ class ClusterStateController:
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED) @run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Stop storage service on {node}") @reporter.step_deco("Stop storage service on {node}")
def stop_storage_service(self, node: ClusterNode): def stop_storage_service(self, node: ClusterNode):
node.storage_node.stop_service()
self.stopped_storage_nodes.append(node) self.stopped_storage_nodes.append(node)
node.storage_node.stop_service()
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED) @run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Stop all {service_type} services") @reporter.step_deco("Stop all {service_type} services")

View File

@ -145,6 +145,9 @@ class StorageNode(NodeBase):
def get_shard_config_path(self) -> str: def get_shard_config_path(self) -> str:
return self._get_attribute(ConfigAttributes.SHARD_CONFIG_PATH) return self._get_attribute(ConfigAttributes.SHARD_CONFIG_PATH)
def get_shards_config(self) -> tuple[str, dict]:
return self.get_config(self.get_shard_config_path())
def get_control_endpoint(self) -> str: def get_control_endpoint(self) -> str:
return self._get_attribute(ConfigAttributes.CONTROL_ENDPOINT) return self._get_attribute(ConfigAttributes.CONTROL_ENDPOINT)

View File

@ -1,6 +1,6 @@
from abc import abstractmethod from abc import abstractmethod
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Tuple, TypedDict, TypeVar from typing import Optional, TypedDict, TypeVar
import yaml import yaml
@ -103,8 +103,10 @@ class NodeBase(HumanReadableABC):
ConfigAttributes.WALLET_CONFIG, ConfigAttributes.WALLET_CONFIG,
) )
def get_config(self) -> Tuple[str, dict]: def get_config(self, config_file_path: Optional[str] = None) -> tuple[str, dict]:
config_file_path = self._get_attribute(ConfigAttributes.CONFIG_PATH) if config_file_path is None:
config_file_path = self._get_attribute(ConfigAttributes.CONFIG_PATH)
shell = self.host.get_shell() shell = self.host.get_shell()
result = shell.exec(f"cat {config_file_path}") result = shell.exec(f"cat {config_file_path}")
@ -113,8 +115,10 @@ class NodeBase(HumanReadableABC):
config = yaml.safe_load(config_text) config = yaml.safe_load(config_text)
return config_file_path, config return config_file_path, config
def save_config(self, new_config: dict) -> None: def save_config(self, new_config: dict, config_file_path: Optional[str] = None) -> None:
config_file_path = self._get_attribute(ConfigAttributes.CONFIG_PATH) if config_file_path is None:
config_file_path = self._get_attribute(ConfigAttributes.CONFIG_PATH)
shell = self.host.get_shell() shell = self.host.get_shell()
config_str = yaml.dump(new_config) config_str = yaml.dump(new_config)