[#102] Updates for failover

Signed-off-by: Andrey Berezin <a.berezin@yadro.com>
port-fill-percent-fix
Andrey Berezin 2023-10-25 15:57:38 +03:00
parent 0c3bb20af5
commit b1a3d740e9
5 changed files with 51 additions and 8 deletions

View File

@ -22,7 +22,7 @@ dependencies = [
"neo-mamba==1.0.0",
"paramiko>=2.10.3",
"pexpect>=4.8.0",
"requests>=2.28.0",
"requests==2.28.1",
"docstring_parser>=0.15",
"testrail-api>=1.12.0",
"pytest==7.1.2",

View File

@ -208,6 +208,42 @@ class Cluster:
def morph_chain(self) -> list[MorphChain]:
return self.services(MorphChain)
def nodes(self, services: list[ServiceClass]) -> list[ClusterNode]:
"""
Resolve which cluster nodes hosting the specified services.
Args:
services: list of services to resolve hosting cluster nodes.
Returns:
list of cluster nodes which host specified services.
"""
cluster_nodes = set()
for service in services:
cluster_nodes.update(
[node for node in self.cluster_nodes if node.service(type(service)) == service]
)
return list(cluster_nodes)
def node(self, service: ServiceClass) -> ClusterNode:
"""
Resolve single cluster node hosting the specified service.
Args:
services: list of services to resolve hosting cluster nodes.
Returns:
list of cluster nodes which host specified services.
"""
nodes = [node for node in self.cluster_nodes if node.service(type(service)) == service]
if not len(nodes):
raise RuntimeError(f"Cannot find service {service} on any node")
return nodes[0]
def services(self, service_type: type[ServiceClass]) -> list[ServiceClass]:
"""
Get all services in a cluster of specified type.

View File

@ -41,10 +41,10 @@ class ClusterStateController:
provider = SshConnectionProvider()
provider.drop(node.host_ip)
self.stopped_nodes.append(node)
with reporter.step(f"Stop host {node.host.config.address}"):
node.host.stop_host(mode=mode)
wait_for_host_offline(self.shell, node.storage_node)
self.stopped_nodes.append(node)
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Shutdown whole cluster")
@ -136,8 +136,8 @@ class ClusterStateController:
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Stop storage service on {node}")
def stop_storage_service(self, node: ClusterNode):
node.storage_node.stop_service()
self.stopped_storage_nodes.append(node)
node.storage_node.stop_service()
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Stop all {service_type} services")

View File

@ -145,6 +145,9 @@ class StorageNode(NodeBase):
def get_shard_config_path(self) -> str:
return self._get_attribute(ConfigAttributes.SHARD_CONFIG_PATH)
def get_shards_config(self) -> tuple[str, dict]:
return self.get_config(self.get_shard_config_path())
def get_control_endpoint(self) -> str:
return self._get_attribute(ConfigAttributes.CONTROL_ENDPOINT)

View File

@ -1,6 +1,6 @@
from abc import abstractmethod
from dataclasses import dataclass
from typing import Optional, Tuple, TypedDict, TypeVar
from typing import Optional, TypedDict, TypeVar
import yaml
@ -103,8 +103,10 @@ class NodeBase(HumanReadableABC):
ConfigAttributes.WALLET_CONFIG,
)
def get_config(self) -> Tuple[str, dict]:
config_file_path = self._get_attribute(ConfigAttributes.CONFIG_PATH)
def get_config(self, config_file_path: Optional[str] = None) -> tuple[str, dict]:
if config_file_path is None:
config_file_path = self._get_attribute(ConfigAttributes.CONFIG_PATH)
shell = self.host.get_shell()
result = shell.exec(f"cat {config_file_path}")
@ -113,8 +115,10 @@ class NodeBase(HumanReadableABC):
config = yaml.safe_load(config_text)
return config_file_path, config
def save_config(self, new_config: dict) -> None:
config_file_path = self._get_attribute(ConfigAttributes.CONFIG_PATH)
def save_config(self, new_config: dict, config_file_path: Optional[str] = None) -> None:
if config_file_path is None:
config_file_path = self._get_attribute(ConfigAttributes.CONFIG_PATH)
shell = self.host.get_shell()
config_str = yaml.dump(new_config)