Stop cm before wipe data and start after data was deleted #75

Closed
mmalygina wants to merge 1 commit from mmalygina/frostfs-testlib:support-stop-cm-before-wipe-data1 into master
5 changed files with 64 additions and 3 deletions

View file

@ -5,6 +5,7 @@ from frostfs_testlib.storage.dataclasses.frostfs_services import (
MorphChain,
S3Gate,
StorageNode,
CM
)
from frostfs_testlib.storage.service_registry import ServiceRegistry
@ -16,7 +17,7 @@ __class_registry.register_service(_FrostfsServicesNames.INNER_RING, InnerRing)
__class_registry.register_service(_FrostfsServicesNames.MORPH_CHAIN, MorphChain)
__class_registry.register_service(_FrostfsServicesNames.S3_GATE, S3Gate)
__class_registry.register_service(_FrostfsServicesNames.HTTP_GATE, HTTPGate)
__class_registry.register_service(_FrostfsServicesNames.CM, CM)
def get_service_registry() -> ServiceRegistry:
"""Returns registry with registered classes related to cluster and cluster nodes.

View file

@ -15,6 +15,7 @@ from frostfs_testlib.storage.dataclasses.frostfs_services import (
MorphChain,
S3Gate,
StorageNode,
CM
)
from frostfs_testlib.storage.dataclasses.node_base import NodeBase, ServiceClass
from frostfs_testlib.storage.service_registry import ServiceRegistry
@ -77,6 +78,9 @@ class ClusterNode:
def s3_gate(self) -> S3Gate:
return self.service(S3Gate)
def cm(self) -> CM:
return self.service(CM)
def get_config(self, config_file_path: str) -> dict:
shell = self.host.get_shell()
@ -98,7 +102,7 @@ class ClusterNode:
Args:
service_type: type of the service which should be returned,
for frostfs it can be StorageNode, S3Gate, HttpGate, MorphChain and InnerRing.
for frostfs it can be StorageNode, S3Gate, HttpGate, MorphChain, InnerRing and CM.
Returns:
service of service_type class.

View file

@ -13,6 +13,7 @@ class ConfigAttributes:
UN_LOCODE = "un_locode"
HTTP_HOSTNAME = "http_hostname"
S3_HOSTNAME = "s3_hostname"
CM_HOSTNAME = "cm_hostname"
class _FrostfsServicesNames:
@ -21,3 +22,4 @@ class _FrostfsServicesNames:
HTTP_GATE = "http-gate"
MORPH_CHAIN = "morph-chain"
INNER_RING = "ir"
CM = "cm"

View file

@ -27,6 +27,7 @@ class ClusterStateController:
self.detached_disks: dict[str, DiskController] = {}
self.stopped_storage_nodes: list[ClusterNode] = []
self.stopped_s3_gates: list[ClusterNode] = []
self.stopped_cms: list[ClusterNode] = []
self.dropped_traffic: list[ClusterNode] = []
self.cluster = cluster
self.shell = shell
@ -74,6 +75,16 @@ class ClusterStateController:
for node in nodes:
self.stop_s3_gate(node)
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Stop all cm services on cluster")
def stop_all_cm_services(self, reversed_order: bool = False):
nodes = (
reversed(self.cluster.cluster_nodes) if reversed_order else self.cluster.cluster_nodes
)
for node in nodes:
self.stop_cm_service(node)
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Start host of node {node}")
def start_node_host(self, node: ClusterNode):
@ -134,6 +145,33 @@ class ClusterStateController:
node.storage_node.start_service()
self.stopped_storage_nodes.remove(node)
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Stop cm service on {node}")
def stop_cm_service(self, node: ClusterNode):
node.cm().stop_service()
self.stopped_cms.append(node)
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Start cm service on {node}")
def start_cm_service(self, node: ClusterNode):
node.cm().start_service()
self.stopped_cms.remove(node)
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Start stopped cm services")
def start_stopped_cm_services(self):
if not self.stopped_cms:
return
# In case if we stopped couple services, for example (s01-s04):
# After starting only s01, it may require connections to s02-s04, which is still down, and fail to start.
# Also, if something goes wrong here, we might skip s02-s04 start at all, and cluster will be left in a bad state.
# So in order to make sure that services are at least attempted to be started, using parallel runs here.
parallel(self.start_cm_service, copy.copy(self.stopped_cms))
wait_all_storage_nodes_returned(self.shell, self.cluster)
self.stopped_cms = []
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Start stopped storage services")
def start_stopped_storage_services(self):

View file

@ -61,7 +61,6 @@ class S3Gate(NodeBase):
def label(self) -> str:
return f"{self.name}: {self.get_endpoint()}"
class HTTPGate(NodeBase):
"""
Class represents HTTP gateway in a cluster
@ -153,6 +152,9 @@ class StorageNode(NodeBase):
def get_s3_hostname(self) -> str:
return self._get_attribute(ConfigAttributes.S3_HOSTNAME)
def get_cm_hostname(self) -> str:
return self._get_attribute(ConfigAttributes.CM_HOSTNAME)
def delete_blobovnicza(self):
self.host.delete_blobovnicza(self.name)
@ -171,3 +173,17 @@ class StorageNode(NodeBase):
@property
def label(self) -> str:
return f"{self.name}: {self.get_rpc_endpoint()}"
class CM(NodeBase):
"""
Class represents cm service in a cluster
"""
@property
def label(self) -> str:
return f"{self.name}"
def service_healthcheck(self) -> bool:
"""Service healthcheck."""
def get_cm_hostname(self) -> str:
return self._get_attribute(ConfigAttributes.CM_HOSTNAME)