[] Add tombstone expiration test
Some checks failed
DCO action / DCO (pull_request) Has been cancelled

Signed-off-by: a.berezin <a.berezin@yadro.com>
This commit is contained in:
Andrey Berezin 2024-11-12 13:59:06 +03:00
parent 95b32a036a
commit 8c63d1d1c7
6 changed files with 46 additions and 14 deletions

View file

@ -164,6 +164,9 @@ class DockerHost(Host):
return volume_path return volume_path
def send_signal_to_service(self, service_name: str, signal: str) -> None:
raise NotImplementedError("Not implemented for docker")
def delete_metabase(self, service_name: str) -> None: def delete_metabase(self, service_name: str) -> None:
raise NotImplementedError("Not implemented for docker") raise NotImplementedError("Not implemented for docker")

View file

@ -117,6 +117,17 @@ class Host(ABC):
service_name: Name of the service to stop. service_name: Name of the service to stop.
""" """
@abstractmethod
def send_signal_to_service(self, service_name: str, signal: str) -> None:
"""Send signal to service with specified name using kill -<signal>
The service must be hosted on this host.
Args:
service_name: Name of the service to stop.
signal: signal name. See kill -l to all names
"""
@abstractmethod @abstractmethod
def mask_service(self, service_name: str) -> None: def mask_service(self, service_name: str) -> None:
"""Prevent the service from start by any activity by masking it. """Prevent the service from start by any activity by masking it.

View file

@ -53,3 +53,4 @@ HOSTING_CONFIG_FILE = os.getenv(
) )
MORE_LOG = os.getenv("MORE_LOG", "1") MORE_LOG = os.getenv("MORE_LOG", "1")
EXPIRATION_EPOCH_ATTRIBUTE = "__SYSTEM__EXPIRATION_EPOCH"

View file

@ -172,6 +172,16 @@ class ClusterStateController:
if service_type == StorageNode: if service_type == StorageNode:
self.wait_after_storage_startup() self.wait_after_storage_startup()
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step("Send sighup to all {service_type} services")
def sighup_services_of_type(self, service_type: type[ServiceClass]):
services = self.cluster.services(service_type)
parallel([service.send_signal_to_service for service in services], signal="SIGHUP")
self.stopped_services.difference_update(set(services))
if service_type == StorageNode:
self.wait_after_storage_startup()
@wait_for_success(600, 60) @wait_for_success(600, 60)
def wait_s3gate(self, s3gate: S3Gate): def wait_s3gate(self, s3gate: S3Gate):
with reporter.step(f"Wait for {s3gate} reconnection"): with reporter.step(f"Wait for {s3gate} reconnection"):

View file

@ -14,14 +14,19 @@ class ConfigStateManager(StateManager):
self.cluster = self.csc.cluster self.cluster = self.csc.cluster
@reporter.step("Change configuration for {service_type} on all nodes") @reporter.step("Change configuration for {service_type} on all nodes")
def set_on_all_nodes(self, service_type: type[ServiceClass], values: dict[str, Any]): def set_on_all_nodes(self, service_type: type[ServiceClass], values: dict[str, Any], sighup: bool = False):
services = self.cluster.services(service_type) services = self.cluster.services(service_type)
nodes = self.cluster.nodes(services) nodes = self.cluster.nodes(services)
self.services_with_changed_config.update([(node, service_type) for node in nodes]) self.services_with_changed_config.update([(node, service_type) for node in nodes])
if not sighup:
self.csc.stop_services_of_type(service_type) self.csc.stop_services_of_type(service_type)
parallel([node.config(service_type).set for node in nodes], values=values) parallel([node.config(service_type).set for node in nodes], values=values)
if not sighup:
self.csc.start_services_of_type(service_type) self.csc.start_services_of_type(service_type)
else:
self.csc.sighup_services_of_type(service_type)
@reporter.step("Change configuration for {service_type} on {node}") @reporter.step("Change configuration for {service_type} on {node}")
def set_on_node(self, node: ClusterNode, service_type: type[ServiceClass], values: dict[str, Any]): def set_on_node(self, node: ClusterNode, service_type: type[ServiceClass], values: dict[str, Any]):
@ -32,18 +37,20 @@ class ConfigStateManager(StateManager):
self.csc.start_service_of_type(node, service_type) self.csc.start_service_of_type(node, service_type)
@reporter.step("Revert all configuration changes") @reporter.step("Revert all configuration changes")
def revert_all(self): def revert_all(self, sighup: bool = False):
if not self.services_with_changed_config: if not self.services_with_changed_config:
return return
parallel(self._revert_svc, self.services_with_changed_config) parallel(self._revert_svc, self.services_with_changed_config, sighup)
self.services_with_changed_config.clear() self.services_with_changed_config.clear()
if not sighup:
self.csc.start_all_stopped_services() self.csc.start_all_stopped_services()
# TODO: parallel can't have multiple parallel_items :( # TODO: parallel can't have multiple parallel_items :(
@reporter.step("Revert all configuration {node_and_service}") @reporter.step("Revert all configuration {node_and_service}")
def _revert_svc(self, node_and_service: tuple[ClusterNode, ServiceClass]): def _revert_svc(self, node_and_service: tuple[ClusterNode, ServiceClass], sighup: bool = False):
node, service_type = node_and_service node, service_type = node_and_service
if not sighup:
self.csc.stop_service_of_type(node, service_type) self.csc.stop_service_of_type(node, service_type)
node.config(service_type).revert() node.config(service_type).revert()

View file

@ -65,6 +65,10 @@ class NodeBase(HumanReadableABC):
with reporter.step(f"Start {self.name} service on {self.host.config.address}"): with reporter.step(f"Start {self.name} service on {self.host.config.address}"):
self.host.start_service(self.name) self.host.start_service(self.name)
def send_signal_to_service(self, signal: str):
with reporter.step(f"Send -{signal} signal to {self.name} service on {self.host.config.address}"):
self.host.send_signal_to_service(self.name, signal)
@abstractmethod @abstractmethod
def service_healthcheck(self) -> bool: def service_healthcheck(self) -> bool:
"""Service healthcheck.""" """Service healthcheck."""
@ -185,9 +189,7 @@ class NodeBase(HumanReadableABC):
if attribute_name not in config.attributes: if attribute_name not in config.attributes:
if default_attribute_name is None: if default_attribute_name is None:
raise RuntimeError( raise RuntimeError(f"Service {self.name} has no {attribute_name} in config and fallback attribute isn't set either")
f"Service {self.name} has no {attribute_name} in config and fallback attribute isn't set either"
)
return config.attributes[default_attribute_name] return config.attributes[default_attribute_name]
@ -197,9 +199,7 @@ class NodeBase(HumanReadableABC):
return self.host.get_service_config(self.name) return self.host.get_service_config(self.name)
def get_service_uptime(self, service: str) -> datetime: def get_service_uptime(self, service: str) -> datetime:
result = self.host.get_shell().exec( result = self.host.get_shell().exec(f"systemctl show {service} --property ActiveEnterTimestamp | cut -d '=' -f 2")
f"systemctl show {service} --property ActiveEnterTimestamp | cut -d '=' -f 2"
)
start_time = parser.parse(result.stdout.strip()) start_time = parser.parse(result.stdout.strip())
current_time = datetime.now(tz=timezone.utc) current_time = datetime.now(tz=timezone.utc)
active_time = current_time - start_time active_time = current_time - start_time