[#318] Add tombstone expiration test
Some checks failed
DCO action / DCO (pull_request) Has been cancelled

Signed-off-by: a.berezin <a.berezin@yadro.com>
This commit is contained in:
Andrey Berezin 2024-11-12 13:59:06 +03:00
parent 95b32a036a
commit 6ec3546bde
6 changed files with 46 additions and 14 deletions

View file

@ -164,6 +164,9 @@ class DockerHost(Host):
return volume_path
def send_signal_to_service(self, service_name: str, signal: str) -> None:
raise NotImplementedError("Not implemented for docker")
def delete_metabase(self, service_name: str) -> None:
raise NotImplementedError("Not implemented for docker")

View file

@ -117,6 +117,17 @@ class Host(ABC):
service_name: Name of the service to stop.
"""
@abstractmethod
def send_signal_to_service(self, service_name: str, signal: str) -> None:
"""Send signal to service with specified name using kill -<signal>
The service must be hosted on this host.
Args:
service_name: Name of the service to stop.
signal: signal name. See kill -l to all names
"""
@abstractmethod
def mask_service(self, service_name: str) -> None:
"""Prevent the service from start by any activity by masking it.

View file

@ -53,3 +53,4 @@ HOSTING_CONFIG_FILE = os.getenv(
)
MORE_LOG = os.getenv("MORE_LOG", "1")
EXPIRATION_EPOCH_ATTRIBUTE = "__SYSTEM__EXPIRATION_EPOCH"

View file

@ -172,6 +172,16 @@ class ClusterStateController:
if service_type == StorageNode:
self.wait_after_storage_startup()
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step("Send sighup to all {service_type} services")
def sighup_services_of_type(self, service_type: type[ServiceClass]):
services = self.cluster.services(service_type)
parallel([service.send_signal_to_service for service in services], signal="SIGHUP")
self.stopped_services.difference_update(set(services))
if service_type == StorageNode:
self.wait_after_storage_startup()
@wait_for_success(600, 60)
def wait_s3gate(self, s3gate: S3Gate):
with reporter.step(f"Wait for {s3gate} reconnection"):

View file

@ -14,14 +14,19 @@ class ConfigStateManager(StateManager):
self.cluster = self.csc.cluster
@reporter.step("Change configuration for {service_type} on all nodes")
def set_on_all_nodes(self, service_type: type[ServiceClass], values: dict[str, Any]):
def set_on_all_nodes(self, service_type: type[ServiceClass], values: dict[str, Any], sighup: bool = False):
services = self.cluster.services(service_type)
nodes = self.cluster.nodes(services)
self.services_with_changed_config.update([(node, service_type) for node in nodes])
if not sighup:
self.csc.stop_services_of_type(service_type)
parallel([node.config(service_type).set for node in nodes], values=values)
if not sighup:
self.csc.start_services_of_type(service_type)
else:
self.csc.sighup_services_of_type(service_type)
@reporter.step("Change configuration for {service_type} on {node}")
def set_on_node(self, node: ClusterNode, service_type: type[ServiceClass], values: dict[str, Any]):
@ -32,18 +37,20 @@ class ConfigStateManager(StateManager):
self.csc.start_service_of_type(node, service_type)
@reporter.step("Revert all configuration changes")
def revert_all(self):
def revert_all(self, sighup: bool = False):
if not self.services_with_changed_config:
return
parallel(self._revert_svc, self.services_with_changed_config)
parallel(self._revert_svc, self.services_with_changed_config, sighup)
self.services_with_changed_config.clear()
if not sighup:
self.csc.start_all_stopped_services()
# TODO: parallel can't have multiple parallel_items :(
@reporter.step("Revert all configuration {node_and_service}")
def _revert_svc(self, node_and_service: tuple[ClusterNode, ServiceClass]):
def _revert_svc(self, node_and_service: tuple[ClusterNode, ServiceClass], sighup: bool = False):
node, service_type = node_and_service
if not sighup:
self.csc.stop_service_of_type(node, service_type)
node.config(service_type).revert()

View file

@ -65,6 +65,10 @@ class NodeBase(HumanReadableABC):
with reporter.step(f"Start {self.name} service on {self.host.config.address}"):
self.host.start_service(self.name)
def send_signal_to_service(self, signal: str):
with reporter.step(f"Send -{signal} signal to {self.name} service on {self.host.config.address}"):
self.host.send_signal_to_service(self.name, signal)
@abstractmethod
def service_healthcheck(self) -> bool:
"""Service healthcheck."""
@ -185,9 +189,7 @@ class NodeBase(HumanReadableABC):
if attribute_name not in config.attributes:
if default_attribute_name is None:
raise RuntimeError(
f"Service {self.name} has no {attribute_name} in config and fallback attribute isn't set either"
)
raise RuntimeError(f"Service {self.name} has no {attribute_name} in config and fallback attribute isn't set either")
return config.attributes[default_attribute_name]
@ -197,9 +199,7 @@ class NodeBase(HumanReadableABC):
return self.host.get_service_config(self.name)
def get_service_uptime(self, service: str) -> datetime:
result = self.host.get_shell().exec(
f"systemctl show {service} --property ActiveEnterTimestamp | cut -d '=' -f 2"
)
result = self.host.get_shell().exec(f"systemctl show {service} --property ActiveEnterTimestamp | cut -d '=' -f 2")
start_time = parser.parse(result.stdout.strip())
current_time = datetime.now(tz=timezone.utc)
active_time = current_time - start_time