Add loader and sceanrio runner interfaces, add support for local scenario

Signed-off-by: Andrey Berezin <a.berezin@yadro.com>
This commit is contained in:
Andrey Berezin 2023-06-26 16:45:34 +03:00 committed by Andrey Berezin
parent 13ea25bff5
commit 182bd6ab36
19 changed files with 786 additions and 384 deletions

View file

@ -23,7 +23,7 @@ class ClusterStateController:
self.stopped_nodes: list[ClusterNode] = []
self.detached_disks: dict[str, DiskController] = {}
self.stopped_storage_nodes: list[ClusterNode] = []
self.stopped_s3_gate: list[ClusterNode] = []
self.stopped_s3_gates: list[ClusterNode] = []
self.cluster = cluster
self.shell = shell
self.suspended_services: dict[str, list[ClusterNode]] = {}
@ -60,6 +60,16 @@ class ClusterStateController:
for node in nodes:
self.stop_storage_service(node)
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Stop all S3 gates on cluster")
def stop_all_s3_gates(self, reversed_order: bool = False):
nodes = (
reversed(self.cluster.cluster_nodes) if reversed_order else self.cluster.cluster_nodes
)
for node in nodes:
self.stop_s3_gate(node)
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Start host of node {node}")
def start_node_host(self, node: ClusterNode):
@ -72,10 +82,18 @@ class ClusterStateController:
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Start stopped hosts")
def start_stopped_hosts(self, reversed_order: bool = False):
if not self.stopped_nodes:
return
nodes = reversed(self.stopped_nodes) if reversed_order else self.stopped_nodes
for node in nodes:
with reporter.step(f"Start host {node.host.config.address}"):
node.host.start_host()
if node in self.stopped_storage_nodes:
self.stopped_storage_nodes.remove(node)
if node in self.stopped_s3_gates:
self.stopped_s3_gates.remove(node)
self.stopped_nodes = []
wait_all_storage_nodes_returned(self.shell, self.cluster)
@ -115,44 +133,51 @@ class ClusterStateController:
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Start stopped storage services")
def start_stopped_storage_services(self):
if self.stopped_storage_nodes:
# In case if we stopped couple services, for example (s01-s04):
# After starting only s01, it may require connections to s02-s04, which is still down, and fail to start.
# Also, if something goes wrong here, we might skip s02-s04 start at all, and cluster will be left in a bad state.
# So in order to make sure that services are at least attempted to be started, using threads here.
with ThreadPoolExecutor(max_workers=len(self.stopped_storage_nodes)) as executor:
start_result = executor.map(self.start_storage_service, self.stopped_storage_nodes)
if not self.stopped_storage_nodes:
return
# Looks tricky, but if exception is raised in any thread, it will be "eaten" by ThreadPoolExecutor,
# But will be thrown here.
# Not ideal solution, but okay for now
for _ in start_result:
pass
# In case if we stopped couple services, for example (s01-s04):
# After starting only s01, it may require connections to s02-s04, which is still down, and fail to start.
# Also, if something goes wrong here, we might skip s02-s04 start at all, and cluster will be left in a bad state.
# So in order to make sure that services are at least attempted to be started, using threads here.
with ThreadPoolExecutor(max_workers=len(self.stopped_storage_nodes)) as executor:
start_result = executor.map(self.start_storage_service, self.stopped_storage_nodes)
# Looks tricky, but if exception is raised in any thread, it will be "eaten" by ThreadPoolExecutor,
# But will be thrown here.
# Not ideal solution, but okay for now
for _ in start_result:
pass
wait_all_storage_nodes_returned(self.shell, self.cluster)
self.stopped_storage_nodes = []
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Stop s3 gate on {node}")
def stop_s3_gate(self, node: ClusterNode):
node.s3_gate.stop_service()
self.stopped_s3_gate.append(node)
self.stopped_s3_gates.append(node)
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Start s3 gate on {node}")
def start_s3_gate(self, node: ClusterNode):
node.s3_gate.start_service()
self.stopped_s3_gate.remove(node)
self.stopped_s3_gates.remove(node)
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Start stopped S3 gates")
def start_stopped_s3_gate(self):
# not sure if we need here to use threads like in start_stopped_storage_services
for s3_gate in self.stopped_s3_gate:
s3_gate.start_service()
self.stopped_s3_gate = []
def start_stopped_s3_gates(self):
if not self.stopped_s3_gates:
return
with ThreadPoolExecutor(max_workers=len(self.stopped_s3_gates)) as executor:
start_result = executor.map(self.start_s3_gate, self.stopped_s3_gates)
# Looks tricky, but if exception is raised in any thread, it will be "eaten" by ThreadPoolExecutor,
# But will be thrown here.
# Not ideal solution, but okay for now
for _ in start_result:
pass
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Suspend {process_name} service in {node}")