forked from TrueCloudLab/frostfs-testlib
Add loader and sceanrio runner interfaces, add support for local scenario
Signed-off-by: Andrey Berezin <a.berezin@yadro.com>
This commit is contained in:
parent
13ea25bff5
commit
182bd6ab36
19 changed files with 786 additions and 384 deletions
|
@ -1,50 +1,37 @@
|
|||
import copy
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
import frostfs_testlib.resources.optionals as optionals
|
||||
from frostfs_testlib.load.k6 import K6
|
||||
from frostfs_testlib.load.interfaces import ScenarioRunner
|
||||
from frostfs_testlib.load.load_config import (
|
||||
EndpointSelectionStrategy,
|
||||
K6ProcessAllocationStrategy,
|
||||
LoadParams,
|
||||
LoadScenario,
|
||||
LoadType,
|
||||
)
|
||||
from frostfs_testlib.load.load_report import LoadReport
|
||||
from frostfs_testlib.load.load_steps import init_s3_client, prepare_k6_instances
|
||||
from frostfs_testlib.load.load_verifiers import LoadVerifier
|
||||
from frostfs_testlib.reporter import get_reporter
|
||||
from frostfs_testlib.resources.load_params import (
|
||||
K6_TEARDOWN_PERIOD,
|
||||
LOAD_NODE_SSH_PASSWORD,
|
||||
LOAD_NODE_SSH_PRIVATE_KEY_PASSPHRASE,
|
||||
LOAD_NODE_SSH_PRIVATE_KEY_PATH,
|
||||
LOAD_NODE_SSH_USER,
|
||||
LOAD_NODES,
|
||||
)
|
||||
from frostfs_testlib.shell.interfaces import SshCredentials
|
||||
from frostfs_testlib.storage.cluster import ClusterNode
|
||||
from frostfs_testlib.storage.dataclasses.frostfs_services import S3Gate, StorageNode
|
||||
from frostfs_testlib.storage.dataclasses.wallet import WalletInfo
|
||||
from frostfs_testlib.testing.test_control import run_optionally
|
||||
from frostfs_testlib.utils import datetime_utils
|
||||
|
||||
reporter = get_reporter()
|
||||
|
||||
|
||||
class BackgroundLoadController:
|
||||
k6_instances: list[K6]
|
||||
k6_dir: str
|
||||
load_params: LoadParams
|
||||
original_load_params: LoadParams
|
||||
load_nodes: list[str]
|
||||
verification_params: LoadParams
|
||||
nodes_under_load: list[ClusterNode]
|
||||
load_counter: int
|
||||
ssh_credentials: SshCredentials
|
||||
loaders_wallet: WalletInfo
|
||||
load_summaries: dict
|
||||
endpoints: list[str]
|
||||
runner: ScenarioRunner
|
||||
started: bool
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
@ -52,15 +39,16 @@ class BackgroundLoadController:
|
|||
load_params: LoadParams,
|
||||
loaders_wallet: WalletInfo,
|
||||
nodes_under_load: list[ClusterNode],
|
||||
runner: ScenarioRunner,
|
||||
) -> None:
|
||||
self.k6_dir = k6_dir
|
||||
self.original_load_params = load_params
|
||||
self.load_params = copy.deepcopy(self.original_load_params)
|
||||
self.nodes_under_load = nodes_under_load
|
||||
self.load_counter = 1
|
||||
self.load_nodes = LOAD_NODES
|
||||
self.loaders_wallet = loaders_wallet
|
||||
|
||||
self.runner = runner
|
||||
self.started = False
|
||||
if load_params.endpoint_selection_strategy is None:
|
||||
raise RuntimeError("endpoint_selection_strategy should not be None")
|
||||
|
||||
|
@ -68,13 +56,6 @@ class BackgroundLoadController:
|
|||
load_params.load_type, load_params.endpoint_selection_strategy
|
||||
)
|
||||
|
||||
self.ssh_credentials = SshCredentials(
|
||||
LOAD_NODE_SSH_USER,
|
||||
LOAD_NODE_SSH_PASSWORD,
|
||||
LOAD_NODE_SSH_PRIVATE_KEY_PATH,
|
||||
LOAD_NODE_SSH_PRIVATE_KEY_PASSPHRASE,
|
||||
)
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED, [])
|
||||
def _get_endpoints(
|
||||
self, load_type: LoadType, endpoint_selection_strategy: EndpointSelectionStrategy
|
||||
|
@ -116,69 +97,28 @@ class BackgroundLoadController:
|
|||
return all_endpoints[load_type][endpoint_selection_strategy]
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
|
||||
@reporter.step_deco("Prepare background load instances")
|
||||
@reporter.step_deco("Prepare load instances")
|
||||
def prepare(self):
|
||||
if self.load_params.load_type == LoadType.S3:
|
||||
init_s3_client(
|
||||
self.load_nodes,
|
||||
self.load_params,
|
||||
self.k6_dir,
|
||||
self.ssh_credentials,
|
||||
self.nodes_under_load,
|
||||
self.loaders_wallet,
|
||||
)
|
||||
|
||||
self._prepare(self.load_params)
|
||||
|
||||
def _prepare(self, load_params: LoadParams):
|
||||
self.k6_instances = prepare_k6_instances(
|
||||
load_nodes=LOAD_NODES,
|
||||
ssh_credentials=self.ssh_credentials,
|
||||
k6_dir=self.k6_dir,
|
||||
load_params=load_params,
|
||||
endpoints=self.endpoints,
|
||||
loaders_wallet=self.loaders_wallet,
|
||||
)
|
||||
self.runner.prepare(self.load_params, self.nodes_under_load, self.k6_dir)
|
||||
self.runner.init_k6_instances(self.load_params, self.endpoints, self.k6_dir)
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
|
||||
@reporter.step_deco("Start background load")
|
||||
def start(self):
|
||||
if self.load_params.preset is None:
|
||||
raise RuntimeError("Preset should not be none at the moment of start")
|
||||
|
||||
with reporter.step(
|
||||
f"Start background load on nodes {self.nodes_under_load}: "
|
||||
f"writers = {self.load_params.writers}, "
|
||||
f"obj_size = {self.load_params.object_size}, "
|
||||
f"load_time = {self.load_params.load_time}, "
|
||||
f"prepare_json = {self.load_params.preset.pregen_json}, "
|
||||
f"endpoints = {self.endpoints}"
|
||||
):
|
||||
for k6_load_instance in self.k6_instances:
|
||||
k6_load_instance.start()
|
||||
|
||||
wait_after_start_time = datetime_utils.parse_time(self.load_params.setup_timeout) + 5
|
||||
with reporter.step(
|
||||
f"Wait for start timeout + couple more seconds ({wait_after_start_time}) before moving on"
|
||||
):
|
||||
time.sleep(wait_after_start_time)
|
||||
with reporter.step(f"Start load on nodes {self.nodes_under_load}"):
|
||||
self.runner.start()
|
||||
self.started = True
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
|
||||
@reporter.step_deco("Stop background load")
|
||||
@reporter.step_deco("Stop load")
|
||||
def stop(self):
|
||||
for k6_load_instance in self.k6_instances:
|
||||
k6_load_instance.stop()
|
||||
self.runner.stop()
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED, True)
|
||||
def is_running(self):
|
||||
for k6_load_instance in self.k6_instances:
|
||||
if not k6_load_instance.is_running:
|
||||
return False
|
||||
|
||||
return True
|
||||
def is_running(self) -> bool:
|
||||
return self.runner.is_running
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
|
||||
@reporter.step_deco("Reset background load")
|
||||
@reporter.step_deco("Reset load")
|
||||
def _reset_for_consequent_load(self):
|
||||
"""This method is required if we want to run multiple loads during test run.
|
||||
Raise load counter by 1 and append it to load_id
|
||||
|
@ -188,25 +128,25 @@ class BackgroundLoadController:
|
|||
self.load_params.set_id(f"{self.load_params.load_id}_{self.load_counter}")
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
|
||||
@reporter.step_deco("Startup background load")
|
||||
@reporter.step_deco("Startup load")
|
||||
def startup(self):
|
||||
self.prepare()
|
||||
self.start()
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
|
||||
@reporter.step_deco("Stop and get results of background load")
|
||||
def teardown(self, load_report: LoadReport = None):
|
||||
if not self.k6_instances:
|
||||
@reporter.step_deco("Stop and get results of load")
|
||||
def teardown(self, load_report: Optional[LoadReport] = None):
|
||||
if not self.started:
|
||||
return
|
||||
|
||||
self.stop()
|
||||
self.load_summaries = self.get_results()
|
||||
self.k6_instances = []
|
||||
self.load_summaries = self._get_results()
|
||||
self.started = False
|
||||
if load_report:
|
||||
load_report.add_summaries(self.load_summaries)
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
|
||||
@reporter.step_deco("Verify results of background load")
|
||||
@reporter.step_deco("Verify results of load")
|
||||
def verify(self):
|
||||
try:
|
||||
if self.load_params.verify:
|
||||
|
@ -220,9 +160,10 @@ class BackgroundLoadController:
|
|||
working_dir=self.load_params.working_dir,
|
||||
endpoint_selection_strategy=self.load_params.endpoint_selection_strategy,
|
||||
k6_process_allocation_strategy=self.load_params.k6_process_allocation_strategy,
|
||||
setup_timeout="1s",
|
||||
)
|
||||
self._run_verify_scenario()
|
||||
verification_summaries = self.get_results()
|
||||
verification_summaries = self._get_results()
|
||||
self.verify_summaries(self.load_summaries, verification_summaries)
|
||||
finally:
|
||||
self._reset_for_consequent_load()
|
||||
|
@ -239,38 +180,20 @@ class BackgroundLoadController:
|
|||
|
||||
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
|
||||
def wait_until_finish(self):
|
||||
if self.load_params.load_time is None:
|
||||
raise RuntimeError("LoadTime should not be none")
|
||||
|
||||
for k6_instance in self.k6_instances:
|
||||
k6_instance.wait_until_finished(self.load_params.load_time + int(K6_TEARDOWN_PERIOD))
|
||||
self.runner.wait_until_finish()
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
|
||||
@reporter.step_deco("Run verify scenario for background load")
|
||||
@reporter.step_deco("Run verify scenario")
|
||||
def _run_verify_scenario(self):
|
||||
if self.verification_params.verify_time is None:
|
||||
raise RuntimeError("verify_time should not be none")
|
||||
|
||||
self._prepare(self.verification_params)
|
||||
with reporter.step("Run verify background load data"):
|
||||
for k6_verify_instance in self.k6_instances:
|
||||
k6_verify_instance.start()
|
||||
k6_verify_instance.wait_until_finished(self.verification_params.verify_time)
|
||||
self.runner.init_k6_instances(self.verification_params, self.endpoints, self.k6_dir)
|
||||
with reporter.step("Run verify load data"):
|
||||
self.runner.start()
|
||||
self.runner.wait_until_finish()
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
|
||||
@reporter.step_deco("K6 run results")
|
||||
def get_results(self) -> dict:
|
||||
results = {}
|
||||
for k6_instance in self.k6_instances:
|
||||
if k6_instance.load_params.k6_process_allocation_strategy is None:
|
||||
raise RuntimeError("k6_process_allocation_strategy should not be none")
|
||||
|
||||
result = k6_instance.get_results()
|
||||
keys_map = {
|
||||
K6ProcessAllocationStrategy.PER_LOAD_NODE: k6_instance.load_node,
|
||||
K6ProcessAllocationStrategy.PER_ENDPOINT: k6_instance.endpoints[0],
|
||||
}
|
||||
key = keys_map[k6_instance.load_params.k6_process_allocation_strategy]
|
||||
results[key] = result
|
||||
|
||||
return results
|
||||
@reporter.step_deco("Get load results")
|
||||
def _get_results(self) -> dict:
|
||||
return self.runner.get_results()
|
||||
|
|
|
@ -23,7 +23,7 @@ class ClusterStateController:
|
|||
self.stopped_nodes: list[ClusterNode] = []
|
||||
self.detached_disks: dict[str, DiskController] = {}
|
||||
self.stopped_storage_nodes: list[ClusterNode] = []
|
||||
self.stopped_s3_gate: list[ClusterNode] = []
|
||||
self.stopped_s3_gates: list[ClusterNode] = []
|
||||
self.cluster = cluster
|
||||
self.shell = shell
|
||||
self.suspended_services: dict[str, list[ClusterNode]] = {}
|
||||
|
@ -60,6 +60,16 @@ class ClusterStateController:
|
|||
for node in nodes:
|
||||
self.stop_storage_service(node)
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
|
||||
@reporter.step_deco("Stop all S3 gates on cluster")
|
||||
def stop_all_s3_gates(self, reversed_order: bool = False):
|
||||
nodes = (
|
||||
reversed(self.cluster.cluster_nodes) if reversed_order else self.cluster.cluster_nodes
|
||||
)
|
||||
|
||||
for node in nodes:
|
||||
self.stop_s3_gate(node)
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
|
||||
@reporter.step_deco("Start host of node {node}")
|
||||
def start_node_host(self, node: ClusterNode):
|
||||
|
@ -72,10 +82,18 @@ class ClusterStateController:
|
|||
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
|
||||
@reporter.step_deco("Start stopped hosts")
|
||||
def start_stopped_hosts(self, reversed_order: bool = False):
|
||||
if not self.stopped_nodes:
|
||||
return
|
||||
|
||||
nodes = reversed(self.stopped_nodes) if reversed_order else self.stopped_nodes
|
||||
for node in nodes:
|
||||
with reporter.step(f"Start host {node.host.config.address}"):
|
||||
node.host.start_host()
|
||||
if node in self.stopped_storage_nodes:
|
||||
self.stopped_storage_nodes.remove(node)
|
||||
|
||||
if node in self.stopped_s3_gates:
|
||||
self.stopped_s3_gates.remove(node)
|
||||
self.stopped_nodes = []
|
||||
wait_all_storage_nodes_returned(self.shell, self.cluster)
|
||||
|
||||
|
@ -115,44 +133,51 @@ class ClusterStateController:
|
|||
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
|
||||
@reporter.step_deco("Start stopped storage services")
|
||||
def start_stopped_storage_services(self):
|
||||
if self.stopped_storage_nodes:
|
||||
# In case if we stopped couple services, for example (s01-s04):
|
||||
# After starting only s01, it may require connections to s02-s04, which is still down, and fail to start.
|
||||
# Also, if something goes wrong here, we might skip s02-s04 start at all, and cluster will be left in a bad state.
|
||||
# So in order to make sure that services are at least attempted to be started, using threads here.
|
||||
with ThreadPoolExecutor(max_workers=len(self.stopped_storage_nodes)) as executor:
|
||||
start_result = executor.map(self.start_storage_service, self.stopped_storage_nodes)
|
||||
if not self.stopped_storage_nodes:
|
||||
return
|
||||
|
||||
# Looks tricky, but if exception is raised in any thread, it will be "eaten" by ThreadPoolExecutor,
|
||||
# But will be thrown here.
|
||||
# Not ideal solution, but okay for now
|
||||
for _ in start_result:
|
||||
pass
|
||||
# In case if we stopped couple services, for example (s01-s04):
|
||||
# After starting only s01, it may require connections to s02-s04, which is still down, and fail to start.
|
||||
# Also, if something goes wrong here, we might skip s02-s04 start at all, and cluster will be left in a bad state.
|
||||
# So in order to make sure that services are at least attempted to be started, using threads here.
|
||||
with ThreadPoolExecutor(max_workers=len(self.stopped_storage_nodes)) as executor:
|
||||
start_result = executor.map(self.start_storage_service, self.stopped_storage_nodes)
|
||||
|
||||
# Looks tricky, but if exception is raised in any thread, it will be "eaten" by ThreadPoolExecutor,
|
||||
# But will be thrown here.
|
||||
# Not ideal solution, but okay for now
|
||||
for _ in start_result:
|
||||
pass
|
||||
|
||||
wait_all_storage_nodes_returned(self.shell, self.cluster)
|
||||
self.stopped_storage_nodes = []
|
||||
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
|
||||
@reporter.step_deco("Stop s3 gate on {node}")
|
||||
def stop_s3_gate(self, node: ClusterNode):
|
||||
node.s3_gate.stop_service()
|
||||
self.stopped_s3_gate.append(node)
|
||||
self.stopped_s3_gates.append(node)
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
|
||||
@reporter.step_deco("Start s3 gate on {node}")
|
||||
def start_s3_gate(self, node: ClusterNode):
|
||||
node.s3_gate.start_service()
|
||||
self.stopped_s3_gate.remove(node)
|
||||
|
||||
self.stopped_s3_gates.remove(node)
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
|
||||
@reporter.step_deco("Start stopped S3 gates")
|
||||
def start_stopped_s3_gate(self):
|
||||
# not sure if we need here to use threads like in start_stopped_storage_services
|
||||
for s3_gate in self.stopped_s3_gate:
|
||||
s3_gate.start_service()
|
||||
self.stopped_s3_gate = []
|
||||
def start_stopped_s3_gates(self):
|
||||
if not self.stopped_s3_gates:
|
||||
return
|
||||
|
||||
with ThreadPoolExecutor(max_workers=len(self.stopped_s3_gates)) as executor:
|
||||
start_result = executor.map(self.start_s3_gate, self.stopped_s3_gates)
|
||||
|
||||
# Looks tricky, but if exception is raised in any thread, it will be "eaten" by ThreadPoolExecutor,
|
||||
# But will be thrown here.
|
||||
# Not ideal solution, but okay for now
|
||||
for _ in start_result:
|
||||
pass
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
|
||||
@reporter.step_deco("Suspend {process_name} service in {node}")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue