Add loader and sceanrio runner interfaces, add support for local scenario

Signed-off-by: Andrey Berezin <a.berezin@yadro.com>
This commit is contained in:
Andrey Berezin 2023-06-26 16:45:34 +03:00 committed by Andrey Berezin
parent 13ea25bff5
commit 182bd6ab36
19 changed files with 786 additions and 384 deletions

View file

@ -1,50 +1,37 @@
import copy
import time
from typing import Optional
import frostfs_testlib.resources.optionals as optionals
from frostfs_testlib.load.k6 import K6
from frostfs_testlib.load.interfaces import ScenarioRunner
from frostfs_testlib.load.load_config import (
EndpointSelectionStrategy,
K6ProcessAllocationStrategy,
LoadParams,
LoadScenario,
LoadType,
)
from frostfs_testlib.load.load_report import LoadReport
from frostfs_testlib.load.load_steps import init_s3_client, prepare_k6_instances
from frostfs_testlib.load.load_verifiers import LoadVerifier
from frostfs_testlib.reporter import get_reporter
from frostfs_testlib.resources.load_params import (
K6_TEARDOWN_PERIOD,
LOAD_NODE_SSH_PASSWORD,
LOAD_NODE_SSH_PRIVATE_KEY_PASSPHRASE,
LOAD_NODE_SSH_PRIVATE_KEY_PATH,
LOAD_NODE_SSH_USER,
LOAD_NODES,
)
from frostfs_testlib.shell.interfaces import SshCredentials
from frostfs_testlib.storage.cluster import ClusterNode
from frostfs_testlib.storage.dataclasses.frostfs_services import S3Gate, StorageNode
from frostfs_testlib.storage.dataclasses.wallet import WalletInfo
from frostfs_testlib.testing.test_control import run_optionally
from frostfs_testlib.utils import datetime_utils
reporter = get_reporter()
class BackgroundLoadController:
k6_instances: list[K6]
k6_dir: str
load_params: LoadParams
original_load_params: LoadParams
load_nodes: list[str]
verification_params: LoadParams
nodes_under_load: list[ClusterNode]
load_counter: int
ssh_credentials: SshCredentials
loaders_wallet: WalletInfo
load_summaries: dict
endpoints: list[str]
runner: ScenarioRunner
started: bool
def __init__(
self,
@ -52,15 +39,16 @@ class BackgroundLoadController:
load_params: LoadParams,
loaders_wallet: WalletInfo,
nodes_under_load: list[ClusterNode],
runner: ScenarioRunner,
) -> None:
self.k6_dir = k6_dir
self.original_load_params = load_params
self.load_params = copy.deepcopy(self.original_load_params)
self.nodes_under_load = nodes_under_load
self.load_counter = 1
self.load_nodes = LOAD_NODES
self.loaders_wallet = loaders_wallet
self.runner = runner
self.started = False
if load_params.endpoint_selection_strategy is None:
raise RuntimeError("endpoint_selection_strategy should not be None")
@ -68,13 +56,6 @@ class BackgroundLoadController:
load_params.load_type, load_params.endpoint_selection_strategy
)
self.ssh_credentials = SshCredentials(
LOAD_NODE_SSH_USER,
LOAD_NODE_SSH_PASSWORD,
LOAD_NODE_SSH_PRIVATE_KEY_PATH,
LOAD_NODE_SSH_PRIVATE_KEY_PASSPHRASE,
)
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED, [])
def _get_endpoints(
self, load_type: LoadType, endpoint_selection_strategy: EndpointSelectionStrategy
@ -116,69 +97,28 @@ class BackgroundLoadController:
return all_endpoints[load_type][endpoint_selection_strategy]
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
@reporter.step_deco("Prepare background load instances")
@reporter.step_deco("Prepare load instances")
def prepare(self):
if self.load_params.load_type == LoadType.S3:
init_s3_client(
self.load_nodes,
self.load_params,
self.k6_dir,
self.ssh_credentials,
self.nodes_under_load,
self.loaders_wallet,
)
self._prepare(self.load_params)
def _prepare(self, load_params: LoadParams):
self.k6_instances = prepare_k6_instances(
load_nodes=LOAD_NODES,
ssh_credentials=self.ssh_credentials,
k6_dir=self.k6_dir,
load_params=load_params,
endpoints=self.endpoints,
loaders_wallet=self.loaders_wallet,
)
self.runner.prepare(self.load_params, self.nodes_under_load, self.k6_dir)
self.runner.init_k6_instances(self.load_params, self.endpoints, self.k6_dir)
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
@reporter.step_deco("Start background load")
def start(self):
if self.load_params.preset is None:
raise RuntimeError("Preset should not be none at the moment of start")
with reporter.step(
f"Start background load on nodes {self.nodes_under_load}: "
f"writers = {self.load_params.writers}, "
f"obj_size = {self.load_params.object_size}, "
f"load_time = {self.load_params.load_time}, "
f"prepare_json = {self.load_params.preset.pregen_json}, "
f"endpoints = {self.endpoints}"
):
for k6_load_instance in self.k6_instances:
k6_load_instance.start()
wait_after_start_time = datetime_utils.parse_time(self.load_params.setup_timeout) + 5
with reporter.step(
f"Wait for start timeout + couple more seconds ({wait_after_start_time}) before moving on"
):
time.sleep(wait_after_start_time)
with reporter.step(f"Start load on nodes {self.nodes_under_load}"):
self.runner.start()
self.started = True
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
@reporter.step_deco("Stop background load")
@reporter.step_deco("Stop load")
def stop(self):
for k6_load_instance in self.k6_instances:
k6_load_instance.stop()
self.runner.stop()
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED, True)
def is_running(self):
for k6_load_instance in self.k6_instances:
if not k6_load_instance.is_running:
return False
return True
def is_running(self) -> bool:
return self.runner.is_running
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
@reporter.step_deco("Reset background load")
@reporter.step_deco("Reset load")
def _reset_for_consequent_load(self):
"""This method is required if we want to run multiple loads during test run.
Raise load counter by 1 and append it to load_id
@ -188,25 +128,25 @@ class BackgroundLoadController:
self.load_params.set_id(f"{self.load_params.load_id}_{self.load_counter}")
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
@reporter.step_deco("Startup background load")
@reporter.step_deco("Startup load")
def startup(self):
self.prepare()
self.start()
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
@reporter.step_deco("Stop and get results of background load")
def teardown(self, load_report: LoadReport = None):
if not self.k6_instances:
@reporter.step_deco("Stop and get results of load")
def teardown(self, load_report: Optional[LoadReport] = None):
if not self.started:
return
self.stop()
self.load_summaries = self.get_results()
self.k6_instances = []
self.load_summaries = self._get_results()
self.started = False
if load_report:
load_report.add_summaries(self.load_summaries)
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
@reporter.step_deco("Verify results of background load")
@reporter.step_deco("Verify results of load")
def verify(self):
try:
if self.load_params.verify:
@ -220,9 +160,10 @@ class BackgroundLoadController:
working_dir=self.load_params.working_dir,
endpoint_selection_strategy=self.load_params.endpoint_selection_strategy,
k6_process_allocation_strategy=self.load_params.k6_process_allocation_strategy,
setup_timeout="1s",
)
self._run_verify_scenario()
verification_summaries = self.get_results()
verification_summaries = self._get_results()
self.verify_summaries(self.load_summaries, verification_summaries)
finally:
self._reset_for_consequent_load()
@ -239,38 +180,20 @@ class BackgroundLoadController:
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
def wait_until_finish(self):
if self.load_params.load_time is None:
raise RuntimeError("LoadTime should not be none")
for k6_instance in self.k6_instances:
k6_instance.wait_until_finished(self.load_params.load_time + int(K6_TEARDOWN_PERIOD))
self.runner.wait_until_finish()
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
@reporter.step_deco("Run verify scenario for background load")
@reporter.step_deco("Run verify scenario")
def _run_verify_scenario(self):
if self.verification_params.verify_time is None:
raise RuntimeError("verify_time should not be none")
self._prepare(self.verification_params)
with reporter.step("Run verify background load data"):
for k6_verify_instance in self.k6_instances:
k6_verify_instance.start()
k6_verify_instance.wait_until_finished(self.verification_params.verify_time)
self.runner.init_k6_instances(self.verification_params, self.endpoints, self.k6_dir)
with reporter.step("Run verify load data"):
self.runner.start()
self.runner.wait_until_finish()
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
@reporter.step_deco("K6 run results")
def get_results(self) -> dict:
results = {}
for k6_instance in self.k6_instances:
if k6_instance.load_params.k6_process_allocation_strategy is None:
raise RuntimeError("k6_process_allocation_strategy should not be none")
result = k6_instance.get_results()
keys_map = {
K6ProcessAllocationStrategy.PER_LOAD_NODE: k6_instance.load_node,
K6ProcessAllocationStrategy.PER_ENDPOINT: k6_instance.endpoints[0],
}
key = keys_map[k6_instance.load_params.k6_process_allocation_strategy]
results[key] = result
return results
@reporter.step_deco("Get load results")
def _get_results(self) -> dict:
return self.runner.get_results()

View file

@ -23,7 +23,7 @@ class ClusterStateController:
self.stopped_nodes: list[ClusterNode] = []
self.detached_disks: dict[str, DiskController] = {}
self.stopped_storage_nodes: list[ClusterNode] = []
self.stopped_s3_gate: list[ClusterNode] = []
self.stopped_s3_gates: list[ClusterNode] = []
self.cluster = cluster
self.shell = shell
self.suspended_services: dict[str, list[ClusterNode]] = {}
@ -60,6 +60,16 @@ class ClusterStateController:
for node in nodes:
self.stop_storage_service(node)
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Stop all S3 gates on cluster")
def stop_all_s3_gates(self, reversed_order: bool = False):
nodes = (
reversed(self.cluster.cluster_nodes) if reversed_order else self.cluster.cluster_nodes
)
for node in nodes:
self.stop_s3_gate(node)
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Start host of node {node}")
def start_node_host(self, node: ClusterNode):
@ -72,10 +82,18 @@ class ClusterStateController:
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Start stopped hosts")
def start_stopped_hosts(self, reversed_order: bool = False):
if not self.stopped_nodes:
return
nodes = reversed(self.stopped_nodes) if reversed_order else self.stopped_nodes
for node in nodes:
with reporter.step(f"Start host {node.host.config.address}"):
node.host.start_host()
if node in self.stopped_storage_nodes:
self.stopped_storage_nodes.remove(node)
if node in self.stopped_s3_gates:
self.stopped_s3_gates.remove(node)
self.stopped_nodes = []
wait_all_storage_nodes_returned(self.shell, self.cluster)
@ -115,44 +133,51 @@ class ClusterStateController:
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Start stopped storage services")
def start_stopped_storage_services(self):
if self.stopped_storage_nodes:
# In case if we stopped couple services, for example (s01-s04):
# After starting only s01, it may require connections to s02-s04, which is still down, and fail to start.
# Also, if something goes wrong here, we might skip s02-s04 start at all, and cluster will be left in a bad state.
# So in order to make sure that services are at least attempted to be started, using threads here.
with ThreadPoolExecutor(max_workers=len(self.stopped_storage_nodes)) as executor:
start_result = executor.map(self.start_storage_service, self.stopped_storage_nodes)
if not self.stopped_storage_nodes:
return
# Looks tricky, but if exception is raised in any thread, it will be "eaten" by ThreadPoolExecutor,
# But will be thrown here.
# Not ideal solution, but okay for now
for _ in start_result:
pass
# In case if we stopped couple services, for example (s01-s04):
# After starting only s01, it may require connections to s02-s04, which is still down, and fail to start.
# Also, if something goes wrong here, we might skip s02-s04 start at all, and cluster will be left in a bad state.
# So in order to make sure that services are at least attempted to be started, using threads here.
with ThreadPoolExecutor(max_workers=len(self.stopped_storage_nodes)) as executor:
start_result = executor.map(self.start_storage_service, self.stopped_storage_nodes)
# Looks tricky, but if exception is raised in any thread, it will be "eaten" by ThreadPoolExecutor,
# But will be thrown here.
# Not ideal solution, but okay for now
for _ in start_result:
pass
wait_all_storage_nodes_returned(self.shell, self.cluster)
self.stopped_storage_nodes = []
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Stop s3 gate on {node}")
def stop_s3_gate(self, node: ClusterNode):
node.s3_gate.stop_service()
self.stopped_s3_gate.append(node)
self.stopped_s3_gates.append(node)
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Start s3 gate on {node}")
def start_s3_gate(self, node: ClusterNode):
node.s3_gate.start_service()
self.stopped_s3_gate.remove(node)
self.stopped_s3_gates.remove(node)
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Start stopped S3 gates")
def start_stopped_s3_gate(self):
# not sure if we need here to use threads like in start_stopped_storage_services
for s3_gate in self.stopped_s3_gate:
s3_gate.start_service()
self.stopped_s3_gate = []
def start_stopped_s3_gates(self):
if not self.stopped_s3_gates:
return
with ThreadPoolExecutor(max_workers=len(self.stopped_s3_gates)) as executor:
start_result = executor.map(self.start_s3_gate, self.stopped_s3_gates)
# Looks tricky, but if exception is raised in any thread, it will be "eaten" by ThreadPoolExecutor,
# But will be thrown here.
# Not ideal solution, but okay for now
for _ in start_result:
pass
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Suspend {process_name} service in {node}")