forked from TrueCloudLab/frostfs-testlib
Add loader and sceanrio runner interfaces, add support for local scenario
Signed-off-by: Andrey Berezin <a.berezin@yadro.com>
This commit is contained in:
parent
13ea25bff5
commit
182bd6ab36
19 changed files with 786 additions and 384 deletions
398
src/frostfs_testlib/load/runners.py
Normal file
398
src/frostfs_testlib/load/runners.py
Normal file
|
@ -0,0 +1,398 @@
|
|||
import copy
|
||||
import itertools
|
||||
import math
|
||||
import re
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from dataclasses import fields
|
||||
from typing import Optional
|
||||
|
||||
import yaml
|
||||
|
||||
from frostfs_testlib.cli.frostfs_authmate.authmate import FrostfsAuthmate
|
||||
from frostfs_testlib.load.interfaces import Loader, ScenarioRunner
|
||||
from frostfs_testlib.load.k6 import K6
|
||||
from frostfs_testlib.load.load_config import K6ProcessAllocationStrategy, LoadParams, LoadType
|
||||
from frostfs_testlib.load.loaders import NodeLoader, RemoteLoader
|
||||
from frostfs_testlib.reporter import get_reporter
|
||||
from frostfs_testlib.resources import optionals
|
||||
from frostfs_testlib.resources.cli import FROSTFS_AUTHMATE_EXEC
|
||||
from frostfs_testlib.resources.common import STORAGE_USER_NAME
|
||||
from frostfs_testlib.resources.load_params import (
|
||||
BACKGROUND_LOAD_VUS_COUNT_DIVISOR,
|
||||
LOAD_NODE_SSH_USER,
|
||||
LOAD_NODES,
|
||||
)
|
||||
from frostfs_testlib.shell.interfaces import CommandOptions, InteractiveInput
|
||||
from frostfs_testlib.storage.cluster import ClusterNode
|
||||
from frostfs_testlib.storage.controllers.cluster_state_controller import ClusterStateController
|
||||
from frostfs_testlib.storage.dataclasses.frostfs_services import S3Gate, StorageNode
|
||||
from frostfs_testlib.storage.dataclasses.wallet import WalletInfo
|
||||
from frostfs_testlib.testing.test_control import run_optionally
|
||||
from frostfs_testlib.utils import datetime_utils
|
||||
from frostfs_testlib.utils.file_keeper import FileKeeper
|
||||
|
||||
reporter = get_reporter()
|
||||
|
||||
|
||||
class DefaultRunner(ScenarioRunner):
|
||||
k6_instances: list[K6]
|
||||
loaders: list[Loader]
|
||||
loaders_wallet: WalletInfo
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
loaders_wallet: WalletInfo,
|
||||
load_ip_list: Optional[list[str]] = None,
|
||||
) -> None:
|
||||
if load_ip_list is None:
|
||||
load_ip_list = LOAD_NODES
|
||||
self.loaders = RemoteLoader.from_ip_list(load_ip_list)
|
||||
self.loaders_wallet = loaders_wallet
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
|
||||
@reporter.step_deco("Prepare load instances")
|
||||
def prepare(
|
||||
self,
|
||||
load_params: LoadParams,
|
||||
nodes_under_load: list[ClusterNode],
|
||||
k6_dir: str,
|
||||
):
|
||||
if load_params.load_type != LoadType.S3:
|
||||
return
|
||||
|
||||
with reporter.step("Init s3 client on loaders"):
|
||||
storage_node = nodes_under_load[0].service(StorageNode)
|
||||
s3_public_keys = [
|
||||
node.service(S3Gate).get_wallet_public_key() for node in nodes_under_load
|
||||
]
|
||||
grpc_peer = storage_node.get_rpc_endpoint()
|
||||
|
||||
for loader in self.loaders:
|
||||
with reporter.step(f"Init s3 client on {loader.ip}"):
|
||||
shell = loader.get_shell()
|
||||
frostfs_authmate_exec: FrostfsAuthmate = FrostfsAuthmate(
|
||||
shell, FROSTFS_AUTHMATE_EXEC
|
||||
)
|
||||
issue_secret_output = frostfs_authmate_exec.secret.issue(
|
||||
wallet=self.loaders_wallet.path,
|
||||
peer=grpc_peer,
|
||||
bearer_rules=f"{k6_dir}/scenarios/files/rules.json",
|
||||
gate_public_key=s3_public_keys,
|
||||
container_placement_policy=load_params.preset.container_placement_policy,
|
||||
container_policy=f"{k6_dir}/scenarios/files/policy.json",
|
||||
wallet_password=self.loaders_wallet.password,
|
||||
).stdout
|
||||
aws_access_key_id = str(
|
||||
re.search(
|
||||
r"access_key_id.*:\s.(?P<aws_access_key_id>\w*)", issue_secret_output
|
||||
).group("aws_access_key_id")
|
||||
)
|
||||
aws_secret_access_key = str(
|
||||
re.search(
|
||||
r"secret_access_key.*:\s.(?P<aws_secret_access_key>\w*)",
|
||||
issue_secret_output,
|
||||
).group("aws_secret_access_key")
|
||||
)
|
||||
|
||||
configure_input = [
|
||||
InteractiveInput(
|
||||
prompt_pattern=r"AWS Access Key ID.*", input=aws_access_key_id
|
||||
),
|
||||
InteractiveInput(
|
||||
prompt_pattern=r"AWS Secret Access Key.*", input=aws_secret_access_key
|
||||
),
|
||||
InteractiveInput(prompt_pattern=r".*", input=""),
|
||||
InteractiveInput(prompt_pattern=r".*", input=""),
|
||||
]
|
||||
shell.exec("aws configure", CommandOptions(interactive_inputs=configure_input))
|
||||
|
||||
def wait_until_finish(self):
|
||||
for k6_instance in self.k6_instances:
|
||||
k6_instance.wait_until_finished()
|
||||
|
||||
def init_k6_instances(self, load_params: LoadParams, endpoints: list[str], k6_dir: str):
|
||||
self.k6_instances = []
|
||||
cycled_loaders = itertools.cycle(self.loaders)
|
||||
|
||||
k6_distribution_count = {
|
||||
K6ProcessAllocationStrategy.PER_LOAD_NODE: len(self.loaders),
|
||||
K6ProcessAllocationStrategy.PER_ENDPOINT: len(endpoints),
|
||||
}
|
||||
endpoints_generators = {
|
||||
K6ProcessAllocationStrategy.PER_LOAD_NODE: itertools.cycle([endpoints]),
|
||||
K6ProcessAllocationStrategy.PER_ENDPOINT: itertools.cycle(
|
||||
[[endpoint] for endpoint in endpoints]
|
||||
),
|
||||
}
|
||||
k6_processes_count = k6_distribution_count[load_params.k6_process_allocation_strategy]
|
||||
endpoints_gen = endpoints_generators[load_params.k6_process_allocation_strategy]
|
||||
|
||||
distributed_load_params_list = self._get_distributed_load_params_list(
|
||||
load_params, k6_processes_count
|
||||
)
|
||||
|
||||
for distributed_load_params in distributed_load_params_list:
|
||||
loader = next(cycled_loaders)
|
||||
shell = loader.get_shell()
|
||||
with reporter.step(
|
||||
f"Init K6 instances on {loader.ip} for load id {distributed_load_params.load_id}"
|
||||
):
|
||||
with reporter.step(f"Make working directory"):
|
||||
shell.exec(f"sudo mkdir -p {distributed_load_params.working_dir}")
|
||||
shell.exec(
|
||||
f"sudo chown {LOAD_NODE_SSH_USER} {distributed_load_params.working_dir}"
|
||||
)
|
||||
|
||||
k6_instance = K6(
|
||||
distributed_load_params,
|
||||
next(endpoints_gen),
|
||||
k6_dir,
|
||||
shell,
|
||||
loader,
|
||||
self.loaders_wallet,
|
||||
)
|
||||
self.k6_instances.append(k6_instance)
|
||||
if load_params.preset:
|
||||
k6_instance.preset()
|
||||
|
||||
def _get_distributed_load_params_list(
|
||||
self, original_load_params: LoadParams, workers_count: int
|
||||
) -> list[LoadParams]:
|
||||
divisor = int(BACKGROUND_LOAD_VUS_COUNT_DIVISOR)
|
||||
distributed_load_params: list[LoadParams] = []
|
||||
|
||||
for i in range(workers_count):
|
||||
load_params = copy.deepcopy(original_load_params)
|
||||
# Append #i here in case if multiple k6 processes goes into same load node
|
||||
load_params.set_id(f"{load_params.load_id}_{i}")
|
||||
distributed_load_params.append(load_params)
|
||||
|
||||
load_fields = fields(original_load_params)
|
||||
|
||||
for field in load_fields:
|
||||
if (
|
||||
field.metadata
|
||||
and original_load_params.scenario in field.metadata["applicable_scenarios"]
|
||||
and field.metadata["distributed"]
|
||||
and getattr(original_load_params, field.name) is not None
|
||||
):
|
||||
original_value = getattr(original_load_params, field.name)
|
||||
distribution = self._get_distribution(
|
||||
math.ceil(original_value / divisor), workers_count
|
||||
)
|
||||
for i in range(workers_count):
|
||||
setattr(distributed_load_params[i], field.name, distribution[i])
|
||||
|
||||
return distributed_load_params
|
||||
|
||||
def _get_distribution(self, clients_count: int, workers_count: int) -> list[int]:
|
||||
"""
|
||||
This function will distribute evenly as possible X clients to Y workers.
|
||||
For example if we have 150 readers (clients) and we want to spread it over 4 load nodes (workers)
|
||||
this will return [38, 38, 37, 37].
|
||||
|
||||
Args:
|
||||
clients_count: amount of things needs to be distributed.
|
||||
workers_count: amount of workers.
|
||||
|
||||
Returns:
|
||||
list of distribution.
|
||||
"""
|
||||
if workers_count < 1:
|
||||
raise Exception("Workers cannot be less then 1")
|
||||
|
||||
# Amount of guaranteed payload on one worker
|
||||
clients_per_worker = clients_count // workers_count
|
||||
# Remainder of clients left to be distributed
|
||||
remainder = clients_count - clients_per_worker * workers_count
|
||||
|
||||
distribution = [
|
||||
clients_per_worker + 1 if i < remainder else clients_per_worker
|
||||
for i in range(workers_count)
|
||||
]
|
||||
return distribution
|
||||
|
||||
def start(self):
|
||||
load_params = self.k6_instances[0].load_params
|
||||
|
||||
with ThreadPoolExecutor(max_workers=len(self.k6_instances)) as executor:
|
||||
futures = [executor.submit(k6.start) for k6 in self.k6_instances]
|
||||
|
||||
# Check for exceptions
|
||||
exceptions = [future.exception() for future in futures if future.exception()]
|
||||
if exceptions:
|
||||
raise RuntimeError(
|
||||
f"The following exceptions occured during start of k6: {exceptions}"
|
||||
)
|
||||
|
||||
wait_after_start_time = datetime_utils.parse_time(load_params.setup_timeout) + 5
|
||||
with reporter.step(
|
||||
f"Wait for start timeout + couple more seconds ({wait_after_start_time}) before moving on"
|
||||
):
|
||||
time.sleep(wait_after_start_time)
|
||||
|
||||
def stop(self):
|
||||
for k6_instance in self.k6_instances:
|
||||
k6_instance.stop()
|
||||
|
||||
def get_results(self) -> dict:
|
||||
results = {}
|
||||
for k6_instance in self.k6_instances:
|
||||
if k6_instance.load_params.k6_process_allocation_strategy is None:
|
||||
raise RuntimeError("k6_process_allocation_strategy should not be none")
|
||||
|
||||
result = k6_instance.get_results()
|
||||
keys_map = {
|
||||
K6ProcessAllocationStrategy.PER_LOAD_NODE: k6_instance.loader.ip,
|
||||
K6ProcessAllocationStrategy.PER_ENDPOINT: k6_instance.endpoints[0],
|
||||
}
|
||||
key = keys_map[k6_instance.load_params.k6_process_allocation_strategy]
|
||||
results[key] = result
|
||||
|
||||
return results
|
||||
|
||||
@property
|
||||
def is_running(self):
|
||||
for k6_instance in self.k6_instances:
|
||||
if not k6_instance.is_running:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
class LocalRunner(ScenarioRunner):
|
||||
k6_instances: list[K6]
|
||||
loaders: list[Loader]
|
||||
cluster_state_controller: ClusterStateController
|
||||
file_keeper: FileKeeper
|
||||
wallet: WalletInfo
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
cluster_state_controller: ClusterStateController,
|
||||
file_keeper: FileKeeper,
|
||||
nodes_under_load: list[ClusterNode],
|
||||
) -> None:
|
||||
self.cluster_state_controller = cluster_state_controller
|
||||
self.file_keeper = file_keeper
|
||||
self.loaders = [NodeLoader(node) for node in nodes_under_load]
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
|
||||
@reporter.step_deco("Prepare load instances")
|
||||
def prepare(
|
||||
self,
|
||||
load_params: LoadParams,
|
||||
nodes_under_load: list[ClusterNode],
|
||||
k6_dir: str,
|
||||
):
|
||||
@reporter.step_deco("Prepare node {cluster_node}")
|
||||
def prepare_node(cluster_node: ClusterNode):
|
||||
shell = cluster_node.host.get_shell()
|
||||
|
||||
with reporter.step("Allow storage user to login into system"):
|
||||
shell.exec(f"sudo chsh -s /bin/bash {STORAGE_USER_NAME}")
|
||||
|
||||
with reporter.step("Update limits.conf"):
|
||||
limits_path = "/etc/security/limits.conf"
|
||||
self.file_keeper.add(cluster_node.storage_node, limits_path)
|
||||
content = f"{STORAGE_USER_NAME} hard nofile 65536\n{STORAGE_USER_NAME} soft nofile 65536\n"
|
||||
shell.exec(f"echo '{content}' | sudo tee {limits_path}")
|
||||
|
||||
with reporter.step("Download K6"):
|
||||
shell.exec(f"sudo rm -rf {k6_dir};sudo mkdir {k6_dir}")
|
||||
shell.exec(f"sudo curl -so {k6_dir}/k6.tar.gz {load_params.k6_url}")
|
||||
shell.exec(f"sudo tar xf {k6_dir}/k6.tar.gz -C {k6_dir}")
|
||||
shell.exec(f"sudo chmod -R 777 {k6_dir}")
|
||||
|
||||
with reporter.step("Create empty_passwd"):
|
||||
self.wallet = WalletInfo(
|
||||
f"{k6_dir}/scenarios/files/wallet.json", "", "/tmp/empty_passwd.yml"
|
||||
)
|
||||
content = yaml.dump({"password": ""})
|
||||
shell.exec(f'echo "{content}" | sudo tee {self.wallet.config_path}')
|
||||
shell.exec(f"sudo chmod -R 777 {self.wallet.config_path}")
|
||||
|
||||
with ThreadPoolExecutor(max_workers=len(nodes_under_load)) as executor:
|
||||
result = executor.map(prepare_node, nodes_under_load)
|
||||
|
||||
# Check for exceptions
|
||||
for _ in result:
|
||||
pass
|
||||
|
||||
def wait_until_finish(self):
|
||||
for k6_instance in self.k6_instances:
|
||||
k6_instance.wait_until_finished()
|
||||
|
||||
def init_k6_instances(self, load_params: LoadParams, endpoints: list[str], k6_dir: str):
|
||||
self.k6_instances = []
|
||||
for loader in self.loaders:
|
||||
shell = loader.get_shell()
|
||||
with reporter.step(f"Init K6 instances on {loader.ip}"):
|
||||
with reporter.step(f"Make working directory"):
|
||||
shell.exec(f"sudo mkdir -p {load_params.working_dir}")
|
||||
# If we chmod /home/<user_name> folder we can no longer ssh to the node
|
||||
# !! IMPORTANT !!
|
||||
if (
|
||||
load_params.working_dir
|
||||
and not load_params.working_dir == f"/home/{LOAD_NODE_SSH_USER}"
|
||||
and not load_params.working_dir == f"/home/{LOAD_NODE_SSH_USER}/"
|
||||
):
|
||||
shell.exec(f"sudo chmod -R 777 {load_params.working_dir}")
|
||||
|
||||
k6_instance = K6(
|
||||
load_params,
|
||||
["localhost:8080"],
|
||||
k6_dir,
|
||||
shell,
|
||||
loader,
|
||||
self.wallet,
|
||||
)
|
||||
self.k6_instances.append(k6_instance)
|
||||
if load_params.preset:
|
||||
k6_instance.preset()
|
||||
|
||||
def start(self):
|
||||
load_params = self.k6_instances[0].load_params
|
||||
|
||||
self.cluster_state_controller.stop_all_s3_gates()
|
||||
self.cluster_state_controller.stop_all_storage_services()
|
||||
|
||||
with ThreadPoolExecutor(max_workers=len(self.k6_instances)) as executor:
|
||||
futures = [executor.submit(k6.start) for k6 in self.k6_instances]
|
||||
|
||||
# Check for exceptions
|
||||
exceptions = [future.exception() for future in futures if future.exception()]
|
||||
if exceptions:
|
||||
raise RuntimeError(
|
||||
f"The following exceptions occured during start of k6: {exceptions}"
|
||||
)
|
||||
|
||||
wait_after_start_time = datetime_utils.parse_time(load_params.setup_timeout) + 5
|
||||
with reporter.step(
|
||||
f"Wait for start timeout + couple more seconds ({wait_after_start_time}) before moving on"
|
||||
):
|
||||
time.sleep(wait_after_start_time)
|
||||
|
||||
def stop(self):
|
||||
for k6_instance in self.k6_instances:
|
||||
k6_instance.stop()
|
||||
|
||||
self.cluster_state_controller.start_stopped_storage_services()
|
||||
self.cluster_state_controller.start_stopped_s3_gates()
|
||||
|
||||
def get_results(self) -> dict:
|
||||
results = {}
|
||||
for k6_instance in self.k6_instances:
|
||||
result = k6_instance.get_results()
|
||||
results[k6_instance.loader.ip] = result
|
||||
|
||||
return results
|
||||
|
||||
@property
|
||||
def is_running(self):
|
||||
for k6_instance in self.k6_instances:
|
||||
if not k6_instance.is_running:
|
||||
return False
|
||||
|
||||
return True
|
Loading…
Add table
Add a link
Reference in a new issue