forked from TrueCloudLab/frostfs-testlib
Move shared code to testlib
Signed-off-by: Andrey Berezin <a.berezin@yadro.com>
This commit is contained in:
parent
d97a02d1d3
commit
997e768e92
69 changed files with 9213 additions and 64 deletions
207
src/frostfs_testlib/controllers/background_load_controller.py
Normal file
207
src/frostfs_testlib/controllers/background_load_controller.py
Normal file
|
@ -0,0 +1,207 @@
|
|||
import frostfs_testlib.resources.optionals as optionals
|
||||
from frostfs_testlib.load.k6 import K6
|
||||
from frostfs_testlib.load.load_config import (
|
||||
EndpointSelectionStrategy,
|
||||
K6ProcessAllocationStrategy,
|
||||
LoadParams,
|
||||
LoadScenario,
|
||||
LoadType,
|
||||
)
|
||||
from frostfs_testlib.load.load_steps import init_s3_client, prepare_k6_instances
|
||||
from frostfs_testlib.reporter import get_reporter
|
||||
from frostfs_testlib.resources.load_params import (
|
||||
K6_TEARDOWN_PERIOD,
|
||||
LOAD_NODE_SSH_PASSWORD,
|
||||
LOAD_NODE_SSH_PRIVATE_KEY_PASSPHRASE,
|
||||
LOAD_NODE_SSH_PRIVATE_KEY_PATH,
|
||||
LOAD_NODE_SSH_USER,
|
||||
LOAD_NODES,
|
||||
)
|
||||
from frostfs_testlib.shell.interfaces import SshCredentials
|
||||
from frostfs_testlib.storage.cluster import ClusterNode
|
||||
from frostfs_testlib.storage.cluster.frostfs_services import S3Gate, StorageNode
|
||||
from frostfs_testlib.storage.dataclasses.wallet import WalletInfo
|
||||
from frostfs_testlib.testing.test_control import run_optionally
|
||||
|
||||
reporter = get_reporter()
|
||||
|
||||
|
||||
class BackgroundLoadController:
|
||||
k6_instances: list[K6]
|
||||
k6_dir: str
|
||||
load_params: LoadParams
|
||||
load_nodes: list[str]
|
||||
verification_params: LoadParams
|
||||
nodes_under_load: list[ClusterNode]
|
||||
ssh_credentials: SshCredentials
|
||||
loaders_wallet: WalletInfo
|
||||
endpoints: list[str]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
k6_dir: str,
|
||||
load_params: LoadParams,
|
||||
loaders_wallet: WalletInfo,
|
||||
nodes_under_load: list[ClusterNode],
|
||||
) -> None:
|
||||
self.k6_dir = k6_dir
|
||||
self.load_params = load_params
|
||||
self.nodes_under_load = nodes_under_load
|
||||
self.load_nodes = LOAD_NODES
|
||||
self.loaders_wallet = loaders_wallet
|
||||
|
||||
if load_params.endpoint_selection_strategy is None:
|
||||
raise RuntimeError("endpoint_selection_strategy should not be None")
|
||||
|
||||
self.endpoints = self._get_endpoints(
|
||||
load_params.load_type, load_params.endpoint_selection_strategy
|
||||
)
|
||||
self.verification_params = LoadParams(
|
||||
clients=load_params.readers,
|
||||
scenario=LoadScenario.VERIFY,
|
||||
registry_file=load_params.registry_file,
|
||||
verify_time=load_params.verify_time,
|
||||
load_type=load_params.load_type,
|
||||
load_id=load_params.load_id,
|
||||
working_dir=load_params.working_dir,
|
||||
endpoint_selection_strategy=load_params.endpoint_selection_strategy,
|
||||
k6_process_allocation_strategy=load_params.k6_process_allocation_strategy,
|
||||
)
|
||||
self.ssh_credentials = SshCredentials(
|
||||
LOAD_NODE_SSH_USER,
|
||||
LOAD_NODE_SSH_PASSWORD,
|
||||
LOAD_NODE_SSH_PRIVATE_KEY_PATH,
|
||||
LOAD_NODE_SSH_PRIVATE_KEY_PASSPHRASE,
|
||||
)
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED, [])
|
||||
def _get_endpoints(
|
||||
self, load_type: LoadType, endpoint_selection_strategy: EndpointSelectionStrategy
|
||||
):
|
||||
all_endpoints = {
|
||||
LoadType.gRPC: {
|
||||
EndpointSelectionStrategy.ALL: list(
|
||||
set(
|
||||
endpoint
|
||||
for node_under_load in self.nodes_under_load
|
||||
for endpoint in node_under_load.service(StorageNode).get_all_rpc_endpoint()
|
||||
)
|
||||
),
|
||||
EndpointSelectionStrategy.FIRST: list(
|
||||
set(
|
||||
node_under_load.service(StorageNode).get_rpc_endpoint()
|
||||
for node_under_load in self.nodes_under_load
|
||||
)
|
||||
),
|
||||
},
|
||||
# for some reason xk6 appends http protocol on its own
|
||||
LoadType.S3: {
|
||||
EndpointSelectionStrategy.ALL: list(
|
||||
set(
|
||||
endpoint.replace("http://", "")
|
||||
for node_under_load in self.nodes_under_load
|
||||
for endpoint in node_under_load.service(S3Gate).get_all_endpoints()
|
||||
)
|
||||
),
|
||||
EndpointSelectionStrategy.FIRST: list(
|
||||
set(
|
||||
node_under_load.service(S3Gate).get_endpoint().replace("http://", "")
|
||||
for node_under_load in self.nodes_under_load
|
||||
)
|
||||
),
|
||||
},
|
||||
}
|
||||
|
||||
return all_endpoints[load_type][endpoint_selection_strategy]
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
|
||||
@reporter.step_deco("Prepare background load instances")
|
||||
def prepare(self):
|
||||
if self.load_params.load_type == LoadType.S3:
|
||||
init_s3_client(
|
||||
self.load_nodes,
|
||||
self.load_params,
|
||||
self.k6_dir,
|
||||
self.ssh_credentials,
|
||||
self.nodes_under_load,
|
||||
self.loaders_wallet,
|
||||
)
|
||||
|
||||
self._prepare(self.load_params)
|
||||
|
||||
def _prepare(self, load_params: LoadParams):
|
||||
self.k6_instances = prepare_k6_instances(
|
||||
load_nodes=LOAD_NODES,
|
||||
ssh_credentials=self.ssh_credentials,
|
||||
k6_dir=self.k6_dir,
|
||||
load_params=load_params,
|
||||
endpoints=self.endpoints,
|
||||
loaders_wallet=self.loaders_wallet,
|
||||
)
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
|
||||
@reporter.step_deco("Start background load")
|
||||
def start(self):
|
||||
if self.load_params.preset is None:
|
||||
raise RuntimeError("Preset should not be none at the moment of start")
|
||||
|
||||
with reporter.step(
|
||||
f"Start background load on nodes {self.nodes_under_load}: "
|
||||
f"writers = {self.load_params.writers}, "
|
||||
f"obj_size = {self.load_params.object_size}, "
|
||||
f"load_time = {self.load_params.load_time}, "
|
||||
f"prepare_json = {self.load_params.preset.pregen_json}, "
|
||||
f"endpoints = {self.endpoints}"
|
||||
):
|
||||
for k6_load_instance in self.k6_instances:
|
||||
k6_load_instance.start()
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
|
||||
@reporter.step_deco("Stop background load")
|
||||
def stop(self):
|
||||
for k6_load_instance in self.k6_instances:
|
||||
k6_load_instance.stop()
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED, True)
|
||||
def is_running(self):
|
||||
for k6_load_instance in self.k6_instances:
|
||||
if not k6_load_instance.is_running:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def wait_until_finish(self):
|
||||
if self.load_params.load_time is None:
|
||||
raise RuntimeError("LoadTime should not be none")
|
||||
|
||||
for k6_instance in self.k6_instances:
|
||||
k6_instance.wait_until_finished(self.load_params.load_time + int(K6_TEARDOWN_PERIOD))
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
|
||||
def verify(self):
|
||||
if self.verification_params.verify_time is None:
|
||||
raise RuntimeError("verify_time should not be none")
|
||||
|
||||
self._prepare(self.verification_params)
|
||||
with reporter.step("Run verify background load data"):
|
||||
for k6_verify_instance in self.k6_instances:
|
||||
k6_verify_instance.start()
|
||||
k6_verify_instance.wait_until_finished(self.verification_params.verify_time)
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
|
||||
@reporter.step_deco("K6 run results")
|
||||
def get_results(self) -> dict:
|
||||
results = {}
|
||||
for k6_instance in self.k6_instances:
|
||||
if k6_instance.load_params.k6_process_allocation_strategy is None:
|
||||
raise RuntimeError("k6_process_allocation_strategy should not be none")
|
||||
|
||||
result = k6_instance.get_results()
|
||||
keys_map = {
|
||||
K6ProcessAllocationStrategy.PER_LOAD_NODE: k6_instance.load_node,
|
||||
K6ProcessAllocationStrategy.PER_ENDPOINT: k6_instance.endpoints[0],
|
||||
}
|
||||
key = keys_map[k6_instance.load_params.k6_process_allocation_strategy]
|
||||
results[key] = result
|
||||
|
||||
return results
|
130
src/frostfs_testlib/controllers/cluster_state_controller.py
Normal file
130
src/frostfs_testlib/controllers/cluster_state_controller.py
Normal file
|
@ -0,0 +1,130 @@
|
|||
import time
|
||||
|
||||
import allure
|
||||
|
||||
import frostfs_testlib.resources.optionals as optionals
|
||||
from frostfs_testlib.reporter import get_reporter
|
||||
from frostfs_testlib.shell import CommandOptions, Shell
|
||||
from frostfs_testlib.steps import epoch
|
||||
from frostfs_testlib.storage.cluster import Cluster, ClusterNode, StorageNode
|
||||
from frostfs_testlib.storage.controllers.disk_controller import DiskController
|
||||
from frostfs_testlib.testing.test_control import run_optionally, wait_for_success
|
||||
from frostfs_testlib.utils.failover_utils import (
|
||||
wait_all_storage_nodes_returned,
|
||||
wait_for_host_offline,
|
||||
wait_for_host_online,
|
||||
wait_for_node_online,
|
||||
)
|
||||
|
||||
reporter = get_reporter()
|
||||
|
||||
|
||||
class ClusterStateController:
|
||||
def __init__(self, shell: Shell, cluster: Cluster) -> None:
|
||||
self.stopped_nodes: list[ClusterNode] = []
|
||||
self.detached_disks: dict[str, DiskController] = {}
|
||||
self.stopped_storage_nodes: list[StorageNode] = []
|
||||
self.cluster = cluster
|
||||
self.shell = shell
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
|
||||
@reporter.step_deco("Stop host of node {node}")
|
||||
def stop_node_host(self, node: ClusterNode, mode: str):
|
||||
with allure.step(f"Stop host {node.host.config.address}"):
|
||||
node.host.stop_host(mode=mode)
|
||||
wait_for_host_offline(self.shell, node.storage_node)
|
||||
self.stopped_nodes.append(node)
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
|
||||
@reporter.step_deco("Start host of node {node}")
|
||||
def start_node_host(self, node: ClusterNode):
|
||||
with allure.step(f"Start host {node.host.config.address}"):
|
||||
node.host.start_host()
|
||||
wait_for_host_online(self.shell, node.storage_node)
|
||||
wait_for_node_online(node.storage_node)
|
||||
self.stopped_nodes.remove(node)
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
|
||||
@reporter.step_deco("Start stopped hosts")
|
||||
def start_stopped_hosts(self):
|
||||
for node in self.stopped_nodes:
|
||||
node.host.start_host()
|
||||
self.stopped_nodes = []
|
||||
wait_all_storage_nodes_returned(self.shell, self.cluster)
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
|
||||
@reporter.step_deco("Detach disk {device} at {mountpoint} on node {node}")
|
||||
def detach_disk(self, node: StorageNode, device: str, mountpoint: str):
|
||||
disk_controller = self._get_disk_controller(node, device, mountpoint)
|
||||
self.detached_disks[disk_controller.id] = disk_controller
|
||||
disk_controller.detach()
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
|
||||
@reporter.step_deco("Attach disk {device} at {mountpoint} on node {node}")
|
||||
def attach_disk(self, node: StorageNode, device: str, mountpoint: str):
|
||||
disk_controller = self._get_disk_controller(node, device, mountpoint)
|
||||
disk_controller.attach()
|
||||
self.detached_disks.pop(disk_controller.id, None)
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
|
||||
@reporter.step_deco("Restore detached disks")
|
||||
def restore_disks(self):
|
||||
for disk_controller in self.detached_disks.values():
|
||||
disk_controller.attach()
|
||||
self.detached_disks = {}
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
|
||||
@reporter.step_deco("Stop storage service on {node}")
|
||||
def stop_storage_service(self, node: ClusterNode):
|
||||
node.storage_node.stop_service()
|
||||
self.stopped_storage_nodes.append(node.storage_node)
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
|
||||
@reporter.step_deco("Start storage service on {node}")
|
||||
def start_storage_service(self, node: ClusterNode):
|
||||
node.storage_node.start_service()
|
||||
self.stopped_storage_nodes.remove(node.storage_node)
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
|
||||
@reporter.step_deco("Start stopped storage services")
|
||||
def start_stopped_storage_services(self):
|
||||
for node in self.stopped_storage_nodes:
|
||||
node.start_service()
|
||||
self.stopped_storage_nodes = []
|
||||
|
||||
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
|
||||
@reporter.step_deco("Hard reboot host {node} via magic SysRq option")
|
||||
def panic_reboot_host(self, node: ClusterNode):
|
||||
shell = node.host.get_shell()
|
||||
shell.exec('sudo sh -c "echo 1 > /proc/sys/kernel/sysrq"')
|
||||
|
||||
options = CommandOptions(close_stdin=True, timeout=1, check=False)
|
||||
shell.exec('sudo sh -c "echo b > /proc/sysrq-trigger"', options)
|
||||
|
||||
# Let the things to be settled
|
||||
# A little wait here to prevent ssh stuck during panic
|
||||
time.sleep(10)
|
||||
wait_for_host_online(self.shell, node.storage_node)
|
||||
wait_for_node_online(node.storage_node)
|
||||
|
||||
@reporter.step_deco("Wait up to {timeout} seconds for nodes on cluster to align epochs")
|
||||
def wait_for_epochs_align(self, timeout=60):
|
||||
@wait_for_success(timeout, 5, None, True)
|
||||
def check_epochs():
|
||||
epochs_by_node = epoch.get_epochs_from_nodes(self.shell, self.cluster)
|
||||
assert (
|
||||
len(set(epochs_by_node.values())) == 1
|
||||
), f"unaligned epochs found: {epochs_by_node}"
|
||||
|
||||
check_epochs()
|
||||
|
||||
def _get_disk_controller(
|
||||
self, node: StorageNode, device: str, mountpoint: str
|
||||
) -> DiskController:
|
||||
disk_controller_id = DiskController.get_id(node, device)
|
||||
if disk_controller_id in self.detached_disks.keys():
|
||||
disk_controller = self.detached_disks[disk_controller_id]
|
||||
else:
|
||||
disk_controller = DiskController(node, device, mountpoint)
|
||||
|
||||
return disk_controller
|
Loading…
Add table
Add a link
Reference in a new issue