Add metabase and write_cache operations

Signed-off-by: Andrey Berezin <a.berezin@yadro.com>
This commit is contained in:
Andrey Berezin 2023-05-30 16:32:38 +03:00
parent a26f8e9c80
commit 2bad0f1db6
10 changed files with 152 additions and 14 deletions

View file

@ -16,7 +16,7 @@ classifiers = [
] ]
keywords = ["frostfs", "test"] keywords = ["frostfs", "test"]
dependencies = [ dependencies = [
"allure-python-commons>=2.9.45", "allure-python-commons>=2.13.2",
"docker>=4.4.0", "docker>=4.4.0",
"importlib_metadata>=5.0; python_version < '3.10'", "importlib_metadata>=5.0; python_version < '3.10'",
"neo-mamba==1.0.0", "neo-mamba==1.0.0",

View file

@ -129,6 +129,12 @@ class DockerHost(Host):
timeout=service_attributes.start_timeout, timeout=service_attributes.start_timeout,
) )
def delete_metabase(self, service_name: str) -> None:
raise NotImplementedError("Not implemented for docker")
def delete_write_cache(self, service_name: str) -> None:
raise NotImplementedError("Not implemented for docker")
def delete_fstree(self, service_name: str) -> None: def delete_fstree(self, service_name: str) -> None:
raise NotImplementedError("Not implemented for docker") raise NotImplementedError("Not implemented for docker")

View file

@ -131,6 +131,26 @@ class Host(ABC):
""" """
@abstractmethod
def delete_metabase(self, service_name: str) -> None:
"""
Deletes all metabase*.db in the node.
Args:
service_name: Name of storage node service.
"""
@abstractmethod
def delete_write_cache(self, service_name: str) -> None:
"""
Deletes all write_cache in the node.
Args:
service_name: Name of storage node service.
"""
@abstractmethod @abstractmethod
def delete_blobovnicza(self, service_name: str) -> None: def delete_blobovnicza(self, service_name: str) -> None:
""" """

View file

@ -61,7 +61,7 @@ class LoadReport:
return html return html
def _calc_unit(self, value: float, skip_units: int = 0) -> Tuple[float, str]: def _calc_unit(self, value: float, skip_units: int = 0) -> Tuple[float, str]:
units = ["B", "KB", "MB", "GB", "TB"] units = ["B", "KiB", "MiB", "GiB", "TiB"]
for unit in units[skip_units:]: for unit in units[skip_units:]:
if value < 1024: if value < 1024:

View file

@ -73,6 +73,21 @@ class ClusterNode:
def s3_gate(self) -> S3Gate: def s3_gate(self) -> S3Gate:
return self.service(S3Gate) return self.service(S3Gate)
def get_config(self, config_file_path: str) -> dict:
shell = self.host.get_shell()
result = shell.exec(f"cat {config_file_path}")
config_text = result.stdout
config = yaml.safe_load(config_text)
return config
def save_config(self, new_config: dict, config_file_path: str) -> None:
shell = self.host.get_shell()
config_str = yaml.dump(new_config)
shell.exec(f"echo '{config_str}' | sudo tee {config_file_path}")
def service(self, service_type: type[ServiceClass]) -> ServiceClass: def service(self, service_type: type[ServiceClass]) -> ServiceClass:
""" """
Get a service cluster node of specified type. Get a service cluster node of specified type.

View file

@ -1,11 +1,11 @@
import time import time
from concurrent.futures import ThreadPoolExecutor
import frostfs_testlib.resources.optionals as optionals import frostfs_testlib.resources.optionals as optionals
from frostfs_testlib.reporter import get_reporter from frostfs_testlib.reporter import get_reporter
from frostfs_testlib.shell import CommandOptions, Shell from frostfs_testlib.shell import CommandOptions, Shell
from frostfs_testlib.steps import epoch from frostfs_testlib.steps import epoch
from frostfs_testlib.storage.cluster import Cluster, ClusterNode, StorageNode from frostfs_testlib.storage.cluster import Cluster, ClusterNode, StorageNode
from frostfs_testlib.steps.node_management import wait_for_node_to_be_ready
from frostfs_testlib.storage.controllers.disk_controller import DiskController from frostfs_testlib.storage.controllers.disk_controller import DiskController
from frostfs_testlib.testing.test_control import run_optionally, wait_for_success from frostfs_testlib.testing.test_control import run_optionally, wait_for_success
from frostfs_testlib.utils.failover_utils import ( from frostfs_testlib.utils.failover_utils import (
@ -22,7 +22,7 @@ class ClusterStateController:
def __init__(self, shell: Shell, cluster: Cluster) -> None: def __init__(self, shell: Shell, cluster: Cluster) -> None:
self.stopped_nodes: list[ClusterNode] = [] self.stopped_nodes: list[ClusterNode] = []
self.detached_disks: dict[str, DiskController] = {} self.detached_disks: dict[str, DiskController] = {}
self.stopped_storage_nodes: list[StorageNode] = [] self.stopped_storage_nodes: list[ClusterNode] = []
self.cluster = cluster self.cluster = cluster
self.shell = shell self.shell = shell
@ -48,6 +48,16 @@ class ClusterStateController:
for node in nodes: for node in nodes:
wait_for_host_offline(self.shell, node.storage_node) wait_for_host_offline(self.shell, node.storage_node)
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Stop all storage services on cluster")
def stop_all_storage_services(self, reversed_order: bool = False):
nodes = (
reversed(self.cluster.cluster_nodes) if reversed_order else self.cluster.cluster_nodes
)
for node in nodes:
self.stop_storage_service(node)
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED) @run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Start host of node {node}") @reporter.step_deco("Start host of node {node}")
def start_node_host(self, node: ClusterNode): def start_node_host(self, node: ClusterNode):
@ -92,19 +102,31 @@ class ClusterStateController:
@reporter.step_deco("Stop storage service on {node}") @reporter.step_deco("Stop storage service on {node}")
def stop_storage_service(self, node: ClusterNode): def stop_storage_service(self, node: ClusterNode):
node.storage_node.stop_service() node.storage_node.stop_service()
self.stopped_storage_nodes.append(node.storage_node) self.stopped_storage_nodes.append(node)
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED) @run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Start storage service on {node}") @reporter.step_deco("Start storage service on {node}")
def start_storage_service(self, node: ClusterNode): def start_storage_service(self, node: ClusterNode):
node.storage_node.start_service() node.storage_node.start_service()
self.stopped_storage_nodes.remove(node.storage_node) self.stopped_storage_nodes.remove(node)
@run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED) @run_optionally(optionals.OPTIONAL_FAILOVER_ENABLED)
@reporter.step_deco("Start stopped storage services") @reporter.step_deco("Start stopped storage services")
def start_stopped_storage_services(self): def start_stopped_storage_services(self):
for node in self.stopped_storage_nodes: if self.stopped_storage_nodes:
node.start_service() # In case if we stopped couple services, for example (s01-s04):
# After starting only s01, it may require connections to s02-s04, which is still down, and fail to start.
# Also, if something goes wrong here, we might skip s02-s04 start at all, and cluster will be left in a bad state.
# So in order to make sure that services are at least attempted to be started, using threads here.
with ThreadPoolExecutor(max_workers=len(self.stopped_storage_nodes)) as executor:
start_result = executor.map(self.start_storage_service, self.stopped_storage_nodes)
# Looks tricky, but if exception is raised in any thread, it will be "eaten" by ThreadPoolExecutor,
# But will be thrown here.
# Not ideal solution, but okay for now
for _ in start_result:
pass
wait_all_storage_nodes_returned(self.shell, self.cluster) wait_all_storage_nodes_returned(self.shell, self.cluster)
self.stopped_storage_nodes = [] self.stopped_storage_nodes = []

View file

@ -177,6 +177,12 @@ class StorageNode(NodeBase):
def delete_pilorama(self): def delete_pilorama(self):
self.host.delete_pilorama(self.name) self.host.delete_pilorama(self.name)
def delete_metabase(self):
self.host.delete_metabase(self.name)
def delete_write_cache(self):
self.host.delete_write_cache(self.name)
@property @property
def label(self) -> str: def label(self) -> str:
return f"{self.name}: {self.get_rpc_endpoint()}" return f"{self.name}: {self.get_rpc_endpoint()}"

View file

@ -1,6 +1,8 @@
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, TypedDict, TypeVar from typing import Optional, Tuple, TypedDict, TypeVar
import yaml
from frostfs_testlib.hosting.config import ServiceConfig from frostfs_testlib.hosting.config import ServiceConfig
from frostfs_testlib.hosting.interfaces import Host from frostfs_testlib.hosting.interfaces import Host
@ -84,12 +86,29 @@ class NodeBase(ABC):
ConfigAttributes.CONFIG_PATH, ConfigAttributes.CONFIG_PATH,
) )
def get_wallet_config_path(self): def get_wallet_config_path(self) -> str:
return self._get_attribute( return self._get_attribute(
ConfigAttributes.LOCAL_WALLET_CONFIG, ConfigAttributes.LOCAL_WALLET_CONFIG,
ConfigAttributes.WALLET_CONFIG, ConfigAttributes.WALLET_CONFIG,
) )
def get_config(self) -> Tuple[str, dict]:
config_file_path = self._get_attribute(ConfigAttributes.CONFIG_PATH)
shell = self.host.get_shell()
result = shell.exec(f"cat {config_file_path}")
config_text = result.stdout
config = yaml.safe_load(config_text)
return config_file_path, config
def save_config(self, new_config: dict) -> None:
config_file_path = self._get_attribute(ConfigAttributes.CONFIG_PATH)
shell = self.host.get_shell()
config_str = yaml.dump(new_config)
shell.exec(f"echo '{config_str}' | sudo tee {config_file_path}")
def get_wallet_public_key(self): def get_wallet_public_key(self):
storage_wallet_path = self.get_wallet_path() storage_wallet_path = self.get_wallet_path()
storage_wallet_pass = self.get_wallet_password() storage_wallet_pass = self.get_wallet_password()

View file

@ -28,8 +28,8 @@ def ping_host(shell: Shell, host: Host):
@reporter.step_deco("Wait for storage nodes returned to cluster") @reporter.step_deco("Wait for storage nodes returned to cluster")
def wait_all_storage_nodes_returned(shell: Shell, cluster: Cluster) -> None: def wait_all_storage_nodes_returned(shell: Shell, cluster: Cluster) -> None:
with reporter.step("Run health check for all storage nodes"): for node in cluster.services(StorageNode):
for node in cluster.services(StorageNode): with reporter.step(f"Run health check for storage at '{node}'"):
wait_for_host_online(shell, node) wait_for_host_online(shell, node)
wait_for_node_online(node) wait_for_node_online(node)
@ -56,7 +56,7 @@ def wait_for_host_offline(shell: Shell, node: StorageNode):
return 0 return 0
@retry(max_attempts=10, sleep_interval=15, expected_result=True) @retry(max_attempts=20, sleep_interval=30, expected_result=True)
@reporter.step_deco("Waiting for node {node} to go online") @reporter.step_deco("Waiting for node {node} to go online")
def wait_for_node_online(node: StorageNode): def wait_for_node_online(node: StorageNode):
try: try:

View file

@ -0,0 +1,50 @@
from concurrent.futures import ThreadPoolExecutor
from frostfs_testlib.reporter import get_reporter
from frostfs_testlib.storage.dataclasses.node_base import NodeBase
reporter = get_reporter()
class FileKeeper:
"""This class is responsible to make backup copy of modified file and restore when required (mostly after the test)"""
files_to_restore: dict[NodeBase, list[str]] = {}
@reporter.step_deco("Adding {file_to_restore} from node {node} to restore list")
def add(self, node: NodeBase, file_to_restore: str):
if node in self.files_to_restore and file_to_restore in self.files_to_restore[node]:
# Already added
return
if node not in self.files_to_restore:
self.files_to_restore[node] = []
if file_to_restore not in self.files_to_restore[node]:
self.files_to_restore[node].append(file_to_restore)
shell = node.host.get_shell()
shell.exec(f"cp {file_to_restore} {file_to_restore}.bak")
@reporter.step_deco("Restore files")
def restore_files(self):
nodes = self.files_to_restore.keys()
if not nodes:
return
with ThreadPoolExecutor(max_workers=len(nodes)) as executor:
results = executor.map(self._restore_files_on_node, nodes)
self.files_to_restore.clear()
for _ in results:
# Iterate through results for exception check if any
pass
@reporter.step_deco("Restore files on node {node}")
def _restore_files_on_node(self, node: NodeBase):
shell = node.host.get_shell()
for file_to_restore in self.files_to_restore[node]:
with reporter.step(f"Restore file {file_to_restore} on node {node}"):
shell.exec(f"cp {file_to_restore}.bak {file_to_restore}")
shell.exec(f"rm {file_to_restore}.bak")