From 9c792c091e657acfefd99e598fc1420f9fbfc73d Mon Sep 17 00:00:00 2001 From: Andrey Berezin Date: Thu, 20 Jul 2023 21:05:49 +0300 Subject: [PATCH] Add error_threshold parameter, add error check after load Signed-off-by: Andrey Berezin --- Makefile | 11 ++- src/frostfs_testlib/load/load_config.py | 5 + src/frostfs_testlib/load/load_report.py | 1 + src/frostfs_testlib/load/load_verifiers.py | 94 +++++++++++++------ .../controllers/background_load_controller.py | 54 ++++++----- 5 files changed, 105 insertions(+), 60 deletions(-) diff --git a/Makefile b/Makefile index 9dbd86ca..365e2fc4 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,7 @@ SHELL := /bin/bash PYTHON_VERSION := 3.10 -VENV_DIR := venv.frostfs-testlib +VENV_NAME := frostfs-testlib +VENV_DIR := venv.${VENV_NAME} current_dir := $(shell pwd) DIRECTORIES := $(sort $(dir $(wildcard ../frostfs-testlib-plugin-*/ ../*-testcases/))) @@ -15,16 +16,16 @@ precommit: paths: @echo Append paths for project - @echo Virtual environment: ${VENV_DIR} + @echo Virtual environment: ${current_dir}/${VENV_DIR} @rm -rf ${VENV_DIR}/lib/python${PYTHON_VERSION}/site-packages/_paths.pth @touch ${VENV_DIR}/lib/python${PYTHON_VERSION}/site-packages/_paths.pth - @echo ${current_dir}/src/frostfs_testlib_frostfs_testlib | tee ${VENV_DIR}/lib/python${PYTHON_VERSION}/site-packages/_paths.pth + @echo ${current_dir}/src/frostfs_testlib | tee ${VENV_DIR}/lib/python${PYTHON_VERSION}/site-packages/_paths.pth create: ${VENV_DIR} ${VENV_DIR}: - @echo Create virtual environment ${VENV_DIR} - virtualenv --python=python${PYTHON_VERSION} --prompt=frostfs-testlib ${VENV_DIR} + @echo Create virtual environment ${current_dir}/${VENV_DIR} + virtualenv --python=python${PYTHON_VERSION} --prompt=${VENV_NAME} ${VENV_DIR} requirements: @echo Isntalling pip requirements diff --git a/src/frostfs_testlib/load/load_config.py b/src/frostfs_testlib/load/load_config.py index 73addf70..9a7e49cc 100644 --- a/src/frostfs_testlib/load/load_config.py +++ b/src/frostfs_testlib/load/load_config.py @@ -139,6 +139,11 @@ class LoadParams: verify: Optional[bool] = None # Just id for load so distinct it between runs. Filled automatically. load_id: Optional[str] = None + # Acceptable number of load errors in % + # 100 means 100% errors allowed + # 1.5 means 1.5% errors allowed + # 0 means no errors allowed + error_threshold: Optional[float] = None # Working directory working_dir: Optional[str] = None # Preset for the k6 run diff --git a/src/frostfs_testlib/load/load_report.py b/src/frostfs_testlib/load/load_report.py index dcd81b49..fa710693 100644 --- a/src/frostfs_testlib/load/load_report.py +++ b/src/frostfs_testlib/load/load_report.py @@ -154,6 +154,7 @@ class LoadReport: Errors {per_node_errors_html} {self._row("Total", f"{total_errors} ({total_errors/total_operations*100.0:.2f}%)")} + {self._row("Threshold", f"{self.load_params.error_threshold:.2f}%")}

""" diff --git a/src/frostfs_testlib/load/load_verifiers.py b/src/frostfs_testlib/load/load_verifiers.py index 1ff63ae3..f2a3e7eb 100644 --- a/src/frostfs_testlib/load/load_verifiers.py +++ b/src/frostfs_testlib/load/load_verifiers.py @@ -2,7 +2,9 @@ import logging from frostfs_testlib.load.load_config import LoadParams, LoadScenario from frostfs_testlib.load.load_metrics import get_metrics_object +from frostfs_testlib.reporter import get_reporter +reporter = get_reporter() logger = logging.getLogger("NeoLogger") @@ -10,54 +12,88 @@ class LoadVerifier: def __init__(self, load_params: LoadParams) -> None: self.load_params = load_params - def verify_summaries(self, load_summary, verification_summary) -> None: - exceptions = [] + def verify_load_results(self, load_summaries: dict[str, dict]): + write_operations = 0 + write_errors = 0 - if not verification_summary or not load_summary: - logger.info("Can't check load results due to missing summary") + read_operations = 0 + read_errors = 0 - load_metrics = get_metrics_object(self.load_params.scenario, load_summary) + delete_operations = 0 + delete_errors = 0 writers = self.load_params.writers or self.load_params.preallocated_writers or 0 readers = self.load_params.readers or self.load_params.preallocated_readers or 0 deleters = self.load_params.deleters or self.load_params.preallocated_deleters or 0 - objects_count = load_metrics.write_success_iterations - fails_count = load_metrics.write_failed_iterations + for load_summary in load_summaries.values(): + metrics = get_metrics_object(self.load_params.scenario, load_summary) - if writers > 0: - if objects_count < 1: - exceptions.append("Total put objects should be greater than 0") - if fails_count > 0: - exceptions.append(f"There were {fails_count} failed write operations") + if writers: + write_operations += metrics.write_total_iterations + write_errors += metrics.write_failed_iterations - if readers > 0: - read_count = load_metrics.read_success_iterations - read_fails_count = load_metrics.read_failed_iterations - if read_count < 1: - exceptions.append("Total read operations should be greater than 0") - if read_fails_count > 0: - exceptions.append(f"There were {read_fails_count} failed read operations") + if readers: + read_operations += metrics.read_total_iterations + read_errors += metrics.read_failed_iterations + + if deleters: + delete_operations += metrics.delete_total_iterations + delete_errors += metrics.delete_failed_iterations + + exceptions = [] + if writers and not write_operations: + exceptions.append(f"No any write operation was performed") + if readers and not read_operations: + exceptions.append(f"No any read operation was performed") + if deleters and not delete_operations: + exceptions.append(f"No any delete operation was performed") + + if writers and write_errors / write_operations * 100 > self.load_params.error_threshold: + exceptions.append( + f"Write error rate is greater than threshold: {write_errors / write_operations * 100} > {self.load_params.error_threshold}" + ) + if readers and read_errors / read_operations * 100 > self.load_params.error_threshold: + exceptions.append( + f"Read error rate is greater than threshold: {read_errors / read_operations * 100} > {self.load_params.error_threshold}" + ) + if deleters and delete_errors / delete_operations * 100 > self.load_params.error_threshold: + exceptions.append( + f"Delete error rate is greater than threshold: {delete_errors / delete_operations * 100} > {self.load_params.error_threshold}" + ) + + assert not exceptions, "\n".join(exceptions) + + def check_verify_results(self, load_summaries, verification_summaries) -> None: + for node_or_endpoint in load_summaries: + with reporter.step(f"Check verify scenario results for {node_or_endpoint}"): + self._check_verify_result( + load_summaries[node_or_endpoint], verification_summaries[node_or_endpoint] + ) + + def _check_verify_result(self, load_summary, verification_summary) -> None: + exceptions = [] + + load_metrics = get_metrics_object(self.load_params.scenario, load_summary) + + writers = self.load_params.writers or self.load_params.preallocated_writers or 0 + deleters = self.load_params.deleters or self.load_params.preallocated_deleters or 0 + + delete_success = 0 if deleters > 0: - delete_count = load_metrics.delete_success_iterations - delete_fails_count = load_metrics.delete_failed_iterations - if delete_count < 1: - exceptions.append("Total delete operations should be greater than 0") - if delete_fails_count > 0: - exceptions.append(f"There were {delete_fails_count} failed delete operations") + delete_success = load_metrics.delete_success_iterations if verification_summary: verify_metrics = get_metrics_object(LoadScenario.VERIFY, verification_summary) verified_objects = verify_metrics.read_success_iterations invalid_objects = verify_metrics.read_failed_iterations + total_left_objects = load_metrics.write_success_iterations - delete_success - if invalid_objects > 0: - exceptions.append(f"There were {invalid_objects} verification fails") # Due to interruptions we may see total verified objects to be less than written on writers count - if abs(objects_count - verified_objects) > writers: + if abs(total_left_objects - verified_objects) > writers: exceptions.append( - f"Verified objects mismatch. Total: {objects_count}, Verified: {verified_objects}. Writers: {writers}." + f"Verified objects mismatch. Total: {total_left_objects}, Verified: {verified_objects}. Writers: {writers}." ) assert not exceptions, "\n".join(exceptions) diff --git a/src/frostfs_testlib/storage/controllers/background_load_controller.py b/src/frostfs_testlib/storage/controllers/background_load_controller.py index 58a7a6f1..91cb1af3 100644 --- a/src/frostfs_testlib/storage/controllers/background_load_controller.py +++ b/src/frostfs_testlib/storage/controllers/background_load_controller.py @@ -9,6 +9,7 @@ from frostfs_testlib.load.load_config import ( LoadScenario, LoadType, ) +from frostfs_testlib.load.load_metrics import get_metrics_object from frostfs_testlib.load.load_report import LoadReport from frostfs_testlib.load.load_verifiers import LoadVerifier from frostfs_testlib.reporter import get_reporter @@ -151,54 +152,55 @@ class BackgroundLoadController: load_report.add_summaries(self.load_summaries) @run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED) - @reporter.step_deco("Verify results of load") + @reporter.step_deco("Run post-load verification") def verify(self): try: + self._verify_load_results() if self.load_params.verify: - self.verification_params = LoadParams( - verify_clients=self.load_params.verify_clients, - scenario=LoadScenario.VERIFY, - registry_file=self.load_params.registry_file, - verify_time=self.load_params.verify_time, - load_type=self.load_params.load_type, - load_id=self.load_params.load_id, - working_dir=self.load_params.working_dir, - endpoint_selection_strategy=self.load_params.endpoint_selection_strategy, - k6_process_allocation_strategy=self.load_params.k6_process_allocation_strategy, - setup_timeout="1s", - ) self._run_verify_scenario() - verification_summaries = self._get_results() - self.verify_summaries(self.load_summaries, verification_summaries) finally: self._reset_for_consequent_load() @run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED) - @reporter.step_deco("Verify summaries from k6") - def verify_summaries(self, load_summaries: dict, verification_summaries: dict): + @reporter.step_deco("Verify load results") + def _verify_load_results(self): verifier = LoadVerifier(self.load_params) - for node_or_endpoint in load_summaries: - with reporter.step(f"Verify load summaries for {node_or_endpoint}"): - verifier.verify_summaries( - load_summaries[node_or_endpoint], verification_summaries[node_or_endpoint] - ) + verifier.verify_load_results(self.load_summaries) @run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED) def wait_until_finish(self): self.runner.wait_until_finish() @run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED) - @reporter.step_deco("Run verify scenario") + @reporter.step_deco("Verify loaded objects") def _run_verify_scenario(self): + self.verification_params = LoadParams( + verify_clients=self.load_params.verify_clients, + scenario=LoadScenario.VERIFY, + registry_file=self.load_params.registry_file, + verify_time=self.load_params.verify_time, + load_type=self.load_params.load_type, + load_id=self.load_params.load_id, + working_dir=self.load_params.working_dir, + endpoint_selection_strategy=self.load_params.endpoint_selection_strategy, + k6_process_allocation_strategy=self.load_params.k6_process_allocation_strategy, + setup_timeout="1s", + ) + if self.verification_params.verify_time is None: raise RuntimeError("verify_time should not be none") self.runner.init_k6_instances(self.verification_params, self.endpoints, self.k6_dir) - with reporter.step("Run verify load data"): + with reporter.step("Run verify scenario"): self.runner.start() self.runner.wait_until_finish() + with reporter.step("Check verify results"): + verification_summaries = self._get_results() + verifier = LoadVerifier(self.load_params) + verifier.check_verify_results(self.load_summaries, verification_summaries) + @run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED) - @reporter.step_deco("Get load results") def _get_results(self) -> dict: - return self.runner.get_results() + with reporter.step(f"Get {self.load_params.scenario.value} scenario results"): + return self.runner.get_results()