From 9c792c091e657acfefd99e598fc1420f9fbfc73d Mon Sep 17 00:00:00 2001
From: Andrey Berezin <a.berezin@yadro.com>
Date: Thu, 20 Jul 2023 21:05:49 +0300
Subject: [PATCH] Add error_threshold parameter, add error check after load

Signed-off-by: Andrey Berezin <a.berezin@yadro.com>
---
 Makefile                                      | 11 ++-
 src/frostfs_testlib/load/load_config.py       |  5 +
 src/frostfs_testlib/load/load_report.py       |  1 +
 src/frostfs_testlib/load/load_verifiers.py    | 94 +++++++++++++------
 .../controllers/background_load_controller.py | 54 ++++++-----
 5 files changed, 105 insertions(+), 60 deletions(-)
diff --git a/Makefile b/Makefile
index 9dbd86ca..365e2fc4 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,7 @@
 SHELL := /bin/bash
 PYTHON_VERSION := 3.10
-VENV_DIR := venv.frostfs-testlib
+VENV_NAME := frostfs-testlib
+VENV_DIR := venv.${VENV_NAME}
 
 current_dir := $(shell pwd)
 DIRECTORIES := $(sort $(dir $(wildcard ../frostfs-testlib-plugin-*/ ../*-testcases/)))
@@ -15,16 +16,16 @@ precommit:
 
 paths:
 	@echo Append paths for project
-	@echo Virtual environment: ${VENV_DIR}
+	@echo Virtual environment: ${current_dir}/${VENV_DIR}
 	@rm -rf ${VENV_DIR}/lib/python${PYTHON_VERSION}/site-packages/_paths.pth
 	@touch ${VENV_DIR}/lib/python${PYTHON_VERSION}/site-packages/_paths.pth
-	@echo ${current_dir}/src/frostfs_testlib_frostfs_testlib | tee ${VENV_DIR}/lib/python${PYTHON_VERSION}/site-packages/_paths.pth
+	@echo ${current_dir}/src/frostfs_testlib | tee ${VENV_DIR}/lib/python${PYTHON_VERSION}/site-packages/_paths.pth
 
 create: ${VENV_DIR}
 
 ${VENV_DIR}:
-	@echo Create virtual environment ${VENV_DIR}
-	virtualenv --python=python${PYTHON_VERSION} --prompt=frostfs-testlib ${VENV_DIR}
+	@echo Create virtual environment ${current_dir}/${VENV_DIR}
+	virtualenv --python=python${PYTHON_VERSION} --prompt=${VENV_NAME} ${VENV_DIR}
 
 requirements:
 	@echo Isntalling pip requirements
diff --git a/src/frostfs_testlib/load/load_config.py b/src/frostfs_testlib/load/load_config.py
index 73addf70..9a7e49cc 100644
--- a/src/frostfs_testlib/load/load_config.py
+++ b/src/frostfs_testlib/load/load_config.py
@@ -139,6 +139,11 @@ class LoadParams:
     verify: Optional[bool] = None
     # Just id for load so distinct it between runs. Filled automatically.
     load_id: Optional[str] = None
+    # Acceptable number of load errors in %
+    # 100 means 100% errors allowed
+    # 1.5 means 1.5% errors allowed
+    # 0 means no errors allowed
+    error_threshold: Optional[float] = None
     # Working directory
     working_dir: Optional[str] = None
     # Preset for the k6 run
diff --git a/src/frostfs_testlib/load/load_report.py b/src/frostfs_testlib/load/load_report.py
index dcd81b49..fa710693 100644
--- a/src/frostfs_testlib/load/load_report.py
+++ b/src/frostfs_testlib/load/load_report.py
@@ -154,6 +154,7 @@ class LoadReport:
         <tr><th colspan="2" bgcolor="gainsboro">Errors</th></tr>
         {per_node_errors_html}
         {self._row("Total", f"{total_errors} ({total_errors/total_operations*100.0:.2f}%)")}
+        {self._row("Threshold", f"{self.load_params.error_threshold:.2f}%")}
         </tbody></table><br><hr>
         """
 
diff --git a/src/frostfs_testlib/load/load_verifiers.py b/src/frostfs_testlib/load/load_verifiers.py
index 1ff63ae3..f2a3e7eb 100644
--- a/src/frostfs_testlib/load/load_verifiers.py
+++ b/src/frostfs_testlib/load/load_verifiers.py
@@ -2,7 +2,9 @@ import logging
 
 from frostfs_testlib.load.load_config import LoadParams, LoadScenario
 from frostfs_testlib.load.load_metrics import get_metrics_object
+from frostfs_testlib.reporter import get_reporter
 
+reporter = get_reporter()
 logger = logging.getLogger("NeoLogger")
 
 
@@ -10,54 +12,88 @@ class LoadVerifier:
     def __init__(self, load_params: LoadParams) -> None:
         self.load_params = load_params
 
-    def verify_summaries(self, load_summary, verification_summary) -> None:
-        exceptions = []
+    def verify_load_results(self, load_summaries: dict[str, dict]):
+        write_operations = 0
+        write_errors = 0
 
-        if not verification_summary or not load_summary:
-            logger.info("Can't check load results due to missing summary")
+        read_operations = 0
+        read_errors = 0
 
-        load_metrics = get_metrics_object(self.load_params.scenario, load_summary)
+        delete_operations = 0
+        delete_errors = 0
 
         writers = self.load_params.writers or self.load_params.preallocated_writers or 0
         readers = self.load_params.readers or self.load_params.preallocated_readers or 0
         deleters = self.load_params.deleters or self.load_params.preallocated_deleters or 0
 
-        objects_count = load_metrics.write_success_iterations
-        fails_count = load_metrics.write_failed_iterations
+        for load_summary in load_summaries.values():
+            metrics = get_metrics_object(self.load_params.scenario, load_summary)
 
-        if writers > 0:
-            if objects_count < 1:
-                exceptions.append("Total put objects should be greater than 0")
-            if fails_count > 0:
-                exceptions.append(f"There were {fails_count} failed write operations")
+            if writers:
+                write_operations += metrics.write_total_iterations
+                write_errors += metrics.write_failed_iterations
 
-        if readers > 0:
-            read_count = load_metrics.read_success_iterations
-            read_fails_count = load_metrics.read_failed_iterations
-            if read_count < 1:
-                exceptions.append("Total read operations should be greater than 0")
-            if read_fails_count > 0:
-                exceptions.append(f"There were {read_fails_count} failed read operations")
+            if readers:
+                read_operations += metrics.read_total_iterations
+                read_errors += metrics.read_failed_iterations
+
+            if deleters:
+                delete_operations += metrics.delete_total_iterations
+                delete_errors += metrics.delete_failed_iterations
+
+        exceptions = []
+        if writers and not write_operations:
+            exceptions.append(f"No any write operation was performed")
+        if readers and not read_operations:
+            exceptions.append(f"No any read operation was performed")
+        if deleters and not delete_operations:
+            exceptions.append(f"No any delete operation was performed")
+
+        if writers and write_errors / write_operations * 100 > self.load_params.error_threshold:
+            exceptions.append(
+                f"Write error rate is greater than threshold: {write_errors / write_operations * 100} > {self.load_params.error_threshold}"
+            )
+        if readers and read_errors / read_operations * 100 > self.load_params.error_threshold:
+            exceptions.append(
+                f"Read error rate is greater than threshold: {read_errors / read_operations * 100} > {self.load_params.error_threshold}"
+            )
+        if deleters and delete_errors / delete_operations * 100 > self.load_params.error_threshold:
+            exceptions.append(
+                f"Delete error rate is greater than threshold: {delete_errors / delete_operations * 100} > {self.load_params.error_threshold}"
+            )
+
+        assert not exceptions, "\n".join(exceptions)
+
+    def check_verify_results(self, load_summaries, verification_summaries) -> None:
+        for node_or_endpoint in load_summaries:
+            with reporter.step(f"Check verify scenario results for {node_or_endpoint}"):
+                self._check_verify_result(
+                    load_summaries[node_or_endpoint], verification_summaries[node_or_endpoint]
+                )
+
+    def _check_verify_result(self, load_summary, verification_summary) -> None:
+        exceptions = []
+
+        load_metrics = get_metrics_object(self.load_params.scenario, load_summary)
+
+        writers = self.load_params.writers or self.load_params.preallocated_writers or 0
+        deleters = self.load_params.deleters or self.load_params.preallocated_deleters or 0
+
+        delete_success = 0
 
         if deleters > 0:
-            delete_count = load_metrics.delete_success_iterations
-            delete_fails_count = load_metrics.delete_failed_iterations
-            if delete_count < 1:
-                exceptions.append("Total delete operations should be greater than 0")
-            if delete_fails_count > 0:
-                exceptions.append(f"There were {delete_fails_count} failed delete operations")
+            delete_success = load_metrics.delete_success_iterations
 
         if verification_summary:
             verify_metrics = get_metrics_object(LoadScenario.VERIFY, verification_summary)
             verified_objects = verify_metrics.read_success_iterations
             invalid_objects = verify_metrics.read_failed_iterations
+            total_left_objects = load_metrics.write_success_iterations - delete_success
 
-            if invalid_objects > 0:
-                exceptions.append(f"There were {invalid_objects} verification fails")
             # Due to interruptions we may see total verified objects to be less than written on writers count
-            if abs(objects_count - verified_objects) > writers:
+            if abs(total_left_objects - verified_objects) > writers:
                 exceptions.append(
-                    f"Verified objects mismatch. Total: {objects_count}, Verified: {verified_objects}. Writers: {writers}."
+                    f"Verified objects mismatch. Total: {total_left_objects}, Verified: {verified_objects}. Writers: {writers}."
                 )
 
         assert not exceptions, "\n".join(exceptions)
diff --git a/src/frostfs_testlib/storage/controllers/background_load_controller.py b/src/frostfs_testlib/storage/controllers/background_load_controller.py
index 58a7a6f1..91cb1af3 100644
--- a/src/frostfs_testlib/storage/controllers/background_load_controller.py
+++ b/src/frostfs_testlib/storage/controllers/background_load_controller.py
@@ -9,6 +9,7 @@ from frostfs_testlib.load.load_config import (
     LoadScenario,
     LoadType,
 )
+from frostfs_testlib.load.load_metrics import get_metrics_object
 from frostfs_testlib.load.load_report import LoadReport
 from frostfs_testlib.load.load_verifiers import LoadVerifier
 from frostfs_testlib.reporter import get_reporter
@@ -151,54 +152,55 @@ class BackgroundLoadController:
             load_report.add_summaries(self.load_summaries)
 
     @run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
-    @reporter.step_deco("Verify results of load")
+    @reporter.step_deco("Run post-load verification")
     def verify(self):
         try:
+            self._verify_load_results()
             if self.load_params.verify:
-                self.verification_params = LoadParams(
-                    verify_clients=self.load_params.verify_clients,
-                    scenario=LoadScenario.VERIFY,
-                    registry_file=self.load_params.registry_file,
-                    verify_time=self.load_params.verify_time,
-                    load_type=self.load_params.load_type,
-                    load_id=self.load_params.load_id,
-                    working_dir=self.load_params.working_dir,
-                    endpoint_selection_strategy=self.load_params.endpoint_selection_strategy,
-                    k6_process_allocation_strategy=self.load_params.k6_process_allocation_strategy,
-                    setup_timeout="1s",
-                )
                 self._run_verify_scenario()
-                verification_summaries = self._get_results()
-                self.verify_summaries(self.load_summaries, verification_summaries)
         finally:
             self._reset_for_consequent_load()
 
     @run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
-    @reporter.step_deco("Verify summaries from k6")
-    def verify_summaries(self, load_summaries: dict, verification_summaries: dict):
+    @reporter.step_deco("Verify load results")
+    def _verify_load_results(self):
         verifier = LoadVerifier(self.load_params)
-        for node_or_endpoint in load_summaries:
-            with reporter.step(f"Verify load summaries for {node_or_endpoint}"):
-                verifier.verify_summaries(
-                    load_summaries[node_or_endpoint], verification_summaries[node_or_endpoint]
-                )
+        verifier.verify_load_results(self.load_summaries)
 
     @run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
     def wait_until_finish(self):
         self.runner.wait_until_finish()
 
     @run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
-    @reporter.step_deco("Run verify scenario")
+    @reporter.step_deco("Verify loaded objects")
     def _run_verify_scenario(self):
+        self.verification_params = LoadParams(
+            verify_clients=self.load_params.verify_clients,
+            scenario=LoadScenario.VERIFY,
+            registry_file=self.load_params.registry_file,
+            verify_time=self.load_params.verify_time,
+            load_type=self.load_params.load_type,
+            load_id=self.load_params.load_id,
+            working_dir=self.load_params.working_dir,
+            endpoint_selection_strategy=self.load_params.endpoint_selection_strategy,
+            k6_process_allocation_strategy=self.load_params.k6_process_allocation_strategy,
+            setup_timeout="1s",
+        )
+
         if self.verification_params.verify_time is None:
             raise RuntimeError("verify_time should not be none")
 
         self.runner.init_k6_instances(self.verification_params, self.endpoints, self.k6_dir)
-        with reporter.step("Run verify load data"):
+        with reporter.step("Run verify scenario"):
             self.runner.start()
             self.runner.wait_until_finish()
 
+        with reporter.step("Check verify results"):
+            verification_summaries = self._get_results()
+            verifier = LoadVerifier(self.load_params)
+            verifier.check_verify_results(self.load_summaries, verification_summaries)
+
     @run_optionally(optionals.OPTIONAL_BACKGROUND_LOAD_ENABLED)
-    @reporter.step_deco("Get load results")
     def _get_results(self) -> dict:
-        return self.runner.get_results()
+        with reporter.step(f"Get {self.load_params.scenario.value} scenario results"):
+            return self.runner.get_results()