From a0587438c4ba1e6ccce2a1bc2f42a66c467a8d61 Mon Sep 17 00:00:00 2001 From: Andrey Berezin Date: Wed, 31 May 2023 18:31:35 +0300 Subject: [PATCH] Add write-cache loss test Signed-off-by: Andrey Berezin --- pytest_tests/testsuites/failovers/conftest.py | 29 +++++ .../failovers/test_failover_storage.py | 100 +++++++++++++++++- 2 files changed, 126 insertions(+), 3 deletions(-) create mode 100644 pytest_tests/testsuites/failovers/conftest.py diff --git a/pytest_tests/testsuites/failovers/conftest.py b/pytest_tests/testsuites/failovers/conftest.py new file mode 100644 index 0000000..b1f0695 --- /dev/null +++ b/pytest_tests/testsuites/failovers/conftest.py @@ -0,0 +1,29 @@ +import random +from datetime import datetime + +import allure +import pytest +from frostfs_testlib.storage.cluster import Cluster, ClusterNode +from frostfs_testlib.storage.controllers import ShardsWatcher + + +@pytest.fixture() +@allure.title("Select random node for testing") +def node_under_test(cluster: Cluster) -> ClusterNode: + selected_node = random.choice(cluster.cluster_nodes) + allure.attach(f"{selected_node}", "Selected node", allure.attachment_type.TEXT) + return selected_node + + +@pytest.fixture() +@allure.title("Provide Shards watcher") +def shards_watcher(node_under_test: ClusterNode) -> ShardsWatcher: + watcher = ShardsWatcher(node_under_test) + return watcher + + +@pytest.fixture() +@allure.title("Test start time") +def test_start_time() -> datetime: + start_time = datetime.utcnow() + return start_time diff --git a/pytest_tests/testsuites/failovers/test_failover_storage.py b/pytest_tests/testsuites/failovers/test_failover_storage.py index 3ab542b..ae40e0f 100644 --- a/pytest_tests/testsuites/failovers/test_failover_storage.py +++ b/pytest_tests/testsuites/failovers/test_failover_storage.py @@ -1,4 +1,5 @@ import logging +from datetime import datetime from time import sleep import allure @@ -9,7 +10,11 @@ from frostfs_testlib.resources.common import MORPH_BLOCK_TIME from frostfs_testlib.resources.wellknown_acl import PUBLIC_ACL from frostfs_testlib.s3 import AwsCliClient, Boto3ClientWrapper, S3ClientWrapper, VersioningStatus from frostfs_testlib.shell import CommandOptions, Shell -from frostfs_testlib.steps.cli.container import create_container +from frostfs_testlib.steps.cli.container import ( + StorageContainer, + StorageContainerInfo, + create_container, +) from frostfs_testlib.steps.cli.object import get_object, put_object_to_random_node from frostfs_testlib.steps.node_management import ( check_node_in_map, @@ -20,8 +25,10 @@ from frostfs_testlib.steps.node_management import ( wait_for_node_to_be_ready, ) from frostfs_testlib.steps.s3 import s3_helper -from frostfs_testlib.storage.cluster import Cluster, StorageNode -from frostfs_testlib.storage.controllers import ClusterStateController +from frostfs_testlib.storage.cluster import Cluster, ClusterNode, StorageNode +from frostfs_testlib.storage.controllers import ClusterStateController, ShardsWatcher +from frostfs_testlib.storage.dataclasses.storage_object_info import StorageObjectInfo +from frostfs_testlib.storage.dataclasses.wallet import WalletInfo from frostfs_testlib.testing.cluster_test_base import ClusterTestBase from frostfs_testlib.testing.test_control import expect_not_raises from frostfs_testlib.utils import datetime_utils @@ -580,3 +587,90 @@ class TestStorageDataLoss(ClusterTestBase): with allure.step("Delete bucket"): with expect_not_raises(): s3_client.delete_bucket(bucket) + + @allure.title( + "Write cache loss on one node should not affect shards and should not produce errors in log" + ) + @pytest.mark.write_cache_loss + def test_write_cache_loss_on_one_node( + self, + node_under_test: ClusterNode, + simple_object_size: int, + cluster_state_controller: ClusterStateController, + shards_watcher: ShardsWatcher, + default_wallet: str, + test_start_time: datetime, + after_run_return_all_stopped_services: str, + ): + exception_messages = [] + allure.dynamic.description(after_run_return_all_stopped_services) + + with allure.step(f"Create container on node {node_under_test}"): + locode = node_under_test.storage_node.get_un_locode() + placement_rule = f"""REP 1 IN X + CBF 1 + SELECT 1 FROM C AS X + FILTER 'UN-LOCODE' EQ '{locode}' AS C""" + cid = create_container( + default_wallet, + self.shell, + node_under_test.storage_node.get_rpc_endpoint(), + rule=placement_rule, + ) + container = StorageContainer( + StorageContainerInfo(cid, WalletInfo(default_wallet)), + self.shell, + cluster_state_controller.cluster, + ) + + with allure.step(f"Put couple objects to container on node {node_under_test}"): + storage_objects: list[StorageObjectInfo] = [] + for _ in range(5): + storage_object = container.generate_object( + simple_object_size, endpoint=node_under_test.storage_node.get_rpc_endpoint() + ) + storage_objects.append(storage_object) + + with allure.step("Take shards snapshot"): + shards_watcher.take_shards_snapshot() + + with allure.step(f"Stop storage service on node {node_under_test}"): + cluster_state_controller.stop_storage_service(node_under_test) + + with allure.step(f"Delete write cache from node {node_under_test}"): + node_under_test.storage_node.delete_write_cache() + + with allure.step(f"Start storage service on node {node_under_test}"): + cluster_state_controller.start_storage_service(node_under_test) + + with allure.step("Objects should be available"): + for storage_object in storage_objects: + get_object( + storage_object.wallet_file_path, + container.get_id(), + storage_object.oid, + self.shell, + node_under_test.storage_node.get_rpc_endpoint(), + ) + + with allure.step("No shards should have new errors"): + shards_watcher.take_shards_snapshot() + shards_with_errors = shards_watcher.get_shards_with_new_errors() + if shards_with_errors: + exception_messages.append(f"Shards have new errors: {shards_with_errors}") + + with allure.step("No shards should have degraded status"): + snapshot = shards_watcher.get_shards_snapshot() + for shard in snapshot: + status = snapshot[shard]["mode"] + if status != "read-write": + exception_messages.append(f"Shard {shard} changed status to {status}") + + with allure.step("No related errors should be in log"): + if node_under_test.host.is_message_in_logs( + message_regex=r"\Wno such file or directory\W", since=test_start_time + ): + exception_messages.append(f"Node {node_under_test} have shard errors in logs") + + with allure.step("Pass test if no errors found"): + assert not exception_messages, "\n".join(exception_messages)