Add write-cache loss test #51
2 changed files with 126 additions and 3 deletions
29
pytest_tests/testsuites/failovers/conftest.py
Normal file
29
pytest_tests/testsuites/failovers/conftest.py
Normal file
|
@ -0,0 +1,29 @@
|
|||
import random
|
||||
from datetime import datetime
|
||||
|
||||
import allure
|
||||
import pytest
|
||||
from frostfs_testlib.storage.cluster import Cluster, ClusterNode
|
||||
from frostfs_testlib.storage.controllers import ShardsWatcher
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
@allure.title("Select random node for testing")
|
||||
def node_under_test(cluster: Cluster) -> ClusterNode:
|
||||
selected_node = random.choice(cluster.cluster_nodes)
|
||||
allure.attach(f"{selected_node}", "Selected node", allure.attachment_type.TEXT)
|
||||
return selected_node
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
@allure.title("Provide Shards watcher")
|
||||
def shards_watcher(node_under_test: ClusterNode) -> ShardsWatcher:
|
||||
watcher = ShardsWatcher(node_under_test)
|
||||
return watcher
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
@allure.title("Test start time")
|
||||
def test_start_time() -> datetime:
|
||||
start_time = datetime.utcnow()
|
||||
return start_time
|
|
@ -1,4 +1,5 @@
|
|||
import logging
|
||||
from datetime import datetime
|
||||
from time import sleep
|
||||
|
||||
import allure
|
||||
|
@ -9,7 +10,11 @@ from frostfs_testlib.resources.common import MORPH_BLOCK_TIME
|
|||
from frostfs_testlib.resources.wellknown_acl import PUBLIC_ACL
|
||||
from frostfs_testlib.s3 import AwsCliClient, Boto3ClientWrapper, S3ClientWrapper, VersioningStatus
|
||||
from frostfs_testlib.shell import CommandOptions, Shell
|
||||
from frostfs_testlib.steps.cli.container import create_container
|
||||
from frostfs_testlib.steps.cli.container import (
|
||||
StorageContainer,
|
||||
StorageContainerInfo,
|
||||
create_container,
|
||||
)
|
||||
from frostfs_testlib.steps.cli.object import get_object, put_object_to_random_node
|
||||
from frostfs_testlib.steps.node_management import (
|
||||
check_node_in_map,
|
||||
|
@ -20,8 +25,10 @@ from frostfs_testlib.steps.node_management import (
|
|||
wait_for_node_to_be_ready,
|
||||
)
|
||||
from frostfs_testlib.steps.s3 import s3_helper
|
||||
from frostfs_testlib.storage.cluster import Cluster, StorageNode
|
||||
from frostfs_testlib.storage.controllers import ClusterStateController
|
||||
from frostfs_testlib.storage.cluster import Cluster, ClusterNode, StorageNode
|
||||
from frostfs_testlib.storage.controllers import ClusterStateController, ShardsWatcher
|
||||
from frostfs_testlib.storage.dataclasses.storage_object_info import StorageObjectInfo
|
||||
from frostfs_testlib.storage.dataclasses.wallet import WalletInfo
|
||||
from frostfs_testlib.testing.cluster_test_base import ClusterTestBase
|
||||
from frostfs_testlib.testing.test_control import expect_not_raises
|
||||
from frostfs_testlib.utils import datetime_utils
|
||||
|
@ -580,3 +587,90 @@ class TestStorageDataLoss(ClusterTestBase):
|
|||
with allure.step("Delete bucket"):
|
||||
with expect_not_raises():
|
||||
s3_client.delete_bucket(bucket)
|
||||
|
||||
@allure.title(
|
||||
"Write cache loss on one node should not affect shards and should not produce errors in log"
|
||||
)
|
||||
@pytest.mark.write_cache_loss
|
||||
def test_write_cache_loss_on_one_node(
|
||||
self,
|
||||
node_under_test: ClusterNode,
|
||||
simple_object_size: int,
|
||||
cluster_state_controller: ClusterStateController,
|
||||
shards_watcher: ShardsWatcher,
|
||||
default_wallet: str,
|
||||
test_start_time: datetime,
|
||||
after_run_return_all_stopped_services: str,
|
||||
):
|
||||
exception_messages = []
|
||||
allure.dynamic.description(after_run_return_all_stopped_services)
|
||||
|
||||
with allure.step(f"Create container on node {node_under_test}"):
|
||||
locode = node_under_test.storage_node.get_un_locode()
|
||||
placement_rule = f"""REP 1 IN X
|
||||
CBF 1
|
||||
SELECT 1 FROM C AS X
|
||||
FILTER 'UN-LOCODE' EQ '{locode}' AS C"""
|
||||
cid = create_container(
|
||||
default_wallet,
|
||||
self.shell,
|
||||
node_under_test.storage_node.get_rpc_endpoint(),
|
||||
rule=placement_rule,
|
||||
)
|
||||
container = StorageContainer(
|
||||
StorageContainerInfo(cid, WalletInfo(default_wallet)),
|
||||
self.shell,
|
||||
cluster_state_controller.cluster,
|
||||
)
|
||||
|
||||
with allure.step(f"Put couple objects to container on node {node_under_test}"):
|
||||
storage_objects: list[StorageObjectInfo] = []
|
||||
for _ in range(5):
|
||||
storage_object = container.generate_object(
|
||||
simple_object_size, endpoint=node_under_test.storage_node.get_rpc_endpoint()
|
||||
)
|
||||
storage_objects.append(storage_object)
|
||||
|
||||
with allure.step("Take shards snapshot"):
|
||||
shards_watcher.take_shards_snapshot()
|
||||
|
||||
with allure.step(f"Stop storage service on node {node_under_test}"):
|
||||
cluster_state_controller.stop_storage_service(node_under_test)
|
||||
|
||||
with allure.step(f"Delete write cache from node {node_under_test}"):
|
||||
node_under_test.storage_node.delete_write_cache()
|
||||
|
||||
with allure.step(f"Start storage service on node {node_under_test}"):
|
||||
cluster_state_controller.start_storage_service(node_under_test)
|
||||
|
||||
with allure.step("Objects should be available"):
|
||||
for storage_object in storage_objects:
|
||||
get_object(
|
||||
storage_object.wallet_file_path,
|
||||
container.get_id(),
|
||||
storage_object.oid,
|
||||
self.shell,
|
||||
node_under_test.storage_node.get_rpc_endpoint(),
|
||||
)
|
||||
|
||||
with allure.step("No shards should have new errors"):
|
||||
shards_watcher.take_shards_snapshot()
|
||||
shards_with_errors = shards_watcher.get_shards_with_new_errors()
|
||||
if shards_with_errors:
|
||||
exception_messages.append(f"Shards have new errors: {shards_with_errors}")
|
||||
|
||||
with allure.step("No shards should have degraded status"):
|
||||
snapshot = shards_watcher.get_shards_snapshot()
|
||||
for shard in snapshot:
|
||||
status = snapshot[shard]["mode"]
|
||||
if status != "read-write":
|
||||
exception_messages.append(f"Shard {shard} changed status to {status}")
|
||||
|
||||
with allure.step("No related errors should be in log"):
|
||||
if node_under_test.host.is_message_in_logs(
|
||||
message_regex=r"\Wno such file or directory\W", since=test_start_time
|
||||
):
|
||||
exception_messages.append(f"Node {node_under_test} have shard errors in logs")
|
||||
|
||||
with allure.step("Pass test if no errors found"):
|
||||
assert not exception_messages, "\n".join(exception_messages)
|
||||
|
|
Loading…
Reference in a new issue