Add write-cache loss test

Signed-off-by: Andrey Berezin <a.berezin@yadro.com>
master
Andrey Berezin 2023-05-31 18:31:35 +03:00
parent 27cf9bb1bd
commit a0587438c4
2 changed files with 126 additions and 3 deletions

View File

@ -0,0 +1,29 @@
import random
from datetime import datetime
import allure
import pytest
from frostfs_testlib.storage.cluster import Cluster, ClusterNode
from frostfs_testlib.storage.controllers import ShardsWatcher
@pytest.fixture()
@allure.title("Select random node for testing")
def node_under_test(cluster: Cluster) -> ClusterNode:
selected_node = random.choice(cluster.cluster_nodes)
allure.attach(f"{selected_node}", "Selected node", allure.attachment_type.TEXT)
return selected_node
@pytest.fixture()
@allure.title("Provide Shards watcher")
def shards_watcher(node_under_test: ClusterNode) -> ShardsWatcher:
watcher = ShardsWatcher(node_under_test)
return watcher
@pytest.fixture()
@allure.title("Test start time")
def test_start_time() -> datetime:
start_time = datetime.utcnow()
return start_time

View File

@ -1,4 +1,5 @@
import logging
from datetime import datetime
from time import sleep
import allure
@ -9,7 +10,11 @@ from frostfs_testlib.resources.common import MORPH_BLOCK_TIME
from frostfs_testlib.resources.wellknown_acl import PUBLIC_ACL
from frostfs_testlib.s3 import AwsCliClient, Boto3ClientWrapper, S3ClientWrapper, VersioningStatus
from frostfs_testlib.shell import CommandOptions, Shell
from frostfs_testlib.steps.cli.container import create_container
from frostfs_testlib.steps.cli.container import (
StorageContainer,
StorageContainerInfo,
create_container,
)
from frostfs_testlib.steps.cli.object import get_object, put_object_to_random_node
from frostfs_testlib.steps.node_management import (
check_node_in_map,
@ -20,8 +25,10 @@ from frostfs_testlib.steps.node_management import (
wait_for_node_to_be_ready,
)
from frostfs_testlib.steps.s3 import s3_helper
from frostfs_testlib.storage.cluster import Cluster, StorageNode
from frostfs_testlib.storage.controllers import ClusterStateController
from frostfs_testlib.storage.cluster import Cluster, ClusterNode, StorageNode
from frostfs_testlib.storage.controllers import ClusterStateController, ShardsWatcher
from frostfs_testlib.storage.dataclasses.storage_object_info import StorageObjectInfo
from frostfs_testlib.storage.dataclasses.wallet import WalletInfo
from frostfs_testlib.testing.cluster_test_base import ClusterTestBase
from frostfs_testlib.testing.test_control import expect_not_raises
from frostfs_testlib.utils import datetime_utils
@ -580,3 +587,90 @@ class TestStorageDataLoss(ClusterTestBase):
with allure.step("Delete bucket"):
with expect_not_raises():
s3_client.delete_bucket(bucket)
@allure.title(
"Write cache loss on one node should not affect shards and should not produce errors in log"
)
@pytest.mark.write_cache_loss
def test_write_cache_loss_on_one_node(
self,
node_under_test: ClusterNode,
simple_object_size: int,
cluster_state_controller: ClusterStateController,
shards_watcher: ShardsWatcher,
default_wallet: str,
test_start_time: datetime,
after_run_return_all_stopped_services: str,
):
exception_messages = []
allure.dynamic.description(after_run_return_all_stopped_services)
with allure.step(f"Create container on node {node_under_test}"):
locode = node_under_test.storage_node.get_un_locode()
placement_rule = f"""REP 1 IN X
CBF 1
SELECT 1 FROM C AS X
FILTER 'UN-LOCODE' EQ '{locode}' AS C"""
cid = create_container(
default_wallet,
self.shell,
node_under_test.storage_node.get_rpc_endpoint(),
rule=placement_rule,
)
container = StorageContainer(
StorageContainerInfo(cid, WalletInfo(default_wallet)),
self.shell,
cluster_state_controller.cluster,
)
with allure.step(f"Put couple objects to container on node {node_under_test}"):
storage_objects: list[StorageObjectInfo] = []
for _ in range(5):
storage_object = container.generate_object(
simple_object_size, endpoint=node_under_test.storage_node.get_rpc_endpoint()
)
storage_objects.append(storage_object)
with allure.step("Take shards snapshot"):
shards_watcher.take_shards_snapshot()
with allure.step(f"Stop storage service on node {node_under_test}"):
cluster_state_controller.stop_storage_service(node_under_test)
with allure.step(f"Delete write cache from node {node_under_test}"):
node_under_test.storage_node.delete_write_cache()
with allure.step(f"Start storage service on node {node_under_test}"):
cluster_state_controller.start_storage_service(node_under_test)
with allure.step("Objects should be available"):
for storage_object in storage_objects:
get_object(
storage_object.wallet_file_path,
container.get_id(),
storage_object.oid,
self.shell,
node_under_test.storage_node.get_rpc_endpoint(),
)
with allure.step("No shards should have new errors"):
shards_watcher.take_shards_snapshot()
shards_with_errors = shards_watcher.get_shards_with_new_errors()
if shards_with_errors:
exception_messages.append(f"Shards have new errors: {shards_with_errors}")
with allure.step("No shards should have degraded status"):
snapshot = shards_watcher.get_shards_snapshot()
for shard in snapshot:
status = snapshot[shard]["mode"]
if status != "read-write":
exception_messages.append(f"Shard {shard} changed status to {status}")
with allure.step("No related errors should be in log"):
if node_under_test.host.is_message_in_logs(
message_regex=r"\Wno such file or directory\W", since=test_start_time
):
exception_messages.append(f"Node {node_under_test} have shard errors in logs")
with allure.step("Pass test if no errors found"):
assert not exception_messages, "\n".join(exception_messages)