Add write-cache loss test #51
2 changed files with 126 additions and 3 deletions
29
pytest_tests/testsuites/failovers/conftest.py
Normal file
29
pytest_tests/testsuites/failovers/conftest.py
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
import random
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
import allure
|
||||||
|
import pytest
|
||||||
|
from frostfs_testlib.storage.cluster import Cluster, ClusterNode
|
||||||
|
from frostfs_testlib.storage.controllers import ShardsWatcher
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
@allure.title("Select random node for testing")
|
||||||
|
def node_under_test(cluster: Cluster) -> ClusterNode:
|
||||||
|
selected_node = random.choice(cluster.cluster_nodes)
|
||||||
|
allure.attach(f"{selected_node}", "Selected node", allure.attachment_type.TEXT)
|
||||||
|
return selected_node
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
@allure.title("Provide Shards watcher")
|
||||||
|
def shards_watcher(node_under_test: ClusterNode) -> ShardsWatcher:
|
||||||
|
watcher = ShardsWatcher(node_under_test)
|
||||||
|
return watcher
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
@allure.title("Test start time")
|
||||||
|
def test_start_time() -> datetime:
|
||||||
|
start_time = datetime.utcnow()
|
||||||
|
return start_time
|
|
@ -1,4 +1,5 @@
|
||||||
import logging
|
import logging
|
||||||
|
from datetime import datetime
|
||||||
from time import sleep
|
from time import sleep
|
||||||
|
|
||||||
import allure
|
import allure
|
||||||
|
@ -9,7 +10,11 @@ from frostfs_testlib.resources.common import MORPH_BLOCK_TIME
|
||||||
from frostfs_testlib.resources.wellknown_acl import PUBLIC_ACL
|
from frostfs_testlib.resources.wellknown_acl import PUBLIC_ACL
|
||||||
from frostfs_testlib.s3 import AwsCliClient, Boto3ClientWrapper, S3ClientWrapper, VersioningStatus
|
from frostfs_testlib.s3 import AwsCliClient, Boto3ClientWrapper, S3ClientWrapper, VersioningStatus
|
||||||
from frostfs_testlib.shell import CommandOptions, Shell
|
from frostfs_testlib.shell import CommandOptions, Shell
|
||||||
from frostfs_testlib.steps.cli.container import create_container
|
from frostfs_testlib.steps.cli.container import (
|
||||||
|
StorageContainer,
|
||||||
|
StorageContainerInfo,
|
||||||
|
create_container,
|
||||||
|
)
|
||||||
from frostfs_testlib.steps.cli.object import get_object, put_object_to_random_node
|
from frostfs_testlib.steps.cli.object import get_object, put_object_to_random_node
|
||||||
from frostfs_testlib.steps.node_management import (
|
from frostfs_testlib.steps.node_management import (
|
||||||
check_node_in_map,
|
check_node_in_map,
|
||||||
|
@ -20,8 +25,10 @@ from frostfs_testlib.steps.node_management import (
|
||||||
wait_for_node_to_be_ready,
|
wait_for_node_to_be_ready,
|
||||||
)
|
)
|
||||||
from frostfs_testlib.steps.s3 import s3_helper
|
from frostfs_testlib.steps.s3 import s3_helper
|
||||||
from frostfs_testlib.storage.cluster import Cluster, StorageNode
|
from frostfs_testlib.storage.cluster import Cluster, ClusterNode, StorageNode
|
||||||
from frostfs_testlib.storage.controllers import ClusterStateController
|
from frostfs_testlib.storage.controllers import ClusterStateController, ShardsWatcher
|
||||||
|
from frostfs_testlib.storage.dataclasses.storage_object_info import StorageObjectInfo
|
||||||
|
from frostfs_testlib.storage.dataclasses.wallet import WalletInfo
|
||||||
from frostfs_testlib.testing.cluster_test_base import ClusterTestBase
|
from frostfs_testlib.testing.cluster_test_base import ClusterTestBase
|
||||||
from frostfs_testlib.testing.test_control import expect_not_raises
|
from frostfs_testlib.testing.test_control import expect_not_raises
|
||||||
from frostfs_testlib.utils import datetime_utils
|
from frostfs_testlib.utils import datetime_utils
|
||||||
|
@ -580,3 +587,90 @@ class TestStorageDataLoss(ClusterTestBase):
|
||||||
with allure.step("Delete bucket"):
|
with allure.step("Delete bucket"):
|
||||||
with expect_not_raises():
|
with expect_not_raises():
|
||||||
s3_client.delete_bucket(bucket)
|
s3_client.delete_bucket(bucket)
|
||||||
|
|
||||||
|
@allure.title(
|
||||||
|
"Write cache loss on one node should not affect shards and should not produce errors in log"
|
||||||
|
)
|
||||||
|
@pytest.mark.write_cache_loss
|
||||||
|
def test_write_cache_loss_on_one_node(
|
||||||
|
self,
|
||||||
|
node_under_test: ClusterNode,
|
||||||
|
simple_object_size: int,
|
||||||
|
cluster_state_controller: ClusterStateController,
|
||||||
|
shards_watcher: ShardsWatcher,
|
||||||
|
default_wallet: str,
|
||||||
|
test_start_time: datetime,
|
||||||
|
after_run_return_all_stopped_services: str,
|
||||||
|
):
|
||||||
|
exception_messages = []
|
||||||
|
allure.dynamic.description(after_run_return_all_stopped_services)
|
||||||
|
|
||||||
|
with allure.step(f"Create container on node {node_under_test}"):
|
||||||
|
locode = node_under_test.storage_node.get_un_locode()
|
||||||
|
placement_rule = f"""REP 1 IN X
|
||||||
|
CBF 1
|
||||||
|
SELECT 1 FROM C AS X
|
||||||
|
FILTER 'UN-LOCODE' EQ '{locode}' AS C"""
|
||||||
|
cid = create_container(
|
||||||
|
default_wallet,
|
||||||
|
self.shell,
|
||||||
|
node_under_test.storage_node.get_rpc_endpoint(),
|
||||||
|
rule=placement_rule,
|
||||||
|
)
|
||||||
|
container = StorageContainer(
|
||||||
|
StorageContainerInfo(cid, WalletInfo(default_wallet)),
|
||||||
|
self.shell,
|
||||||
|
cluster_state_controller.cluster,
|
||||||
|
)
|
||||||
|
|
||||||
|
with allure.step(f"Put couple objects to container on node {node_under_test}"):
|
||||||
|
storage_objects: list[StorageObjectInfo] = []
|
||||||
|
for _ in range(5):
|
||||||
|
storage_object = container.generate_object(
|
||||||
|
simple_object_size, endpoint=node_under_test.storage_node.get_rpc_endpoint()
|
||||||
|
)
|
||||||
|
storage_objects.append(storage_object)
|
||||||
|
|
||||||
|
with allure.step("Take shards snapshot"):
|
||||||
|
shards_watcher.take_shards_snapshot()
|
||||||
|
|
||||||
|
with allure.step(f"Stop storage service on node {node_under_test}"):
|
||||||
|
cluster_state_controller.stop_storage_service(node_under_test)
|
||||||
|
|
||||||
|
with allure.step(f"Delete write cache from node {node_under_test}"):
|
||||||
|
node_under_test.storage_node.delete_write_cache()
|
||||||
|
|
||||||
|
with allure.step(f"Start storage service on node {node_under_test}"):
|
||||||
|
cluster_state_controller.start_storage_service(node_under_test)
|
||||||
|
|
||||||
|
with allure.step("Objects should be available"):
|
||||||
|
for storage_object in storage_objects:
|
||||||
|
get_object(
|
||||||
|
storage_object.wallet_file_path,
|
||||||
|
container.get_id(),
|
||||||
|
storage_object.oid,
|
||||||
|
self.shell,
|
||||||
|
node_under_test.storage_node.get_rpc_endpoint(),
|
||||||
|
)
|
||||||
|
|
||||||
|
with allure.step("No shards should have new errors"):
|
||||||
|
shards_watcher.take_shards_snapshot()
|
||||||
|
shards_with_errors = shards_watcher.get_shards_with_new_errors()
|
||||||
|
if shards_with_errors:
|
||||||
|
exception_messages.append(f"Shards have new errors: {shards_with_errors}")
|
||||||
|
|
||||||
|
with allure.step("No shards should have degraded status"):
|
||||||
|
snapshot = shards_watcher.get_shards_snapshot()
|
||||||
|
for shard in snapshot:
|
||||||
|
status = snapshot[shard]["mode"]
|
||||||
|
if status != "read-write":
|
||||||
|
exception_messages.append(f"Shard {shard} changed status to {status}")
|
||||||
|
|
||||||
|
with allure.step("No related errors should be in log"):
|
||||||
|
if node_under_test.host.is_message_in_logs(
|
||||||
|
message_regex=r"\Wno such file or directory\W", since=test_start_time
|
||||||
|
):
|
||||||
|
exception_messages.append(f"Node {node_under_test} have shard errors in logs")
|
||||||
|
|
||||||
|
with allure.step("Pass test if no errors found"):
|
||||||
|
assert not exception_messages, "\n".join(exception_messages)
|
||||||
|
|
Loading…
Reference in a new issue