forked from TrueCloudLab/frostfs-testcases
424 lines
18 KiB
Python
424 lines
18 KiB
Python
import logging
|
|
import random
|
|
from time import sleep
|
|
|
|
import allure
|
|
import pytest
|
|
from frostfs_testlib import reporter
|
|
from frostfs_testlib.healthcheck.interfaces import Healthcheck
|
|
from frostfs_testlib.steps.cli.container import create_container
|
|
from frostfs_testlib.steps.cli.object import get_object, get_object_nodes, neo_go_query_height, put_object, put_object_to_random_node
|
|
from frostfs_testlib.storage.cluster import ClusterNode
|
|
from frostfs_testlib.storage.controllers import ClusterStateController
|
|
from frostfs_testlib.storage.dataclasses.object_size import ObjectSize
|
|
from frostfs_testlib.storage.dataclasses.storage_object_info import Interfaces, StorageObjectInfo
|
|
from frostfs_testlib.storage.dataclasses.wallet import WalletInfo
|
|
from frostfs_testlib.testing.cluster_test_base import ClusterTestBase
|
|
from frostfs_testlib.testing.parallel import parallel
|
|
from frostfs_testlib.utils.failover_utils import wait_object_replication
|
|
from frostfs_testlib.utils.file_utils import generate_file, get_file_hash
|
|
|
|
from ...helpers.container_request import PUBLIC_WITH_POLICY, REP_2_2_2_PUBLIC, requires_container
|
|
|
|
logger = logging.getLogger("NeoLogger")
|
|
STORAGE_NODE_COMMUNICATION_PORT = "8080"
|
|
STORAGE_NODE_COMMUNICATION_PORT_TLS = "8082"
|
|
PORTS_TO_BLOCK = [STORAGE_NODE_COMMUNICATION_PORT, STORAGE_NODE_COMMUNICATION_PORT_TLS]
|
|
blocked_nodes: list[ClusterNode] = []
|
|
|
|
OBJECT_ATTRIBUTES = [
|
|
None,
|
|
{"key1": 1, "key2": "abc", "common_key": "common_value"},
|
|
{"key1": 2, "common_key": "common_value"},
|
|
]
|
|
|
|
|
|
@pytest.mark.failover
|
|
@pytest.mark.failover_network
|
|
class TestFailoverNetwork(ClusterTestBase):
|
|
@pytest.fixture(autouse=True)
|
|
@allure.title("Restore network")
|
|
def restore_network(self, healthcheck: Healthcheck, cluster_state_controller: ClusterStateController):
|
|
yield
|
|
with reporter.step(f"Count blocked nodes {len(blocked_nodes)}"):
|
|
not_empty = len(blocked_nodes) != 0
|
|
for node in list(blocked_nodes):
|
|
with reporter.step(f"Restore network for {node}"):
|
|
cluster_state_controller.restore_traffic(node=node)
|
|
blocked_nodes.remove(node)
|
|
if not_empty:
|
|
parallel(healthcheck.storage_healthcheck, self.cluster.cluster_nodes)
|
|
|
|
@pytest.fixture()
|
|
@allure.title("Restore drop traffic to system")
|
|
def restore_down_interfaces(self, cluster_state_controller: ClusterStateController):
|
|
yield
|
|
cluster_state_controller.restore_interfaces()
|
|
|
|
@pytest.fixture()
|
|
def storage_objects(
|
|
self,
|
|
simple_object_size: ObjectSize,
|
|
container: str,
|
|
default_wallet: WalletInfo,
|
|
) -> list[StorageObjectInfo]:
|
|
|
|
file_path = generate_file(simple_object_size.value)
|
|
file_hash = get_file_hash(file_path)
|
|
|
|
storage_objects = []
|
|
|
|
with reporter.step("Put object"):
|
|
for attribute in OBJECT_ATTRIBUTES:
|
|
oid = put_object_to_random_node(
|
|
wallet=default_wallet,
|
|
path=file_path,
|
|
cid=container,
|
|
shell=self.shell,
|
|
cluster=self.cluster,
|
|
)
|
|
|
|
storage_object = StorageObjectInfo(cid=container, oid=oid)
|
|
storage_object.size = simple_object_size.value
|
|
storage_object.wallet = default_wallet
|
|
storage_object.file_path = file_path
|
|
storage_object.file_hash = file_hash
|
|
storage_object.attributes = attribute
|
|
|
|
storage_objects.append(storage_object)
|
|
|
|
return storage_objects
|
|
|
|
@allure.title("Block Storage node traffic")
|
|
@requires_container(REP_2_2_2_PUBLIC)
|
|
def test_block_storage_node_traffic(
|
|
self,
|
|
default_wallet: WalletInfo,
|
|
container: str,
|
|
require_multiple_hosts,
|
|
simple_object_size: ObjectSize,
|
|
cluster_state_controller: ClusterStateController,
|
|
):
|
|
"""
|
|
Block storage nodes traffic using iptables and wait for replication for objects.
|
|
"""
|
|
wallet = default_wallet
|
|
wakeup_node_timeout = 10 # timeout to let nodes detect that traffic has blocked
|
|
nodes_to_block_count = 2
|
|
|
|
source_file_path = generate_file(simple_object_size.value)
|
|
oid = put_object_to_random_node(wallet, source_file_path, container, self.shell, self.cluster)
|
|
|
|
nodes = wait_object_replication(container, oid, 2, self.shell, self.cluster.storage_nodes)
|
|
|
|
logger.info(f"Nodes are {nodes}")
|
|
nodes_to_block = nodes
|
|
if nodes_to_block_count > len(nodes):
|
|
# TODO: the intent of this logic is not clear, need to revisit
|
|
nodes_to_block = random.choices(nodes, k=2)
|
|
|
|
nodes_non_block = list(set(self.cluster.storage_nodes) - set(nodes_to_block))
|
|
nodes_non_block_cluster = [
|
|
cluster_node for cluster_node in self.cluster.cluster_nodes if cluster_node.storage_node in nodes_non_block
|
|
]
|
|
with reporter.step("Block traffic and check corrupted object"):
|
|
for node in nodes_non_block_cluster:
|
|
with reporter.step(f"Block incoming traffic at node {node}"):
|
|
blocked_nodes.append(node)
|
|
cluster_state_controller.drop_traffic(
|
|
node=node, wakeup_timeout=wakeup_node_timeout, name_interface="data", block_nodes=nodes_to_block
|
|
)
|
|
|
|
with reporter.step(f"Check object is not stored on node {node}"):
|
|
new_nodes = wait_object_replication(
|
|
container,
|
|
oid,
|
|
2,
|
|
shell=self.shell,
|
|
nodes=list(set(self.cluster.storage_nodes) - set(nodes_non_block)),
|
|
)
|
|
assert node.storage_node not in new_nodes
|
|
|
|
with reporter.step("Check object data is not corrupted"):
|
|
got_file_path = get_object(wallet, container, oid, endpoint=new_nodes[0].get_rpc_endpoint(), shell=self.shell)
|
|
assert get_file_hash(source_file_path) == get_file_hash(got_file_path)
|
|
|
|
with reporter.step(f"Unblock incoming traffic"):
|
|
for node in nodes_non_block_cluster:
|
|
with reporter.step(f"Unblock at host {node}"):
|
|
cluster_state_controller.restore_traffic(node=node)
|
|
block_node = [
|
|
cluster_node for cluster_node in self.cluster.cluster_nodes if cluster_node.storage_node == node.storage_node
|
|
]
|
|
blocked_nodes.remove(*block_node)
|
|
sleep(wakeup_node_timeout)
|
|
|
|
with reporter.step("Check object data is not corrupted"):
|
|
new_nodes = wait_object_replication(container, oid, 2, shell=self.shell, nodes=self.cluster.storage_nodes)
|
|
|
|
got_file_path = get_object(wallet, container, oid, shell=self.shell, endpoint=new_nodes[0].get_rpc_endpoint())
|
|
assert get_file_hash(source_file_path) == get_file_hash(got_file_path)
|
|
|
|
@pytest.mark.interfaces
|
|
@allure.title("Block DATA interface node")
|
|
@requires_container(PUBLIC_WITH_POLICY("REP 1 CBF 1", short_name="REP 1 CBF 1"))
|
|
def test_block_data_interface(
|
|
self,
|
|
cluster_state_controller: ClusterStateController,
|
|
default_wallet: WalletInfo,
|
|
restore_down_interfaces: None,
|
|
storage_objects: list[StorageObjectInfo],
|
|
):
|
|
storage_object = storage_objects[0]
|
|
|
|
with reporter.step("Search nodes with object"):
|
|
nodes_with_object = get_object_nodes(
|
|
cluster=self.cluster,
|
|
cid=storage_object.cid,
|
|
oid=storage_object.oid,
|
|
alive_node=self.cluster.cluster_nodes[0],
|
|
)
|
|
|
|
with reporter.step("Get data interface to node"):
|
|
config_interfaces = list(nodes_with_object[0].host.config.interfaces.keys())
|
|
with reporter.step(f"Get data in {config_interfaces}"):
|
|
data_interfaces = [interface for interface in config_interfaces if "data" in interface]
|
|
|
|
with reporter.step("Block data interfaces for node"):
|
|
for interface in data_interfaces:
|
|
cluster_state_controller.down_interface(nodes=nodes_with_object, interface=interface)
|
|
|
|
with reporter.step("Tick epoch and wait 2 block"):
|
|
nodes_without_an_object = list(set(self.cluster.cluster_nodes) - set(nodes_with_object))
|
|
self.tick_epochs(1, alive_node=nodes_without_an_object[0].storage_node, wait_block=2)
|
|
|
|
with reporter.step("Get object for target nodes to data interfaces, expect false"):
|
|
with pytest.raises(RuntimeError, match="can't create API client: can't init SDK client: context deadline exceeded"):
|
|
get_object(
|
|
wallet=default_wallet,
|
|
cid=storage_object.cid,
|
|
oid=storage_object.oid,
|
|
shell=self.shell,
|
|
endpoint=nodes_with_object[0].storage_node.get_rpc_endpoint(),
|
|
)
|
|
|
|
with reporter.step(f"Get object others nodes, expect true"):
|
|
input_file = get_object(
|
|
wallet=default_wallet,
|
|
cid=storage_object.cid,
|
|
oid=storage_object.oid,
|
|
shell=self.shell,
|
|
endpoint=nodes_without_an_object[0].storage_node.get_rpc_endpoint(),
|
|
)
|
|
|
|
with reporter.step("Restore interface and tick 1 epoch, wait 2 block"):
|
|
cluster_state_controller.restore_interfaces()
|
|
self.tick_epochs(1, alive_node=nodes_without_an_object[0].storage_node, wait_block=2)
|
|
|
|
@pytest.mark.interfaces
|
|
@allure.title("Block INTERNAL interface node")
|
|
@requires_container(PUBLIC_WITH_POLICY("REP 1 CBF 1", short_name="REP 1 CBF 1"))
|
|
def test_block_internal_interface(
|
|
self,
|
|
cluster_state_controller: ClusterStateController,
|
|
default_wallet: WalletInfo,
|
|
restore_down_interfaces: None,
|
|
storage_objects: list[StorageObjectInfo],
|
|
simple_object_size: ObjectSize,
|
|
):
|
|
storage_object = storage_objects[0]
|
|
|
|
with reporter.step("Search nodes with object"):
|
|
nodes_with_object = get_object_nodes(
|
|
cluster=self.cluster,
|
|
cid=storage_object.cid,
|
|
oid=storage_object.oid,
|
|
alive_node=self.cluster.cluster_nodes[0],
|
|
)
|
|
|
|
with reporter.step("Get internal interface to node"):
|
|
config_interfaces = list(nodes_with_object[0].host.config.interfaces.keys())
|
|
with reporter.step(f"Get internal in {config_interfaces}"):
|
|
internal_interfaces = [interface for interface in config_interfaces if "internal" in interface]
|
|
|
|
with reporter.step("Block internal interfaces for node"):
|
|
for interface in internal_interfaces:
|
|
cluster_state_controller.down_interface(nodes=nodes_with_object, interface=interface)
|
|
|
|
with reporter.step("Tick epoch and wait 2 block"):
|
|
nodes_without_an_object = list(set(self.cluster.cluster_nodes) - set(nodes_with_object))
|
|
self.tick_epochs(1, alive_node=nodes_without_an_object[0].storage_node, wait_block=2)
|
|
|
|
with reporter.step("Get object others node, expect false"):
|
|
with pytest.raises(RuntimeError, match="rpc error"):
|
|
get_object(
|
|
wallet=default_wallet,
|
|
cid=storage_object.cid,
|
|
oid=storage_object.oid,
|
|
shell=self.shell,
|
|
endpoint=nodes_without_an_object[0].storage_node.get_rpc_endpoint(),
|
|
)
|
|
|
|
with reporter.step("Put object, others node, expect false"):
|
|
with pytest.raises(RuntimeError, match="rpc error"):
|
|
put_object(
|
|
wallet=default_wallet,
|
|
path=storage_object.file_path,
|
|
cid=storage_object.cid,
|
|
shell=self.shell,
|
|
endpoint=nodes_without_an_object[0].storage_node.get_rpc_endpoint(),
|
|
)
|
|
|
|
with reporter.step(f"Get object nodes with object, expect true"):
|
|
_ = get_object(
|
|
wallet=default_wallet,
|
|
cid=storage_object.cid,
|
|
oid=storage_object.oid,
|
|
shell=self.shell,
|
|
endpoint=nodes_with_object[0].storage_node.get_rpc_endpoint(),
|
|
)
|
|
|
|
with reporter.step(f"Put object nodes with object, expect true"):
|
|
temp_file_path = generate_file(simple_object_size.value)
|
|
_ = put_object(
|
|
wallet=default_wallet,
|
|
path=temp_file_path,
|
|
cid=storage_object.cid,
|
|
shell=self.shell,
|
|
endpoint=nodes_with_object[0].storage_node.get_rpc_endpoint(),
|
|
)
|
|
|
|
with reporter.step("Restore interface and tick 1 epoch, wait 2 block"):
|
|
cluster_state_controller.restore_interfaces()
|
|
self.tick_epochs(1, alive_node=nodes_without_an_object[0].storage_node, wait_block=2)
|
|
|
|
@pytest.mark.interfaces
|
|
@pytest.mark.failover_baremetal
|
|
@pytest.mark.parametrize(
|
|
"block_interface, other_interface",
|
|
[(Interfaces.DATA_O, Interfaces.DATA_1), (Interfaces.DATA_1, Interfaces.DATA_O)],
|
|
)
|
|
@allure.title("Down data interfaces to all nodes(interface={block_interface})")
|
|
def test_down_data_interface(
|
|
self,
|
|
require_multiple_interfaces,
|
|
cluster_state_controller: ClusterStateController,
|
|
default_wallet: WalletInfo,
|
|
simple_object_size: ObjectSize,
|
|
restore_down_interfaces: None,
|
|
block_interface: Interfaces,
|
|
other_interface: Interfaces,
|
|
):
|
|
cluster_nodes = self.cluster.cluster_nodes
|
|
with reporter.step(f"Block {block_interface.value} interfaces"):
|
|
cluster_state_controller.down_interface(cluster_nodes, block_interface.value)
|
|
|
|
with reporter.step("Tick 1 epoch and wait 2 block for sync all nodes"):
|
|
self.tick_epochs(1, alive_node=cluster_nodes[0].storage_node, wait_block=2)
|
|
|
|
with reporter.step("Create container"):
|
|
cid = create_container(
|
|
wallet=default_wallet,
|
|
shell=self.shell,
|
|
endpoint=f"{cluster_nodes[0].get_data_interface(other_interface.value)[0]}:8080",
|
|
rule="REP 4 CBF 1",
|
|
)
|
|
|
|
with reporter.step("Put object"):
|
|
file_path = generate_file(simple_object_size.value)
|
|
|
|
oid = put_object(
|
|
wallet=default_wallet,
|
|
path=file_path,
|
|
cid=cid,
|
|
shell=self.shell,
|
|
endpoint=f"{cluster_nodes[0].get_data_interface(other_interface.value)[0]}:8080",
|
|
)
|
|
|
|
with reporter.step("Get object"):
|
|
file_get_path = get_object(
|
|
wallet=default_wallet,
|
|
cid=cid,
|
|
oid=oid,
|
|
shell=self.shell,
|
|
endpoint=f"{cluster_nodes[0].get_data_interface(other_interface.value)[0]}:8080",
|
|
)
|
|
|
|
with reporter.step("Restore interfaces all nodes"):
|
|
cluster_state_controller.restore_interfaces()
|
|
self.tick_epochs(1, alive_node=cluster_nodes[0].storage_node, wait_block=2)
|
|
|
|
@pytest.mark.interfaces
|
|
@pytest.mark.failover_baremetal
|
|
@pytest.mark.parametrize("interface", [Interfaces.INTERNAL_0, Interfaces.INTERNAL_1])
|
|
@allure.title("Down internal interfaces to all nodes(interface={interface})")
|
|
def test_down_internal_interface(
|
|
self,
|
|
require_multiple_interfaces,
|
|
cluster_state_controller: ClusterStateController,
|
|
default_wallet: WalletInfo,
|
|
simple_object_size: ObjectSize,
|
|
restore_down_interfaces: None,
|
|
interface: Interfaces,
|
|
):
|
|
cluster_nodes = self.cluster.cluster_nodes
|
|
latest_block = {}
|
|
|
|
with reporter.step("Get block all nodes"):
|
|
for cluster_node in cluster_nodes:
|
|
latest_block[cluster_node] = neo_go_query_height(
|
|
shell=cluster_node.host.get_shell(), endpoint=cluster_node.morph_chain.get_http_endpoint()
|
|
)
|
|
|
|
with reporter.step(f"Block {interface} interfaces"):
|
|
cluster_state_controller.down_interface(cluster_nodes, interface.value)
|
|
|
|
with reporter.step("Tick 1 epoch and wait 2 block for sync all nodes"):
|
|
self.tick_epochs(1, alive_node=cluster_nodes[0].storage_node, wait_block=2)
|
|
|
|
with reporter.step("Create container"):
|
|
cid = create_container(
|
|
wallet=default_wallet,
|
|
shell=self.shell,
|
|
endpoint=self.cluster.default_rpc_endpoint,
|
|
rule="REP 4 CBF 1",
|
|
)
|
|
|
|
with reporter.step(f"Put object, after down {interface}"):
|
|
file_path = generate_file(simple_object_size.value)
|
|
|
|
oid = put_object(
|
|
wallet=default_wallet,
|
|
path=file_path,
|
|
cid=cid,
|
|
shell=self.shell,
|
|
endpoint=self.cluster.default_rpc_endpoint,
|
|
)
|
|
|
|
with reporter.step("Get object"):
|
|
file_get_path = get_object(
|
|
wallet=default_wallet,
|
|
cid=cid,
|
|
oid=oid,
|
|
shell=self.shell,
|
|
endpoint=self.cluster.default_rpc_endpoint,
|
|
)
|
|
|
|
now_block = {}
|
|
|
|
with reporter.step("Get actual block"):
|
|
for cluster_node in cluster_nodes:
|
|
now_block[cluster_node] = neo_go_query_height(
|
|
shell=cluster_node.host.get_shell(), endpoint=cluster_node.morph_chain.get_http_endpoint()
|
|
)
|
|
with reporter.step(f"Compare block"):
|
|
for cluster_node, items in now_block.items():
|
|
with reporter.step(
|
|
f"Node - {cluster_node.host_ip}, old block - {latest_block[cluster_node]['Latest block']}, "
|
|
f"now block - {now_block[cluster_node]['Latest block']}"
|
|
):
|
|
assert latest_block[cluster_node]["Latest block"] < now_block[cluster_node]["Latest block"]
|
|
|
|
with reporter.step("Restore interfaces all nodes"):
|
|
cluster_state_controller.restore_interfaces()
|
|
self.tick_epochs(1, alive_node=self.cluster.cluster_nodes[0].storage_node, wait_block=2)
|