Compare commits

..

3 commits

Author SHA1 Message Date
741102ec17 [#265] fix tests metrics object and logs 2024-07-02 16:23:02 +03:00
5d3d22f685 [#262] Added new test for EC policy
Signed-off-by: Dmitriy Zayakin <d.zayakin@yadro.com>
2024-07-01 15:41:14 +00:00
7d4792f49b [#260] fix tests logs 2024-07-01 10:26:21 +00:00
6 changed files with 674 additions and 74 deletions

View file

@ -1,8 +1,8 @@
[tool.isort] [tool.isort]
profile = "black" profile = "black"
src_paths = ["pytest_tests"] src_paths = ["pytest_tests"]
line_length = 120 line_length = 140
[tool.black] [tool.black]
line-length = 120 line-length = 140
target-version = ["py310"] target-version = ["py310"]

View file

@ -12,6 +12,7 @@ markers =
smoke: test runs in smoke testrun smoke: test runs in smoke testrun
# controlling markers # controlling markers
order: manual control of test order order: manual control of test order
logs_after_session: Make the last test in session
# functional markers # functional markers
maintenance: tests for change mode node maintenance: tests for change mode node
container: tests for container creation container: tests for container creation
@ -63,5 +64,6 @@ markers =
write_cache_loss: tests for write cache loss write_cache_loss: tests for write cache loss
time: time tests time: time tests
replication: replication tests replication: replication tests
ec_replication: replication EC
static_session_container: tests for a static session in a container static_session_container: tests for a static session in a container
shard: shard management tests shard: shard management tests

View file

@ -26,17 +26,17 @@ class TestLogsMetrics(ClusterTestBase):
cluster_state_controller.manager(ConfigStateManager).revert_all() cluster_state_controller.manager(ConfigStateManager).revert_all()
@wait_for_success(interval=10) @wait_for_success(interval=10)
def check_metrics_in_node(self, cluster_node: ClusterNode, restart_time: datetime, **metrics_greps): def check_metrics_in_node(self, cluster_node: ClusterNode, restart_time: datetime, log_priority: str = None, **metrics_greps):
counter_exp = self.get_count_logs_by_level(cluster_node, metrics_greps.get("level"), restart_time) counter_logs = self.get_count_logs_by_level(cluster_node, metrics_greps.get("level"), restart_time, log_priority)
counter_act = get_metrics_value(cluster_node, **metrics_greps) counter_metrics = get_metrics_value(cluster_node, **metrics_greps)
assert counter_act == counter_exp, f"Expected: {counter_exp}, Actual: {counter_act} in node: {cluster_node}" assert counter_logs == counter_metrics, f"counter_logs: {counter_logs}, counter_metrics: {counter_metrics} in node: {cluster_node}"
@staticmethod @staticmethod
def get_count_logs_by_level(cluster_node: ClusterNode, log_level: str, after_time: datetime): def get_count_logs_by_level(cluster_node: ClusterNode, log_level: str, after_time: datetime, log_priority: str):
count_logs = 0 count_logs = 0
try: try:
logs = cluster_node.host.get_filtered_logs(log_level, unit="frostfs-storage", since=after_time) logs = cluster_node.host.get_filtered_logs(log_level, unit="frostfs-storage", since=after_time, priority=log_priority)
result = re.findall(rf"Z\s+{log_level}\s+", logs) result = re.findall(rf"\s+{log_level}\s+", logs)
count_logs += len(result) count_logs += len(result)
except RuntimeError as e: except RuntimeError as e:
... ...
@ -49,7 +49,14 @@ class TestLogsMetrics(ClusterTestBase):
node = random.choice(cluster.cluster_nodes) node = random.choice(cluster.cluster_nodes)
with reporter.step(f"Check metrics count logs with level 'info'"): with reporter.step(f"Check metrics count logs with level 'info'"):
self.check_metrics_in_node(node, restart_time, command="frostfs_node_logger_entry_count", level="info") self.check_metrics_in_node(
node,
restart_time,
log_priority="6..6",
command="frostfs_node_logger_entry_count",
level="info",
dropped="false",
)
with reporter.step(f"Check metrics count logs with level 'error'"): with reporter.step(f"Check metrics count logs with level 'error'"):
self.check_metrics_in_node(node, restart_time, command="frostfs_node_logger_entry_count", level="error") self.check_metrics_in_node(node, restart_time, command="frostfs_node_logger_entry_count", level="error", dropped="false")

View file

@ -18,9 +18,7 @@ from frostfs_testlib.utils.file_utils import generate_file
class TestObjectMetrics(ClusterTestBase): class TestObjectMetrics(ClusterTestBase):
@allure.title("Object metrics of removed container (obj_size={object_size})") @allure.title("Object metrics of removed container (obj_size={object_size})")
def test_object_metrics_removed_container( def test_object_metrics_removed_container(self, object_size: ObjectSize, default_wallet: WalletInfo, cluster: Cluster):
self, object_size: ObjectSize, default_wallet: WalletInfo, cluster: Cluster
):
file_path = generate_file(object_size.value) file_path = generate_file(object_size.value)
placement_policy = "REP 2 IN X CBF 2 SELECT 2 FROM * AS X" placement_policy = "REP 2 IN X CBF 2 SELECT 2 FROM * AS X"
copies = 2 copies = 2
@ -33,11 +31,7 @@ class TestObjectMetrics(ClusterTestBase):
with reporter.step("Check metric appears in node where the object is located"): with reporter.step("Check metric appears in node where the object is located"):
object_storage_nodes = get_nodes_with_object(cid, oid, self.shell, cluster.storage_nodes) object_storage_nodes = get_nodes_with_object(cid, oid, self.shell, cluster.storage_nodes)
object_nodes = [ object_nodes = [cluster_node for cluster_node in cluster.cluster_nodes if cluster_node.storage_node in object_storage_nodes]
cluster_node
for cluster_node in cluster.cluster_nodes
if cluster_node.storage_node in object_storage_nodes
]
check_metrics_counter( check_metrics_counter(
object_nodes, object_nodes,
@ -54,23 +48,15 @@ class TestObjectMetrics(ClusterTestBase):
self.tick_epochs(epochs_to_tick=2, wait_block=2) self.tick_epochs(epochs_to_tick=2, wait_block=2)
with reporter.step("Check metrics of removed containers doesn't appear in the storage node"): with reporter.step("Check metrics of removed containers doesn't appear in the storage node"):
check_metrics_counter( check_metrics_counter(object_nodes, counter_exp=0, command="frostfs_node_engine_container_objects_total", cid=cid, type="user")
object_nodes, counter_exp=0, command="frostfs_node_engine_container_objects_total", cid=cid, type="user" check_metrics_counter(object_nodes, counter_exp=0, command="frostfs_node_engine_container_size_byte", cid=cid)
)
check_metrics_counter(
object_nodes, counter_exp=0, command="frostfs_node_engine_container_size_byte", cid=cid
)
for node in object_nodes: for node in object_nodes:
all_metrics = node.metrics.storage.get_all_metrics() all_metrics = node.metrics.storage.get_metrics_search_by_greps(command="frostfs_node_engine_container_size_byte")
assert ( assert cid not in all_metrics.stdout, "metrics of removed containers shouldn't appear in the storage node"
cid not in all_metrics.stdout
), "metrics of removed containers shouldn't appear in the storage node"
@allure.title("Object metrics, locked object (obj_size={object_size}, policy={placement_policy})") @allure.title("Object metrics, locked object (obj_size={object_size}, policy={placement_policy})")
@pytest.mark.parametrize( @pytest.mark.parametrize("placement_policy", ["REP 1 IN X CBF 1 SELECT 1 FROM * AS X", "REP 2 IN X CBF 2 SELECT 2 FROM * AS X"])
"placement_policy", ["REP 1 IN X CBF 1 SELECT 1 FROM * AS X", "REP 2 IN X CBF 2 SELECT 2 FROM * AS X"]
)
def test_object_metrics_blocked_object( def test_object_metrics_blocked_object(
self, object_size: ObjectSize, default_wallet: WalletInfo, cluster: Cluster, placement_policy: str self, object_size: ObjectSize, default_wallet: WalletInfo, cluster: Cluster, placement_policy: str
): ):
@ -92,14 +78,10 @@ class TestObjectMetrics(ClusterTestBase):
with reporter.step("Get current metrics for metric_type=user"): with reporter.step("Get current metrics for metric_type=user"):
objects_metric_counter = 0 objects_metric_counter = 0
for node in container_nodes: for node in container_nodes:
objects_metric_counter += get_metrics_value( objects_metric_counter += get_metrics_value(node, command="frostfs_node_engine_objects_total", type="user")
node, command="frostfs_node_engine_objects_total", type="user"
)
with reporter.step("Put object to container node"): with reporter.step("Put object to container node"):
oid = put_object( oid = put_object(default_wallet, file_path, cid, self.shell, container_nodes[0].storage_node.get_rpc_endpoint())
default_wallet, file_path, cid, self.shell, container_nodes[0].storage_node.get_rpc_endpoint()
)
with reporter.step(f"Check metric user 'the counter should increase by {metric_step}'"): with reporter.step(f"Check metric user 'the counter should increase by {metric_step}'"):
objects_metric_counter += metric_step objects_metric_counter += metric_step
@ -137,9 +119,7 @@ class TestObjectMetrics(ClusterTestBase):
) )
with reporter.step("Put object and lock it to next epoch"): with reporter.step("Put object and lock it to next epoch"):
oid = put_object( oid = put_object(default_wallet, file_path, cid, self.shell, container_nodes[0].storage_node.get_rpc_endpoint())
default_wallet, file_path, cid, self.shell, container_nodes[0].storage_node.get_rpc_endpoint()
)
current_epoch = self.get_epoch() current_epoch = self.get_epoch()
lock_object( lock_object(
default_wallet, default_wallet,
@ -266,9 +246,7 @@ class TestObjectMetrics(ClusterTestBase):
with reporter.step("Get current metrics for each nodes"): with reporter.step("Get current metrics for each nodes"):
objects_metric_counter: dict[ClusterNode:int] = {} objects_metric_counter: dict[ClusterNode:int] = {}
for node in self.cluster.cluster_nodes: for node in self.cluster.cluster_nodes:
objects_metric_counter[node] = get_metrics_value( objects_metric_counter[node] = get_metrics_value(node, command="frostfs_node_engine_objects_total", type="user")
node, command="frostfs_node_engine_objects_total", type="user"
)
with reporter.step("Put object"): with reporter.step("Put object"):
oid = put_object(default_wallet, file_path, cid, self.shell, self.cluster.default_rpc_endpoint) oid = put_object(default_wallet, file_path, cid, self.shell, self.cluster.default_rpc_endpoint)
@ -276,16 +254,12 @@ class TestObjectMetrics(ClusterTestBase):
with reporter.step("Get object nodes"): with reporter.step("Get object nodes"):
object_storage_nodes = get_nodes_with_object(cid, oid, self.shell, self.cluster.storage_nodes) object_storage_nodes = get_nodes_with_object(cid, oid, self.shell, self.cluster.storage_nodes)
object_nodes = [ object_nodes = [
cluster_node cluster_node for cluster_node in self.cluster.cluster_nodes if cluster_node.storage_node in object_storage_nodes
for cluster_node in self.cluster.cluster_nodes
if cluster_node.storage_node in object_storage_nodes
] ]
with reporter.step(f"Check metrics in object nodes 'the counter should increase by {copies}'"): with reporter.step(f"Check metrics in object nodes 'the counter should increase by {copies}'"):
counter_exp = sum(objects_metric_counter[node] for node in object_nodes) + copies counter_exp = sum(objects_metric_counter[node] for node in object_nodes) + copies
check_metrics_counter( check_metrics_counter(object_nodes, counter_exp=counter_exp, command="frostfs_node_engine_objects_total", type="user")
object_nodes, counter_exp=counter_exp, command="frostfs_node_engine_objects_total", type="user"
)
check_metrics_counter( check_metrics_counter(
object_nodes, object_nodes,
counter_exp=copies, counter_exp=copies,
@ -304,9 +278,7 @@ class TestObjectMetrics(ClusterTestBase):
with reporter.step(f"Check metric in alive nodes 'the counter should increase'"): with reporter.step(f"Check metric in alive nodes 'the counter should increase'"):
counter_exp = sum(objects_metric_counter[node] for node in alive_nodes) counter_exp = sum(objects_metric_counter[node] for node in alive_nodes)
check_metrics_counter( check_metrics_counter(alive_nodes, ">=", counter_exp, command="frostfs_node_engine_objects_total", type="user")
alive_nodes, ">=", counter_exp, command="frostfs_node_engine_objects_total", type="user"
)
with reporter.step("Start node"): with reporter.step("Start node"):
cluster_state_controller.start_node_host(node_to_stop) cluster_state_controller.start_node_host(node_to_stop)

View file

@ -0,0 +1,578 @@
import json
from dataclasses import dataclass
import allure
import pytest
import yaml
from frostfs_testlib import reporter
from frostfs_testlib.cli import FrostfsAdm, FrostfsCli
from frostfs_testlib.cli.netmap_parser import NetmapParser
from frostfs_testlib.credentials.interfaces import User
from frostfs_testlib.resources.cli import FROSTFS_ADM_CONFIG_PATH, FROSTFS_ADM_EXEC, FROSTFS_CLI_EXEC
from frostfs_testlib.shell import Shell
from frostfs_testlib.steps.cli.object import get_object, put_object
from frostfs_testlib.storage.cluster import Cluster, ClusterNode, StorageNode
from frostfs_testlib.storage.controllers import ClusterStateController
from frostfs_testlib.storage.controllers.state_managers.config_state_manager import ConfigStateManager
from frostfs_testlib.storage.dataclasses.object_size import ObjectSize
from frostfs_testlib.storage.dataclasses.storage_object_info import NodeNetmapInfo
from frostfs_testlib.testing.cluster_test_base import ClusterTestBase
from frostfs_testlib.utils.cli_utils import parse_netmap_output
from frostfs_testlib.utils.file_utils import generate_file, get_file_hash
from pytest_tests.resources.common import HOSTING_CONFIG_FILE
def pytest_generate_tests(metafunc: pytest.Metafunc) -> None:
if "ec_policy" not in metafunc.fixturenames:
return
with open(HOSTING_CONFIG_FILE, "r") as file:
hosting_config = yaml.full_load(file)
node_count = len(hosting_config["hosts"])
ec_map = {
4: ["EC 1.1", "EC 2.1", "EC 3.1"],
8: ["EC 5.3", "EC 3.2", "EC 7.1", "EC 4.4"],
16: ["EC 12.4", "EC 8.4", "EC 5.3", "EC 4.4"],
100: ["EC 12.4", "EC 8.4", "EC 5.3", "EC 4.4"],
}
metafunc.parametrize("ec_policy, node_count", ((ec_policy, node_count) for ec_policy in ec_map[node_count]))
@dataclass
class Chunk:
def __init__(self, object_id: str, required_nodes: list, confirmed_nodes: list, ec_parent_object_id: str, ec_index: int) -> None:
self.object_id = object_id
self.required_nodes = required_nodes
self.confirmed_nodes = confirmed_nodes
self.ec_parent_object_id = ec_parent_object_id
self.ec_index = ec_index
def __str__(self) -> str:
return self.object_id
@allure.title("Initialized local FrostfsCli")
@pytest.fixture()
def frostfs_local_cli(client_shell: Shell, default_user: User) -> FrostfsCli:
return FrostfsCli(client_shell, frostfs_cli_exec_path=FROSTFS_CLI_EXEC, config_file=default_user.wallet.config_path)
@allure.title("Initialized remote FrostfsAdm")
@pytest.fixture
def frostfs_remote_adm(cluster: Cluster) -> FrostfsAdm:
node = cluster.cluster_nodes[0]
shell = node.host.get_shell()
return FrostfsAdm(shell, frostfs_adm_exec_path=FROSTFS_ADM_EXEC, config_file=FROSTFS_ADM_CONFIG_PATH)
@pytest.mark.replication
@pytest.mark.ec_replication
class TestECReplication(ClusterTestBase):
@allure.title("Restore chunk maximum params in network params ")
@pytest.fixture
def restore_network_config(self, frostfs_remote_adm: FrostfsAdm) -> None:
yield
frostfs_remote_adm.morph.set_config(set_key_value='"MaxECDataCount=12" "MaxECParityCount=5"')
@reporter.step("Get object nodes output ")
def get_object_nodes(self, cli: FrostfsCli, cid: str, oid: str, endpoint: str = None) -> dict:
if not endpoint:
endpoint = self.cluster.default_rpc_endpoint
return json.loads(cli.object.nodes(endpoint, cid, oid=oid, json=True).stdout)
@reporter.step("Get all chunks object ")
def get_all_chunks_object(self, cli: FrostfsCli, cid: str, oid: str, endpoint: str = None) -> list[Chunk]:
chunks = self.get_object_nodes(cli, cid, oid, endpoint)
return [Chunk(**chunk) for chunk in chunks["data_objects"]]
@reporter.step("Get parity chunk ")
def get_parity_chunk_object(self, cli: FrostfsCli, cid: str, oid: str, endpoint: str = None) -> Chunk:
chunks = self.get_object_nodes(cli, cid, oid, endpoint)["data_objects"]
return Chunk(**chunks[-1])
@reporter.step("Get data chunk ")
def get_data_chunk_object(self, cli: FrostfsCli, cid: str, oid: str, endpoint: str = None) -> Chunk:
chunks = self.get_object_nodes(cli, cid, oid, endpoint)["data_objects"]
return Chunk(**chunks[0])
@reporter.step("Search node without chunks ")
def search_node_not_chunks(self, chunks: list[Chunk], local_cli: FrostfsCli, endpoint: str = None) -> list[ClusterNode]:
if not endpoint:
self.cluster.default_rpc_endpoint
netmap = parse_netmap_output(local_cli.netmap.snapshot(endpoint).stdout)
chunks_node_key = []
for chunk in chunks:
chunks_node_key.extend(chunk.confirmed_nodes)
for node_info in netmap.copy():
if node_info.node_id in chunks_node_key and node_info in netmap:
netmap.remove(node_info)
result = []
for node_info in netmap:
for cluster_node in self.cluster.cluster_nodes:
if node_info.node == cluster_node.host_ip:
result.append(cluster_node)
return result
@reporter.step("Create container, policy={policy}")
def create_container(self, user_cli: FrostfsCli, endpoint: str, policy: str) -> str:
return user_cli.container.create(endpoint, policy=policy, await_mode=True).stdout.split(" ")[1].strip().split("\n")[0]
@reporter.step("Search node chunk {chunk}")
def get_chunk_node(self, frostfs_cli: FrostfsCli, chunk: Chunk) -> tuple[ClusterNode, NodeNetmapInfo]:
netmap = parse_netmap_output(frostfs_cli.netmap.snapshot(self.cluster.default_rpc_endpoint).stdout)
for node_info in netmap:
if node_info.node_id in chunk.confirmed_nodes:
for cluster_node in self.cluster.cluster_nodes:
if cluster_node.host_ip == node_info.node:
return (cluster_node, node_info)
@reporter.step("Check replication chunks={total_chunks} chunks ")
def check_replication(self, total_chunks: int, local_cli: FrostfsCli, cid: str, oid: str) -> bool:
object_nodes_info = local_cli.object.nodes(self.cluster.default_rpc_endpoint, cid, oid=oid, json=True).stdout
object_nodes_info = json.loads(object_nodes_info)
return len(object_nodes_info["data_objects"]) == total_chunks
@allure.title("Disable Policer on all nodes")
@pytest.fixture()
def disable_policer(
self,
cluster_state_controller: ClusterStateController,
) -> None:
with reporter.step(f"Disable policer for nodes"):
cluster_state_controller.manager(ConfigStateManager).set_on_all_nodes(
service_type=StorageNode, values={"policer": {"unsafe_disable": True}}
)
yield
with reporter.step(f"Enable policer for nodes"):
cluster_state_controller.start_stopped_hosts()
cluster_state_controller.manager(ConfigStateManager).revert_all()
@allure.title("Create container with EC policy (size={object_size.value})")
def test_create_container_with_ec_policy(
self,
default_user: User,
frostfs_local_cli: FrostfsCli,
object_size: ObjectSize,
) -> None:
test_file = generate_file(object_size.value)
rep_count = 3
if object_size.name == "complex":
rep_count *= 4
with reporter.step("Create container."):
cid = self.create_container(frostfs_local_cli, self.cluster.default_rpc_endpoint, "EC 2.1")
with reporter.step("Put object in container."):
oid = put_object(default_user.wallet, test_file, cid, self.shell, self.cluster.default_rpc_endpoint)
with reporter.step("Check replication chunks."):
assert self.check_replication(rep_count, frostfs_local_cli, cid, oid)
@allure.title("Lose node with chunk data")
@pytest.mark.failover
def test_lose_node_with_data_chunk(
self,
frostfs_local_cli: FrostfsCli,
default_user: User,
simple_object_size: ObjectSize,
cluster_state_controller: ClusterStateController,
disable_policer: None,
) -> None:
with reporter.step("Create container."):
cid = self.create_container(frostfs_local_cli, self.cluster.default_rpc_endpoint, "EC 3.1")
with reporter.step("Put object in container."):
test_file = generate_file(simple_object_size.value)
oid = put_object(default_user.wallet, test_file, cid, self.shell, self.cluster.default_rpc_endpoint)
with reporter.step("Check chunk replication on 4 nodes."):
assert self.check_replication(4, frostfs_local_cli, cid, oid)
with reporter.step("Search node data chunk"):
chunk = self.get_data_chunk_object(frostfs_local_cli, cid, oid)
chunk_node = self.get_chunk_node(frostfs_local_cli, chunk)[0]
with reporter.step("Stop node with data chunk."):
cluster_state_controller.stop_node_host(chunk_node, "hard")
with reporter.step("Get object"):
node = list(set(self.cluster.cluster_nodes) - {chunk_node})[0]
get_object(default_user.wallet, cid, oid, self.shell, node.storage_node.get_rpc_endpoint())
with reporter.step("Start stopped node, and check replication chunks."):
cluster_state_controller.start_node_host(chunk_node)
assert self.check_replication(4, frostfs_local_cli, cid, oid)
@allure.title("Lose node with chunk parity")
@pytest.mark.failover
def test_lose_node_with_parity_chunk(
self,
frostfs_local_cli: FrostfsCli,
default_user: User,
simple_object_size: ObjectSize,
cluster_state_controller: ClusterStateController,
disable_policer: None,
) -> None:
with reporter.step("Create container."):
cid = self.create_container(frostfs_local_cli, self.cluster.default_rpc_endpoint, "EC 3.1")
with reporter.step("Put object in container."):
test_file = generate_file(simple_object_size.value)
oid = put_object(default_user.wallet, test_file, cid, self.shell, self.cluster.default_rpc_endpoint)
with reporter.step("Check chunk replication on 4 nodes."):
assert self.check_replication(4, frostfs_local_cli, cid, oid)
with reporter.step("Search node with parity chunk"):
chunk = self.get_parity_chunk_object(frostfs_local_cli, cid, oid)
chunk_node = self.get_chunk_node(frostfs_local_cli, chunk)[0]
with reporter.step("Stop node parity chunk."):
cluster_state_controller.stop_node_host(chunk_node, "hard")
with reporter.step("Get object, expect success."):
node = list(set(self.cluster.cluster_nodes) - {chunk_node})[0]
get_object(default_user.wallet, cid, oid, self.shell, node.storage_node.get_rpc_endpoint())
with reporter.step("Start stoped node, and check replication chunks."):
cluster_state_controller.start_node_host(chunk_node)
assert self.check_replication(4, frostfs_local_cli, cid, oid)
@allure.title("Lose nodes with chunk data and parity")
@pytest.mark.failover
def test_lose_nodes_data_chunk_and_parity(
self,
frostfs_local_cli: FrostfsCli,
default_user: User,
simple_object_size: ObjectSize,
cluster_state_controller: ClusterStateController,
disable_policer: None,
) -> None:
with reporter.step("Create container."):
cid = self.create_container(frostfs_local_cli, self.cluster.default_rpc_endpoint, "EC 3.1")
with reporter.step("Put object in container."):
test_file = generate_file(simple_object_size.value)
oid = put_object(default_user.wallet, test_file, cid, self.shell, self.cluster.default_rpc_endpoint)
with reporter.step("Check count chunks, expect 4."):
assert self.check_replication(4, frostfs_local_cli, cid, oid)
with reporter.step("Search node data chunk and node parity chunk"):
data_chunk = self.get_data_chunk_object(frostfs_local_cli, cid, oid)
node_data_chunk = self.get_chunk_node(frostfs_local_cli, data_chunk)[0]
parity_chunk = self.get_parity_chunk_object(frostfs_local_cli, cid, oid)
node_parity_chunk = self.get_chunk_node(frostfs_local_cli, parity_chunk)[0]
with reporter.step("Stop node with data chunk."):
cluster_state_controller.stop_node_host(node_data_chunk, "hard")
with reporter.step("Get object"):
node = list(set(self.cluster.cluster_nodes) - {node_data_chunk, node_parity_chunk})[0]
get_object(default_user.wallet, cid, oid, self.shell, node.storage_node.get_rpc_endpoint())
with reporter.step("Start stopped host and check chunks."):
cluster_state_controller.start_node_host(node_data_chunk)
assert self.check_replication(4, frostfs_local_cli, cid, oid)
with reporter.step("Stop node with parity chunk and one all node."):
cluster_state_controller.stop_node_host(node_data_chunk, "hard")
cluster_state_controller.stop_node_host(node_parity_chunk, "hard")
with reporter.step("Get object, expect error."):
with pytest.raises(RuntimeError):
get_object(default_user.wallet, cid, oid, self.shell, node.storage_node.get_rpc_endpoint())
with reporter.step("Start stopped nodes and check replication chunk."):
cluster_state_controller.start_stopped_hosts()
assert self.check_replication(4, frostfs_local_cli, cid, oid)
@allure.title("Policer work with chunk")
@pytest.mark.failover
def test_work_policer_with_nodes(
self,
simple_object_size: ObjectSize,
frostfs_local_cli: FrostfsCli,
default_user: User,
cluster_state_controller: ClusterStateController,
) -> None:
with reporter.step("Create container."):
cid = self.create_container(frostfs_local_cli, self.cluster.default_rpc_endpoint, "EC 2.1")
with reporter.step("Put object on container."):
test_file = generate_file(simple_object_size.value)
oid = put_object(default_user.wallet, test_file, cid, self.shell, self.cluster.default_rpc_endpoint)
with reporter.step("Check count chunks nodes on 3."):
assert self.check_replication(3, frostfs_local_cli, cid, oid)
with reporter.step("Stop node with chunk."):
data_chunk = self.get_data_chunk_object(frostfs_local_cli, cid, oid)
first_all_chunks = self.get_all_chunks_object(frostfs_local_cli, cid, oid)
node_data_chunk = self.get_chunk_node(frostfs_local_cli, data_chunk)[0]
cluster_state_controller.stop_node_host(node_data_chunk, "hard")
with reporter.step("Check replication chunk with different node."):
alive_endpoint = list(set(self.cluster.cluster_nodes) - {node_data_chunk})[0].storage_node.get_rpc_endpoint()
node = self.search_node_not_chunks(first_all_chunks, frostfs_local_cli, endpoint=alive_endpoint)[0]
second_all_chunks = self.get_all_chunks_object(frostfs_local_cli, cid, oid, node.storage_node.get_rpc_endpoint())
with reporter.step("Check that oid no change."):
oid_chunk_check = [chunk for chunk in second_all_chunks if data_chunk.object_id == chunk.object_id]
assert len(oid_chunk_check) > 0
with reporter.step("Start stopped host, and check delete 4 chunk."):
cluster_state_controller.start_node_host(node_data_chunk)
all_chunks_after_start_node = self.get_all_chunks_object(frostfs_local_cli, cid, oid)
assert len(all_chunks_after_start_node) == 3
@allure.title("EC X.Y combinations (nodes={node_count},policy={ec_policy},size={object_size.name})")
def test_create_container_with_difference_count_nodes(
self,
node_count: int,
ec_policy: str,
object_size: ObjectSize,
default_user: User,
frostfs_local_cli: FrostfsCli,
) -> None:
with reporter.step("Create container."):
expected_chunks = int(ec_policy.split(" ")[1].split(".")[0]) + int(ec_policy.split(" ")[1].split(".")[1])
if "complex" in object_size.name:
expected_chunks *= 4
cid = self.create_container(frostfs_local_cli, self.cluster.default_rpc_endpoint, ec_policy)
with reporter.step("Put object in container."):
test_file = generate_file(object_size.value)
oid = put_object(default_user.wallet, test_file, cid, self.shell, self.cluster.default_rpc_endpoint)
with reporter.step("Check count object chunks."):
chunks = self.get_all_chunks_object(frostfs_local_cli, cid, oid, self.cluster.default_rpc_endpoint)
assert len(chunks) == expected_chunks
with reporter.step("get object and check hash."):
file_with_node = get_object(default_user.wallet, cid, oid, self.shell, self.cluster.default_rpc_endpoint)
assert get_file_hash(test_file) == get_file_hash(file_with_node)
@allure.title("Request PUT with copies_number flag")
def test_put_object_with_copies_number(
self,
frostfs_local_cli: FrostfsCli,
default_user: User,
simple_object_size: ObjectSize,
) -> None:
with reporter.step("Create container."):
cid = self.create_container(frostfs_local_cli, self.cluster.default_rpc_endpoint, "EC 2.1")
with reporter.step("Put object in container with copies number = 1"):
test_file = generate_file(simple_object_size.value)
oid = put_object(default_user.wallet, test_file, cid, self.shell, self.cluster.default_rpc_endpoint, copies_number=1)
with reporter.step("Check that count chunks > 1."):
chunks = self.get_all_chunks_object(frostfs_local_cli, cid, oid, self.cluster.default_rpc_endpoint)
assert len(chunks) > 1
@allure.title("Request PUT and 1 node off")
@pytest.mark.failover
def test_put_object_with_off_cnr_node(
self,
frostfs_local_cli: FrostfsCli,
cluster_state_controller: ClusterStateController,
default_user: User,
simple_object_size: ObjectSize,
) -> None:
with reporter.step("Create container."):
cid = self.create_container(frostfs_local_cli, self.cluster.default_rpc_endpoint, "EC 3.1")
with reporter.step("Stop one node in container nodes"):
cluster_state_controller.stop_node_host(self.cluster.cluster_nodes[1], "hard")
with reporter.step("Put object in container, expect error."):
test_file = generate_file(simple_object_size.value)
with pytest.raises(RuntimeError, match="put single object on client"):
put_object(default_user.wallet, test_file, cid, self.shell, self.cluster.default_rpc_endpoint)
@allure.title("Request DELETE (size={object_size.name})")
@pytest.mark.failover
def test_delete_object_in_ec_cnr(
self,
default_user: User,
frostfs_local_cli: FrostfsCli,
object_size: ObjectSize,
cluster_state_controller: ClusterStateController,
) -> None:
with reporter.step("Create container."):
cid = self.create_container(frostfs_local_cli, self.cluster.default_rpc_endpoint, "EC 2.1")
with reporter.step("Put object in container."):
test_file = generate_file(object_size.value)
oid = put_object(default_user.wallet, test_file, cid, self.shell, self.cluster.default_rpc_endpoint)
with reporter.step("Check object chunks nodes."):
chunks_object = self.get_all_chunks_object(frostfs_local_cli, cid, oid, self.cluster.default_rpc_endpoint)
replication_count = 3 if object_size.name == "simple" else 3 * 4
assert len(chunks_object) == replication_count
with reporter.step("Delete object"):
frostfs_local_cli.object.delete(self.cluster.default_rpc_endpoint, cid, oid)
with reporter.step("Check that delete all chunks."):
for chunk in chunks_object:
with pytest.raises(RuntimeError, match="object already removed"):
frostfs_local_cli.object.head(self.cluster.default_rpc_endpoint, cid, chunk.object_id)
with reporter.step("Put second object."):
oid_second = put_object(default_user.wallet, test_file, cid, self.shell, self.cluster.default_rpc_endpoint)
with reporter.step("Check second object chunks nodes."):
chunks_second_object = self.get_all_chunks_object(frostfs_local_cli, cid, oid_second, self.cluster.default_rpc_endpoint)
assert len(chunks_second_object) == replication_count
with reporter.step("Stop nodes with chunk."):
chunk_node = self.get_chunk_node(frostfs_local_cli, chunks_second_object[0])
cluster_state_controller.stop_node_host(chunk_node[0], "hard")
with reporter.step("Delete second object"):
frostfs_local_cli.object.delete(self.cluster.default_rpc_endpoint, cid, oid_second)
with reporter.step("Check that delete all chunk second object."):
for chunk in chunks_second_object:
with pytest.raises(RuntimeError, match="object already removed"):
frostfs_local_cli.object.head(self.cluster.default_rpc_endpoint, cid, chunk.object_id)
@allure.title("Request LOCK (size={object_size.name})")
@pytest.mark.failover
def test_lock_object_in_ec_cnr(
self,
frostfs_local_cli: FrostfsCli,
object_size: ObjectSize,
default_user: User,
cluster_state_controller: ClusterStateController,
) -> None:
with reporter.step("Create container."):
cid = self.create_container(frostfs_local_cli, self.cluster.default_rpc_endpoint, "EC 2.1")
with reporter.step("Put object in container."):
test_file = generate_file(object_size.value)
oid = put_object(default_user.wallet, test_file, cid, self.shell, self.cluster.default_rpc_endpoint)
with reporter.step("Check object chunks nodes."):
chunks_object = self.get_all_chunks_object(frostfs_local_cli, cid, oid, self.cluster.default_rpc_endpoint)
replication_count = 3 if object_size.name == "simple" else 3 * 4
assert len(chunks_object) == replication_count
with reporter.step("Put LOCK in object."):
epoch = frostfs_local_cli.netmap.epoch(self.cluster.default_rpc_endpoint).stdout.strip()
frostfs_local_cli.object.lock(self.cluster.default_rpc_endpoint, cid, oid, expire_at=int(epoch) + 5).stdout
with reporter.step("Check LOCK in object"):
chunks = frostfs_local_cli.object.head(self.cluster.default_rpc_endpoint, cid, oid, raw=True).stdout.strip().split(" ")
oids_chunks = [chunk.strip() for chunk in chunks if len(chunk) > 35]
for chunk_id in oids_chunks:
with pytest.raises(RuntimeError, match="could not delete objects"):
frostfs_local_cli.object.delete(self.cluster.default_rpc_endpoint, cid, chunk_id)
with reporter.step("Stop chunk node."):
chunk_node = self.get_chunk_node(frostfs_local_cli, chunks_object[0])
cluster_state_controller.stop_node_host(chunk_node[0], "hard")
cluster_state_controller.start_node_host(chunk_node[0])
with reporter.step("Check LOCK in object."):
chunks = self.get_all_chunks_object(frostfs_local_cli, cid, oid, self.cluster.default_rpc_endpoint)
for chunk_id in oids_chunks:
with pytest.raises(RuntimeError, match="could not delete objects"):
frostfs_local_cli.object.delete(self.cluster.default_rpc_endpoint, cid, chunk_id)
@allure.title("Output MaxEC* params in frostfscli (type={type_shards})")
@pytest.mark.parametrize("type_shards", ["Maximum count of data shards", "Maximum count of parity shards"])
def test_maxec_info_with_output_cli(self, frostfs_local_cli: FrostfsCli, type_shards: str) -> None:
with reporter.step("Get and check params"):
net_info = frostfs_local_cli.netmap.netinfo(self.cluster.default_rpc_endpoint).stdout
assert type_shards in net_info
@allure.title("Change MaxEC*Count params")
def test_change_max_data_shards_params(
self,
frostfs_remote_adm: FrostfsAdm,
frostfs_local_cli: FrostfsCli,
restore_network_config: None,
) -> None:
with reporter.step("Get now params MaxECDataCount and MaxECParityCount"):
node_netinfo = NetmapParser.netinfo(frostfs_local_cli.netmap.netinfo(self.cluster.default_rpc_endpoint).stdout)
with reporter.step("Change params"):
frostfs_remote_adm.morph.set_config(set_key_value='"MaxECDataCount=5" "MaxECParityCount=3"')
with reporter.step("Get update params"):
update_net_info = NetmapParser.netinfo(frostfs_local_cli.netmap.netinfo(self.cluster.default_rpc_endpoint).stdout)
with reporter.step("Check old and new params difference"):
assert (
update_net_info.maximum_count_of_data_shards not in node_netinfo.maximum_count_of_data_shards
and update_net_info.maximum_count_of_parity_shards not in node_netinfo.maximum_count_of_parity_shards
)
@allure.title("Check maximum count data and parity shards")
def test_change_over_max_parity_shards_params(
self,
frostfs_remote_adm: FrostfsAdm,
) -> None:
with reporter.step("Change over maximum params shards count."):
with pytest.raises(RuntimeError, match="MaxECDataCount and MaxECParityCount must be <= 256"):
frostfs_remote_adm.morph.set_config(set_key_value='"MaxECDataCount=130" "MaxECParityCount=130"')
@allure.title("Create container with EC policy and SELECT (SELECT={select})")
@pytest.mark.parametrize("select", [2, 4])
def test_create_container_with_select(
self,
select: int,
frostfs_local_cli: FrostfsCli,
) -> None:
with reporter.step("Create container"):
policy = f"EC 1.1 CBF 1 SELECT {select} FROM *"
cid = self.create_container(frostfs_local_cli, self.cluster.default_rpc_endpoint, policy)
with reporter.step("Check container nodes decomposed"):
container_nodes = frostfs_local_cli.container.search_node(self.cluster.default_rpc_endpoint, cid).stdout.strip().split("\n")[1:]
assert len(container_nodes) == select
@allure.title("Create container with EC policy and CBF (CBF={cbf})")
@pytest.mark.parametrize("cbf, expected_nodes", [(1, 2), (2, 4)])
def test_create_container_with_cbf(
self,
cbf: int,
expected_nodes: int,
frostfs_local_cli: FrostfsCli,
) -> None:
with reporter.step("Create container."):
policy = f"EC 1.1 CBF {cbf}"
cid = self.create_container(frostfs_local_cli, self.cluster.default_rpc_endpoint, policy)
with reporter.step("Check expected container nodes."):
container_nodes = frostfs_local_cli.container.search_node(self.cluster.default_rpc_endpoint, cid).stdout.strip().split("\n")[1:]
assert len(container_nodes) == expected_nodes
@allure.title("Create container with EC policy and FILTER")
def test_create_container_with_filter(
self,
default_user: User,
frostfs_local_cli: FrostfsCli,
simple_object_size: ObjectSize,
) -> None:
with reporter.step("Create Container."):
policy = "EC 1.1 IN RUS SELECT 2 FROM RU AS RUS FILTER Country EQ Russia AS RU"
cid = self.create_container(frostfs_local_cli, self.cluster.default_rpc_endpoint, policy)
with reporter.step("Put object in container."):
test_file = generate_file(simple_object_size.value)
oid = put_object(default_user.wallet, test_file, cid, self.shell, self.cluster.default_rpc_endpoint)
with reporter.step("Check object is decomposed exclusively on Russian nodes"):
data_chunk = self.get_data_chunk_object(frostfs_local_cli, cid, oid, self.cluster.default_rpc_endpoint)
parity_chunk = self.get_parity_chunk_object(frostfs_local_cli, cid, oid, self.cluster.default_rpc_endpoint)
node_data_chunk = self.get_chunk_node(frostfs_local_cli, data_chunk)
node_parity_chunk = self.get_chunk_node(frostfs_local_cli, parity_chunk)
for node in [node_data_chunk[1], node_parity_chunk[1]]:
assert "Russia" in node.country

View file

@ -1,7 +1,7 @@
import os import os
import shutil import shutil
import time import time
from datetime import datetime from datetime import datetime, timezone
import allure import allure
import pytest import pytest
@ -20,23 +20,61 @@ def pytest_generate_tests(metafunc: pytest.Metafunc):
) )
@pytest.mark.logs_after_session
class TestLogs: class TestLogs:
@allure.title("Check logs from frostfs-testcases with marks '{request.config.option.markexpr}'")
@pytest.mark.order(1000) @pytest.mark.order(1000)
def test_logs_after_session( @allure.title("Check logs from frostfs-testcases with marks '{request.config.option.markexpr}' - search errors")
def test_logs_search_errors(self, temp_directory: str, cluster: Cluster, session_start_time: datetime, request: pytest.FixtureRequest):
end_time = datetime.now(timezone.utc)
logs_dir = os.path.join(temp_directory, "logs")
if not os.path.exists(logs_dir):
os.makedirs(logs_dir)
issues_regex = r"\bpanic\b|\boom\b|too many|insufficient funds|insufficient amount of gas|cannot assign requested address|\bunable to process\b"
exclude_filter = r"too many requests"
log_level_priority = "3" # will include 0-3 priority logs (0: emergency 1: alerts 2: critical 3: errors)
time.sleep(2)
futures = parallel(
self._collect_logs_on_host,
cluster.hosts,
logs_dir,
issues_regex,
session_start_time,
end_time,
exclude_filter,
priority=log_level_priority,
)
hosts_with_problems = [future.result() for future in futures if not future.exception() and future.result() is not None]
if hosts_with_problems:
self._attach_logs(logs_dir)
assert not hosts_with_problems, f"The following hosts contains critical errors in system logs: {', '.join(hosts_with_problems)}"
@pytest.mark.order(1001)
@allure.title("Check logs from frostfs-testcases with marks '{request.config.option.markexpr}' - identify sensitive data")
def test_logs_identify_sensitive_data(
self, temp_directory: str, cluster: Cluster, session_start_time: datetime, request: pytest.FixtureRequest self, temp_directory: str, cluster: Cluster, session_start_time: datetime, request: pytest.FixtureRequest
): ):
""" end_time = datetime.now(timezone.utc)
This test automatically added to any test run to check logs from cluster for critical errors.
"""
end_time = datetime.utcnow()
logs_dir = os.path.join(temp_directory, "logs") logs_dir = os.path.join(temp_directory, "logs")
if not os.path.exists(logs_dir):
os.makedirs(logs_dir) os.makedirs(logs_dir)
# Using \b here because 'oom' and 'panic' can sometimes be found in OID or CID
issues_regex = r"\bpanic\b|\boom\b|too many|insufficient funds|insufficient amount of gas|wallet passwd|secret \bkey\b|access \bkey\b|cannot assign requested address" _regex = {
exclude_filter = r"too many requests" "authorization_basic": r"basic [a-zA-Z0-9=:_\+\/-]{16,100}",
"authorization_bearer": r"bearer [a-zA-Z0-9_\-\.=:_\+\/]{16,100}",
"access_token": r"\"access_token\":\"[0-9a-z]{16}\$[0-9a-f]{32}\"",
"api_token": r"\"api_token\":\"(xox[a-zA-Z]-[a-zA-Z0-9-]+)\"",
"yadro_access_token": r"[a-zA-Z0-9_-]*:[a-zA-Z0-9_\-]+@yadro\.com*",
"SSH_privKey": r"([-]+BEGIN [^\s]+ PRIVATE KEY[-]+[\s]*[^-]*[-]+END [^\s]+ PRIVATE KEY[-]+)",
"possible_Creds": r"(?i)(" r"password\s*[`=:]+\s*[^\s]+|" r"password is\s*[`=:]+\s*[^\s]+|" r"passwd\s*[`=:]+\s*[^\s]+)",
}
issues_regex = "|".join(_regex.values())
exclude_filter = r"COMMAND=\|--\sBoot\s"
time.sleep(2) time.sleep(2)
@ -50,21 +88,24 @@ class TestLogs:
exclude_filter, exclude_filter,
) )
hosts_with_problems = [ hosts_with_problems = [future.result() for future in futures if not future.exception() and future.result() is not None]
future.result() for future in futures if not future.exception() and future.result() is not None
]
if hosts_with_problems: if hosts_with_problems:
self._attach_logs(logs_dir) self._attach_logs(logs_dir)
assert ( assert not hosts_with_problems, f"The following hosts contains sensitive data in system logs: {', '.join(hosts_with_problems)}"
not hosts_with_problems
), f"The following hosts contains contain critical errors in system logs: {', '.join(hosts_with_problems)}"
def _collect_logs_on_host( def _collect_logs_on_host(
self, host: Host, logs_dir: str, regex: str, since: datetime, until: datetime, exclude_filter: str self,
host: Host,
logs_dir: str,
regex: str,
since: datetime,
until: datetime,
exclude_filter: str,
priority: str = None,
): ):
with reporter.step(f"Get logs from {host.config.address}"): with reporter.step(f"Get logs from {host.config.address}"):
logs = host.get_filtered_logs(filter_regex=regex, since=since, until=until, exclude_filter=exclude_filter) logs = host.get_filtered_logs(filter_regex=regex, since=since, until=until, exclude_filter=exclude_filter, priority=priority)
if not logs: if not logs:
return None return None