[#265] fix tests metrics object and logs

This commit is contained in:
Ilyas Niyazov 2024-07-02 16:23:02 +03:00
parent 5d3d22f685
commit 741102ec17
3 changed files with 39 additions and 77 deletions

View file

@ -26,17 +26,17 @@ class TestLogsMetrics(ClusterTestBase):
cluster_state_controller.manager(ConfigStateManager).revert_all() cluster_state_controller.manager(ConfigStateManager).revert_all()
@wait_for_success(interval=10) @wait_for_success(interval=10)
def check_metrics_in_node(self, cluster_node: ClusterNode, restart_time: datetime, **metrics_greps): def check_metrics_in_node(self, cluster_node: ClusterNode, restart_time: datetime, log_priority: str = None, **metrics_greps):
counter_exp = self.get_count_logs_by_level(cluster_node, metrics_greps.get("level"), restart_time) counter_logs = self.get_count_logs_by_level(cluster_node, metrics_greps.get("level"), restart_time, log_priority)
counter_act = get_metrics_value(cluster_node, **metrics_greps) counter_metrics = get_metrics_value(cluster_node, **metrics_greps)
assert counter_act == counter_exp, f"Expected: {counter_exp}, Actual: {counter_act} in node: {cluster_node}" assert counter_logs == counter_metrics, f"counter_logs: {counter_logs}, counter_metrics: {counter_metrics} in node: {cluster_node}"
@staticmethod @staticmethod
def get_count_logs_by_level(cluster_node: ClusterNode, log_level: str, after_time: datetime): def get_count_logs_by_level(cluster_node: ClusterNode, log_level: str, after_time: datetime, log_priority: str):
count_logs = 0 count_logs = 0
try: try:
logs = cluster_node.host.get_filtered_logs(log_level, unit="frostfs-storage", since=after_time) logs = cluster_node.host.get_filtered_logs(log_level, unit="frostfs-storage", since=after_time, priority=log_priority)
result = re.findall(rf"Z\s+{log_level}\s+", logs) result = re.findall(rf"\s+{log_level}\s+", logs)
count_logs += len(result) count_logs += len(result)
except RuntimeError as e: except RuntimeError as e:
... ...
@ -49,7 +49,14 @@ class TestLogsMetrics(ClusterTestBase):
node = random.choice(cluster.cluster_nodes) node = random.choice(cluster.cluster_nodes)
with reporter.step(f"Check metrics count logs with level 'info'"): with reporter.step(f"Check metrics count logs with level 'info'"):
self.check_metrics_in_node(node, restart_time, command="frostfs_node_logger_entry_count", level="info") self.check_metrics_in_node(
node,
restart_time,
log_priority="6..6",
command="frostfs_node_logger_entry_count",
level="info",
dropped="false",
)
with reporter.step(f"Check metrics count logs with level 'error'"): with reporter.step(f"Check metrics count logs with level 'error'"):
self.check_metrics_in_node(node, restart_time, command="frostfs_node_logger_entry_count", level="error") self.check_metrics_in_node(node, restart_time, command="frostfs_node_logger_entry_count", level="error", dropped="false")

View file

@ -18,9 +18,7 @@ from frostfs_testlib.utils.file_utils import generate_file
class TestObjectMetrics(ClusterTestBase): class TestObjectMetrics(ClusterTestBase):
@allure.title("Object metrics of removed container (obj_size={object_size})") @allure.title("Object metrics of removed container (obj_size={object_size})")
def test_object_metrics_removed_container( def test_object_metrics_removed_container(self, object_size: ObjectSize, default_wallet: WalletInfo, cluster: Cluster):
self, object_size: ObjectSize, default_wallet: WalletInfo, cluster: Cluster
):
file_path = generate_file(object_size.value) file_path = generate_file(object_size.value)
placement_policy = "REP 2 IN X CBF 2 SELECT 2 FROM * AS X" placement_policy = "REP 2 IN X CBF 2 SELECT 2 FROM * AS X"
copies = 2 copies = 2
@ -33,11 +31,7 @@ class TestObjectMetrics(ClusterTestBase):
with reporter.step("Check metric appears in node where the object is located"): with reporter.step("Check metric appears in node where the object is located"):
object_storage_nodes = get_nodes_with_object(cid, oid, self.shell, cluster.storage_nodes) object_storage_nodes = get_nodes_with_object(cid, oid, self.shell, cluster.storage_nodes)
object_nodes = [ object_nodes = [cluster_node for cluster_node in cluster.cluster_nodes if cluster_node.storage_node in object_storage_nodes]
cluster_node
for cluster_node in cluster.cluster_nodes
if cluster_node.storage_node in object_storage_nodes
]
check_metrics_counter( check_metrics_counter(
object_nodes, object_nodes,
@ -54,23 +48,15 @@ class TestObjectMetrics(ClusterTestBase):
self.tick_epochs(epochs_to_tick=2, wait_block=2) self.tick_epochs(epochs_to_tick=2, wait_block=2)
with reporter.step("Check metrics of removed containers doesn't appear in the storage node"): with reporter.step("Check metrics of removed containers doesn't appear in the storage node"):
check_metrics_counter( check_metrics_counter(object_nodes, counter_exp=0, command="frostfs_node_engine_container_objects_total", cid=cid, type="user")
object_nodes, counter_exp=0, command="frostfs_node_engine_container_objects_total", cid=cid, type="user" check_metrics_counter(object_nodes, counter_exp=0, command="frostfs_node_engine_container_size_byte", cid=cid)
)
check_metrics_counter(
object_nodes, counter_exp=0, command="frostfs_node_engine_container_size_byte", cid=cid
)
for node in object_nodes: for node in object_nodes:
all_metrics = node.metrics.storage.get_all_metrics() all_metrics = node.metrics.storage.get_metrics_search_by_greps(command="frostfs_node_engine_container_size_byte")
assert ( assert cid not in all_metrics.stdout, "metrics of removed containers shouldn't appear in the storage node"
cid not in all_metrics.stdout
), "metrics of removed containers shouldn't appear in the storage node"
@allure.title("Object metrics, locked object (obj_size={object_size}, policy={placement_policy})") @allure.title("Object metrics, locked object (obj_size={object_size}, policy={placement_policy})")
@pytest.mark.parametrize( @pytest.mark.parametrize("placement_policy", ["REP 1 IN X CBF 1 SELECT 1 FROM * AS X", "REP 2 IN X CBF 2 SELECT 2 FROM * AS X"])
"placement_policy", ["REP 1 IN X CBF 1 SELECT 1 FROM * AS X", "REP 2 IN X CBF 2 SELECT 2 FROM * AS X"]
)
def test_object_metrics_blocked_object( def test_object_metrics_blocked_object(
self, object_size: ObjectSize, default_wallet: WalletInfo, cluster: Cluster, placement_policy: str self, object_size: ObjectSize, default_wallet: WalletInfo, cluster: Cluster, placement_policy: str
): ):
@ -92,14 +78,10 @@ class TestObjectMetrics(ClusterTestBase):
with reporter.step("Get current metrics for metric_type=user"): with reporter.step("Get current metrics for metric_type=user"):
objects_metric_counter = 0 objects_metric_counter = 0
for node in container_nodes: for node in container_nodes:
objects_metric_counter += get_metrics_value( objects_metric_counter += get_metrics_value(node, command="frostfs_node_engine_objects_total", type="user")
node, command="frostfs_node_engine_objects_total", type="user"
)
with reporter.step("Put object to container node"): with reporter.step("Put object to container node"):
oid = put_object( oid = put_object(default_wallet, file_path, cid, self.shell, container_nodes[0].storage_node.get_rpc_endpoint())
default_wallet, file_path, cid, self.shell, container_nodes[0].storage_node.get_rpc_endpoint()
)
with reporter.step(f"Check metric user 'the counter should increase by {metric_step}'"): with reporter.step(f"Check metric user 'the counter should increase by {metric_step}'"):
objects_metric_counter += metric_step objects_metric_counter += metric_step
@ -137,9 +119,7 @@ class TestObjectMetrics(ClusterTestBase):
) )
with reporter.step("Put object and lock it to next epoch"): with reporter.step("Put object and lock it to next epoch"):
oid = put_object( oid = put_object(default_wallet, file_path, cid, self.shell, container_nodes[0].storage_node.get_rpc_endpoint())
default_wallet, file_path, cid, self.shell, container_nodes[0].storage_node.get_rpc_endpoint()
)
current_epoch = self.get_epoch() current_epoch = self.get_epoch()
lock_object( lock_object(
default_wallet, default_wallet,
@ -266,9 +246,7 @@ class TestObjectMetrics(ClusterTestBase):
with reporter.step("Get current metrics for each nodes"): with reporter.step("Get current metrics for each nodes"):
objects_metric_counter: dict[ClusterNode:int] = {} objects_metric_counter: dict[ClusterNode:int] = {}
for node in self.cluster.cluster_nodes: for node in self.cluster.cluster_nodes:
objects_metric_counter[node] = get_metrics_value( objects_metric_counter[node] = get_metrics_value(node, command="frostfs_node_engine_objects_total", type="user")
node, command="frostfs_node_engine_objects_total", type="user"
)
with reporter.step("Put object"): with reporter.step("Put object"):
oid = put_object(default_wallet, file_path, cid, self.shell, self.cluster.default_rpc_endpoint) oid = put_object(default_wallet, file_path, cid, self.shell, self.cluster.default_rpc_endpoint)
@ -276,16 +254,12 @@ class TestObjectMetrics(ClusterTestBase):
with reporter.step("Get object nodes"): with reporter.step("Get object nodes"):
object_storage_nodes = get_nodes_with_object(cid, oid, self.shell, self.cluster.storage_nodes) object_storage_nodes = get_nodes_with_object(cid, oid, self.shell, self.cluster.storage_nodes)
object_nodes = [ object_nodes = [
cluster_node cluster_node for cluster_node in self.cluster.cluster_nodes if cluster_node.storage_node in object_storage_nodes
for cluster_node in self.cluster.cluster_nodes
if cluster_node.storage_node in object_storage_nodes
] ]
with reporter.step(f"Check metrics in object nodes 'the counter should increase by {copies}'"): with reporter.step(f"Check metrics in object nodes 'the counter should increase by {copies}'"):
counter_exp = sum(objects_metric_counter[node] for node in object_nodes) + copies counter_exp = sum(objects_metric_counter[node] for node in object_nodes) + copies
check_metrics_counter( check_metrics_counter(object_nodes, counter_exp=counter_exp, command="frostfs_node_engine_objects_total", type="user")
object_nodes, counter_exp=counter_exp, command="frostfs_node_engine_objects_total", type="user"
)
check_metrics_counter( check_metrics_counter(
object_nodes, object_nodes,
counter_exp=copies, counter_exp=copies,
@ -304,9 +278,7 @@ class TestObjectMetrics(ClusterTestBase):
with reporter.step(f"Check metric in alive nodes 'the counter should increase'"): with reporter.step(f"Check metric in alive nodes 'the counter should increase'"):
counter_exp = sum(objects_metric_counter[node] for node in alive_nodes) counter_exp = sum(objects_metric_counter[node] for node in alive_nodes)
check_metrics_counter( check_metrics_counter(alive_nodes, ">=", counter_exp, command="frostfs_node_engine_objects_total", type="user")
alive_nodes, ">=", counter_exp, command="frostfs_node_engine_objects_total", type="user"
)
with reporter.step("Start node"): with reporter.step("Start node"):
cluster_state_controller.start_node_host(node_to_stop) cluster_state_controller.start_node_host(node_to_stop)

View file

@ -24,9 +24,7 @@ def pytest_generate_tests(metafunc: pytest.Metafunc):
class TestLogs: class TestLogs:
@pytest.mark.order(1000) @pytest.mark.order(1000)
@allure.title("Check logs from frostfs-testcases with marks '{request.config.option.markexpr}' - search errors") @allure.title("Check logs from frostfs-testcases with marks '{request.config.option.markexpr}' - search errors")
def test_logs_search_errors( def test_logs_search_errors(self, temp_directory: str, cluster: Cluster, session_start_time: datetime, request: pytest.FixtureRequest):
self, temp_directory: str, cluster: Cluster, session_start_time: datetime, request: pytest.FixtureRequest
):
end_time = datetime.now(timezone.utc) end_time = datetime.now(timezone.utc)
logs_dir = os.path.join(temp_directory, "logs") logs_dir = os.path.join(temp_directory, "logs")
if not os.path.exists(logs_dir): if not os.path.exists(logs_dir):
@ -49,20 +47,14 @@ class TestLogs:
priority=log_level_priority, priority=log_level_priority,
) )
hosts_with_problems = [ hosts_with_problems = [future.result() for future in futures if not future.exception() and future.result() is not None]
future.result() for future in futures if not future.exception() and future.result() is not None
]
if hosts_with_problems: if hosts_with_problems:
self._attach_logs(logs_dir) self._attach_logs(logs_dir)
assert ( assert not hosts_with_problems, f"The following hosts contains critical errors in system logs: {', '.join(hosts_with_problems)}"
not hosts_with_problems
), f"The following hosts contains critical errors in system logs: {', '.join(hosts_with_problems)}"
@pytest.mark.order(1001) @pytest.mark.order(1001)
@allure.title( @allure.title("Check logs from frostfs-testcases with marks '{request.config.option.markexpr}' - identify sensitive data")
"Check logs from frostfs-testcases with marks '{request.config.option.markexpr}' - identify sensitive data"
)
def test_logs_identify_sensitive_data( def test_logs_identify_sensitive_data(
self, temp_directory: str, cluster: Cluster, session_start_time: datetime, request: pytest.FixtureRequest self, temp_directory: str, cluster: Cluster, session_start_time: datetime, request: pytest.FixtureRequest
): ):
@ -78,14 +70,11 @@ class TestLogs:
"api_token": r"\"api_token\":\"(xox[a-zA-Z]-[a-zA-Z0-9-]+)\"", "api_token": r"\"api_token\":\"(xox[a-zA-Z]-[a-zA-Z0-9-]+)\"",
"yadro_access_token": r"[a-zA-Z0-9_-]*:[a-zA-Z0-9_\-]+@yadro\.com*", "yadro_access_token": r"[a-zA-Z0-9_-]*:[a-zA-Z0-9_\-]+@yadro\.com*",
"SSH_privKey": r"([-]+BEGIN [^\s]+ PRIVATE KEY[-]+[\s]*[^-]*[-]+END [^\s]+ PRIVATE KEY[-]+)", "SSH_privKey": r"([-]+BEGIN [^\s]+ PRIVATE KEY[-]+[\s]*[^-]*[-]+END [^\s]+ PRIVATE KEY[-]+)",
"possible_Creds": r"(?i)(" "possible_Creds": r"(?i)(" r"password\s*[`=:]+\s*[^\s]+|" r"password is\s*[`=:]+\s*[^\s]+|" r"passwd\s*[`=:]+\s*[^\s]+)",
r"password\s*[`=:]+\s*[^\s]+|"
r"password is\s*[`=:]*\s*[^\s]+|"
r"passwd\s*[`=:]+\s*[^\s]+)",
} }
issues_regex = "|".join(_regex.values()) issues_regex = "|".join(_regex.values())
exclude_filter = "COMMAND=" exclude_filter = r"COMMAND=\|--\sBoot\s"
time.sleep(2) time.sleep(2)
@ -99,15 +88,11 @@ class TestLogs:
exclude_filter, exclude_filter,
) )
hosts_with_problems = [ hosts_with_problems = [future.result() for future in futures if not future.exception() and future.result() is not None]
future.result() for future in futures if not future.exception() and future.result() is not None
]
if hosts_with_problems: if hosts_with_problems:
self._attach_logs(logs_dir) self._attach_logs(logs_dir)
assert ( assert not hosts_with_problems, f"The following hosts contains sensitive data in system logs: {', '.join(hosts_with_problems)}"
not hosts_with_problems
), f"The following hosts contains sensitive data in system logs: {', '.join(hosts_with_problems)}"
def _collect_logs_on_host( def _collect_logs_on_host(
self, self,
@ -120,9 +105,7 @@ class TestLogs:
priority: str = None, priority: str = None,
): ):
with reporter.step(f"Get logs from {host.config.address}"): with reporter.step(f"Get logs from {host.config.address}"):
logs = host.get_filtered_logs( logs = host.get_filtered_logs(filter_regex=regex, since=since, until=until, exclude_filter=exclude_filter, priority=priority)
filter_regex=regex, since=since, until=until, exclude_filter=exclude_filter, priority=priority
)
if not logs: if not logs:
return None return None