[#265] fix tests metrics object and logs

This commit is contained in:
Ilyas Niyazov 2024-07-02 16:23:02 +03:00
parent 5d3d22f685
commit 741102ec17
3 changed files with 39 additions and 77 deletions

View file

@ -26,17 +26,17 @@ class TestLogsMetrics(ClusterTestBase):
cluster_state_controller.manager(ConfigStateManager).revert_all()
@wait_for_success(interval=10)
def check_metrics_in_node(self, cluster_node: ClusterNode, restart_time: datetime, **metrics_greps):
counter_exp = self.get_count_logs_by_level(cluster_node, metrics_greps.get("level"), restart_time)
counter_act = get_metrics_value(cluster_node, **metrics_greps)
assert counter_act == counter_exp, f"Expected: {counter_exp}, Actual: {counter_act} in node: {cluster_node}"
def check_metrics_in_node(self, cluster_node: ClusterNode, restart_time: datetime, log_priority: str = None, **metrics_greps):
counter_logs = self.get_count_logs_by_level(cluster_node, metrics_greps.get("level"), restart_time, log_priority)
counter_metrics = get_metrics_value(cluster_node, **metrics_greps)
assert counter_logs == counter_metrics, f"counter_logs: {counter_logs}, counter_metrics: {counter_metrics} in node: {cluster_node}"
@staticmethod
def get_count_logs_by_level(cluster_node: ClusterNode, log_level: str, after_time: datetime):
def get_count_logs_by_level(cluster_node: ClusterNode, log_level: str, after_time: datetime, log_priority: str):
count_logs = 0
try:
logs = cluster_node.host.get_filtered_logs(log_level, unit="frostfs-storage", since=after_time)
result = re.findall(rf"Z\s+{log_level}\s+", logs)
logs = cluster_node.host.get_filtered_logs(log_level, unit="frostfs-storage", since=after_time, priority=log_priority)
result = re.findall(rf"\s+{log_level}\s+", logs)
count_logs += len(result)
except RuntimeError as e:
...
@ -49,7 +49,14 @@ class TestLogsMetrics(ClusterTestBase):
node = random.choice(cluster.cluster_nodes)
with reporter.step(f"Check metrics count logs with level 'info'"):
self.check_metrics_in_node(node, restart_time, command="frostfs_node_logger_entry_count", level="info")
self.check_metrics_in_node(
node,
restart_time,
log_priority="6..6",
command="frostfs_node_logger_entry_count",
level="info",
dropped="false",
)
with reporter.step(f"Check metrics count logs with level 'error'"):
self.check_metrics_in_node(node, restart_time, command="frostfs_node_logger_entry_count", level="error")
self.check_metrics_in_node(node, restart_time, command="frostfs_node_logger_entry_count", level="error", dropped="false")

View file

@ -18,9 +18,7 @@ from frostfs_testlib.utils.file_utils import generate_file
class TestObjectMetrics(ClusterTestBase):
@allure.title("Object metrics of removed container (obj_size={object_size})")
def test_object_metrics_removed_container(
self, object_size: ObjectSize, default_wallet: WalletInfo, cluster: Cluster
):
def test_object_metrics_removed_container(self, object_size: ObjectSize, default_wallet: WalletInfo, cluster: Cluster):
file_path = generate_file(object_size.value)
placement_policy = "REP 2 IN X CBF 2 SELECT 2 FROM * AS X"
copies = 2
@ -33,11 +31,7 @@ class TestObjectMetrics(ClusterTestBase):
with reporter.step("Check metric appears in node where the object is located"):
object_storage_nodes = get_nodes_with_object(cid, oid, self.shell, cluster.storage_nodes)
object_nodes = [
cluster_node
for cluster_node in cluster.cluster_nodes
if cluster_node.storage_node in object_storage_nodes
]
object_nodes = [cluster_node for cluster_node in cluster.cluster_nodes if cluster_node.storage_node in object_storage_nodes]
check_metrics_counter(
object_nodes,
@ -54,23 +48,15 @@ class TestObjectMetrics(ClusterTestBase):
self.tick_epochs(epochs_to_tick=2, wait_block=2)
with reporter.step("Check metrics of removed containers doesn't appear in the storage node"):
check_metrics_counter(
object_nodes, counter_exp=0, command="frostfs_node_engine_container_objects_total", cid=cid, type="user"
)
check_metrics_counter(
object_nodes, counter_exp=0, command="frostfs_node_engine_container_size_byte", cid=cid
)
check_metrics_counter(object_nodes, counter_exp=0, command="frostfs_node_engine_container_objects_total", cid=cid, type="user")
check_metrics_counter(object_nodes, counter_exp=0, command="frostfs_node_engine_container_size_byte", cid=cid)
for node in object_nodes:
all_metrics = node.metrics.storage.get_all_metrics()
assert (
cid not in all_metrics.stdout
), "metrics of removed containers shouldn't appear in the storage node"
all_metrics = node.metrics.storage.get_metrics_search_by_greps(command="frostfs_node_engine_container_size_byte")
assert cid not in all_metrics.stdout, "metrics of removed containers shouldn't appear in the storage node"
@allure.title("Object metrics, locked object (obj_size={object_size}, policy={placement_policy})")
@pytest.mark.parametrize(
"placement_policy", ["REP 1 IN X CBF 1 SELECT 1 FROM * AS X", "REP 2 IN X CBF 2 SELECT 2 FROM * AS X"]
)
@pytest.mark.parametrize("placement_policy", ["REP 1 IN X CBF 1 SELECT 1 FROM * AS X", "REP 2 IN X CBF 2 SELECT 2 FROM * AS X"])
def test_object_metrics_blocked_object(
self, object_size: ObjectSize, default_wallet: WalletInfo, cluster: Cluster, placement_policy: str
):
@ -92,14 +78,10 @@ class TestObjectMetrics(ClusterTestBase):
with reporter.step("Get current metrics for metric_type=user"):
objects_metric_counter = 0
for node in container_nodes:
objects_metric_counter += get_metrics_value(
node, command="frostfs_node_engine_objects_total", type="user"
)
objects_metric_counter += get_metrics_value(node, command="frostfs_node_engine_objects_total", type="user")
with reporter.step("Put object to container node"):
oid = put_object(
default_wallet, file_path, cid, self.shell, container_nodes[0].storage_node.get_rpc_endpoint()
)
oid = put_object(default_wallet, file_path, cid, self.shell, container_nodes[0].storage_node.get_rpc_endpoint())
with reporter.step(f"Check metric user 'the counter should increase by {metric_step}'"):
objects_metric_counter += metric_step
@ -137,9 +119,7 @@ class TestObjectMetrics(ClusterTestBase):
)
with reporter.step("Put object and lock it to next epoch"):
oid = put_object(
default_wallet, file_path, cid, self.shell, container_nodes[0].storage_node.get_rpc_endpoint()
)
oid = put_object(default_wallet, file_path, cid, self.shell, container_nodes[0].storage_node.get_rpc_endpoint())
current_epoch = self.get_epoch()
lock_object(
default_wallet,
@ -266,9 +246,7 @@ class TestObjectMetrics(ClusterTestBase):
with reporter.step("Get current metrics for each nodes"):
objects_metric_counter: dict[ClusterNode:int] = {}
for node in self.cluster.cluster_nodes:
objects_metric_counter[node] = get_metrics_value(
node, command="frostfs_node_engine_objects_total", type="user"
)
objects_metric_counter[node] = get_metrics_value(node, command="frostfs_node_engine_objects_total", type="user")
with reporter.step("Put object"):
oid = put_object(default_wallet, file_path, cid, self.shell, self.cluster.default_rpc_endpoint)
@ -276,16 +254,12 @@ class TestObjectMetrics(ClusterTestBase):
with reporter.step("Get object nodes"):
object_storage_nodes = get_nodes_with_object(cid, oid, self.shell, self.cluster.storage_nodes)
object_nodes = [
cluster_node
for cluster_node in self.cluster.cluster_nodes
if cluster_node.storage_node in object_storage_nodes
cluster_node for cluster_node in self.cluster.cluster_nodes if cluster_node.storage_node in object_storage_nodes
]
with reporter.step(f"Check metrics in object nodes 'the counter should increase by {copies}'"):
counter_exp = sum(objects_metric_counter[node] for node in object_nodes) + copies
check_metrics_counter(
object_nodes, counter_exp=counter_exp, command="frostfs_node_engine_objects_total", type="user"
)
check_metrics_counter(object_nodes, counter_exp=counter_exp, command="frostfs_node_engine_objects_total", type="user")
check_metrics_counter(
object_nodes,
counter_exp=copies,
@ -304,9 +278,7 @@ class TestObjectMetrics(ClusterTestBase):
with reporter.step(f"Check metric in alive nodes 'the counter should increase'"):
counter_exp = sum(objects_metric_counter[node] for node in alive_nodes)
check_metrics_counter(
alive_nodes, ">=", counter_exp, command="frostfs_node_engine_objects_total", type="user"
)
check_metrics_counter(alive_nodes, ">=", counter_exp, command="frostfs_node_engine_objects_total", type="user")
with reporter.step("Start node"):
cluster_state_controller.start_node_host(node_to_stop)

View file

@ -24,9 +24,7 @@ def pytest_generate_tests(metafunc: pytest.Metafunc):
class TestLogs:
@pytest.mark.order(1000)
@allure.title("Check logs from frostfs-testcases with marks '{request.config.option.markexpr}' - search errors")
def test_logs_search_errors(
self, temp_directory: str, cluster: Cluster, session_start_time: datetime, request: pytest.FixtureRequest
):
def test_logs_search_errors(self, temp_directory: str, cluster: Cluster, session_start_time: datetime, request: pytest.FixtureRequest):
end_time = datetime.now(timezone.utc)
logs_dir = os.path.join(temp_directory, "logs")
if not os.path.exists(logs_dir):
@ -49,20 +47,14 @@ class TestLogs:
priority=log_level_priority,
)
hosts_with_problems = [
future.result() for future in futures if not future.exception() and future.result() is not None
]
hosts_with_problems = [future.result() for future in futures if not future.exception() and future.result() is not None]
if hosts_with_problems:
self._attach_logs(logs_dir)
assert (
not hosts_with_problems
), f"The following hosts contains critical errors in system logs: {', '.join(hosts_with_problems)}"
assert not hosts_with_problems, f"The following hosts contains critical errors in system logs: {', '.join(hosts_with_problems)}"
@pytest.mark.order(1001)
@allure.title(
"Check logs from frostfs-testcases with marks '{request.config.option.markexpr}' - identify sensitive data"
)
@allure.title("Check logs from frostfs-testcases with marks '{request.config.option.markexpr}' - identify sensitive data")
def test_logs_identify_sensitive_data(
self, temp_directory: str, cluster: Cluster, session_start_time: datetime, request: pytest.FixtureRequest
):
@ -78,14 +70,11 @@ class TestLogs:
"api_token": r"\"api_token\":\"(xox[a-zA-Z]-[a-zA-Z0-9-]+)\"",
"yadro_access_token": r"[a-zA-Z0-9_-]*:[a-zA-Z0-9_\-]+@yadro\.com*",
"SSH_privKey": r"([-]+BEGIN [^\s]+ PRIVATE KEY[-]+[\s]*[^-]*[-]+END [^\s]+ PRIVATE KEY[-]+)",
"possible_Creds": r"(?i)("
r"password\s*[`=:]+\s*[^\s]+|"
r"password is\s*[`=:]*\s*[^\s]+|"
r"passwd\s*[`=:]+\s*[^\s]+)",
"possible_Creds": r"(?i)(" r"password\s*[`=:]+\s*[^\s]+|" r"password is\s*[`=:]+\s*[^\s]+|" r"passwd\s*[`=:]+\s*[^\s]+)",
}
issues_regex = "|".join(_regex.values())
exclude_filter = "COMMAND="
exclude_filter = r"COMMAND=\|--\sBoot\s"
time.sleep(2)
@ -99,15 +88,11 @@ class TestLogs:
exclude_filter,
)
hosts_with_problems = [
future.result() for future in futures if not future.exception() and future.result() is not None
]
hosts_with_problems = [future.result() for future in futures if not future.exception() and future.result() is not None]
if hosts_with_problems:
self._attach_logs(logs_dir)
assert (
not hosts_with_problems
), f"The following hosts contains sensitive data in system logs: {', '.join(hosts_with_problems)}"
assert not hosts_with_problems, f"The following hosts contains sensitive data in system logs: {', '.join(hosts_with_problems)}"
def _collect_logs_on_host(
self,
@ -120,9 +105,7 @@ class TestLogs:
priority: str = None,
):
with reporter.step(f"Get logs from {host.config.address}"):
logs = host.get_filtered_logs(
filter_regex=regex, since=since, until=until, exclude_filter=exclude_filter, priority=priority
)
logs = host.get_filtered_logs(filter_regex=regex, since=since, until=until, exclude_filter=exclude_filter, priority=priority)
if not logs:
return None