Compare commits
2 commits
master
...
fix-metric
Author | SHA1 | Date | |
---|---|---|---|
3d49c2c6fa | |||
96d7488f6d |
2 changed files with 54 additions and 29 deletions
|
@ -392,12 +392,12 @@ def session_start_time(configure_testlib):
|
||||||
return start_time
|
return start_time
|
||||||
|
|
||||||
|
|
||||||
@allure.title("[Autouse/Session] After deploy healthcheck")
|
# @allure.title("[Autouse/Session] After deploy healthcheck")
|
||||||
@pytest.fixture(scope="session", autouse=True)
|
# @pytest.fixture(scope="session", autouse=True)
|
||||||
@run_optionally(optionals.OPTIONAL_AUTOUSE_FIXTURES_ENABLED)
|
# @run_optionally(optionals.OPTIONAL_AUTOUSE_FIXTURES_ENABLED)
|
||||||
def after_deploy_healthcheck(cluster: Cluster):
|
# def after_deploy_healthcheck(cluster: Cluster):
|
||||||
with reporter.step("Wait for cluster readiness after deploy"):
|
# with reporter.step("Wait for cluster readiness after deploy"):
|
||||||
parallel(readiness_on_node, cluster.cluster_nodes)
|
# parallel(readiness_on_node, cluster.cluster_nodes)
|
||||||
|
|
||||||
|
|
||||||
@wait_for_success(60 * SERVICE_ACTIVE_TIME * 3, 60, title="Wait for {cluster_node} readiness")
|
@wait_for_success(60 * SERVICE_ACTIVE_TIME * 3, 60, title="Wait for {cluster_node} readiness")
|
||||||
|
|
|
@ -1,12 +1,11 @@
|
||||||
import random
|
import time
|
||||||
import re
|
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
import allure
|
import allure
|
||||||
import pytest
|
import pytest
|
||||||
from frostfs_testlib import reporter
|
from frostfs_testlib import reporter
|
||||||
from frostfs_testlib.steps.metrics import get_metrics_value
|
from frostfs_testlib.steps.metrics import get_metrics_value
|
||||||
from frostfs_testlib.storage.cluster import Cluster, ClusterNode
|
from frostfs_testlib.storage.cluster import ClusterNode
|
||||||
from frostfs_testlib.storage.controllers.cluster_state_controller import ClusterStateController
|
from frostfs_testlib.storage.controllers.cluster_state_controller import ClusterStateController
|
||||||
from frostfs_testlib.storage.controllers.state_managers.config_state_manager import ConfigStateManager
|
from frostfs_testlib.storage.controllers.state_managers.config_state_manager import ConfigStateManager
|
||||||
from frostfs_testlib.storage.dataclasses.frostfs_services import StorageNode
|
from frostfs_testlib.storage.dataclasses.frostfs_services import StorageNode
|
||||||
|
@ -23,40 +22,59 @@ class TestLogsMetrics(ClusterTestBase):
|
||||||
def restart_storage_service(self, cluster_state_controller: ClusterStateController) -> datetime:
|
def restart_storage_service(self, cluster_state_controller: ClusterStateController) -> datetime:
|
||||||
config_manager = cluster_state_controller.manager(ConfigStateManager)
|
config_manager = cluster_state_controller.manager(ConfigStateManager)
|
||||||
config_manager.csc.stop_services_of_type(StorageNode)
|
config_manager.csc.stop_services_of_type(StorageNode)
|
||||||
restart_time = datetime.now(timezone.utc)
|
|
||||||
config_manager.csc.start_services_of_type(StorageNode)
|
config_manager.csc.start_services_of_type(StorageNode)
|
||||||
return restart_time
|
|
||||||
|
|
||||||
@wait_for_success(interval=10)
|
@wait_for_success(interval=10)
|
||||||
def check_metrics_in_node(self, cluster_node: ClusterNode, restart_time: datetime, log_priority: str = None, **metrics_greps):
|
def check_metrics_in_node(self, cluster_node: ClusterNode, restart_time: datetime, log_priority: str = None, **metrics_greps):
|
||||||
current_time = datetime.now(timezone.utc)
|
current_time = datetime.now(timezone.utc)
|
||||||
counter_metrics = get_metrics_value(cluster_node, **metrics_greps)
|
counter_metrics = get_metrics_value(cluster_node, **metrics_greps)
|
||||||
counter_logs = self.get_count_logs_by_level(cluster_node, metrics_greps.get("level"), restart_time, current_time, log_priority)
|
counter_logs = self.get_logs_count_by_level(cluster_node, metrics_greps.get("level"), restart_time, current_time, log_priority)
|
||||||
assert counter_logs == counter_metrics, f"counter_logs: {counter_logs}, counter_metrics: {counter_metrics} in node: {cluster_node}"
|
if (counter_metrics + counter_logs) < 1000:
|
||||||
|
assert (
|
||||||
|
counter_logs == counter_metrics
|
||||||
|
), f"counter_logs: {counter_logs}, counter_metrics: {counter_metrics} in node: {cluster_node}"
|
||||||
|
else:
|
||||||
|
# for big values check permissible deviation
|
||||||
|
assert self.are_numbers_similar(
|
||||||
|
counter_logs, counter_metrics
|
||||||
|
), f"counter_logs: {counter_logs}, counter_metrics: {counter_metrics} in node: {cluster_node}"
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_count_logs_by_level(cluster_node: ClusterNode, log_level: str, after_time: datetime, until_time: datetime, log_priority: str):
|
def are_numbers_similar(num1, num2, tolerance_percentage=1):
|
||||||
count_logs = 0
|
"""
|
||||||
|
if difference of numbers is less than permissible deviation than numbers are similar
|
||||||
|
"""
|
||||||
|
# Calculate the permissible deviation
|
||||||
|
average = (num1 + num2) / 2
|
||||||
|
tolerance = average * (tolerance_percentage / 100)
|
||||||
|
|
||||||
|
# Calculate the real difference
|
||||||
|
difference = abs(num1 - num2)
|
||||||
|
return difference <= tolerance
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_logs_count_by_level(node: ClusterNode, level: str, since: datetime, until: datetime, priority: str):
|
||||||
try:
|
try:
|
||||||
logs = cluster_node.host.get_filtered_logs(
|
result = node.host.get_filtered_logs(
|
||||||
log_level, unit="frostfs-storage", since=after_time, until=until_time, priority=log_priority
|
filter_regex=level, unit="frostfs-storage", since=since, until=until, priority=priority, calc_count=True
|
||||||
)
|
)
|
||||||
result = re.findall(rf"Z\s+{log_level}\s+", logs)
|
return int(result)
|
||||||
count_logs += len(result)
|
except Exception as e:
|
||||||
except RuntimeError as e:
|
return e
|
||||||
...
|
|
||||||
return count_logs
|
|
||||||
|
|
||||||
@allure.title("Metrics for the log counter")
|
@allure.title("Metrics for the log counter")
|
||||||
def test_log_counter_metrics(self, cluster_state_controller: ClusterStateController, revert_all):
|
def test_log_counter_metrics(self, cluster_state_controller: ClusterStateController, node_under_test: ClusterNode, revert_all):
|
||||||
restart_time = self.restart_storage_service(cluster_state_controller)
|
with reporter.step("Restart frostfs-node for reset logs metrics"):
|
||||||
with reporter.step("Select random node"):
|
self.restart_storage_service(cluster_state_controller)
|
||||||
node = random.choice(self.cluster.cluster_nodes)
|
|
||||||
|
with reporter.step("wait until the number of logs in frostfs-storage stabilizes"):
|
||||||
|
restart_time = datetime.now(timezone.utc)
|
||||||
|
time.sleep(10)
|
||||||
|
|
||||||
with reporter.step(f"Check metrics count logs with level 'info'"):
|
with reporter.step(f"Check metrics count logs with level 'info'"):
|
||||||
self.check_metrics_in_node(
|
self.check_metrics_in_node(
|
||||||
node,
|
node_under_test,
|
||||||
restart_time,
|
restart_time=restart_time,
|
||||||
log_priority="6..6",
|
log_priority="6..6",
|
||||||
command="frostfs_node_logger_entry_count",
|
command="frostfs_node_logger_entry_count",
|
||||||
level="info",
|
level="info",
|
||||||
|
@ -64,4 +82,11 @@ class TestLogsMetrics(ClusterTestBase):
|
||||||
)
|
)
|
||||||
|
|
||||||
with reporter.step(f"Check metrics count logs with level 'error'"):
|
with reporter.step(f"Check metrics count logs with level 'error'"):
|
||||||
self.check_metrics_in_node(node, restart_time, command="frostfs_node_logger_entry_count", level="error", dropped="false")
|
self.check_metrics_in_node(
|
||||||
|
node_under_test,
|
||||||
|
restart_time=restart_time,
|
||||||
|
# log_priority="3..3", TODO: add filter log_priority after https://j.yadro.com/browse/OBJECT-8581
|
||||||
|
command="frostfs_node_logger_entry_count",
|
||||||
|
level="error",
|
||||||
|
dropped="false",
|
||||||
|
)
|
||||||
|
|
Loading…
Reference in a new issue