import random import re import allure import pytest from frostfs_testlib import reporter from frostfs_testlib.healthcheck.interfaces import Healthcheck from frostfs_testlib.steps.cli.container import create_container, delete_container, get_container, list_containers from frostfs_testlib.steps.cli.object import get_object, head_object, put_object, search_object from frostfs_testlib.steps.cli.tree import get_tree_list from frostfs_testlib.storage.cluster import Cluster, ClusterNode from frostfs_testlib.storage.controllers.cluster_state_controller import ClusterStateController from frostfs_testlib.storage.controllers.state_managers.config_state_manager import ConfigStateManager from frostfs_testlib.storage.dataclasses.frostfs_services import StorageNode from frostfs_testlib.storage.dataclasses.object_size import ObjectSize from frostfs_testlib.storage.dataclasses.wallet import WalletInfo from frostfs_testlib.testing.cluster_test_base import ClusterTestBase from frostfs_testlib.testing.test_control import wait_for_success from frostfs_testlib.utils.file_utils import generate_file class TestGRPCMetrics(ClusterTestBase): @pytest.fixture def disable_policer(self, cluster_state_controller: ClusterStateController): config_manager = cluster_state_controller.manager(ConfigStateManager) config_manager.set_on_all_nodes(StorageNode, {"policer:unsafe_disable": "true"}) yield cluster_state_controller.manager(ConfigStateManager).revert_all() @wait_for_success(interval=10) def check_metrics_in_node(self, cluster_node: ClusterNode, counter_exp: int, **metrics_greps: str): counter_act = self.get_metrics_value(cluster_node, **metrics_greps) assert counter_act == counter_exp, f"Expected: {counter_exp}, Actual: {counter_act} in node: {cluster_node}" def get_metrics_value(self, node: ClusterNode, **metrics_greps: str): try: command_result = node.metrics.storage.get_metrics_search_by_greps(**metrics_greps) metrics_counter = self.calc_metrics_count_from_stdout(command_result.stdout) except RuntimeError as e: metrics_counter = 0 return metrics_counter @staticmethod def calc_metrics_count_from_stdout(metric_result_stdout: str): result = re.findall(r"}\s(\d+)", metric_result_stdout) return sum(map(int, result)) @allure.title("GRPC metrics container operations") def test_grpc_metrics_container_operations(self, default_wallet: WalletInfo, cluster: Cluster): placement_policy = "REP 2 IN X CBF 1 SELECT 4 FROM * AS X" with reporter.step("Select random node"): node = random.choice(cluster.cluster_nodes) with reporter.step("Get current gRPC metrics for method 'Put'"): metrics_counter_put = self.get_metrics_value( node, command="grpc_server_handled_total", service="ContainerService", method="Put" ) with reporter.step(f"Create container with policy {placement_policy}"): cid = create_container(default_wallet, self.shell, node.storage_node.get_rpc_endpoint(), placement_policy) with reporter.step(f"Check gRPC metrics method 'Put', 'the counter should increase by 1'"): metrics_counter_put += 1 self.check_metrics_in_node( node, metrics_counter_put, command="grpc_server_handled_total", service="ContainerService", method="Put" ) with reporter.step("Get current gRPC metrics for method 'Get'"): metrics_counter_get = self.get_metrics_value( node, command="grpc_server_handled_total", service="ContainerService", method="Get" ) with reporter.step(f"Get container"): get_container(default_wallet, cid, self.shell, node.storage_node.get_rpc_endpoint()) with reporter.step(f"Check gRPC metrics method=Get, 'the counter should increase by 1'"): metrics_counter_get += 1 self.check_metrics_in_node( node, metrics_counter_get, command="grpc_server_handled_total", service="ContainerService", method="Get" ) with reporter.step("Get current gRPC metrics for method 'List'"): metrics_counter_list = self.get_metrics_value( node, command="grpc_server_handled_total", service="ContainerService", method="List" ) with reporter.step(f"Get container list"): list_containers(default_wallet, self.shell, node.storage_node.get_rpc_endpoint()) with reporter.step(f"Check gRPC metrics method=List, 'the counter should increase by 1'"): metrics_counter_list += 1 self.check_metrics_in_node( node, metrics_counter_list, command="grpc_server_handled_total", service="ContainerService", method="List", ) with reporter.step("Delete container"): delete_container(default_wallet, cid, self.shell, self.cluster.default_rpc_endpoint) @allure.title("GRPC metrics object operations") def test_grpc_metrics_object_operations( self, simple_object_size: ObjectSize, default_wallet: WalletInfo, cluster: Cluster, disable_policer ): file_path = generate_file(simple_object_size.value) placement_policy = "REP 2 IN X CBF 1 SELECT 4 FROM * AS X" with reporter.step("Select random node"): node = random.choice(cluster.cluster_nodes) with reporter.step(f"Create container with policy {placement_policy}"): cid = create_container(default_wallet, self.shell, node.storage_node.get_rpc_endpoint(), placement_policy) with reporter.step("Get current gRPC metrics for method 'Put'"): metrics_counter_put = self.get_metrics_value( node, command="grpc_server_handled_total", service="ObjectService", method="Put" ) with reporter.step("Put object to selected node"): oid = put_object(default_wallet, file_path, cid, self.shell, node.storage_node.get_rpc_endpoint()) with reporter.step(f"Check gRPC metrics method 'Put', 'the counter should increase by 1'"): metrics_counter_put += 1 self.check_metrics_in_node( node, metrics_counter_put, command="grpc_server_handled_total", service="ObjectService", method="Put" ) with reporter.step("Get current gRPC metrics for method 'Get'"): metrics_counter_get = self.get_metrics_value( node, command="grpc_server_handled_total", service="ObjectService", method="Get" ) with reporter.step(f"Get object"): get_object(default_wallet, cid, oid, self.shell, node.storage_node.get_rpc_endpoint()) with reporter.step(f"Check gRPC metrics method=Get, 'the counter should increase by 1'"): metrics_counter_get += 1 self.check_metrics_in_node( node, metrics_counter_get, command="grpc_server_handled_total", service="ObjectService", method="Get" ) with reporter.step("Get current gRPC metrics for method 'Search'"): metrics_counter_search = self.get_metrics_value( node, command="grpc_server_handled_total", service="ObjectService", method="Search" ) with reporter.step(f"Search object"): search_object(default_wallet, cid, self.shell, node.storage_node.get_rpc_endpoint()) with reporter.step(f"Check gRPC metrics method=Search, 'the counter should increase by 1'"): metrics_counter_search += 1 self.check_metrics_in_node( node, metrics_counter_search, command="grpc_server_handled_total", service="ObjectService", method="Search", ) with reporter.step("Get current gRPC metrics for method 'Head'"): metrics_counter_head = self.get_metrics_value( node, command="grpc_server_handled_total", service="ObjectService", method="Head" ) with reporter.step(f"Head object"): head_object(default_wallet, cid, oid, self.shell, node.storage_node.get_rpc_endpoint()) with reporter.step(f"Check gRPC metrics method=Head, 'the counter should increase by 1'"): metrics_counter_head += 1 self.check_metrics_in_node( node, metrics_counter_head, command="grpc_server_handled_total", service="ObjectService", method="Head" ) with reporter.step("Delete container"): delete_container(default_wallet, cid, self.shell, self.cluster.default_rpc_endpoint) @allure.title("GRPC metrics Tree healthcheck") def test_grpc_metrics_tree_service(self, cluster: Cluster, healthcheck: Healthcheck): with reporter.step("Select random node"): node = random.choice(cluster.cluster_nodes) with reporter.step("Get current gRPC metrics for Healthcheck"): metrics_counter = self.get_metrics_value( node, command="grpc_server_handled_total", service="TreeService", method="Healthcheck" ) with reporter.step("Query Tree healthcheck status"): healthcheck.tree_healthcheck(node) with reporter.step(f"Check gRPC metrics for Healthcheck, 'the counter should increase'"): metrics_counter_new = self.get_metrics_value( node, command="grpc_server_handled_total", service="TreeService", method="Healthcheck" ) assert metrics_counter_new > metrics_counter, "the metrics has not increased" @allure.title("GRPC metrics Tree list") def test_grpc_metrics_tree_list(self, default_wallet: WalletInfo, cluster: Cluster): placement_policy = "REP 2 IN X CBF 1 SELECT 4 FROM * AS X" with reporter.step("Select random node"): node = random.choice(cluster.cluster_nodes) with reporter.step(f"Create container with policy {placement_policy}"): cid = create_container(default_wallet, self.shell, node.storage_node.get_rpc_endpoint(), placement_policy) with reporter.step("Get current gRPC metrics for Tree List"): metrics_counter = self.get_metrics_value( node, command="grpc_server_handled_total", service="TreeService", method="TreeList" ) with reporter.step("Query Tree List"): get_tree_list(default_wallet, cid, self.shell, node.storage_node.get_rpc_endpoint()) with reporter.step(f"Check gRPC metrics for Tree List, 'the counter should increase by 1'"): metrics_counter += 1 self.check_metrics_in_node( node, metrics_counter, command="grpc_server_handled_total", service="TreeService", method="TreeList" ) with reporter.step("Delete container"): delete_container(default_wallet, cid, self.shell, self.cluster.default_rpc_endpoint)