frostfs-testcases/pytest_tests/testsuites/metrics/test_grpc_metrics.py

225 lines
11 KiB
Python

import random
import re
import allure
import pytest
from frostfs_testlib import reporter
from frostfs_testlib.healthcheck.interfaces import Healthcheck
from frostfs_testlib.steps.cli.container import create_container, delete_container, get_container, list_containers
from frostfs_testlib.steps.cli.object import get_object, head_object, put_object, search_object
from frostfs_testlib.steps.cli.tree import get_tree_list
from frostfs_testlib.storage.cluster import Cluster, ClusterNode
from frostfs_testlib.storage.controllers.cluster_state_controller import ClusterStateController
from frostfs_testlib.storage.controllers.state_managers.config_state_manager import ConfigStateManager
from frostfs_testlib.storage.dataclasses.frostfs_services import StorageNode
from frostfs_testlib.storage.dataclasses.object_size import ObjectSize
from frostfs_testlib.storage.dataclasses.wallet import WalletInfo
from frostfs_testlib.testing.cluster_test_base import ClusterTestBase
from frostfs_testlib.testing.test_control import wait_for_success
from frostfs_testlib.utils.file_utils import generate_file
class TestGRPCMetrics(ClusterTestBase):
@pytest.fixture
def disable_policer(self, cluster_state_controller: ClusterStateController):
config_manager = cluster_state_controller.manager(ConfigStateManager)
config_manager.set_on_all_nodes(StorageNode, {"policer:unsafe_disable": "true"})
yield
cluster_state_controller.manager(ConfigStateManager).revert_all()
@wait_for_success(interval=10)
def check_metrics_in_node(self, cluster_node: ClusterNode, counter_exp: int, **metrics_greps: str):
counter_act = self.get_metrics_value(cluster_node, **metrics_greps)
assert counter_act == counter_exp, f"Expected: {counter_exp}, Actual: {counter_act} in node: {cluster_node}"
def get_metrics_value(self, node: ClusterNode, **metrics_greps: str):
try:
command_result = node.metrics.storage.get_metrics_search_by_greps(**metrics_greps)
metrics_counter = self.calc_metrics_count_from_stdout(command_result.stdout)
except RuntimeError as e:
metrics_counter = 0
return metrics_counter
@staticmethod
def calc_metrics_count_from_stdout(metric_result_stdout: str):
result = re.findall(r"}\s(\d+)", metric_result_stdout)
return sum(map(int, result))
@allure.title("GRPC metrics container operations")
def test_grpc_metrics_container_operations(self, default_wallet: WalletInfo, cluster: Cluster):
placement_policy = "REP 2 IN X CBF 1 SELECT 4 FROM * AS X"
with reporter.step("Select random node"):
node = random.choice(cluster.cluster_nodes)
with reporter.step("Get current gRPC metrics for method 'Put'"):
metrics_counter_put = self.get_metrics_value(
node, command="grpc_server_handled_total", service="ContainerService", method="Put"
)
with reporter.step(f"Create container with policy {placement_policy}"):
cid = create_container(default_wallet, self.shell, node.storage_node.get_rpc_endpoint(), placement_policy)
with reporter.step(f"Check gRPC metrics method 'Put', 'the counter should increase by 1'"):
metrics_counter_put += 1
self.check_metrics_in_node(
node, metrics_counter_put, command="grpc_server_handled_total", service="ContainerService", method="Put"
)
with reporter.step("Get current gRPC metrics for method 'Get'"):
metrics_counter_get = self.get_metrics_value(
node, command="grpc_server_handled_total", service="ContainerService", method="Get"
)
with reporter.step(f"Get container"):
get_container(default_wallet, cid, self.shell, node.storage_node.get_rpc_endpoint())
with reporter.step(f"Check gRPC metrics method=Get, 'the counter should increase by 1'"):
metrics_counter_get += 1
self.check_metrics_in_node(
node, metrics_counter_get, command="grpc_server_handled_total", service="ContainerService", method="Get"
)
with reporter.step("Get current gRPC metrics for method 'List'"):
metrics_counter_list = self.get_metrics_value(
node, command="grpc_server_handled_total", service="ContainerService", method="List"
)
with reporter.step(f"Get container list"):
list_containers(default_wallet, self.shell, node.storage_node.get_rpc_endpoint())
with reporter.step(f"Check gRPC metrics method=List, 'the counter should increase by 1'"):
metrics_counter_list += 1
self.check_metrics_in_node(
node,
metrics_counter_list,
command="grpc_server_handled_total",
service="ContainerService",
method="List",
)
with reporter.step("Delete container"):
delete_container(default_wallet, cid, self.shell, self.cluster.default_rpc_endpoint)
@allure.title("GRPC metrics object operations")
def test_grpc_metrics_object_operations(
self, simple_object_size: ObjectSize, default_wallet: WalletInfo, cluster: Cluster, disable_policer
):
file_path = generate_file(simple_object_size.value)
placement_policy = "REP 2 IN X CBF 1 SELECT 4 FROM * AS X"
with reporter.step("Select random node"):
node = random.choice(cluster.cluster_nodes)
with reporter.step(f"Create container with policy {placement_policy}"):
cid = create_container(default_wallet, self.shell, node.storage_node.get_rpc_endpoint(), placement_policy)
with reporter.step("Get current gRPC metrics for method 'Put'"):
metrics_counter_put = self.get_metrics_value(
node, command="grpc_server_handled_total", service="ObjectService", method="Put"
)
with reporter.step("Put object to selected node"):
oid = put_object(default_wallet, file_path, cid, self.shell, node.storage_node.get_rpc_endpoint())
with reporter.step(f"Check gRPC metrics method 'Put', 'the counter should increase by 1'"):
metrics_counter_put += 1
self.check_metrics_in_node(
node, metrics_counter_put, command="grpc_server_handled_total", service="ObjectService", method="Put"
)
with reporter.step("Get current gRPC metrics for method 'Get'"):
metrics_counter_get = self.get_metrics_value(
node, command="grpc_server_handled_total", service="ObjectService", method="Get"
)
with reporter.step(f"Get object"):
get_object(default_wallet, cid, oid, self.shell, node.storage_node.get_rpc_endpoint())
with reporter.step(f"Check gRPC metrics method=Get, 'the counter should increase by 1'"):
metrics_counter_get += 1
self.check_metrics_in_node(
node, metrics_counter_get, command="grpc_server_handled_total", service="ObjectService", method="Get"
)
with reporter.step("Get current gRPC metrics for method 'Search'"):
metrics_counter_search = self.get_metrics_value(
node, command="grpc_server_handled_total", service="ObjectService", method="Search"
)
with reporter.step(f"Search object"):
search_object(default_wallet, cid, self.shell, node.storage_node.get_rpc_endpoint())
with reporter.step(f"Check gRPC metrics method=Search, 'the counter should increase by 1'"):
metrics_counter_search += 1
self.check_metrics_in_node(
node,
metrics_counter_search,
command="grpc_server_handled_total",
service="ObjectService",
method="Search",
)
with reporter.step("Get current gRPC metrics for method 'Head'"):
metrics_counter_head = self.get_metrics_value(
node, command="grpc_server_handled_total", service="ObjectService", method="Head"
)
with reporter.step(f"Head object"):
head_object(default_wallet, cid, oid, self.shell, node.storage_node.get_rpc_endpoint())
with reporter.step(f"Check gRPC metrics method=Head, 'the counter should increase by 1'"):
metrics_counter_head += 1
self.check_metrics_in_node(
node, metrics_counter_head, command="grpc_server_handled_total", service="ObjectService", method="Head"
)
with reporter.step("Delete container"):
delete_container(default_wallet, cid, self.shell, self.cluster.default_rpc_endpoint)
@allure.title("GRPC metrics Tree healthcheck")
def test_grpc_metrics_tree_service(self, cluster: Cluster, healthcheck: Healthcheck):
with reporter.step("Select random node"):
node = random.choice(cluster.cluster_nodes)
with reporter.step("Get current gRPC metrics for Healthcheck"):
metrics_counter = self.get_metrics_value(
node, command="grpc_server_handled_total", service="TreeService", method="Healthcheck"
)
with reporter.step("Query Tree healthcheck status"):
healthcheck.tree_healthcheck(node)
with reporter.step(f"Check gRPC metrics for Healthcheck, 'the counter should increase'"):
metrics_counter_new = self.get_metrics_value(
node, command="grpc_server_handled_total", service="TreeService", method="Healthcheck"
)
assert metrics_counter_new > metrics_counter, "the metrics has not increased"
@allure.title("GRPC metrics Tree list")
def test_grpc_metrics_tree_list(self, default_wallet: WalletInfo, cluster: Cluster):
placement_policy = "REP 2 IN X CBF 1 SELECT 4 FROM * AS X"
with reporter.step("Select random node"):
node = random.choice(cluster.cluster_nodes)
with reporter.step(f"Create container with policy {placement_policy}"):
cid = create_container(default_wallet, self.shell, node.storage_node.get_rpc_endpoint(), placement_policy)
with reporter.step("Get current gRPC metrics for Tree List"):
metrics_counter = self.get_metrics_value(
node, command="grpc_server_handled_total", service="TreeService", method="TreeList"
)
with reporter.step("Query Tree List"):
get_tree_list(default_wallet, cid, self.shell, node.storage_node.get_rpc_endpoint())
with reporter.step(f"Check gRPC metrics for Tree List, 'the counter should increase by 1'"):
metrics_counter += 1
self.check_metrics_in_node(
node, metrics_counter, command="grpc_server_handled_total", service="TreeService", method="TreeList"
)
with reporter.step("Delete container"):
delete_container(default_wallet, cid, self.shell, self.cluster.default_rpc_endpoint)