[#256] refactore metrics tests #256
6 changed files with 345 additions and 372 deletions
|
@ -1,47 +1,21 @@
|
|||
import math
|
||||
import re
|
||||
|
||||
import allure
|
||||
import pytest
|
||||
from frostfs_testlib import reporter
|
||||
from frostfs_testlib.steps.cli.container import create_container, delete_container
|
||||
from frostfs_testlib.steps.cli.object import delete_object, get_object_nodes, put_object_to_random_node
|
||||
from frostfs_testlib.storage.cluster import Cluster, ClusterNode
|
||||
from frostfs_testlib.steps.cli.container import create_container
|
||||
from frostfs_testlib.steps.cli.object import delete_object, put_object_to_random_node
|
||||
from frostfs_testlib.steps.metrics import check_metrics_counter
|
||||
from frostfs_testlib.steps.storage_policy import get_nodes_with_object
|
||||
from frostfs_testlib.storage.cluster import Cluster
|
||||
from frostfs_testlib.storage.dataclasses.object_size import ObjectSize
|
||||
from frostfs_testlib.storage.dataclasses.wallet import WalletInfo
|
||||
from frostfs_testlib.testing.cluster_test_base import ClusterTestBase
|
||||
from frostfs_testlib.testing.test_control import wait_for_success
|
||||
from frostfs_testlib.utils.file_utils import generate_file
|
||||
|
||||
|
||||
@pytest.mark.container
|
||||
class TestContainerMetrics(ClusterTestBase):
|
||||
@wait_for_success(interval=10)
|
||||
def check_sum_counter_metrics_in_nodes(
|
||||
self, cluster_nodes: list[ClusterNode], cid: str, phy_exp: int, logic_exp: int, user_exp: int
|
||||
):
|
||||
counter_phy = 0
|
||||
counter_logic = 0
|
||||
counter_user = 0
|
||||
for cluster_node in cluster_nodes:
|
||||
metric_result = cluster_node.metrics.storage.get_metric_container(f"container_objects_total", cid)
|
||||
counter_phy += self.get_count_metric_type_from_stdout(metric_result.stdout, "phy")
|
||||
counter_logic += self.get_count_metric_type_from_stdout(metric_result.stdout, "logic")
|
||||
counter_user += self.get_count_metric_type_from_stdout(metric_result.stdout, "user")
|
||||
|
||||
assert counter_phy == phy_exp, f"Expected metric Phy={phy_exp}, Actual: {counter_phy} in nodes: {cluster_nodes}"
|
||||
assert (
|
||||
counter_logic == logic_exp
|
||||
), f"Expected metric logic={logic_exp}, Actual: {counter_logic} in nodes: {cluster_nodes}"
|
||||
assert (
|
||||
counter_user == user_exp
|
||||
), f"Expected metric User={user_exp}, Actual: {counter_user} in nodes: {cluster_nodes}"
|
||||
|
||||
@staticmethod
|
||||
def get_count_metric_type_from_stdout(metric_result_stdout: str, metric_type: str):
|
||||
result = re.findall(rf'type="{metric_type}"}}\s(\d+)', metric_result_stdout)
|
||||
return sum(map(int, result))
|
||||
|
||||
@allure.title("Container metrics (obj_size={object_size})")
|
||||
def test_container_metrics(
|
||||
self, object_size: ObjectSize, max_object_size: int, default_wallet: WalletInfo, cluster: Cluster
|
||||
|
@ -57,15 +31,10 @@ class TestContainerMetrics(ClusterTestBase):
|
|||
link_object = 1
|
||||
|
||||
with reporter.step(f"Create container with policy {placement_policy}"):
|
||||
cid = create_container(
|
||||
default_wallet,
|
||||
rule=placement_policy,
|
||||
shell=self.shell,
|
||||
endpoint=self.cluster.default_rpc_endpoint,
|
||||
)
|
||||
cid = create_container(default_wallet, self.shell, cluster.default_rpc_endpoint, placement_policy)
|
||||
|
||||
with reporter.step("Put object to random node"):
|
||||
storage_object_id = put_object_to_random_node(
|
||||
oid = put_object_to_random_node(
|
||||
wallet=default_wallet,
|
||||
path=file_path,
|
||||
cid=cid,
|
||||
|
@ -73,25 +42,46 @@ class TestContainerMetrics(ClusterTestBase):
|
|||
cluster=cluster,
|
||||
)
|
||||
|
||||
with reporter.step("Get object nodes"):
|
||||
object_storage_nodes = get_nodes_with_object(cid, oid, self.shell, cluster.storage_nodes)
|
||||
object_nodes = [
|
||||
cluster_node
|
||||
for cluster_node in cluster.cluster_nodes
|
||||
if cluster_node.storage_node in object_storage_nodes
|
||||
]
|
||||
|
||||
with reporter.step("Check metric appears in node where the object is located"):
|
||||
object_nodes = get_object_nodes(
|
||||
cluster=cluster, cid=cid, oid=storage_object_id, alive_node=cluster.cluster_nodes[0]
|
||||
count_metrics = (object_chunks + head_object + link_object) * copies
|
||||
check_metrics_counter(
|
||||
object_nodes, counter_exp=count_metrics, command="container_objects_total", cid=cid, type="phy"
|
||||
)
|
||||
count_metrics_exp = (object_chunks + head_object + link_object) * copies
|
||||
self.check_sum_counter_metrics_in_nodes(
|
||||
object_nodes, cid, phy_exp=count_metrics_exp, logic_exp=count_metrics_exp, user_exp=copies
|
||||
check_metrics_counter(
|
||||
object_nodes, counter_exp=count_metrics, command="container_objects_total", cid=cid, type="logic"
|
||||
)
|
||||
check_metrics_counter(
|
||||
object_nodes, counter_exp=copies, command="container_objects_total", cid=cid, type="user"
|
||||
)
|
||||
|
||||
with reporter.step("Delete file, wait until gc remove object"):
|
||||
delete_object(default_wallet, cid, storage_object_id, self.shell, self.cluster.default_rpc_endpoint)
|
||||
count_metrics_exp = len(object_nodes)
|
||||
self.check_sum_counter_metrics_in_nodes(
|
||||
object_nodes, cid, phy_exp=count_metrics_exp, logic_exp=count_metrics_exp, user_exp=0
|
||||
delete_object(default_wallet, cid, oid, self.shell, cluster.default_rpc_endpoint)
|
||||
|
||||
with reporter.step(f"Check container metrics 'the counter should equal {len(object_nodes)}' in object nodes"):
|
||||
check_metrics_counter(
|
||||
object_nodes, counter_exp=len(object_nodes), command="container_objects_total", cid=cid, type="phy"
|
||||
)
|
||||
check_metrics_counter(
|
||||
object_nodes, counter_exp=len(object_nodes), command="container_objects_total", cid=cid, type="logic"
|
||||
)
|
||||
check_metrics_counter(object_nodes, counter_exp=0, command="container_objects_total", cid=cid, type="user")
|
||||
|
||||
with reporter.step("Check metrics(Phy, Logic, User) in each nodes"):
|
||||
# Phy and Logic metrics are 4, because in rule 'CBF 2 SELECT 2 FROM', cbf2*sel2=4
|
||||
self.check_sum_counter_metrics_in_nodes(cluster.cluster_nodes, cid, phy_exp=4, logic_exp=4, user_exp=0)
|
||||
|
||||
with reporter.step("Delete container"):
|
||||
delete_container(default_wallet, cid, shell=self.shell, endpoint=self.cluster.default_rpc_endpoint)
|
||||
check_metrics_counter(
|
||||
cluster.cluster_nodes, counter_exp=4, command="container_objects_total", cid=cid, type="phy"
|
||||
)
|
||||
check_metrics_counter(
|
||||
cluster.cluster_nodes, counter_exp=4, command="container_objects_total", cid=cid, type="logic"
|
||||
)
|
||||
check_metrics_counter(
|
||||
cluster.cluster_nodes, counter_exp=0, command="container_objects_total", cid=cid, type="user"
|
||||
)
|
||||
|
|
|
@ -2,9 +2,12 @@ import random
|
|||
import re
|
||||
|
||||
import allure
|
||||
import pytest
|
||||
from frostfs_testlib import reporter
|
||||
from frostfs_testlib.steps.cli.container import create_container, delete_container
|
||||
from frostfs_testlib.steps.cli.object import delete_object, get_object_nodes, put_object, put_object_to_random_node
|
||||
from frostfs_testlib.steps.cli.container import create_container
|
||||
from frostfs_testlib.steps.cli.object import delete_object, put_object, put_object_to_random_node
|
||||
from frostfs_testlib.steps.metrics import check_metrics_counter, get_metrics_value
|
||||
from frostfs_testlib.steps.storage_policy import get_nodes_with_object
|
||||
from frostfs_testlib.storage.cluster import Cluster, ClusterNode
|
||||
from frostfs_testlib.storage.dataclasses.object_size import ObjectSize
|
||||
from frostfs_testlib.storage.dataclasses.wallet import WalletInfo
|
||||
|
@ -40,10 +43,9 @@ class TestGarbageCollectorMetrics(ClusterTestBase):
|
|||
with reporter.step("Get current garbage collector metrics for each nodes"):
|
||||
metrics_counter = {}
|
||||
for node in cluster.cluster_nodes:
|
||||
command_result = node.metrics.storage.get_metrics_search_by_greps(
|
||||
command="frostfs_node_garbage_collector_marked_for_removal_objects_total"
|
||||
metrics_counter[node] = get_metrics_value(
|
||||
node, command="frostfs_node_garbage_collector_marked_for_removal_objects_total"
|
||||
)
|
||||
metrics_counter[node] = self.calc_metrics_count_from_stdout(command_result.stdout)
|
||||
|
||||
with reporter.step(f"Create container with policy {placement_policy}"):
|
||||
cid = create_container(default_wallet, self.shell, cluster.default_rpc_endpoint, placement_policy)
|
||||
|
@ -60,23 +62,29 @@ class TestGarbageCollectorMetrics(ClusterTestBase):
|
|||
)
|
||||
|
||||
with reporter.step("Get object nodes"):
|
||||
object_nodes = get_object_nodes(cluster, cid, oid, cluster.cluster_nodes[0])
|
||||
object_storage_nodes = get_nodes_with_object(cid, oid, self.shell, cluster.storage_nodes)
|
||||
object_nodes = [
|
||||
cluster_node
|
||||
for cluster_node in cluster.cluster_nodes
|
||||
if cluster_node.storage_node in object_storage_nodes
|
||||
]
|
||||
|
||||
with reporter.step("Tick Epoch"):
|
||||
self.tick_epochs(epochs_to_tick=2, wait_block=2)
|
||||
|
||||
with reporter.step(f"Check garbage collector metrics 'the counter should increase by {metrics_step}'"):
|
||||
with reporter.step(
|
||||
f"Check garbage collector metrics 'the counter should increase by {metrics_step}' in object nodes"
|
||||
):
|
||||
for node in object_nodes:
|
||||
metrics_counter[node] += metrics_step
|
||||
|
||||
for node, counter in metrics_counter.items():
|
||||
self.check_metrics_in_node(
|
||||
node, counter, command="frostfs_node_garbage_collector_marked_for_removal_objects_total"
|
||||
check_metrics_counter(
|
||||
[node],
|
||||
counter_exp=counter,
|
||||
command="frostfs_node_garbage_collector_marked_for_removal_objects_total",
|
||||
)
|
||||
|
||||
with reporter.step("Delete container"):
|
||||
delete_container(default_wallet, cid, self.shell, cluster.default_rpc_endpoint)
|
||||
|
||||
@allure.title("Garbage collector delete object")
|
||||
def test_garbage_collector_metrics_deleted_objects(
|
||||
self, simple_object_size: ObjectSize, default_wallet: WalletInfo, cluster: Cluster
|
||||
|
@ -89,10 +97,7 @@ class TestGarbageCollectorMetrics(ClusterTestBase):
|
|||
node = random.choice(cluster.cluster_nodes)
|
||||
|
||||
with reporter.step("Get current garbage collector metrics for selected node"):
|
||||
command_result = node.metrics.storage.get_metrics_search_by_greps(
|
||||
command="frostfs_node_garbage_collector_deleted_objects_total"
|
||||
)
|
||||
metrics_counter = self.calc_metrics_count_from_stdout(command_result.stdout)
|
||||
metrics_counter = get_metrics_value(node, command="frostfs_node_garbage_collector_deleted_objects_total")
|
||||
|
||||
with reporter.step(f"Create container with policy {placement_policy}"):
|
||||
cid = create_container(default_wallet, self.shell, node.storage_node.get_rpc_endpoint(), placement_policy)
|
||||
|
@ -105,9 +110,6 @@ class TestGarbageCollectorMetrics(ClusterTestBase):
|
|||
|
||||
with reporter.step(f"Check garbage collector metrics 'the counter should increase by {metrics_step}'"):
|
||||
metrics_counter += metrics_step
|
||||
self.check_metrics_in_node(
|
||||
node, metrics_counter, command="frostfs_node_garbage_collector_deleted_objects_total"
|
||||
check_metrics_counter(
|
||||
[node], counter_exp=metrics_counter, command="frostfs_node_garbage_collector_deleted_objects_total"
|
||||
)
|
||||
|
||||
with reporter.step("Delete container"):
|
||||
delete_container(default_wallet, cid, self.shell, cluster.default_rpc_endpoint)
|
||||
|
|
|
@ -1,21 +1,20 @@
|
|||
import random
|
||||
import re
|
||||
|
||||
import allure
|
||||
import pytest
|
||||
from frostfs_testlib import reporter
|
||||
from frostfs_testlib.healthcheck.interfaces import Healthcheck
|
||||
from frostfs_testlib.steps.cli.container import create_container, delete_container, get_container, list_containers
|
||||
from frostfs_testlib.steps.cli.container import create_container, get_container, list_containers
|
||||
from frostfs_testlib.steps.cli.object import get_object, head_object, put_object, search_object
|
||||
from frostfs_testlib.steps.cli.tree import get_tree_list
|
||||
from frostfs_testlib.storage.cluster import Cluster, ClusterNode
|
||||
from frostfs_testlib.steps.metrics import check_metrics_counter, get_metrics_value
|
||||
from frostfs_testlib.storage.cluster import Cluster
|
||||
from frostfs_testlib.storage.controllers.cluster_state_controller import ClusterStateController
|
||||
from frostfs_testlib.storage.controllers.state_managers.config_state_manager import ConfigStateManager
|
||||
from frostfs_testlib.storage.dataclasses.frostfs_services import StorageNode
|
||||
from frostfs_testlib.storage.dataclasses.object_size import ObjectSize
|
||||
from frostfs_testlib.storage.dataclasses.wallet import WalletInfo
|
||||
from frostfs_testlib.testing.cluster_test_base import ClusterTestBase
|
||||
from frostfs_testlib.testing.test_control import wait_for_success
|
||||
from frostfs_testlib.utils.file_utils import generate_file
|
||||
|
||||
|
||||
|
@ -27,25 +26,6 @@ class TestGRPCMetrics(ClusterTestBase):
|
|||
yield
|
||||
cluster_state_controller.manager(ConfigStateManager).revert_all()
|
||||
|
||||
@wait_for_success(interval=10)
|
||||
def check_metrics_in_node(self, cluster_node: ClusterNode, counter_exp: int, **metrics_greps: str):
|
||||
counter_act = self.get_metrics_value(cluster_node, **metrics_greps)
|
||||
assert counter_act == counter_exp, f"Expected: {counter_exp}, Actual: {counter_act} in node: {cluster_node}"
|
||||
|
||||
def get_metrics_value(self, node: ClusterNode, **metrics_greps: str):
|
||||
try:
|
||||
command_result = node.metrics.storage.get_metrics_search_by_greps(**metrics_greps)
|
||||
metrics_counter = self.calc_metrics_count_from_stdout(command_result.stdout)
|
||||
except RuntimeError as e:
|
||||
metrics_counter = 0
|
||||
|
||||
return metrics_counter
|
||||
|
||||
@staticmethod
|
||||
def calc_metrics_count_from_stdout(metric_result_stdout: str):
|
||||
result = re.findall(r"}\s(\d+)", metric_result_stdout)
|
||||
return sum(map(int, result))
|
||||
|
||||
@allure.title("GRPC metrics container operations")
|
||||
def test_grpc_metrics_container_operations(self, default_wallet: WalletInfo, cluster: Cluster):
|
||||
placement_policy = "REP 2 IN X CBF 1 SELECT 4 FROM * AS X"
|
||||
|
@ -54,7 +34,7 @@ class TestGRPCMetrics(ClusterTestBase):
|
|||
node = random.choice(cluster.cluster_nodes)
|
||||
|
||||
with reporter.step("Get current gRPC metrics for method 'Put'"):
|
||||
metrics_counter_put = self.get_metrics_value(
|
||||
metrics_counter_put = get_metrics_value(
|
||||
node, command="grpc_server_handled_total", service="ContainerService", method="Put"
|
||||
)
|
||||
|
||||
|
@ -63,12 +43,16 @@ class TestGRPCMetrics(ClusterTestBase):
|
|||
|
||||
with reporter.step(f"Check gRPC metrics method 'Put', 'the counter should increase by 1'"):
|
||||
metrics_counter_put += 1
|
||||
self.check_metrics_in_node(
|
||||
node, metrics_counter_put, command="grpc_server_handled_total", service="ContainerService", method="Put"
|
||||
check_metrics_counter(
|
||||
[node],
|
||||
counter_exp=metrics_counter_put,
|
||||
command="grpc_server_handled_total",
|
||||
service="ContainerService",
|
||||
method="Put",
|
||||
)
|
||||
|
||||
with reporter.step("Get current gRPC metrics for method 'Get'"):
|
||||
metrics_counter_get = self.get_metrics_value(
|
||||
metrics_counter_get = get_metrics_value(
|
||||
node, command="grpc_server_handled_total", service="ContainerService", method="Get"
|
||||
)
|
||||
|
||||
|
@ -77,12 +61,16 @@ class TestGRPCMetrics(ClusterTestBase):
|
|||
|
||||
with reporter.step(f"Check gRPC metrics method=Get, 'the counter should increase by 1'"):
|
||||
metrics_counter_get += 1
|
||||
self.check_metrics_in_node(
|
||||
node, metrics_counter_get, command="grpc_server_handled_total", service="ContainerService", method="Get"
|
||||
check_metrics_counter(
|
||||
[node],
|
||||
counter_exp=metrics_counter_get,
|
||||
command="grpc_server_handled_total",
|
||||
service="ContainerService",
|
||||
method="Get",
|
||||
)
|
||||
|
||||
with reporter.step("Get current gRPC metrics for method 'List'"):
|
||||
metrics_counter_list = self.get_metrics_value(
|
||||
metrics_counter_list = get_metrics_value(
|
||||
node, command="grpc_server_handled_total", service="ContainerService", method="List"
|
||||
)
|
||||
|
||||
|
@ -91,17 +79,14 @@ class TestGRPCMetrics(ClusterTestBase):
|
|||
|
||||
with reporter.step(f"Check gRPC metrics method=List, 'the counter should increase by 1'"):
|
||||
metrics_counter_list += 1
|
||||
self.check_metrics_in_node(
|
||||
node,
|
||||
metrics_counter_list,
|
||||
check_metrics_counter(
|
||||
[node],
|
||||
counter_exp=metrics_counter_list,
|
||||
command="grpc_server_handled_total",
|
||||
service="ContainerService",
|
||||
method="List",
|
||||
)
|
||||
|
||||
with reporter.step("Delete container"):
|
||||
delete_container(default_wallet, cid, self.shell, self.cluster.default_rpc_endpoint)
|
||||
|
||||
@allure.title("GRPC metrics object operations")
|
||||
def test_grpc_metrics_object_operations(
|
||||
self, simple_object_size: ObjectSize, default_wallet: WalletInfo, cluster: Cluster, disable_policer
|
||||
|
@ -116,7 +101,7 @@ class TestGRPCMetrics(ClusterTestBase):
|
|||
cid = create_container(default_wallet, self.shell, node.storage_node.get_rpc_endpoint(), placement_policy)
|
||||
|
||||
with reporter.step("Get current gRPC metrics for method 'Put'"):
|
||||
metrics_counter_put = self.get_metrics_value(
|
||||
metrics_counter_put = get_metrics_value(
|
||||
node, command="grpc_server_handled_total", service="ObjectService", method="Put"
|
||||
)
|
||||
|
||||
|
@ -125,12 +110,16 @@ class TestGRPCMetrics(ClusterTestBase):
|
|||
|
||||
with reporter.step(f"Check gRPC metrics method 'Put', 'the counter should increase by 1'"):
|
||||
metrics_counter_put += 1
|
||||
self.check_metrics_in_node(
|
||||
node, metrics_counter_put, command="grpc_server_handled_total", service="ObjectService", method="Put"
|
||||
check_metrics_counter(
|
||||
[node],
|
||||
counter_exp=metrics_counter_put,
|
||||
command="grpc_server_handled_total",
|
||||
service="ObjectService",
|
||||
method="Put",
|
||||
)
|
||||
|
||||
with reporter.step("Get current gRPC metrics for method 'Get'"):
|
||||
metrics_counter_get = self.get_metrics_value(
|
||||
metrics_counter_get = get_metrics_value(
|
||||
node, command="grpc_server_handled_total", service="ObjectService", method="Get"
|
||||
)
|
||||
|
||||
|
@ -139,12 +128,16 @@ class TestGRPCMetrics(ClusterTestBase):
|
|||
|
||||
with reporter.step(f"Check gRPC metrics method=Get, 'the counter should increase by 1'"):
|
||||
metrics_counter_get += 1
|
||||
self.check_metrics_in_node(
|
||||
node, metrics_counter_get, command="grpc_server_handled_total", service="ObjectService", method="Get"
|
||||
check_metrics_counter(
|
||||
[node],
|
||||
counter_exp=metrics_counter_get,
|
||||
command="grpc_server_handled_total",
|
||||
service="ObjectService",
|
||||
method="Get",
|
||||
)
|
||||
|
||||
with reporter.step("Get current gRPC metrics for method 'Search'"):
|
||||
metrics_counter_search = self.get_metrics_value(
|
||||
metrics_counter_search = get_metrics_value(
|
||||
node, command="grpc_server_handled_total", service="ObjectService", method="Search"
|
||||
)
|
||||
|
||||
|
@ -153,16 +146,16 @@ class TestGRPCMetrics(ClusterTestBase):
|
|||
|
||||
with reporter.step(f"Check gRPC metrics method=Search, 'the counter should increase by 1'"):
|
||||
metrics_counter_search += 1
|
||||
self.check_metrics_in_node(
|
||||
node,
|
||||
metrics_counter_search,
|
||||
check_metrics_counter(
|
||||
[node],
|
||||
counter_exp=metrics_counter_search,
|
||||
command="grpc_server_handled_total",
|
||||
service="ObjectService",
|
||||
method="Search",
|
||||
)
|
||||
|
||||
with reporter.step("Get current gRPC metrics for method 'Head'"):
|
||||
metrics_counter_head = self.get_metrics_value(
|
||||
metrics_counter_head = get_metrics_value(
|
||||
node, command="grpc_server_handled_total", service="ObjectService", method="Head"
|
||||
)
|
||||
|
||||
|
@ -171,20 +164,21 @@ class TestGRPCMetrics(ClusterTestBase):
|
|||
|
||||
with reporter.step(f"Check gRPC metrics method=Head, 'the counter should increase by 1'"):
|
||||
metrics_counter_head += 1
|
||||
self.check_metrics_in_node(
|
||||
node, metrics_counter_head, command="grpc_server_handled_total", service="ObjectService", method="Head"
|
||||
check_metrics_counter(
|
||||
[node],
|
||||
counter_exp=metrics_counter_head,
|
||||
command="grpc_server_handled_total",
|
||||
service="ObjectService",
|
||||
method="Head",
|
||||
)
|
||||
|
||||
with reporter.step("Delete container"):
|
||||
delete_container(default_wallet, cid, self.shell, self.cluster.default_rpc_endpoint)
|
||||
|
||||
@allure.title("GRPC metrics Tree healthcheck")
|
||||
def test_grpc_metrics_tree_service(self, cluster: Cluster, healthcheck: Healthcheck):
|
||||
with reporter.step("Select random node"):
|
||||
node = random.choice(cluster.cluster_nodes)
|
||||
|
||||
with reporter.step("Get current gRPC metrics for Healthcheck"):
|
||||
metrics_counter = self.get_metrics_value(
|
||||
metrics_counter = get_metrics_value(
|
||||
node, command="grpc_server_handled_total", service="TreeService", method="Healthcheck"
|
||||
)
|
||||
|
||||
|
@ -192,10 +186,14 @@ class TestGRPCMetrics(ClusterTestBase):
|
|||
healthcheck.tree_healthcheck(node)
|
||||
|
||||
with reporter.step(f"Check gRPC metrics for Healthcheck, 'the counter should increase'"):
|
||||
metrics_counter_new = self.get_metrics_value(
|
||||
node, command="grpc_server_handled_total", service="TreeService", method="Healthcheck"
|
||||
check_metrics_counter(
|
||||
[node],
|
||||
">",
|
||||
metrics_counter,
|
||||
command="grpc_server_handled_total",
|
||||
service="TreeService",
|
||||
method="Healthcheck",
|
||||
)
|
||||
assert metrics_counter_new > metrics_counter, "the metrics has not increased"
|
||||
|
||||
@allure.title("GRPC metrics Tree list")
|
||||
def test_grpc_metrics_tree_list(self, default_wallet: WalletInfo, cluster: Cluster):
|
||||
|
@ -208,7 +206,7 @@ class TestGRPCMetrics(ClusterTestBase):
|
|||
cid = create_container(default_wallet, self.shell, node.storage_node.get_rpc_endpoint(), placement_policy)
|
||||
|
||||
with reporter.step("Get current gRPC metrics for Tree List"):
|
||||
metrics_counter = self.get_metrics_value(
|
||||
metrics_counter = get_metrics_value(
|
||||
node, command="grpc_server_handled_total", service="TreeService", method="TreeList"
|
||||
)
|
||||
|
||||
|
@ -217,9 +215,10 @@ class TestGRPCMetrics(ClusterTestBase):
|
|||
|
||||
with reporter.step(f"Check gRPC metrics for Tree List, 'the counter should increase by 1'"):
|
||||
metrics_counter += 1
|
||||
self.check_metrics_in_node(
|
||||
node, metrics_counter, command="grpc_server_handled_total", service="TreeService", method="TreeList"
|
||||
check_metrics_counter(
|
||||
[node],
|
||||
counter_exp=metrics_counter,
|
||||
command="grpc_server_handled_total",
|
||||
service="TreeService",
|
||||
method="TreeList",
|
||||
)
|
||||
|
||||
with reporter.step("Delete container"):
|
||||
delete_container(default_wallet, cid, self.shell, self.cluster.default_rpc_endpoint)
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
import random
|
||||
import re
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import allure
|
||||
import pytest
|
||||
from frostfs_testlib import reporter
|
||||
from frostfs_testlib.shell import Shell
|
||||
from frostfs_testlib.steps.metrics import get_metrics_value
|
||||
from frostfs_testlib.storage.cluster import Cluster, ClusterNode
|
||||
from frostfs_testlib.storage.controllers.cluster_state_controller import ClusterStateController
|
||||
from frostfs_testlib.storage.controllers.state_managers.config_state_manager import ConfigStateManager
|
||||
|
@ -16,59 +16,40 @@ from frostfs_testlib.testing.test_control import wait_for_success
|
|||
|
||||
class TestLogsMetrics(ClusterTestBase):
|
||||
@pytest.fixture
|
||||
def restart_storage_service(self, cluster_state_controller: ClusterStateController) -> str:
|
||||
def restart_storage_service(self, cluster_state_controller: ClusterStateController) -> datetime:
|
||||
config_manager = cluster_state_controller.manager(ConfigStateManager)
|
||||
config_manager.csc.stop_services_of_type(StorageNode)
|
||||
restart_time = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
|
||||
restart_time = datetime.now(timezone.utc)
|
||||
config_manager.csc.start_services_of_type(StorageNode)
|
||||
yield restart_time
|
||||
|
||||
cluster_state_controller.manager(ConfigStateManager).revert_all()
|
||||
|
||||
@wait_for_success(interval=10)
|
||||
def check_metrics_in_node(self, cluster_node: ClusterNode, counter_exp: int, **metrics_greps: str):
|
||||
counter_act = self.get_metrics_value(cluster_node, **metrics_greps)
|
||||
def check_metrics_in_node(self, cluster_node: ClusterNode, restart_time: datetime, **metrics_greps):
|
||||
counter_exp = self.get_count_logs_by_level(cluster_node, metrics_greps.get("level"), restart_time)
|
||||
counter_act = get_metrics_value(cluster_node, **metrics_greps)
|
||||
assert counter_act == counter_exp, f"Expected: {counter_exp}, Actual: {counter_act} in node: {cluster_node}"
|
||||
|
||||
def get_metrics_value(self, node: ClusterNode, **metrics_greps: str):
|
||||
try:
|
||||
command_result = node.metrics.storage.get_metrics_search_by_greps(**metrics_greps)
|
||||
metrics_counter = self.calc_metrics_count_from_stdout(command_result.stdout)
|
||||
except RuntimeError as e:
|
||||
metrics_counter = 0
|
||||
|
||||
return metrics_counter
|
||||
|
||||
@staticmethod
|
||||
def calc_metrics_count_from_stdout(metric_result_stdout: str):
|
||||
result = re.findall(r"}\s(\d+)", metric_result_stdout)
|
||||
return sum(map(int, result))
|
||||
|
||||
@staticmethod
|
||||
def get_count_logs_by_level(shell: Shell, log_level: str, after_time: str):
|
||||
def get_count_logs_by_level(cluster_node: ClusterNode, log_level: str, after_time: datetime):
|
||||
count_logs = 0
|
||||
try:
|
||||
logs = shell.exec(f"journalctl -u frostfs-storage --grep='{log_level}' --since '{after_time}'")
|
||||
result = re.findall(rf"Z\s+{log_level}\s+", logs.stdout)
|
||||
logs = cluster_node.host.get_filtered_logs(log_level, unit="frostfs-storage", since=after_time)
|
||||
result = re.findall(rf"Z\s+{log_level}\s+", logs)
|
||||
count_logs += len(result)
|
||||
except RuntimeError as e:
|
||||
...
|
||||
return count_logs
|
||||
|
||||
@allure.title("Metrics for the log counter")
|
||||
def test_log_counter_metrics(self, cluster: Cluster, restart_storage_service: str):
|
||||
def test_log_counter_metrics(self, cluster: Cluster, restart_storage_service: datetime):
|
||||
restart_time = restart_storage_service
|
||||
with reporter.step("Select random node"):
|
||||
node = random.choice(cluster.cluster_nodes)
|
||||
|
||||
with reporter.step("Get count logs from journalctl with level 'info'"):
|
||||
count_logs_info = self.get_count_logs_by_level(node.host.get_shell(), "info", restart_time)
|
||||
|
||||
with reporter.step(f"Check metrics count logs with level 'info'"):
|
||||
self.check_metrics_in_node(node, count_logs_info, command="frostfs_node_logger_entry_count", level="info")
|
||||
|
||||
with reporter.step("Get count logs from journalctl with level 'error'"):
|
||||
count_logs_error = self.get_count_logs_by_level(node.host.get_shell(), "error", restart_time)
|
||||
self.check_metrics_in_node(node, restart_time, command="frostfs_node_logger_entry_count", level="info")
|
||||
|
||||
with reporter.step(f"Check metrics count logs with level 'error'"):
|
||||
self.check_metrics_in_node(node, count_logs_error, command="frostfs_node_logger_entry_count", level="error")
|
||||
self.check_metrics_in_node(node, restart_time, command="frostfs_node_logger_entry_count", level="error")
|
||||
|
|
|
@ -5,92 +5,19 @@ import allure
|
|||
import pytest
|
||||
from frostfs_testlib import reporter
|
||||
from frostfs_testlib.steps.cli.container import create_container, delete_container, search_nodes_with_container
|
||||
from frostfs_testlib.steps.cli.object import (
|
||||
delete_object,
|
||||
get_object_nodes,
|
||||
lock_object,
|
||||
put_object,
|
||||
put_object_to_random_node,
|
||||
)
|
||||
from frostfs_testlib.steps.cli.object import delete_object, lock_object, put_object, put_object_to_random_node
|
||||
from frostfs_testlib.steps.metrics import check_metrics_counter, get_metrics_value
|
||||
from frostfs_testlib.steps.storage_policy import get_nodes_with_object
|
||||
from frostfs_testlib.storage.cluster import Cluster, ClusterNode
|
||||
from frostfs_testlib.storage.controllers.cluster_state_controller import ClusterStateController
|
||||
from frostfs_testlib.storage.dataclasses.object_size import ObjectSize
|
||||
from frostfs_testlib.storage.dataclasses.wallet import WalletInfo
|
||||
from frostfs_testlib.testing.cluster_test_base import ClusterTestBase
|
||||
from frostfs_testlib.testing.test_control import wait_for_success
|
||||
from frostfs_testlib.utils.file_utils import generate_file
|
||||
|
||||
|
||||
class TestObjectMetrics(ClusterTestBase):
|
||||
@wait_for_success(interval=10)
|
||||
def check_metrics_by_type(
|
||||
self, cluster_nodes: list[ClusterNode], metric_command: str, grep_by: str, metric_type: str, counter_exp: int
|
||||
):
|
||||
counter_act = 0
|
||||
for cluster_node in cluster_nodes:
|
||||
try:
|
||||
metric_result = cluster_node.metrics.storage.get_metrics_search_by_greps(
|
||||
command=metric_command, grep_by=grep_by
|
||||
)
|
||||
counter_act += self.calc_metrics_count_from_stdout(metric_result.stdout, metric_type)
|
||||
except RuntimeError as e:
|
||||
...
|
||||
assert (
|
||||
counter_act == counter_exp
|
||||
), f"Expected metric {metric_type}={counter_exp}, Actual: {counter_act} in nodes: {cluster_nodes}"
|
||||
|
||||
@staticmethod
|
||||
def calc_metrics_count_from_stdout(metric_result_stdout: str, metric_type: str):
|
||||
result = re.findall(rf'type="{metric_type}"}}\s(\d+)', metric_result_stdout)
|
||||
return sum(map(int, result))
|
||||
|
||||
@wait_for_success(interval=10)
|
||||
def check_object_metrics_total_and_container(
|
||||
self, cluster_nodes: list[ClusterNode], cid: str, objects_metric_total: int, objects_metric_container: int
|
||||
):
|
||||
self.check_metrics_by_type(
|
||||
cluster_nodes,
|
||||
"frostfs_node_engine_objects_total",
|
||||
grep_by="user",
|
||||
metric_type="user",
|
||||
counter_exp=objects_metric_total,
|
||||
)
|
||||
|
||||
objects_metric_container_act = 0
|
||||
for node in cluster_nodes:
|
||||
try:
|
||||
metrics_container = node.metrics.storage.get_metrics_search_by_greps(
|
||||
command="frostfs_node_engine_container_objects_total", cid=cid, type="user"
|
||||
)
|
||||
objects_metric_container_act += self.calc_metrics_count_from_stdout(
|
||||
metrics_container.stdout, metric_type="user"
|
||||
)
|
||||
except RuntimeError as e:
|
||||
...
|
||||
assert (
|
||||
objects_metric_container_act == objects_metric_container
|
||||
), f"Expected {objects_metric_container} objects in container"
|
||||
|
||||
@wait_for_success(max_wait_time=120, interval=10)
|
||||
def check_object_metrics_container(
|
||||
self, cluster_nodes: list[ClusterNode], cid: str, objects_metric_container_exp: int
|
||||
):
|
||||
objects_metric_container_act = 0
|
||||
for node in cluster_nodes:
|
||||
try:
|
||||
metrics_container = node.metrics.storage.get_metrics_search_by_greps(
|
||||
command="frostfs_node_engine_container_objects_total", cid=cid, type="user"
|
||||
)
|
||||
objects_metric_container_act += self.calc_metrics_count_from_stdout(
|
||||
metrics_container.stdout, metric_type="user"
|
||||
)
|
||||
except RuntimeError as e:
|
||||
...
|
||||
assert (
|
||||
objects_metric_container_act == objects_metric_container_exp
|
||||
), f"Expected {objects_metric_container_exp} objects in container"
|
||||
|
||||
@allure.title("Object metrics of removed container")
|
||||
@allure.title("Object metrics of removed container (obj_size={object_size})")
|
||||
def test_object_metrics_removed_container(
|
||||
self, object_size: ObjectSize, default_wallet: WalletInfo, cluster: Cluster
|
||||
):
|
||||
|
@ -99,28 +26,26 @@ class TestObjectMetrics(ClusterTestBase):
|
|||
copies = 2
|
||||
|
||||
with reporter.step(f"Create container with policy {placement_policy}"):
|
||||
cid = create_container(
|
||||
default_wallet,
|
||||
rule=placement_policy,
|
||||
shell=self.shell,
|
||||
endpoint=self.cluster.default_rpc_endpoint,
|
||||
)
|
||||
cid = create_container(default_wallet, self.shell, cluster.default_rpc_endpoint, placement_policy)
|
||||
|
||||
with reporter.step("Put object to random node"):
|
||||
storage_object_id = put_object_to_random_node(
|
||||
wallet=default_wallet,
|
||||
path=file_path,
|
||||
cid=cid,
|
||||
shell=self.shell,
|
||||
cluster=cluster,
|
||||
)
|
||||
oid = put_object_to_random_node(default_wallet, file_path, cid, self.shell, cluster)
|
||||
|
||||
with reporter.step("Check metric appears in node where the object is located"):
|
||||
object_nodes = get_object_nodes(
|
||||
cluster=cluster, cid=cid, oid=storage_object_id, alive_node=cluster.cluster_nodes[0]
|
||||
)
|
||||
object_storage_nodes = get_nodes_with_object(cid, oid, self.shell, cluster.storage_nodes)
|
||||
object_nodes = [
|
||||
cluster_node
|
||||
for cluster_node in cluster.cluster_nodes
|
||||
if cluster_node.storage_node in object_storage_nodes
|
||||
]
|
||||
|
||||
self.check_metrics_by_type(object_nodes, "frostfs_node_engine_container_objects_total", cid, "user", copies)
|
||||
check_metrics_counter(
|
||||
object_nodes,
|
||||
counter_exp=copies,
|
||||
command="frostfs_node_engine_container_objects_total",
|
||||
cid=cid,
|
||||
type="user",
|
||||
)
|
||||
|
||||
with reporter.step("Delete container"):
|
||||
delete_container(default_wallet, cid, shell=self.shell, endpoint=self.cluster.default_rpc_endpoint)
|
||||
|
@ -129,13 +54,20 @@ class TestObjectMetrics(ClusterTestBase):
|
|||
self.tick_epochs(epochs_to_tick=2, wait_block=2)
|
||||
|
||||
with reporter.step("Check metrics of removed containers doesn't appear in the storage node"):
|
||||
self.check_metrics_by_type(object_nodes, "frostfs_node_engine_container_objects_total", cid, "user", 0)
|
||||
check_metrics_counter(
|
||||
object_nodes, counter_exp=0, command="frostfs_node_engine_container_objects_total", cid=cid, type="user"
|
||||
)
|
||||
check_metrics_counter(
|
||||
object_nodes, counter_exp=0, command="frostfs_node_engine_container_size_byte", cid=cid
|
||||
)
|
||||
|
||||
for node in object_nodes:
|
||||
with pytest.raises(RuntimeError):
|
||||
node.metrics.storage.get_metric_container(f"frostfs_node_engine_container_size_byte", cid)
|
||||
all_metrics = node.metrics.storage.get_all_metrics()
|
||||
assert (
|
||||
cid not in all_metrics.stdout
|
||||
), "metrics of removed containers shouldn't appear in the storage node"
|
||||
|
||||
@allure.title("Object metrics, locked object, (policy={placement_policy})")
|
||||
@allure.title("Object metrics, locked object (obj_size={object_size}, policy={placement_policy})")
|
||||
@pytest.mark.parametrize(
|
||||
"placement_policy", ["REP 1 IN X CBF 1 SELECT 1 FROM * AS X", "REP 2 IN X CBF 2 SELECT 2 FROM * AS X"]
|
||||
)
|
||||
|
@ -146,12 +78,7 @@ class TestObjectMetrics(ClusterTestBase):
|
|||
metric_step = int(re.search(r"REP\s(\d+)", placement_policy).group(1))
|
||||
|
||||
with reporter.step(f"Create container with policy {placement_policy}"):
|
||||
cid = create_container(
|
||||
wallet=default_wallet,
|
||||
rule=placement_policy,
|
||||
shell=self.shell,
|
||||
endpoint=self.cluster.default_rpc_endpoint,
|
||||
)
|
||||
cid = create_container(default_wallet, self.shell, cluster.default_rpc_endpoint, placement_policy)
|
||||
|
||||
with reporter.step("Search container nodes"):
|
||||
container_nodes = search_nodes_with_container(
|
||||
|
@ -165,34 +92,51 @@ class TestObjectMetrics(ClusterTestBase):
|
|||
with reporter.step("Get current metrics for metric_type=user"):
|
||||
objects_metric_counter = 0
|
||||
for node in container_nodes:
|
||||
metric_objects_total = node.metrics.storage.get_metrics_search_by_greps(
|
||||
command="frostfs_node_engine_objects_total", type="user"
|
||||
)
|
||||
objects_metric_counter += self.calc_metrics_count_from_stdout(
|
||||
metric_objects_total.stdout, metric_type="user"
|
||||
objects_metric_counter += get_metrics_value(
|
||||
node, command="frostfs_node_engine_objects_total", type="user"
|
||||
)
|
||||
|
||||
with reporter.step("Put object to container node"):
|
||||
oid = put_object(
|
||||
wallet=default_wallet,
|
||||
path=file_path,
|
||||
cid=cid,
|
||||
shell=self.shell,
|
||||
endpoint=container_nodes[0].storage_node.get_rpc_endpoint(),
|
||||
default_wallet, file_path, cid, self.shell, container_nodes[0].storage_node.get_rpc_endpoint()
|
||||
)
|
||||
|
||||
with reporter.step(f"Check metric user 'the counter should increase by {metric_step}'"):
|
||||
objects_metric_counter += metric_step
|
||||
self.check_object_metrics_total_and_container(container_nodes, cid, objects_metric_counter, metric_step)
|
||||
check_metrics_counter(
|
||||
container_nodes,
|
||||
counter_exp=objects_metric_counter,
|
||||
command="frostfs_node_engine_objects_total",
|
||||
type="user",
|
||||
)
|
||||
check_metrics_counter(
|
||||
container_nodes,
|
||||
counter_exp=metric_step,
|
||||
command="frostfs_node_engine_container_objects_total",
|
||||
cid=cid,
|
||||
type="user",
|
||||
)
|
||||
|
||||
with reporter.step("Delete object"):
|
||||
delete_object(default_wallet, cid, oid, self.shell, self.cluster.default_rpc_endpoint)
|
||||
|
||||
with reporter.step(f"Check metric user 'the counter should decrease by {metric_step}'"):
|
||||
objects_metric_counter -= metric_step
|
||||
self.check_object_metrics_total_and_container(container_nodes, cid, objects_metric_counter, 0)
|
||||
check_metrics_counter(
|
||||
container_nodes,
|
||||
counter_exp=objects_metric_counter,
|
||||
command="frostfs_node_engine_objects_total",
|
||||
type="user",
|
||||
)
|
||||
check_metrics_counter(
|
||||
container_nodes,
|
||||
counter_exp=0,
|
||||
command="frostfs_node_engine_container_objects_total",
|
||||
cid=cid,
|
||||
type="user",
|
||||
)
|
||||
|
||||
with reporter.step("Put object and lock it"):
|
||||
with reporter.step("Put object and lock it to next epoch"):
|
||||
oid = put_object(
|
||||
default_wallet, file_path, cid, self.shell, container_nodes[0].storage_node.get_rpc_endpoint()
|
||||
)
|
||||
|
@ -208,18 +152,47 @@ class TestObjectMetrics(ClusterTestBase):
|
|||
|
||||
with reporter.step(f"Check metric user 'the counter should increase by {metric_step}'"):
|
||||
objects_metric_counter += metric_step
|
||||
self.check_object_metrics_total_and_container(container_nodes, cid, objects_metric_counter, metric_step)
|
||||
check_metrics_counter(
|
||||
container_nodes,
|
||||
counter_exp=objects_metric_counter,
|
||||
command="frostfs_node_engine_objects_total",
|
||||
type="user",
|
||||
)
|
||||
check_metrics_counter(
|
||||
container_nodes,
|
||||
counter_exp=metric_step,
|
||||
command="frostfs_node_engine_container_objects_total",
|
||||
cid=cid,
|
||||
type="user",
|
||||
)
|
||||
|
||||
with reporter.step(f"Wait until remove locking 'the counter doesn't change'"):
|
||||
self.tick_epochs(epochs_to_tick=2)
|
||||
self.check_object_metrics_total_and_container(container_nodes, cid, objects_metric_counter, metric_step)
|
||||
check_metrics_counter(
|
||||
container_nodes,
|
||||
counter_exp=objects_metric_counter,
|
||||
command="frostfs_node_engine_objects_total",
|
||||
type="user",
|
||||
)
|
||||
|
||||
with reporter.step("Delete object"):
|
||||
delete_object(default_wallet, cid, oid, self.shell, self.cluster.default_rpc_endpoint)
|
||||
|
||||
with reporter.step(f"Check metric user 'the counter should decrease by {metric_step}'"):
|
||||
objects_metric_counter -= metric_step
|
||||
self.check_object_metrics_total_and_container(container_nodes, cid, objects_metric_counter, 0)
|
||||
check_metrics_counter(
|
||||
container_nodes,
|
||||
counter_exp=objects_metric_counter,
|
||||
command="frostfs_node_engine_objects_total",
|
||||
type="user",
|
||||
)
|
||||
check_metrics_counter(
|
||||
container_nodes,
|
||||
counter_exp=0,
|
||||
command="frostfs_node_engine_container_objects_total",
|
||||
cid=cid,
|
||||
type="user",
|
||||
)
|
||||
|
||||
with reporter.step("Put object with expire_at"):
|
||||
current_epoch = self.get_epoch()
|
||||
|
@ -234,23 +207,43 @@ class TestObjectMetrics(ClusterTestBase):
|
|||
|
||||
with reporter.step(f"Check metric user 'the counter should increase by {metric_step}'"):
|
||||
objects_metric_counter += metric_step
|
||||
self.check_object_metrics_total_and_container(container_nodes, cid, objects_metric_counter, metric_step)
|
||||
check_metrics_counter(
|
||||
container_nodes,
|
||||
counter_exp=objects_metric_counter,
|
||||
command="frostfs_node_engine_objects_total",
|
||||
type="user",
|
||||
)
|
||||
check_metrics_counter(
|
||||
container_nodes,
|
||||
counter_exp=metric_step,
|
||||
command="frostfs_node_engine_container_objects_total",
|
||||
cid=cid,
|
||||
type="user",
|
||||
)
|
||||
|
||||
with reporter.step("Tick Epoch"):
|
||||
self.tick_epochs(epochs_to_tick=2)
|
||||
|
||||
with reporter.step(f"Check metric user 'the counter should decrease by {metric_step}'"):
|
||||
objects_metric_counter -= metric_step
|
||||
self.check_object_metrics_total_and_container(container_nodes, cid, objects_metric_counter, 0)
|
||||
check_metrics_counter(
|
||||
container_nodes,
|
||||
counter_exp=objects_metric_counter,
|
||||
command="frostfs_node_engine_objects_total",
|
||||
type="user",
|
||||
)
|
||||
check_metrics_counter(
|
||||
container_nodes,
|
||||
counter_exp=0,
|
||||
command="frostfs_node_engine_container_objects_total",
|
||||
cid=cid,
|
||||
type="user",
|
||||
)
|
||||
|
||||
with reporter.step("Delete container"):
|
||||
delete_container(default_wallet, cid, shell=self.shell, endpoint=self.cluster.default_rpc_endpoint)
|
||||
|
||||
@allure.title("Object metrics, stop the node")
|
||||
@allure.title("Object metrics, stop the node (obj_size={object_size})")
|
||||
def test_object_metrics_stop_node(
|
||||
self,
|
||||
object_size: ObjectSize,
|
||||
max_object_size: int,
|
||||
default_wallet: WalletInfo,
|
||||
cluster_state_controller: ClusterStateController,
|
||||
):
|
||||
|
@ -259,60 +252,70 @@ class TestObjectMetrics(ClusterTestBase):
|
|||
copies = 2
|
||||
|
||||
with reporter.step(f"Create container with policy {placement_policy}"):
|
||||
cid = create_container(
|
||||
wallet=default_wallet,
|
||||
rule=placement_policy,
|
||||
shell=self.shell,
|
||||
endpoint=self.cluster.default_rpc_endpoint,
|
||||
)
|
||||
cid = create_container(default_wallet, self.shell, self.cluster.default_rpc_endpoint, placement_policy)
|
||||
|
||||
with reporter.step("Search container nodes"):
|
||||
container_nodes = search_nodes_with_container(
|
||||
wallet=default_wallet,
|
||||
with reporter.step(f"Check object metrics in container 'should be zero'"):
|
||||
check_metrics_counter(
|
||||
self.cluster.cluster_nodes,
|
||||
counter_exp=0,
|
||||
command="frostfs_node_engine_container_objects_total",
|
||||
type="user",
|
||||
cid=cid,
|
||||
shell=self.shell,
|
||||
endpoint=self.cluster.default_rpc_endpoint,
|
||||
cluster=self.cluster,
|
||||
)
|
||||
|
||||
with reporter.step("Get current metrics for container nodes"):
|
||||
objects_metric_counter = 0
|
||||
for node in container_nodes:
|
||||
metric_objects_total = node.metrics.storage.get_metrics_search_by_greps(
|
||||
command="frostfs_node_engine_objects_total", type="user"
|
||||
)
|
||||
objects_metric_counter += self.calc_metrics_count_from_stdout(
|
||||
metric_objects_total.stdout, metric_type="user"
|
||||
with reporter.step("Get current metrics for each nodes"):
|
||||
objects_metric_counter: dict[ClusterNode:int] = {}
|
||||
for node in self.cluster.cluster_nodes:
|
||||
objects_metric_counter[node] = get_metrics_value(
|
||||
node, command="frostfs_node_engine_objects_total", type="user"
|
||||
)
|
||||
|
||||
with reporter.step("Put object to container node"):
|
||||
oid = put_object(
|
||||
wallet=default_wallet,
|
||||
path=file_path,
|
||||
cid=cid,
|
||||
shell=self.shell,
|
||||
endpoint=container_nodes[0].storage_node.get_rpc_endpoint(),
|
||||
)
|
||||
with reporter.step("Put object"):
|
||||
oid = put_object(default_wallet, file_path, cid, self.shell, self.cluster.default_rpc_endpoint)
|
||||
|
||||
with reporter.step(f"Check metric in container nodes 'the counter should increase by {copies}'"):
|
||||
objects_metric_counter += copies
|
||||
self.check_object_metrics_total_and_container(container_nodes, cid, objects_metric_counter, copies)
|
||||
with reporter.step("Get object nodes"):
|
||||
object_storage_nodes = get_nodes_with_object(cid, oid, self.shell, self.cluster.storage_nodes)
|
||||
object_nodes = [
|
||||
cluster_node
|
||||
for cluster_node in self.cluster.cluster_nodes
|
||||
if cluster_node.storage_node in object_storage_nodes
|
||||
]
|
||||
|
||||
with reporter.step(f"Check metrics in object nodes 'the counter should increase by {copies}'"):
|
||||
counter_exp = sum(objects_metric_counter[node] for node in object_nodes) + copies
|
||||
check_metrics_counter(
|
||||
object_nodes, counter_exp=counter_exp, command="frostfs_node_engine_objects_total", type="user"
|
||||
)
|
||||
check_metrics_counter(
|
||||
object_nodes,
|
||||
counter_exp=copies,
|
||||
command="frostfs_node_engine_container_objects_total",
|
||||
type="user",
|
||||
cid=cid,
|
||||
)
|
||||
|
||||
with reporter.step(f"Select node to stop"):
|
||||
node_to_stop = container_nodes[0]
|
||||
alive_nodes = [node for node in container_nodes if node != node_to_stop]
|
||||
node_to_stop = random.choice(object_nodes)
|
||||
alive_nodes = set(object_nodes).difference({node_to_stop})
|
||||
|
||||
with reporter.step(f"Stop the node, wait until the object is replicated to another node"):
|
||||
cluster_state_controller.stop_node_host(node_to_stop, "hard")
|
||||
objects_metric_counter[node_to_stop] += 1
|
||||
|
||||
with reporter.step(f"Check metric in alive nodes 'the counter should increase by 1'"):
|
||||
self.check_object_metrics_container(alive_nodes, cid, copies)
|
||||
with reporter.step(f"Check metric in alive nodes 'the counter should increase'"):
|
||||
counter_exp = sum(objects_metric_counter[node] for node in alive_nodes)
|
||||
check_metrics_counter(
|
||||
alive_nodes, ">=", counter_exp, command="frostfs_node_engine_objects_total", type="user"
|
||||
)
|
||||
|
||||
with reporter.step("Start node"):
|
||||
cluster_state_controller.start_node_host(node_to_stop)
|
||||
|
||||
with reporter.step(f"Check metric in container nodes 'the counter doesn't change'"):
|
||||
self.check_object_metrics_total_and_container(container_nodes, cid, objects_metric_counter, copies)
|
||||
|
||||
with reporter.step("Delete container"):
|
||||
delete_container(default_wallet, cid, shell=self.shell, endpoint=self.cluster.default_rpc_endpoint)
|
||||
with reporter.step(f"Check metric in restarted node, 'the counter doesn't change'"):
|
||||
check_metrics_counter(
|
||||
object_nodes,
|
||||
counter_exp=copies,
|
||||
command="frostfs_node_engine_container_objects_total",
|
||||
type="user",
|
||||
cid=cid,
|
||||
)
|
||||
|
|
|
@ -4,11 +4,13 @@ import re
|
|||
import allure
|
||||
import pytest
|
||||
from frostfs_testlib import reporter
|
||||
from frostfs_testlib.resources.error_patterns import OBJECT_NOT_FOUND
|
||||
from frostfs_testlib.resources.wellknown_acl import EACL_PUBLIC_READ_WRITE
|
||||
from frostfs_testlib.shell import Shell
|
||||
from frostfs_testlib.steps.cli.container import create_container, delete_container
|
||||
from frostfs_testlib.steps.cli.object import get_object, get_object_nodes, put_object
|
||||
from frostfs_testlib.steps.cli.container import create_container
|
||||
from frostfs_testlib.steps.cli.object import get_object, put_object
|
||||
from frostfs_testlib.steps.metrics import check_metrics_counter
|
||||
from frostfs_testlib.steps.node_management import node_shard_list, node_shard_set_mode
|
||||
from frostfs_testlib.steps.storage_policy import get_nodes_with_object
|
||||
from frostfs_testlib.storage.cluster import Cluster, ClusterNode
|
||||
from frostfs_testlib.storage.controllers import ShardsWatcher
|
||||
from frostfs_testlib.storage.dataclasses.wallet import WalletInfo
|
||||
|
@ -45,28 +47,13 @@ class TestShardMetrics(ClusterTestBase):
|
|||
watcher.set_shard_mode(shard["shard_id"], mode="read-write")
|
||||
watcher.await_for_all_shards_status(status="read-write")
|
||||
|
||||
@wait_for_success(interval=10)
|
||||
def check_metrics_in_node(self, cluster_node: ClusterNode, counter_exp: int, **metrics_greps: str):
|
||||
counter_act = 0
|
||||
try:
|
||||
metric_result = cluster_node.metrics.storage.get_metrics_search_by_greps(**metrics_greps)
|
||||
counter_act += self.calc_metrics_count_from_stdout(metric_result.stdout)
|
||||
except RuntimeError as e:
|
||||
...
|
||||
assert counter_act == counter_exp, f"Expected: {counter_exp}, Actual: {counter_act} in node: {cluster_node}"
|
||||
|
||||
@staticmethod
|
||||
def calc_metrics_count_from_stdout(metric_result_stdout: str):
|
||||
result = re.findall(r"}\s(\d+)", metric_result_stdout)
|
||||
return sum(map(int, result))
|
||||
|
||||
@staticmethod
|
||||
def get_error_count_from_logs(shell: Shell, object_path: str, object_name: str):
|
||||
def get_error_count_from_logs(cluster_node: ClusterNode, object_path: str, object_name: str):
|
||||
error_count = 0
|
||||
try:
|
||||
logs = shell.exec(f"journalctl -u frostfs-storage --grep='error count' --no-pager")
|
||||
logs = cluster_node.host.get_filtered_logs("error count", unit="frostfs-storage")
|
||||
# search error logs for current object
|
||||
for error_line in logs.stdout.split("\n"):
|
||||
for error_line in logs.split("\n"):
|
||||
if object_path in error_line and object_name in error_line:
|
||||
result = re.findall(r'"error\scount":\s(\d+)', error_line)
|
||||
error_count += sum(map(int, result))
|
||||
|
@ -106,17 +93,21 @@ class TestShardMetrics(ClusterTestBase):
|
|||
node_shard_set_mode(node.storage_node, shard1, "read-only")
|
||||
|
||||
with reporter.step(f"Check shard metrics, 'the mode will change to 'READ_ONLY'"):
|
||||
self.check_metrics_in_node(
|
||||
node, metrics_counter, command="frostfs_node_engine_mode_info", mode="READ_ONLY", shard_id=shard1
|
||||
check_metrics_counter(
|
||||
[node],
|
||||
counter_exp=metrics_counter,
|
||||
command="frostfs_node_engine_mode_info",
|
||||
mode="READ_ONLY",
|
||||
shard_id=shard1,
|
||||
)
|
||||
|
||||
with reporter.step("Shard2 set to mode 'degraded-read-only'"):
|
||||
node_shard_set_mode(node.storage_node, shard2, "degraded-read-only")
|
||||
|
||||
with reporter.step(f"Check shard metrics, 'the mode will change to 'DEGRADED_READ_ONLY'"):
|
||||
self.check_metrics_in_node(
|
||||
node,
|
||||
metrics_counter,
|
||||
check_metrics_counter(
|
||||
[node],
|
||||
counter_exp=metrics_counter,
|
||||
command="frostfs_node_engine_mode_info",
|
||||
mode="DEGRADED_READ_ONLY",
|
||||
shard_id=shard2,
|
||||
|
@ -126,9 +117,14 @@ class TestShardMetrics(ClusterTestBase):
|
|||
for shard in [shard1, shard2]:
|
||||
node_shard_set_mode(node.storage_node, shard, "read-write")
|
||||
|
||||
with reporter.step(f"Check shard metrics, 'the mode will change to 'READ_WRITE'"):
|
||||
self.check_metrics_in_node(
|
||||
node, metrics_counter, command="frostfs_node_engine_mode_info", mode="READ_WRITE", shard_id=shard
|
||||
with reporter.step(f"Check shard metrics, 'the mode will change to 'READ_WRITE'"):
|
||||
for shard in [shard1, shard2]:
|
||||
check_metrics_counter(
|
||||
[node],
|
||||
counter_exp=metrics_counter,
|
||||
command="frostfs_node_engine_mode_info",
|
||||
mode="READ_WRITE",
|
||||
shard_id=shard,
|
||||
)
|
||||
|
||||
@allure.title("Metric for error count on shard")
|
||||
|
@ -141,17 +137,22 @@ class TestShardMetrics(ClusterTestBase):
|
|||
cid = create_container(
|
||||
wallet=default_wallet,
|
||||
shell=self.shell,
|
||||
endpoint=self.cluster.default_rpc_endpoint,
|
||||
endpoint=cluster.default_rpc_endpoint,
|
||||
rule="REP 1 CBF 1",
|
||||
basic_acl=EACL_PUBLIC_READ_WRITE,
|
||||
)
|
||||
|
||||
with reporter.step("Put object"):
|
||||
oid = put_object(default_wallet, file_path, cid, self.shell, self.cluster.default_rpc_endpoint)
|
||||
oid = put_object(default_wallet, file_path, cid, self.shell, cluster.default_rpc_endpoint)
|
||||
|
||||
with reporter.step("Get object nodes"):
|
||||
object_nodes = get_object_nodes(cluster, cid, oid, cluster.cluster_nodes[0])
|
||||
node = object_nodes[0]
|
||||
object_storage_nodes = get_nodes_with_object(cid, oid, self.shell, cluster.storage_nodes)
|
||||
object_nodes = [
|
||||
cluster_node
|
||||
for cluster_node in cluster.cluster_nodes
|
||||
if cluster_node.storage_node in object_storage_nodes
|
||||
]
|
||||
node = random.choice(object_nodes)
|
||||
|
||||
with reporter.step("Search object in system."):
|
||||
object_path, object_name = self.get_object_path_and_name_file(oid, cid, node)
|
||||
|
@ -160,7 +161,7 @@ class TestShardMetrics(ClusterTestBase):
|
|||
node.host.get_shell().exec(f"chmod a-r {object_path}/{object_name}")
|
||||
|
||||
with reporter.step("Get object, expect error"):
|
||||
with pytest.raises(RuntimeError):
|
||||
with pytest.raises(RuntimeError, match=OBJECT_NOT_FOUND):
|
||||
get_object(
|
||||
wallet=default_wallet,
|
||||
cid=cid,
|
||||
|
@ -170,10 +171,7 @@ class TestShardMetrics(ClusterTestBase):
|
|||
)
|
||||
|
||||
with reporter.step(f"Get shard error count from logs"):
|
||||
counter = self.get_error_count_from_logs(node.host.get_shell(), object_path, object_name)
|
||||
counter = self.get_error_count_from_logs(node, object_path, object_name)
|
||||
|
||||
with reporter.step(f"Check shard error metrics"):
|
||||
self.check_metrics_in_node(node, counter, command="frostfs_node_engine_errors_total")
|
||||
|
||||
with reporter.step("Delete container"):
|
||||
delete_container(default_wallet, cid, self.shell, cluster.default_rpc_endpoint)
|
||||
check_metrics_counter([node], counter_exp=counter, command="frostfs_node_engine_errors_total")
|
||||
|
|
Loading…
Reference in a new issue