Add Maintenance tests

Signed-off-by: Dmitriy Zayakin <d.zayakin@yadro.com>
bug/log_analyzer_fixing
Dmitriy Zayakin 2023-11-20 13:49:54 +03:00
parent 7da11807da
commit b0c9502bd3
2 changed files with 342 additions and 5 deletions

View File

@ -13,6 +13,7 @@ markers =
# controlling markers
no_healthcheck: skip healthcheck for this test
# functional markers
maintenance: tests for change mode node
container: tests for container creation
grpc_api: standard gRPC API tests
grpc_control: tests related to using frostfs-cli control commands

View File

@ -1,16 +1,30 @@
import logging
import os
import random
from datetime import datetime
from time import sleep
import allure
import pytest
from frostfs_testlib.resources.common import MORPH_BLOCK_TIME
from frostfs_testlib.cli import FrostfsCli
from frostfs_testlib.cli.netmap_parser import NetmapParser
from frostfs_testlib.resources.cli import FROSTFS_CLI_EXEC
from frostfs_testlib.resources.common import DEFAULT_WALLET_CONFIG, MORPH_BLOCK_TIME
from frostfs_testlib.resources.wellknown_acl import PUBLIC_ACL
from frostfs_testlib.s3 import S3ClientWrapper, VersioningStatus
from frostfs_testlib.shell import CommandOptions
from frostfs_testlib.steps.cli.container import StorageContainer, StorageContainerInfo, create_container
from frostfs_testlib.steps.cli.object import get_object, put_object_to_random_node
from frostfs_testlib.steps.cli.container import (
StorageContainer,
StorageContainerInfo,
create_container,
search_nodes_with_container,
)
from frostfs_testlib.steps.cli.object import (
delete_object,
get_object,
put_object,
put_object_to_random_node,
search_object,
)
from frostfs_testlib.steps.node_management import (
check_node_in_map,
check_node_not_in_map,
@ -23,7 +37,7 @@ from frostfs_testlib.steps.s3 import s3_helper
from frostfs_testlib.storage.cluster import Cluster, ClusterNode, S3Gate, StorageNode
from frostfs_testlib.storage.controllers import ClusterStateController, ShardsWatcher
from frostfs_testlib.storage.dataclasses.object_size import ObjectSize
from frostfs_testlib.storage.dataclasses.storage_object_info import StorageObjectInfo
from frostfs_testlib.storage.dataclasses.storage_object_info import ModeNode, StorageObjectInfo
from frostfs_testlib.storage.dataclasses.wallet import WalletInfo
from frostfs_testlib.testing.cluster_test_base import ClusterTestBase
from frostfs_testlib.testing.test_control import expect_not_raises
@ -773,3 +787,325 @@ class TestStorageDataLoss(ClusterTestBase):
with allure.step("Delete bucket"):
s3_client.delete_bucket(bucket)
@pytest.mark.maintenance
class TestMaintenanceMode(ClusterTestBase):
change_node: ClusterNode = None
@pytest.fixture()
@allure.title("Init Frostfs CLI remote")
def frostfs_cli_remote(self, node_under_test: ClusterNode) -> FrostfsCli:
host = node_under_test.host
service_config = host.get_service_config(node_under_test.storage_node.name)
wallet_path = service_config.attributes["wallet_path"]
wallet_password = service_config.attributes["wallet_password"]
shell = host.get_shell()
wallet_config_path = f"/tmp/{node_under_test.storage_node.name}-config.yaml"
wallet_config = f'wallet: {wallet_path}\npassword: "{wallet_password}"'
shell.exec(f"echo '{wallet_config}' > {wallet_config_path}")
cli = FrostfsCli(shell=shell, frostfs_cli_exec_path=FROSTFS_CLI_EXEC, config_file=wallet_config_path)
return cli
@pytest.fixture()
@allure.title("Init Frostfs CLI remote")
def frostfs_cli(self) -> FrostfsCli:
cli = FrostfsCli(shell=self.shell, frostfs_cli_exec_path=FROSTFS_CLI_EXEC, config_file=DEFAULT_WALLET_CONFIG)
return cli
@pytest.fixture()
def restore_online_mode_node(self, cluster_state_controller: ClusterStateController, default_wallet: str):
yield
cluster_state_controller.set_mode_node(cluster_node=self.change_node, wallet=default_wallet, status="online")
self.tick_epoch(wait_block=2)
def basic_operations(self, wallet, cid, oid, shell, endpoint, matchs, object_size):
file_path = generate_file(object_size)
default_kw = {"wallet": wallet, "cid": cid, "shell": shell, "endpoint": endpoint}
operations = {
get_object: {"oid": oid},
search_object: {},
delete_object: {"oid": oid},
put_object: {"path": file_path},
}
for index, operation, kw in enumerate(operations.items()):
with allure.step(f"Run {operation.__name__} object, waiting response - {matchs[index]}"):
default_kw.update(kw)
if operation == search_object and "Found" in matchs[index]:
operation(**default_kw)
continue
with pytest.raises(RuntimeError, match=matchs[index]):
operation(**default_kw)
os.remove(file_path)
@allure.title("Test of basic node operations in maintenance mode")
def test_maintenance_mode(
self,
default_wallet: str,
simple_object_size: ObjectSize,
cluster_state_controller: ClusterStateController,
restore_online_mode_node: None,
):
with allure.step("Create container and create\put object"):
cid = create_container(
wallet=default_wallet,
shell=self.shell,
endpoint=self.cluster.default_rpc_endpoint,
rule="REP 1 CBF 1",
)
node = search_nodes_with_container(
wallet=default_wallet,
cid=cid,
shell=self.shell,
endpoint=self.cluster.default_rpc_endpoint,
cluster=self.cluster,
)
self.change_node = node[0]
file_path = generate_file(simple_object_size.value)
oid = put_object(
wallet=default_wallet,
path=file_path,
cid=cid,
shell=self.shell,
endpoint=self.change_node.storage_node.get_rpc_endpoint(),
)
with allure.step("Enable MaintenanceModeAllowed:"):
cluster_state_controller.set_mode_node(
cluster_node=self.change_node, wallet=default_wallet, status="maintenance"
)
other_nodes = list(set(self.cluster.cluster_nodes) - set(node))
node_and_match = {
self.change_node: ["node is under maintenance"] * 4,
other_nodes[0]: [
"object not found",
"Found 0 objects",
"object not found",
"node is under maintenance",
],
}
with allure.step("Run basic operations"):
for cluster_node, matchs in node_and_match.items():
self.basic_operations(
wallet=default_wallet,
cid=cid,
oid=oid,
shell=self.shell,
endpoint=cluster_node.storage_node.get_rpc_endpoint(),
matchs=matchs,
object_size=simple_object_size.value,
)
@pytest.mark.sanity
@allure.title("MAINTENANCE and OFFLINE mode transitions")
def test_mode_transitions(
self,
cluster_state_controller: ClusterStateController,
node_under_test: ClusterNode,
default_wallet: str,
frostfs_cli: FrostfsCli,
restore_online_mode_node: None,
):
self.change_node = node_under_test
cluster_nodes = list(set(self.cluster.cluster_nodes) - {node_under_test})
with allure.step("Tick epoch"):
self.tick_epochs(epochs_to_tick=2, alive_node=cluster_nodes[0].storage_node, wait_block=2)
with allure.step("Set node mode to offline"):
cluster_state_controller.set_mode_node(
cluster_node=node_under_test,
wallet=default_wallet,
status=ModeNode.OFFLINE.value,
)
with allure.step("Tick epoch to update the network map"):
self.tick_epochs(epochs_to_tick=2, alive_node=cluster_nodes[0].storage_node, wait_block=2)
with ((allure.step("Check node mode = offline, after update the network map"))):
netmap = frostfs_cli.netmap.snapshot(
rpc_endpoint=cluster_nodes[0].storage_node.get_rpc_endpoint(), wallet=default_wallet
).stdout
netmap = NetmapParser.snapshot_all_nodes(netmap)
assert node_under_test.host_ip not in [
netmap_node.node for netmap_node in netmap
], f"Node {node_under_test.host_ip} not in state offline"
with allure.step("Restart storage service"):
cluster_state_controller.stop_storage_service(node_under_test)
cluster_state_controller.start_storage_service(node_under_test)
with allure.step("Tick epoch after restart storage service and set mode to offline"):
self.tick_epochs(epochs_to_tick=2, alive_node=cluster_nodes[0].storage_node, wait_block=2)
with (allure.step("Check node mode = online, after restart storage service")):
netmap = frostfs_cli.netmap.snapshot(
rpc_endpoint=cluster_nodes[0].storage_node.get_rpc_endpoint(), wallet=default_wallet
).stdout
node_state = NetmapParser.snapshot_one_node(netmap, node_under_test).node_status
assert node_state == ModeNode.ONLINE.value.upper(), (
f"Node actual state - {node_state}, " f"expect - {ModeNode.ONLINE.value}"
)
with allure.step("Tick epoch"):
self.tick_epochs(epochs_to_tick=2, alive_node=cluster_nodes[0].storage_node, wait_block=2)
with allure.step("Set node mode to maintenance"):
cluster_state_controller.set_mode_node(
cluster_node=node_under_test, wallet=default_wallet, status=ModeNode.MAINTENANCE.value
)
with allure.step("Restart storage service"):
cluster_state_controller.stop_storage_service(node_under_test)
cluster_state_controller.start_storage_service(node_under_test)
with allure.step("Tick epoch after restart storage service"):
self.tick_epochs(epochs_to_tick=2, alive_node=cluster_nodes[0].storage_node, wait_block=2)
with allure.step("Check node mode = maintenance, after restart storage service and tick epoch"):
netmap = frostfs_cli.netmap.snapshot(
rpc_endpoint=cluster_nodes[0].storage_node.get_rpc_endpoint(), wallet=default_wallet
).stdout
node_state = NetmapParser.snapshot_one_node(netmap, node_under_test).node_status
assert node_state == ModeNode.MAINTENANCE.value.upper(), (
f"Node actual state - {node_state}, " f"expect - {ModeNode.MAINTENANCE.value}"
)
with allure.step("Tick epoch"):
self.tick_epochs(epochs_to_tick=2, alive_node=cluster_nodes[0].storage_node, wait_block=2)
with allure.step("Set node mode to offline"):
netmap = frostfs_cli.netmap.snapshot(
rpc_endpoint=cluster_nodes[0].storage_node.get_rpc_endpoint(), wallet=default_wallet
).stdout
node_state = NetmapParser.snapshot_one_node(netmap, node_under_test).node_status
assert node_state == ModeNode.OFFLINE.value.upper(), (
f"Node actual state - {node_state}, " f"expect - {ModeNode.OFFLINE.value}"
)
with allure.step("Stop storage service"):
cluster_state_controller.stop_storage_service(node_under_test)
with allure.step("Tick epoch"):
self.tick_epochs(epochs_to_tick=2, alive_node=cluster_nodes[0].storage_node, wait_block=2)
with allure.step("Start storage service"):
cluster_state_controller.start_storage_service(node_under_test)
with allure.step("Check node mode = offline, after tick epoch and start storage service"):
netmap = frostfs_cli.netmap.snapshot(
rpc_endpoint=cluster_nodes[0].storage_node.get_rpc_endpoint(), wallet=default_wallet
).stdout
node_state = NetmapParser.snapshot_one_node(netmap, node_under_test).node_status
assert node_state == ModeNode.OFFLINE.value.upper(), (
f"Node actual state - {node_state}, " f"expect - {ModeNode.OFFLINE.value}"
)
with allure.step("Tick epoch"):
self.tick_epochs(epochs_to_tick=2, alive_node=cluster_nodes[0].storage_node, wait_block=2)
with allure.step("Check node mode = online, after start storage service and tick epoch"):
netmap = frostfs_cli.netmap.snapshot(
rpc_endpoint=cluster_nodes[0].storage_node.get_rpc_endpoint(), wallet=default_wallet
).stdout
node_state = NetmapParser.snapshot_one_node(netmap, node_under_test).node_status
assert node_state == ModeNode.ONLINE.value.upper(), (
f"Node actual state - {node_state}, " f"expect - {ModeNode.ONLINE.value}"
)
with allure.step("Tick epoch"):
self.tick_epochs(epochs_to_tick=2, alive_node=cluster_nodes[0].storage_node, wait_block=2)
with allure.step("Set node mode to maintenance"):
cluster_state_controller.set_mode_node(
cluster_node=node_under_test, wallet=default_wallet, status=ModeNode.MAINTENANCE.value
)
with allure.step("Stop storage service"):
cluster_state_controller.stop_storage_service(node_under_test)
with allure.step("Tick epoch"):
self.tick_epochs(epochs_to_tick=2, alive_node=cluster_nodes[0].storage_node, wait_block=2)
with allure.step("Start storage service"):
cluster_state_controller.start_storage_service(node_under_test)
with allure.step("Check node mode = maintenance"):
netmap = frostfs_cli.netmap.snapshot(
rpc_endpoint=cluster_nodes[0].storage_node.get_rpc_endpoint(), wallet=default_wallet
).stdout
node_state = NetmapParser.snapshot_one_node(netmap, node_under_test).node_status
assert (
node_state == ModeNode.MAINTENANCE.value.upper()
), f"Node actual state - {node_state}, expect - {ModeNode.MAINTENANCE.value}"
with allure.step("Tick epoch"):
self.tick_epochs(epochs_to_tick=2, alive_node=cluster_nodes[0].storage_node, wait_block=2)
with allure.step("Check node mode = maintenance"):
netmap = frostfs_cli.netmap.snapshot(
rpc_endpoint=cluster_nodes[0].storage_node.get_rpc_endpoint(), wallet=default_wallet
).stdout
node_state = NetmapParser.snapshot_one_node(netmap, node_under_test).node_status
assert node_state == ModeNode.MAINTENANCE.value.upper(), (
f"Node actual state - {node_state}, " f"expect - {ModeNode.MAINTENANCE.value}"
)
@allure.title("A node cannot go into maintenance if maintenance is prohibited globally in the network")
def test_maintenance_globally_forbidden(
self,
cluster_state_controller: ClusterStateController,
node_under_test: ClusterNode,
frostfs_cli_remote: FrostfsCli,
frostfs_cli: FrostfsCli,
default_wallet: str,
restore_online_mode_node: None,
):
self.change_node = node_under_test
control_endpoint = node_under_test.service(StorageNode).get_control_endpoint()
with allure.step("Set MaintenanceModeAllowed = false"):
cluster_state_controller.set_maintenance_mode_allowed("false", node_under_test)
with allure.step("Set status node - maintenance"):
with pytest.raises(RuntimeError, match="maintenance mode is not allowed by the network"):
frostfs_cli_remote.control.set_status(endpoint=control_endpoint, status="maintenance")
with allure.step("Set MaintenanceModeAllowed = true"):
cluster_state_controller.set_maintenance_mode_allowed("true", node_under_test)
with allure.step("Set status node - maintenance"):
output = frostfs_cli_remote.control.set_status(endpoint=control_endpoint, status="maintenance")
assert "update request successfully sent" in output.stdout, f"Response = {output}"
with allure.step("Tick epoch"):
self.tick_epoch(wait_block=2)
with allure.step("Check state node = maintenance "):
netmap_node = NetmapParser.snapshot_one_node(
frostfs_cli.netmap.snapshot(
rpc_endpoint=node_under_test.storage_node.get_rpc_endpoint(), wallet=default_wallet
).stdout,
node_under_test,
)
assert (
netmap_node.node_status == ModeNode.MAINTENANCE.value.upper()
), f"Node actual state - {netmap_node.node_status}, expect - {ModeNode.MAINTENANCE.value}"
with allure.step("Set status node - online "):
frostfs_cli_remote.control.set_status(endpoint=control_endpoint, status="online")
with allure.step("Tick epoch"):
self.tick_epoch()
with allure.step("Check state node: online"):
netmap_node = NetmapParser.snapshot_one_node(
frostfs_cli.netmap.snapshot(
rpc_endpoint=node_under_test.storage_node.get_rpc_endpoint(), wallet=default_wallet
).stdout,
node_under_test,
)
assert (
netmap_node.node_status == ModeNode.ONLINE.value.upper()
), f"Node actual state - {netmap_node.node_status}, expect - {ModeNode.ONLINE.value}"