Add Maintenance tests

Signed-off-by: Dmitriy Zayakin <d.zayakin@yadro.com>
This commit is contained in:
Dmitriy Zayakin 2023-11-20 13:49:54 +03:00
parent 7da11807da
commit b0c9502bd3
2 changed files with 342 additions and 5 deletions

View file

@ -13,6 +13,7 @@ markers =
# controlling markers # controlling markers
no_healthcheck: skip healthcheck for this test no_healthcheck: skip healthcheck for this test
# functional markers # functional markers
maintenance: tests for change mode node
container: tests for container creation container: tests for container creation
grpc_api: standard gRPC API tests grpc_api: standard gRPC API tests
grpc_control: tests related to using frostfs-cli control commands grpc_control: tests related to using frostfs-cli control commands

View file

@ -1,16 +1,30 @@
import logging import logging
import os
import random import random
from datetime import datetime from datetime import datetime
from time import sleep from time import sleep
import allure import allure
import pytest import pytest
from frostfs_testlib.resources.common import MORPH_BLOCK_TIME from frostfs_testlib.cli import FrostfsCli
from frostfs_testlib.cli.netmap_parser import NetmapParser
from frostfs_testlib.resources.cli import FROSTFS_CLI_EXEC
from frostfs_testlib.resources.common import DEFAULT_WALLET_CONFIG, MORPH_BLOCK_TIME
from frostfs_testlib.resources.wellknown_acl import PUBLIC_ACL from frostfs_testlib.resources.wellknown_acl import PUBLIC_ACL
from frostfs_testlib.s3 import S3ClientWrapper, VersioningStatus from frostfs_testlib.s3 import S3ClientWrapper, VersioningStatus
from frostfs_testlib.shell import CommandOptions from frostfs_testlib.steps.cli.container import (
from frostfs_testlib.steps.cli.container import StorageContainer, StorageContainerInfo, create_container StorageContainer,
from frostfs_testlib.steps.cli.object import get_object, put_object_to_random_node StorageContainerInfo,
create_container,
search_nodes_with_container,
)
from frostfs_testlib.steps.cli.object import (
delete_object,
get_object,
put_object,
put_object_to_random_node,
search_object,
)
from frostfs_testlib.steps.node_management import ( from frostfs_testlib.steps.node_management import (
check_node_in_map, check_node_in_map,
check_node_not_in_map, check_node_not_in_map,
@ -23,7 +37,7 @@ from frostfs_testlib.steps.s3 import s3_helper
from frostfs_testlib.storage.cluster import Cluster, ClusterNode, S3Gate, StorageNode from frostfs_testlib.storage.cluster import Cluster, ClusterNode, S3Gate, StorageNode
from frostfs_testlib.storage.controllers import ClusterStateController, ShardsWatcher from frostfs_testlib.storage.controllers import ClusterStateController, ShardsWatcher
from frostfs_testlib.storage.dataclasses.object_size import ObjectSize from frostfs_testlib.storage.dataclasses.object_size import ObjectSize
from frostfs_testlib.storage.dataclasses.storage_object_info import StorageObjectInfo from frostfs_testlib.storage.dataclasses.storage_object_info import ModeNode, StorageObjectInfo
from frostfs_testlib.storage.dataclasses.wallet import WalletInfo from frostfs_testlib.storage.dataclasses.wallet import WalletInfo
from frostfs_testlib.testing.cluster_test_base import ClusterTestBase from frostfs_testlib.testing.cluster_test_base import ClusterTestBase
from frostfs_testlib.testing.test_control import expect_not_raises from frostfs_testlib.testing.test_control import expect_not_raises
@ -773,3 +787,325 @@ class TestStorageDataLoss(ClusterTestBase):
with allure.step("Delete bucket"): with allure.step("Delete bucket"):
s3_client.delete_bucket(bucket) s3_client.delete_bucket(bucket)
@pytest.mark.maintenance
class TestMaintenanceMode(ClusterTestBase):
change_node: ClusterNode = None
@pytest.fixture()
@allure.title("Init Frostfs CLI remote")
def frostfs_cli_remote(self, node_under_test: ClusterNode) -> FrostfsCli:
host = node_under_test.host
service_config = host.get_service_config(node_under_test.storage_node.name)
wallet_path = service_config.attributes["wallet_path"]
wallet_password = service_config.attributes["wallet_password"]
shell = host.get_shell()
wallet_config_path = f"/tmp/{node_under_test.storage_node.name}-config.yaml"
wallet_config = f'wallet: {wallet_path}\npassword: "{wallet_password}"'
shell.exec(f"echo '{wallet_config}' > {wallet_config_path}")
cli = FrostfsCli(shell=shell, frostfs_cli_exec_path=FROSTFS_CLI_EXEC, config_file=wallet_config_path)
return cli
@pytest.fixture()
@allure.title("Init Frostfs CLI remote")
def frostfs_cli(self) -> FrostfsCli:
cli = FrostfsCli(shell=self.shell, frostfs_cli_exec_path=FROSTFS_CLI_EXEC, config_file=DEFAULT_WALLET_CONFIG)
return cli
@pytest.fixture()
def restore_online_mode_node(self, cluster_state_controller: ClusterStateController, default_wallet: str):
yield
cluster_state_controller.set_mode_node(cluster_node=self.change_node, wallet=default_wallet, status="online")
self.tick_epoch(wait_block=2)
def basic_operations(self, wallet, cid, oid, shell, endpoint, matchs, object_size):
file_path = generate_file(object_size)
default_kw = {"wallet": wallet, "cid": cid, "shell": shell, "endpoint": endpoint}
operations = {
get_object: {"oid": oid},
search_object: {},
delete_object: {"oid": oid},
put_object: {"path": file_path},
}
for index, operation, kw in enumerate(operations.items()):
with allure.step(f"Run {operation.__name__} object, waiting response - {matchs[index]}"):
default_kw.update(kw)
if operation == search_object and "Found" in matchs[index]:
operation(**default_kw)
continue
with pytest.raises(RuntimeError, match=matchs[index]):
operation(**default_kw)
os.remove(file_path)
@allure.title("Test of basic node operations in maintenance mode")
def test_maintenance_mode(
self,
default_wallet: str,
simple_object_size: ObjectSize,
cluster_state_controller: ClusterStateController,
restore_online_mode_node: None,
):
with allure.step("Create container and create\put object"):
cid = create_container(
wallet=default_wallet,
shell=self.shell,
endpoint=self.cluster.default_rpc_endpoint,
rule="REP 1 CBF 1",
)
node = search_nodes_with_container(
wallet=default_wallet,
cid=cid,
shell=self.shell,
endpoint=self.cluster.default_rpc_endpoint,
cluster=self.cluster,
)
self.change_node = node[0]
file_path = generate_file(simple_object_size.value)
oid = put_object(
wallet=default_wallet,
path=file_path,
cid=cid,
shell=self.shell,
endpoint=self.change_node.storage_node.get_rpc_endpoint(),
)
with allure.step("Enable MaintenanceModeAllowed:"):
cluster_state_controller.set_mode_node(
cluster_node=self.change_node, wallet=default_wallet, status="maintenance"
)
other_nodes = list(set(self.cluster.cluster_nodes) - set(node))
node_and_match = {
self.change_node: ["node is under maintenance"] * 4,
other_nodes[0]: [
"object not found",
"Found 0 objects",
"object not found",
"node is under maintenance",
],
}
with allure.step("Run basic operations"):
for cluster_node, matchs in node_and_match.items():
self.basic_operations(
wallet=default_wallet,
cid=cid,
oid=oid,
shell=self.shell,
endpoint=cluster_node.storage_node.get_rpc_endpoint(),
matchs=matchs,
object_size=simple_object_size.value,
)
@pytest.mark.sanity
@allure.title("MAINTENANCE and OFFLINE mode transitions")
def test_mode_transitions(
self,
cluster_state_controller: ClusterStateController,
node_under_test: ClusterNode,
default_wallet: str,
frostfs_cli: FrostfsCli,
restore_online_mode_node: None,
):
self.change_node = node_under_test
cluster_nodes = list(set(self.cluster.cluster_nodes) - {node_under_test})
with allure.step("Tick epoch"):
self.tick_epochs(epochs_to_tick=2, alive_node=cluster_nodes[0].storage_node, wait_block=2)
with allure.step("Set node mode to offline"):
cluster_state_controller.set_mode_node(
cluster_node=node_under_test,
wallet=default_wallet,
status=ModeNode.OFFLINE.value,
)
with allure.step("Tick epoch to update the network map"):
self.tick_epochs(epochs_to_tick=2, alive_node=cluster_nodes[0].storage_node, wait_block=2)
with ((allure.step("Check node mode = offline, after update the network map"))):
netmap = frostfs_cli.netmap.snapshot(
rpc_endpoint=cluster_nodes[0].storage_node.get_rpc_endpoint(), wallet=default_wallet
).stdout
netmap = NetmapParser.snapshot_all_nodes(netmap)
assert node_under_test.host_ip not in [
netmap_node.node for netmap_node in netmap
], f"Node {node_under_test.host_ip} not in state offline"
with allure.step("Restart storage service"):
cluster_state_controller.stop_storage_service(node_under_test)
cluster_state_controller.start_storage_service(node_under_test)
with allure.step("Tick epoch after restart storage service and set mode to offline"):
self.tick_epochs(epochs_to_tick=2, alive_node=cluster_nodes[0].storage_node, wait_block=2)
with (allure.step("Check node mode = online, after restart storage service")):
netmap = frostfs_cli.netmap.snapshot(
rpc_endpoint=cluster_nodes[0].storage_node.get_rpc_endpoint(), wallet=default_wallet
).stdout
node_state = NetmapParser.snapshot_one_node(netmap, node_under_test).node_status
assert node_state == ModeNode.ONLINE.value.upper(), (
f"Node actual state - {node_state}, " f"expect - {ModeNode.ONLINE.value}"
)
with allure.step("Tick epoch"):
self.tick_epochs(epochs_to_tick=2, alive_node=cluster_nodes[0].storage_node, wait_block=2)
with allure.step("Set node mode to maintenance"):
cluster_state_controller.set_mode_node(
cluster_node=node_under_test, wallet=default_wallet, status=ModeNode.MAINTENANCE.value
)
with allure.step("Restart storage service"):
cluster_state_controller.stop_storage_service(node_under_test)
cluster_state_controller.start_storage_service(node_under_test)
with allure.step("Tick epoch after restart storage service"):
self.tick_epochs(epochs_to_tick=2, alive_node=cluster_nodes[0].storage_node, wait_block=2)
with allure.step("Check node mode = maintenance, after restart storage service and tick epoch"):
netmap = frostfs_cli.netmap.snapshot(
rpc_endpoint=cluster_nodes[0].storage_node.get_rpc_endpoint(), wallet=default_wallet
).stdout
node_state = NetmapParser.snapshot_one_node(netmap, node_under_test).node_status
assert node_state == ModeNode.MAINTENANCE.value.upper(), (
f"Node actual state - {node_state}, " f"expect - {ModeNode.MAINTENANCE.value}"
)
with allure.step("Tick epoch"):
self.tick_epochs(epochs_to_tick=2, alive_node=cluster_nodes[0].storage_node, wait_block=2)
with allure.step("Set node mode to offline"):
netmap = frostfs_cli.netmap.snapshot(
rpc_endpoint=cluster_nodes[0].storage_node.get_rpc_endpoint(), wallet=default_wallet
).stdout
node_state = NetmapParser.snapshot_one_node(netmap, node_under_test).node_status
assert node_state == ModeNode.OFFLINE.value.upper(), (
f"Node actual state - {node_state}, " f"expect - {ModeNode.OFFLINE.value}"
)
with allure.step("Stop storage service"):
cluster_state_controller.stop_storage_service(node_under_test)
with allure.step("Tick epoch"):
self.tick_epochs(epochs_to_tick=2, alive_node=cluster_nodes[0].storage_node, wait_block=2)
with allure.step("Start storage service"):
cluster_state_controller.start_storage_service(node_under_test)
with allure.step("Check node mode = offline, after tick epoch and start storage service"):
netmap = frostfs_cli.netmap.snapshot(
rpc_endpoint=cluster_nodes[0].storage_node.get_rpc_endpoint(), wallet=default_wallet
).stdout
node_state = NetmapParser.snapshot_one_node(netmap, node_under_test).node_status
assert node_state == ModeNode.OFFLINE.value.upper(), (
f"Node actual state - {node_state}, " f"expect - {ModeNode.OFFLINE.value}"
)
with allure.step("Tick epoch"):
self.tick_epochs(epochs_to_tick=2, alive_node=cluster_nodes[0].storage_node, wait_block=2)
with allure.step("Check node mode = online, after start storage service and tick epoch"):
netmap = frostfs_cli.netmap.snapshot(
rpc_endpoint=cluster_nodes[0].storage_node.get_rpc_endpoint(), wallet=default_wallet
).stdout
node_state = NetmapParser.snapshot_one_node(netmap, node_under_test).node_status
assert node_state == ModeNode.ONLINE.value.upper(), (
f"Node actual state - {node_state}, " f"expect - {ModeNode.ONLINE.value}"
)
with allure.step("Tick epoch"):
self.tick_epochs(epochs_to_tick=2, alive_node=cluster_nodes[0].storage_node, wait_block=2)
with allure.step("Set node mode to maintenance"):
cluster_state_controller.set_mode_node(
cluster_node=node_under_test, wallet=default_wallet, status=ModeNode.MAINTENANCE.value
)
with allure.step("Stop storage service"):
cluster_state_controller.stop_storage_service(node_under_test)
with allure.step("Tick epoch"):
self.tick_epochs(epochs_to_tick=2, alive_node=cluster_nodes[0].storage_node, wait_block=2)
with allure.step("Start storage service"):
cluster_state_controller.start_storage_service(node_under_test)
with allure.step("Check node mode = maintenance"):
netmap = frostfs_cli.netmap.snapshot(
rpc_endpoint=cluster_nodes[0].storage_node.get_rpc_endpoint(), wallet=default_wallet
).stdout
node_state = NetmapParser.snapshot_one_node(netmap, node_under_test).node_status
assert (
node_state == ModeNode.MAINTENANCE.value.upper()
), f"Node actual state - {node_state}, expect - {ModeNode.MAINTENANCE.value}"
with allure.step("Tick epoch"):
self.tick_epochs(epochs_to_tick=2, alive_node=cluster_nodes[0].storage_node, wait_block=2)
with allure.step("Check node mode = maintenance"):
netmap = frostfs_cli.netmap.snapshot(
rpc_endpoint=cluster_nodes[0].storage_node.get_rpc_endpoint(), wallet=default_wallet
).stdout
node_state = NetmapParser.snapshot_one_node(netmap, node_under_test).node_status
assert node_state == ModeNode.MAINTENANCE.value.upper(), (
f"Node actual state - {node_state}, " f"expect - {ModeNode.MAINTENANCE.value}"
)
@allure.title("A node cannot go into maintenance if maintenance is prohibited globally in the network")
def test_maintenance_globally_forbidden(
self,
cluster_state_controller: ClusterStateController,
node_under_test: ClusterNode,
frostfs_cli_remote: FrostfsCli,
frostfs_cli: FrostfsCli,
default_wallet: str,
restore_online_mode_node: None,
):
self.change_node = node_under_test
control_endpoint = node_under_test.service(StorageNode).get_control_endpoint()
with allure.step("Set MaintenanceModeAllowed = false"):
cluster_state_controller.set_maintenance_mode_allowed("false", node_under_test)
with allure.step("Set status node - maintenance"):
with pytest.raises(RuntimeError, match="maintenance mode is not allowed by the network"):
frostfs_cli_remote.control.set_status(endpoint=control_endpoint, status="maintenance")
with allure.step("Set MaintenanceModeAllowed = true"):
cluster_state_controller.set_maintenance_mode_allowed("true", node_under_test)
with allure.step("Set status node - maintenance"):
output = frostfs_cli_remote.control.set_status(endpoint=control_endpoint, status="maintenance")
assert "update request successfully sent" in output.stdout, f"Response = {output}"
with allure.step("Tick epoch"):
self.tick_epoch(wait_block=2)
with allure.step("Check state node = maintenance "):
netmap_node = NetmapParser.snapshot_one_node(
frostfs_cli.netmap.snapshot(
rpc_endpoint=node_under_test.storage_node.get_rpc_endpoint(), wallet=default_wallet
).stdout,
node_under_test,
)
assert (
netmap_node.node_status == ModeNode.MAINTENANCE.value.upper()
), f"Node actual state - {netmap_node.node_status}, expect - {ModeNode.MAINTENANCE.value}"
with allure.step("Set status node - online "):
frostfs_cli_remote.control.set_status(endpoint=control_endpoint, status="online")
with allure.step("Tick epoch"):
self.tick_epoch()
with allure.step("Check state node: online"):
netmap_node = NetmapParser.snapshot_one_node(
frostfs_cli.netmap.snapshot(
rpc_endpoint=node_under_test.storage_node.get_rpc_endpoint(), wallet=default_wallet
).stdout,
node_under_test,
)
assert (
netmap_node.node_status == ModeNode.ONLINE.value.upper()
), f"Node actual state - {netmap_node.node_status}, expect - {ModeNode.ONLINE.value}"