Add tests with empty map #22
4 changed files with 279 additions and 20 deletions
|
@ -1,4 +1,5 @@
|
||||||
import random
|
import random
|
||||||
|
import pathlib
|
||||||
import re
|
import re
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
@ -110,7 +111,14 @@ class InnerRingNode(NodeBase):
|
||||||
since frostfs network will still treat it as "node"
|
since frostfs network will still treat it as "node"
|
||||||
"""
|
"""
|
||||||
|
|
||||||
pass
|
def get_netmap_cleaner_threshold(self) -> str:
|
||||||
anikeev-yadro marked this conversation as resolved
Outdated
|
|||||||
|
config_file = self.get_remote_config_path()
|
||||||
|
contents = self.host.get_shell().exec(f"cat {config_file}").stdout
|
||||||
|
|
||||||
|
config = yaml.safe_load(contents)
|
||||||
|
value = config["netmap_cleaner"]["threshold"]
|
||||||
|
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
class S3Gate(NodeBase):
|
class S3Gate(NodeBase):
|
||||||
|
|
|
@ -2,17 +2,24 @@ import logging
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
from time import sleep
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import allure
|
import allure
|
||||||
from frostfs_testlib.cli import FrostfsCli
|
from frostfs_testlib.cli import FrostfsAdm, FrostfsCli
|
||||||
from frostfs_testlib.shell import Shell
|
from frostfs_testlib.shell import Shell
|
||||||
from frostfs_testlib.utils import datetime_utils
|
from frostfs_testlib.utils import datetime_utils
|
||||||
|
|
||||||
from pytest_tests.helpers.cluster import Cluster, StorageNode
|
from pytest_tests.helpers.cluster import Cluster, StorageNode
|
||||||
from pytest_tests.helpers.epoch import tick_epoch
|
from pytest_tests.helpers.epoch import tick_epoch
|
||||||
from pytest_tests.resources.common import FROSTFS_CLI_EXEC, MORPH_BLOCK_TIME
|
from pytest_tests.resources.common import (
|
||||||
|
FROSTFS_CLI_EXEC,
|
||||||
|
FROSTFS_ADM_CONFIG_PATH,
|
||||||
|
FROSTFS_ADM_EXEC,
|
||||||
|
FROSTFS_CLI_EXEC,
|
||||||
|
MORPH_BLOCK_TIME,
|
||||||
|
)
|
||||||
|
|
||||||
logger = logging.getLogger("NeoLogger")
|
logger = logging.getLogger("NeoLogger")
|
||||||
|
|
||||||
|
@ -211,6 +218,62 @@ def check_node_in_map(
|
||||||
node_netmap_key in snapshot
|
node_netmap_key in snapshot
|
||||||
), f"Expected node with key {node_netmap_key} to be in network map"
|
), f"Expected node with key {node_netmap_key} to be in network map"
|
||||||
|
|
||||||
|
@allure.step("Check node {node} NOT in network map")
|
||||||
|
def check_node_not_in_map(
|
||||||
|
node: StorageNode, shell: Shell, alive_node: Optional[StorageNode] = None
|
||||||
|
) -> None:
|
||||||
|
alive_node = alive_node or node
|
||||||
|
|
||||||
|
node_netmap_key = node.get_wallet_public_key()
|
||||||
|
logger.info(f"Node ({node.label}) netmap key: {node_netmap_key}")
|
||||||
|
|
||||||
|
snapshot = get_netmap_snapshot(alive_node, shell)
|
||||||
|
assert (
|
||||||
|
node_netmap_key not in snapshot
|
||||||
|
), f"Expected node with key {node_netmap_key} to be NOT in network map"
|
||||||
|
|
||||||
|
@allure.step("Wait for node {node} is ready")
|
||||||
|
def wait_for_node_to_be_ready(node: StorageNode) -> None:
|
||||||
anikeev-yadro marked this conversation as resolved
Outdated
abereziny
commented
We already have such method here: We already have such method here:
https://git.frostfs.info/TrueCloudLab/frostfs-testcases/src/branch/master/pytest_tests/testsuites/network/test_node_management.py#L455
anikeev-yadro
commented
method moved from test_node_management.py to node_management.py method moved from test_node_management.py to node_management.py
|
|||||||
|
timeout, attempts = 30, 6
|
||||||
|
for _ in range(attempts):
|
||||||
|
try:
|
||||||
|
health_check = storage_node_healthcheck(node)
|
||||||
|
if health_check.health_status == "READY":
|
||||||
|
return
|
||||||
|
except Exception as err:
|
||||||
|
logger.warning(f"Node {node} is not ready:\n{err}")
|
||||||
|
sleep(timeout)
|
||||||
|
raise AssertionError(
|
||||||
|
f"Node {node} hasn't gone to the READY state after {timeout * attempts} seconds"
|
||||||
|
)
|
||||||
|
|
||||||
|
@allure.step("Remove nodes from network map trough cli-adm morph command")
|
||||||
|
def remove_nodes_from_map_morph(shell: Shell, cluster: Cluster, remove_nodes: list[StorageNode], alive_node: Optional[StorageNode] = None):
|
||||||
|
"""
|
||||||
|
Move node to the Offline state in the candidates list and tick an epoch to update the netmap
|
||||||
|
using frostfs-adm
|
||||||
|
Args:
|
||||||
|
shell: local shell to make queries about current epoch. Remote shell will be used to tick new one
|
||||||
|
cluster: cluster instance under test
|
||||||
|
alive_node: node to send requests to (first node in cluster by default)
|
||||||
|
remove_nodes: list of nodes which would be removed from map
|
||||||
|
"""
|
||||||
|
|
||||||
|
alive_node = alive_node if alive_node else remove_nodes[0]
|
||||||
|
remote_shell = alive_node.host.get_shell()
|
||||||
|
|
||||||
|
node_netmap_keys = list(map(StorageNode.get_wallet_public_key, remove_nodes))
|
||||||
|
logger.info(f"Nodes netmap keys are: {' '.join(node_netmap_keys)}")
|
||||||
anikeev-yadro marked this conversation as resolved
Outdated
abereziny
commented
"keys are" "keys are"
anikeev-yadro
commented
fixed fixed
|
|||||||
|
|
||||||
|
if FROSTFS_ADM_EXEC and FROSTFS_ADM_CONFIG_PATH:
|
||||||
|
# If frostfs-adm is available, then we tick epoch with it (to be consistent with UAT tests)
|
||||||
|
frostfsadm = FrostfsAdm(
|
||||||
|
shell=remote_shell,
|
||||||
|
frostfs_adm_exec_path=FROSTFS_ADM_EXEC,
|
||||||
|
config_file=FROSTFS_ADM_CONFIG_PATH,
|
||||||
|
)
|
||||||
|
frostfsadm.morph.remove_nodes(node_netmap_keys)
|
||||||
|
|
||||||
|
|
||||||
def _run_control_command_with_retries(node: StorageNode, command: str, retries: int = 0) -> str:
|
def _run_control_command_with_retries(node: StorageNode, command: str, retries: int = 0) -> str:
|
||||||
for attempt in range(1 + retries): # original attempt + specified retries
|
for attempt in range(1 + retries): # original attempt + specified retries
|
||||||
|
|
|
@ -1,11 +1,15 @@
|
||||||
|
import os
|
||||||
import logging
|
import logging
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
import allure
|
import allure
|
||||||
import pytest
|
import pytest
|
||||||
|
from frostfs_testlib.analytics import test_case
|
||||||
from frostfs_testlib.hosting import Host
|
from frostfs_testlib.hosting import Host
|
||||||
from frostfs_testlib.resources.common import PUBLIC_ACL
|
from frostfs_testlib.resources.common import PUBLIC_ACL
|
||||||
from frostfs_testlib.shell import CommandOptions
|
from frostfs_testlib.shell import CommandOptions
|
||||||
|
from frostfs_testlib.utils import datetime_utils
|
||||||
|
from pytest_tests.resources.common import FROSTFS_CONTRACT_CACHE_TIMEOUT, MORPH_BLOCK_TIME
|
||||||
from pytest_tests.helpers.cluster import Cluster, StorageNode
|
from pytest_tests.helpers.cluster import Cluster, StorageNode
|
||||||
from pytest_tests.helpers.container import create_container
|
from pytest_tests.helpers.container import create_container
|
||||||
from pytest_tests.helpers.failover_utils import (
|
from pytest_tests.helpers.failover_utils import (
|
||||||
|
@ -16,6 +20,34 @@ from pytest_tests.helpers.file_helper import generate_file, get_file_hash
|
||||||
from pytest_tests.helpers.frostfs_verbs import get_object, put_object_to_random_node
|
from pytest_tests.helpers.frostfs_verbs import get_object, put_object_to_random_node
|
||||||
from pytest_tests.steps.cluster_test_base import ClusterTestBase
|
from pytest_tests.steps.cluster_test_base import ClusterTestBase
|
||||||
|
|
||||||
|
from pytest_tests.helpers.node_management import (
|
||||||
|
check_node_in_map,
|
||||||
|
check_node_not_in_map,
|
||||||
|
exclude_node_from_network_map,
|
||||||
|
include_node_to_network_map,
|
||||||
|
stop_random_storage_nodes,
|
||||||
|
wait_for_node_to_be_ready,
|
||||||
|
remove_nodes_from_map_morph
|
||||||
|
)
|
||||||
|
|
||||||
|
from pytest_tests.helpers.s3_helper import (
|
||||||
|
check_objects_in_bucket
|
||||||
|
)
|
||||||
|
from pytest_tests.steps import s3_gate_object
|
||||||
|
from pytest_tests.steps.s3_gate_base import TestS3GateBase
|
||||||
|
|
||||||
|
from pytest_tests.helpers.aws_cli_client import AwsCliClient
|
||||||
|
from pytest_tests.helpers.file_helper import (
|
||||||
|
generate_file,
|
||||||
|
get_file_hash,
|
||||||
|
)
|
||||||
|
|
||||||
|
from pytest_tests.helpers.node_management import (
|
||||||
|
check_node_in_map,
|
||||||
|
exclude_node_from_network_map,
|
||||||
|
include_node_to_network_map,
|
||||||
|
)
|
||||||
|
|
||||||
logger = logging.getLogger("NeoLogger")
|
logger = logging.getLogger("NeoLogger")
|
||||||
stopped_nodes: list[StorageNode] = []
|
stopped_nodes: list[StorageNode] = []
|
||||||
|
|
||||||
|
@ -173,3 +205,173 @@ class TestFailoverStorage(ClusterTestBase):
|
||||||
wallet, cid, oid, shell=self.shell, endpoint=new_nodes[0].get_rpc_endpoint()
|
wallet, cid, oid, shell=self.shell, endpoint=new_nodes[0].get_rpc_endpoint()
|
||||||
)
|
)
|
||||||
assert get_file_hash(source_file_path) == get_file_hash(got_file_path)
|
assert get_file_hash(source_file_path) == get_file_hash(got_file_path)
|
||||||
|
|
||||||
|
|
||||||
|
def pytest_generate_tests(metafunc):
|
||||||
|
if "s3_client" in metafunc.fixturenames:
|
||||||
|
metafunc.parametrize("s3_client", ["aws cli", "boto3"], indirect=True)
|
||||||
|
|
||||||
|
@pytest.mark.failover
|
||||||
|
@pytest.mark.failover_empty_map
|
||||||
|
class TestEmptyMap(TestS3GateBase):
|
||||||
|
"""
|
||||||
|
A set of tests for makes map empty and verify that we can read objects after that
|
||||||
|
"""
|
||||||
|
@allure.step("Teardown after EmptyMap offline test")
|
||||||
|
@pytest.fixture()
|
||||||
|
def empty_map_offline_teardown(self):
|
||||||
|
yield
|
||||||
|
with allure.step("Return all storage nodes to network map"):
|
||||||
|
for node in list(stopped_nodes):
|
||||||
|
include_node_to_network_map(
|
||||||
|
node, node, shell=self.shell, cluster=self.cluster
|
||||||
|
)
|
||||||
|
stopped_nodes.remove(node)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def object_key_from_file_path(full_path: str) -> str:
|
||||||
|
return os.path.basename(full_path)
|
||||||
|
|
||||||
|
@test_case.title("Test makes network map empty (offline all storage nodes)")
|
||||||
|
@test_case.priority(test_case.TestCasePriority.HIGH)
|
||||||
|
@test_case.suite_name("failovers")
|
||||||
|
@test_case.suite_section("test_failover_storage")
|
||||||
|
@pytest.mark.failover_empty_map_offlne
|
||||||
|
@allure.title("Test makes network map empty (offline all storage nodes)")
|
||||||
|
def test_offline_all_storage_nodes(self, bucket, simple_object_size, empty_map_offline_teardown):
|
||||||
|
"""
|
||||||
|
The test makes network map empty (set offline status on all storage nodes) then returns all nodes to map and checks that object can read through s3.
|
||||||
|
|
||||||
|
Steps:
|
||||||
|
1. Check that bucket is empty
|
||||||
|
2: PUT object into bucket
|
||||||
|
3: Check that object exists in bucket
|
||||||
|
4: Exclude all storage nodes from network map (set status OFFLINE)
|
||||||
|
5: Return all storage nodes to network map
|
||||||
|
6: Check that we can read object from #2
|
||||||
|
Args:
|
||||||
|
bucket: bucket which contains tested object
|
||||||
|
simple_object_size: size of object
|
||||||
|
"""
|
||||||
|
file_path = generate_file(simple_object_size)
|
||||||
|
file_name = self.object_key_from_file_path(file_path)
|
||||||
|
bucket_objects = [file_name]
|
||||||
|
|
||||||
|
objects_list = s3_gate_object.list_objects_s3(self.s3_client, bucket)
|
||||||
|
assert not objects_list, f"Expected empty bucket, got {objects_list}"
|
||||||
|
|
||||||
|
with allure.step("Put object into bucket"):
|
||||||
|
s3_gate_object.put_object_s3(self.s3_client, bucket, file_path)
|
||||||
|
|
||||||
|
with allure.step("Check that object exists in bucket"):
|
||||||
|
check_objects_in_bucket(self.s3_client, bucket, bucket_objects)
|
||||||
|
|
||||||
|
storage_nodes = self.cluster.storage_nodes
|
||||||
|
with allure.step("Exclude all storage nodes from network map"):
|
||||||
|
for node in storage_nodes:
|
||||||
|
exclude_node_from_network_map(
|
||||||
|
node, node, shell=self.shell, cluster=self.cluster
|
||||||
|
)
|
||||||
|
stopped_nodes.append(node)
|
||||||
|
|
||||||
|
with allure.step("Return all storage nodes to network map"):
|
||||||
|
for node in storage_nodes:
|
||||||
|
include_node_to_network_map(
|
||||||
|
node, node, shell=self.shell, cluster=self.cluster
|
||||||
|
)
|
||||||
|
stopped_nodes.remove(node)
|
||||||
|
|
||||||
|
with allure.step("Check that we can read object"):
|
||||||
|
check_objects_in_bucket(self.s3_client, bucket, bucket_objects)
|
||||||
|
|
||||||
|
objects_list = s3_gate_object.list_objects_s3(self.s3_client, bucket)
|
||||||
|
assert not objects_list, f"Expected empty bucket, got {objects_list}"
|
||||||
|
|
||||||
|
@allure.step("Teardown after EmptyMap stop service test")
|
||||||
|
@pytest.fixture()
|
||||||
|
def empty_map_stop_service_teardown(self):
|
||||||
|
yield
|
||||||
|
with allure.step("Return all storage nodes to network map"):
|
||||||
|
for node in list(list(stopped_nodes)):
|
||||||
|
with allure.step(f"Start node {node}"):
|
||||||
|
node.start_service()
|
||||||
|
with allure.step(f"Waiting status ready for node {node}"):
|
||||||
|
wait_for_node_to_be_ready(node)
|
||||||
|
|
||||||
|
sleep(datetime_utils.parse_time(MORPH_BLOCK_TIME))
|
||||||
|
self.tick_epochs(1)
|
||||||
|
check_node_in_map(node, shell=self.shell, alive_node=node)
|
||||||
|
stopped_nodes.remove(node)
|
||||||
|
|
||||||
|
@test_case.title("Test makes network map empty (stop storage service on all nodes)")
|
||||||
|
@test_case.priority(test_case.TestCasePriority.HIGH)
|
||||||
|
@test_case.suite_name("failovers")
|
||||||
|
@test_case.suite_section("test_failover_storage")
|
||||||
|
@pytest.mark.failover_empty_map_stop_service
|
||||||
|
@allure.title("Test makes network map empty (stop storage service on all nodes)")
|
||||||
|
def test_stop_all_storage_nodes(self, bucket, simple_object_size, empty_map_stop_service_teardown):
|
||||||
|
"""
|
||||||
|
The test makes network map empty (stop storage service on all nodes
|
||||||
|
then use 'frostfs-adm morph delete-nodes' to delete nodes from map)
|
||||||
|
then start all services and checks that object can read through s3.
|
||||||
|
|
||||||
|
Steps:
|
||||||
|
1. Check that bucket is empty
|
||||||
|
2: PUT object into bucket
|
||||||
|
3: Check that object exists in bucket
|
||||||
|
4: Exclude all storage nodes from network map (stop storage service
|
||||||
|
and manual exclude from map)
|
||||||
|
5: Return all storage nodes to network map
|
||||||
|
6: Check that we can read object from #2
|
||||||
|
Args:
|
||||||
|
bucket: bucket which contains tested object
|
||||||
|
simple_object_size: size of object
|
||||||
anikeev-yadro marked this conversation as resolved
Outdated
abereziny
commented
Also need to ensure that nodes will be returned in case if test fails (fixture which will call this method after test end) Also need to ensure that nodes will be returned in case if test fails (fixture which will call this method after test end)
anikeev-yadro
commented
added tear down procedure to both tests added tear down procedure to both tests
|
|||||||
|
"""
|
||||||
|
file_path = generate_file(simple_object_size)
|
||||||
|
file_name = self.object_key_from_file_path(file_path)
|
||||||
|
bucket_objects = [file_name]
|
||||||
|
|
||||||
|
objects_list = s3_gate_object.list_objects_s3(self.s3_client, bucket)
|
||||||
|
assert not objects_list, f"Expected empty bucket, got {objects_list}"
|
||||||
|
|
||||||
|
with allure.step("Put object into bucket"):
|
||||||
|
s3_gate_object.put_object_s3(self.s3_client, bucket, file_path)
|
||||||
|
|
||||||
|
with allure.step("Check that object exists in bucket"):
|
||||||
|
check_objects_in_bucket(self.s3_client, bucket, bucket_objects)
|
||||||
|
|
||||||
|
with allure.step("Stop all storage nodes"):
|
||||||
|
for node in self.cluster.storage_nodes:
|
||||||
|
with allure.step(f"Stop storage service on node: {node}"):
|
||||||
|
node.stop_service()
|
||||||
|
stopped_nodes.append(node)
|
||||||
|
|
||||||
|
with allure.step(f"Remove all nodes from network map"):
|
||||||
|
remove_nodes_from_map_morph(shell=self.shell, cluster=self.cluster, remove_nodes=stopped_nodes)
|
||||||
|
|
||||||
|
with allure.step("Return all storage nodes to network map"):
|
||||||
|
self.return_nodes_after_stop_with_check_empty_map(stopped_nodes)
|
||||||
|
|
||||||
|
with allure.step("Check that object exists in bucket"):
|
||||||
|
check_objects_in_bucket(self.s3_client, bucket, bucket_objects)
|
||||||
|
|
||||||
|
@allure.step("Return all nodes to cluster with check empty map first")
|
||||||
|
def return_nodes_after_stop_with_check_empty_map(self, return_nodes = None) -> None:
|
||||||
|
first_node = True
|
||||||
|
for node in list(return_nodes):
|
||||||
|
with allure.step(f"Start node {node}"):
|
||||||
|
node.start_service()
|
||||||
|
with allure.step(f"Waiting status ready for node {node}"):
|
||||||
|
wait_for_node_to_be_ready(node)
|
||||||
|
|
||||||
|
with allure.step(f"We need to make sure that network map is empty"):
|
||||||
|
if first_node:
|
||||||
|
for check_node in list(return_nodes):
|
||||||
|
check_node_not_in_map(check_node, shell=self.shell, alive_node=node)
|
||||||
|
first_node = False
|
||||||
|
|
||||||
|
sleep(datetime_utils.parse_time(MORPH_BLOCK_TIME))
|
||||||
|
self.tick_epochs(1)
|
||||||
|
check_node_in_map(node, shell=self.shell, alive_node=node)
|
||||||
|
stopped_nodes.remove(node)
|
||||||
|
|
||||||
|
|
|
@ -32,6 +32,7 @@ from pytest_tests.helpers.node_management import (
|
||||||
node_shard_set_mode,
|
node_shard_set_mode,
|
||||||
storage_node_healthcheck,
|
storage_node_healthcheck,
|
||||||
storage_node_set_status,
|
storage_node_set_status,
|
||||||
|
wait_for_node_to_be_ready
|
||||||
)
|
)
|
||||||
from pytest_tests.helpers.storage_policy import get_nodes_with_object, get_simple_object_copies
|
from pytest_tests.helpers.storage_policy import get_nodes_with_object, get_simple_object_copies
|
||||||
from pytest_tests.helpers.utility import (
|
from pytest_tests.helpers.utility import (
|
||||||
|
@ -109,7 +110,7 @@ class TestNodeManagement(ClusterTestBase):
|
||||||
with allure.step(f"Start node {node}"):
|
with allure.step(f"Start node {node}"):
|
||||||
node.start_service()
|
node.start_service()
|
||||||
with allure.step(f"Waiting status ready for node {node}"):
|
with allure.step(f"Waiting status ready for node {node}"):
|
||||||
self.wait_for_node_to_be_ready(node)
|
wait_for_node_to_be_ready(node)
|
||||||
|
|
||||||
# We need to wait for node to establish notifications from morph-chain
|
# We need to wait for node to establish notifications from morph-chain
|
||||||
# Otherwise it will hang up when we will try to set status
|
# Otherwise it will hang up when we will try to set status
|
||||||
|
@ -451,21 +452,6 @@ class TestNodeManagement(ClusterTestBase):
|
||||||
f"Node {node} hasn't gone to the READY and ONLINE state after {timeout * attempts} second"
|
f"Node {node} hasn't gone to the READY and ONLINE state after {timeout * attempts} second"
|
||||||
)
|
)
|
||||||
|
|
||||||
@allure.step("Wait for node {node} is ready")
|
|
||||||
def wait_for_node_to_be_ready(self, node: StorageNode) -> None:
|
|
||||||
timeout, attempts = 30, 6
|
|
||||||
for _ in range(attempts):
|
|
||||||
try:
|
|
||||||
health_check = storage_node_healthcheck(node)
|
|
||||||
if health_check.health_status == "READY":
|
|
||||||
return
|
|
||||||
except Exception as err:
|
|
||||||
logger.warning(f"Node {node} is not ready:\n{err}")
|
|
||||||
sleep(timeout)
|
|
||||||
raise AssertionError(
|
|
||||||
f"Node {node} hasn't gone to the READY state after {timeout * attempts} seconds"
|
|
||||||
)
|
|
||||||
|
|
||||||
@allure.step("Wait for {expected_copies} object copies in the wallet")
|
@allure.step("Wait for {expected_copies} object copies in the wallet")
|
||||||
def wait_for_expected_object_copies(
|
def wait_for_expected_object_copies(
|
||||||
self, wallet: str, cid: str, oid: str, expected_copies: int = 2
|
self, wallet: str, cid: str, oid: str, expected_copies: int = 2
|
||||||
|
|
Loading…
Reference in a new issue
Looks like it should be moved to NodeBase class (every node have config)
deleted. It has already in NodeBase class.