forked from TrueCloudLab/frostfs-testcases
Add tests with empty map
Signed-off-by: anikeev-yadro <a.anikeev@yadro.com>
This commit is contained in:
parent
c3947b0716
commit
eb464f422c
4 changed files with 279 additions and 20 deletions
|
@ -1,4 +1,5 @@
|
||||||
import random
|
import random
|
||||||
|
import pathlib
|
||||||
import re
|
import re
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
@ -110,7 +111,14 @@ class InnerRingNode(NodeBase):
|
||||||
since frostfs network will still treat it as "node"
|
since frostfs network will still treat it as "node"
|
||||||
"""
|
"""
|
||||||
|
|
||||||
pass
|
def get_netmap_cleaner_threshold(self) -> str:
|
||||||
|
config_file = self.get_remote_config_path()
|
||||||
|
contents = self.host.get_shell().exec(f"cat {config_file}").stdout
|
||||||
|
|
||||||
|
config = yaml.safe_load(contents)
|
||||||
|
value = config["netmap_cleaner"]["threshold"]
|
||||||
|
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
class S3Gate(NodeBase):
|
class S3Gate(NodeBase):
|
||||||
|
|
|
@ -2,17 +2,24 @@ import logging
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
from time import sleep
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import allure
|
import allure
|
||||||
from frostfs_testlib.cli import FrostfsCli
|
from frostfs_testlib.cli import FrostfsAdm, FrostfsCli
|
||||||
from frostfs_testlib.shell import Shell
|
from frostfs_testlib.shell import Shell
|
||||||
from frostfs_testlib.utils import datetime_utils
|
from frostfs_testlib.utils import datetime_utils
|
||||||
|
|
||||||
from pytest_tests.helpers.cluster import Cluster, StorageNode
|
from pytest_tests.helpers.cluster import Cluster, StorageNode
|
||||||
from pytest_tests.helpers.epoch import tick_epoch
|
from pytest_tests.helpers.epoch import tick_epoch
|
||||||
from pytest_tests.resources.common import FROSTFS_CLI_EXEC, MORPH_BLOCK_TIME
|
from pytest_tests.resources.common import (
|
||||||
|
FROSTFS_CLI_EXEC,
|
||||||
|
FROSTFS_ADM_CONFIG_PATH,
|
||||||
|
FROSTFS_ADM_EXEC,
|
||||||
|
FROSTFS_CLI_EXEC,
|
||||||
|
MORPH_BLOCK_TIME,
|
||||||
|
)
|
||||||
|
|
||||||
logger = logging.getLogger("NeoLogger")
|
logger = logging.getLogger("NeoLogger")
|
||||||
|
|
||||||
|
@ -211,6 +218,62 @@ def check_node_in_map(
|
||||||
node_netmap_key in snapshot
|
node_netmap_key in snapshot
|
||||||
), f"Expected node with key {node_netmap_key} to be in network map"
|
), f"Expected node with key {node_netmap_key} to be in network map"
|
||||||
|
|
||||||
|
@allure.step("Check node {node} NOT in network map")
|
||||||
|
def check_node_not_in_map(
|
||||||
|
node: StorageNode, shell: Shell, alive_node: Optional[StorageNode] = None
|
||||||
|
) -> None:
|
||||||
|
alive_node = alive_node or node
|
||||||
|
|
||||||
|
node_netmap_key = node.get_wallet_public_key()
|
||||||
|
logger.info(f"Node ({node.label}) netmap key: {node_netmap_key}")
|
||||||
|
|
||||||
|
snapshot = get_netmap_snapshot(alive_node, shell)
|
||||||
|
assert (
|
||||||
|
node_netmap_key not in snapshot
|
||||||
|
), f"Expected node with key {node_netmap_key} to be NOT in network map"
|
||||||
|
|
||||||
|
@allure.step("Wait for node {node} is ready")
|
||||||
|
def wait_for_node_to_be_ready(node: StorageNode) -> None:
|
||||||
|
timeout, attempts = 30, 6
|
||||||
|
for _ in range(attempts):
|
||||||
|
try:
|
||||||
|
health_check = storage_node_healthcheck(node)
|
||||||
|
if health_check.health_status == "READY":
|
||||||
|
return
|
||||||
|
except Exception as err:
|
||||||
|
logger.warning(f"Node {node} is not ready:\n{err}")
|
||||||
|
sleep(timeout)
|
||||||
|
raise AssertionError(
|
||||||
|
f"Node {node} hasn't gone to the READY state after {timeout * attempts} seconds"
|
||||||
|
)
|
||||||
|
|
||||||
|
@allure.step("Remove nodes from network map trough cli-adm morph command")
|
||||||
|
def remove_nodes_from_map_morph(shell: Shell, cluster: Cluster, remove_nodes: list[StorageNode], alive_node: Optional[StorageNode] = None):
|
||||||
|
"""
|
||||||
|
Move node to the Offline state in the candidates list and tick an epoch to update the netmap
|
||||||
|
using frostfs-adm
|
||||||
|
Args:
|
||||||
|
shell: local shell to make queries about current epoch. Remote shell will be used to tick new one
|
||||||
|
cluster: cluster instance under test
|
||||||
|
alive_node: node to send requests to (first node in cluster by default)
|
||||||
|
remove_nodes: list of nodes which would be removed from map
|
||||||
|
"""
|
||||||
|
|
||||||
|
alive_node = alive_node if alive_node else remove_nodes[0]
|
||||||
|
remote_shell = alive_node.host.get_shell()
|
||||||
|
|
||||||
|
node_netmap_keys = list(map(StorageNode.get_wallet_public_key, remove_nodes))
|
||||||
|
logger.info(f"Nodes netmap keys are: {' '.join(node_netmap_keys)}")
|
||||||
|
|
||||||
|
if FROSTFS_ADM_EXEC and FROSTFS_ADM_CONFIG_PATH:
|
||||||
|
# If frostfs-adm is available, then we tick epoch with it (to be consistent with UAT tests)
|
||||||
|
frostfsadm = FrostfsAdm(
|
||||||
|
shell=remote_shell,
|
||||||
|
frostfs_adm_exec_path=FROSTFS_ADM_EXEC,
|
||||||
|
config_file=FROSTFS_ADM_CONFIG_PATH,
|
||||||
|
)
|
||||||
|
frostfsadm.morph.remove_nodes(node_netmap_keys)
|
||||||
|
|
||||||
|
|
||||||
def _run_control_command_with_retries(node: StorageNode, command: str, retries: int = 0) -> str:
|
def _run_control_command_with_retries(node: StorageNode, command: str, retries: int = 0) -> str:
|
||||||
for attempt in range(1 + retries): # original attempt + specified retries
|
for attempt in range(1 + retries): # original attempt + specified retries
|
||||||
|
|
|
@ -1,11 +1,15 @@
|
||||||
|
import os
|
||||||
import logging
|
import logging
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
import allure
|
import allure
|
||||||
import pytest
|
import pytest
|
||||||
|
from frostfs_testlib.analytics import test_case
|
||||||
from frostfs_testlib.hosting import Host
|
from frostfs_testlib.hosting import Host
|
||||||
from frostfs_testlib.resources.common import PUBLIC_ACL
|
from frostfs_testlib.resources.common import PUBLIC_ACL
|
||||||
from frostfs_testlib.shell import CommandOptions
|
from frostfs_testlib.shell import CommandOptions
|
||||||
|
from frostfs_testlib.utils import datetime_utils
|
||||||
|
from pytest_tests.resources.common import FROSTFS_CONTRACT_CACHE_TIMEOUT, MORPH_BLOCK_TIME
|
||||||
from pytest_tests.helpers.cluster import Cluster, StorageNode
|
from pytest_tests.helpers.cluster import Cluster, StorageNode
|
||||||
from pytest_tests.helpers.container import create_container
|
from pytest_tests.helpers.container import create_container
|
||||||
from pytest_tests.helpers.failover_utils import (
|
from pytest_tests.helpers.failover_utils import (
|
||||||
|
@ -16,6 +20,34 @@ from pytest_tests.helpers.file_helper import generate_file, get_file_hash
|
||||||
from pytest_tests.helpers.frostfs_verbs import get_object, put_object_to_random_node
|
from pytest_tests.helpers.frostfs_verbs import get_object, put_object_to_random_node
|
||||||
from pytest_tests.steps.cluster_test_base import ClusterTestBase
|
from pytest_tests.steps.cluster_test_base import ClusterTestBase
|
||||||
|
|
||||||
|
from pytest_tests.helpers.node_management import (
|
||||||
|
check_node_in_map,
|
||||||
|
check_node_not_in_map,
|
||||||
|
exclude_node_from_network_map,
|
||||||
|
include_node_to_network_map,
|
||||||
|
stop_random_storage_nodes,
|
||||||
|
wait_for_node_to_be_ready,
|
||||||
|
remove_nodes_from_map_morph
|
||||||
|
)
|
||||||
|
|
||||||
|
from pytest_tests.helpers.s3_helper import (
|
||||||
|
check_objects_in_bucket
|
||||||
|
)
|
||||||
|
from pytest_tests.steps import s3_gate_object
|
||||||
|
from pytest_tests.steps.s3_gate_base import TestS3GateBase
|
||||||
|
|
||||||
|
from pytest_tests.helpers.aws_cli_client import AwsCliClient
|
||||||
|
from pytest_tests.helpers.file_helper import (
|
||||||
|
generate_file,
|
||||||
|
get_file_hash,
|
||||||
|
)
|
||||||
|
|
||||||
|
from pytest_tests.helpers.node_management import (
|
||||||
|
check_node_in_map,
|
||||||
|
exclude_node_from_network_map,
|
||||||
|
include_node_to_network_map,
|
||||||
|
)
|
||||||
|
|
||||||
logger = logging.getLogger("NeoLogger")
|
logger = logging.getLogger("NeoLogger")
|
||||||
stopped_nodes: list[StorageNode] = []
|
stopped_nodes: list[StorageNode] = []
|
||||||
|
|
||||||
|
@ -173,3 +205,173 @@ class TestFailoverStorage(ClusterTestBase):
|
||||||
wallet, cid, oid, shell=self.shell, endpoint=new_nodes[0].get_rpc_endpoint()
|
wallet, cid, oid, shell=self.shell, endpoint=new_nodes[0].get_rpc_endpoint()
|
||||||
)
|
)
|
||||||
assert get_file_hash(source_file_path) == get_file_hash(got_file_path)
|
assert get_file_hash(source_file_path) == get_file_hash(got_file_path)
|
||||||
|
|
||||||
|
|
||||||
|
def pytest_generate_tests(metafunc):
|
||||||
|
if "s3_client" in metafunc.fixturenames:
|
||||||
|
metafunc.parametrize("s3_client", ["aws cli", "boto3"], indirect=True)
|
||||||
|
|
||||||
|
@pytest.mark.failover
|
||||||
|
@pytest.mark.failover_empty_map
|
||||||
|
class TestEmptyMap(TestS3GateBase):
|
||||||
|
"""
|
||||||
|
A set of tests for makes map empty and verify that we can read objects after that
|
||||||
|
"""
|
||||||
|
@allure.step("Teardown after EmptyMap offline test")
|
||||||
|
@pytest.fixture()
|
||||||
|
def empty_map_offline_teardown(self):
|
||||||
|
yield
|
||||||
|
with allure.step("Return all storage nodes to network map"):
|
||||||
|
for node in list(stopped_nodes):
|
||||||
|
include_node_to_network_map(
|
||||||
|
node, node, shell=self.shell, cluster=self.cluster
|
||||||
|
)
|
||||||
|
stopped_nodes.remove(node)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def object_key_from_file_path(full_path: str) -> str:
|
||||||
|
return os.path.basename(full_path)
|
||||||
|
|
||||||
|
@test_case.title("Test makes network map empty (offline all storage nodes)")
|
||||||
|
@test_case.priority(test_case.TestCasePriority.HIGH)
|
||||||
|
@test_case.suite_name("failovers")
|
||||||
|
@test_case.suite_section("test_failover_storage")
|
||||||
|
@pytest.mark.failover_empty_map_offlne
|
||||||
|
@allure.title("Test makes network map empty (offline all storage nodes)")
|
||||||
|
def test_offline_all_storage_nodes(self, bucket, simple_object_size, empty_map_offline_teardown):
|
||||||
|
"""
|
||||||
|
The test makes network map empty (set offline status on all storage nodes) then returns all nodes to map and checks that object can read through s3.
|
||||||
|
|
||||||
|
Steps:
|
||||||
|
1. Check that bucket is empty
|
||||||
|
2: PUT object into bucket
|
||||||
|
3: Check that object exists in bucket
|
||||||
|
4: Exclude all storage nodes from network map (set status OFFLINE)
|
||||||
|
5: Return all storage nodes to network map
|
||||||
|
6: Check that we can read object from #2
|
||||||
|
Args:
|
||||||
|
bucket: bucket which contains tested object
|
||||||
|
simple_object_size: size of object
|
||||||
|
"""
|
||||||
|
file_path = generate_file(simple_object_size)
|
||||||
|
file_name = self.object_key_from_file_path(file_path)
|
||||||
|
bucket_objects = [file_name]
|
||||||
|
|
||||||
|
objects_list = s3_gate_object.list_objects_s3(self.s3_client, bucket)
|
||||||
|
assert not objects_list, f"Expected empty bucket, got {objects_list}"
|
||||||
|
|
||||||
|
with allure.step("Put object into bucket"):
|
||||||
|
s3_gate_object.put_object_s3(self.s3_client, bucket, file_path)
|
||||||
|
|
||||||
|
with allure.step("Check that object exists in bucket"):
|
||||||
|
check_objects_in_bucket(self.s3_client, bucket, bucket_objects)
|
||||||
|
|
||||||
|
storage_nodes = self.cluster.storage_nodes
|
||||||
|
with allure.step("Exclude all storage nodes from network map"):
|
||||||
|
for node in storage_nodes:
|
||||||
|
exclude_node_from_network_map(
|
||||||
|
node, node, shell=self.shell, cluster=self.cluster
|
||||||
|
)
|
||||||
|
stopped_nodes.append(node)
|
||||||
|
|
||||||
|
with allure.step("Return all storage nodes to network map"):
|
||||||
|
for node in storage_nodes:
|
||||||
|
include_node_to_network_map(
|
||||||
|
node, node, shell=self.shell, cluster=self.cluster
|
||||||
|
)
|
||||||
|
stopped_nodes.remove(node)
|
||||||
|
|
||||||
|
with allure.step("Check that we can read object"):
|
||||||
|
check_objects_in_bucket(self.s3_client, bucket, bucket_objects)
|
||||||
|
|
||||||
|
objects_list = s3_gate_object.list_objects_s3(self.s3_client, bucket)
|
||||||
|
assert not objects_list, f"Expected empty bucket, got {objects_list}"
|
||||||
|
|
||||||
|
@allure.step("Teardown after EmptyMap stop service test")
|
||||||
|
@pytest.fixture()
|
||||||
|
def empty_map_stop_service_teardown(self):
|
||||||
|
yield
|
||||||
|
with allure.step("Return all storage nodes to network map"):
|
||||||
|
for node in list(list(stopped_nodes)):
|
||||||
|
with allure.step(f"Start node {node}"):
|
||||||
|
node.start_service()
|
||||||
|
with allure.step(f"Waiting status ready for node {node}"):
|
||||||
|
wait_for_node_to_be_ready(node)
|
||||||
|
|
||||||
|
sleep(datetime_utils.parse_time(MORPH_BLOCK_TIME))
|
||||||
|
self.tick_epochs(1)
|
||||||
|
check_node_in_map(node, shell=self.shell, alive_node=node)
|
||||||
|
stopped_nodes.remove(node)
|
||||||
|
|
||||||
|
@test_case.title("Test makes network map empty (stop storage service on all nodes)")
|
||||||
|
@test_case.priority(test_case.TestCasePriority.HIGH)
|
||||||
|
@test_case.suite_name("failovers")
|
||||||
|
@test_case.suite_section("test_failover_storage")
|
||||||
|
@pytest.mark.failover_empty_map_stop_service
|
||||||
|
@allure.title("Test makes network map empty (stop storage service on all nodes)")
|
||||||
|
def test_stop_all_storage_nodes(self, bucket, simple_object_size, empty_map_stop_service_teardown):
|
||||||
|
"""
|
||||||
|
The test makes network map empty (stop storage service on all nodes
|
||||||
|
then use 'frostfs-adm morph delete-nodes' to delete nodes from map)
|
||||||
|
then start all services and checks that object can read through s3.
|
||||||
|
|
||||||
|
Steps:
|
||||||
|
1. Check that bucket is empty
|
||||||
|
2: PUT object into bucket
|
||||||
|
3: Check that object exists in bucket
|
||||||
|
4: Exclude all storage nodes from network map (stop storage service
|
||||||
|
and manual exclude from map)
|
||||||
|
5: Return all storage nodes to network map
|
||||||
|
6: Check that we can read object from #2
|
||||||
|
Args:
|
||||||
|
bucket: bucket which contains tested object
|
||||||
|
simple_object_size: size of object
|
||||||
|
"""
|
||||||
|
file_path = generate_file(simple_object_size)
|
||||||
|
file_name = self.object_key_from_file_path(file_path)
|
||||||
|
bucket_objects = [file_name]
|
||||||
|
|
||||||
|
objects_list = s3_gate_object.list_objects_s3(self.s3_client, bucket)
|
||||||
|
assert not objects_list, f"Expected empty bucket, got {objects_list}"
|
||||||
|
|
||||||
|
with allure.step("Put object into bucket"):
|
||||||
|
s3_gate_object.put_object_s3(self.s3_client, bucket, file_path)
|
||||||
|
|
||||||
|
with allure.step("Check that object exists in bucket"):
|
||||||
|
check_objects_in_bucket(self.s3_client, bucket, bucket_objects)
|
||||||
|
|
||||||
|
with allure.step("Stop all storage nodes"):
|
||||||
|
for node in self.cluster.storage_nodes:
|
||||||
|
with allure.step(f"Stop storage service on node: {node}"):
|
||||||
|
node.stop_service()
|
||||||
|
stopped_nodes.append(node)
|
||||||
|
|
||||||
|
with allure.step(f"Remove all nodes from network map"):
|
||||||
|
remove_nodes_from_map_morph(shell=self.shell, cluster=self.cluster, remove_nodes=stopped_nodes)
|
||||||
|
|
||||||
|
with allure.step("Return all storage nodes to network map"):
|
||||||
|
self.return_nodes_after_stop_with_check_empty_map(stopped_nodes)
|
||||||
|
|
||||||
|
with allure.step("Check that object exists in bucket"):
|
||||||
|
check_objects_in_bucket(self.s3_client, bucket, bucket_objects)
|
||||||
|
|
||||||
|
@allure.step("Return all nodes to cluster with check empty map first")
|
||||||
|
def return_nodes_after_stop_with_check_empty_map(self, return_nodes = None) -> None:
|
||||||
|
first_node = True
|
||||||
|
for node in list(return_nodes):
|
||||||
|
with allure.step(f"Start node {node}"):
|
||||||
|
node.start_service()
|
||||||
|
with allure.step(f"Waiting status ready for node {node}"):
|
||||||
|
wait_for_node_to_be_ready(node)
|
||||||
|
|
||||||
|
with allure.step(f"We need to make sure that network map is empty"):
|
||||||
|
if first_node:
|
||||||
|
for check_node in list(return_nodes):
|
||||||
|
check_node_not_in_map(check_node, shell=self.shell, alive_node=node)
|
||||||
|
first_node = False
|
||||||
|
|
||||||
|
sleep(datetime_utils.parse_time(MORPH_BLOCK_TIME))
|
||||||
|
self.tick_epochs(1)
|
||||||
|
check_node_in_map(node, shell=self.shell, alive_node=node)
|
||||||
|
stopped_nodes.remove(node)
|
||||||
|
|
||||||
|
|
|
@ -32,6 +32,7 @@ from pytest_tests.helpers.node_management import (
|
||||||
node_shard_set_mode,
|
node_shard_set_mode,
|
||||||
storage_node_healthcheck,
|
storage_node_healthcheck,
|
||||||
storage_node_set_status,
|
storage_node_set_status,
|
||||||
|
wait_for_node_to_be_ready
|
||||||
)
|
)
|
||||||
from pytest_tests.helpers.storage_policy import get_nodes_with_object, get_simple_object_copies
|
from pytest_tests.helpers.storage_policy import get_nodes_with_object, get_simple_object_copies
|
||||||
from pytest_tests.helpers.utility import (
|
from pytest_tests.helpers.utility import (
|
||||||
|
@ -109,7 +110,7 @@ class TestNodeManagement(ClusterTestBase):
|
||||||
with allure.step(f"Start node {node}"):
|
with allure.step(f"Start node {node}"):
|
||||||
node.start_service()
|
node.start_service()
|
||||||
with allure.step(f"Waiting status ready for node {node}"):
|
with allure.step(f"Waiting status ready for node {node}"):
|
||||||
self.wait_for_node_to_be_ready(node)
|
wait_for_node_to_be_ready(node)
|
||||||
|
|
||||||
# We need to wait for node to establish notifications from morph-chain
|
# We need to wait for node to establish notifications from morph-chain
|
||||||
# Otherwise it will hang up when we will try to set status
|
# Otherwise it will hang up when we will try to set status
|
||||||
|
@ -451,21 +452,6 @@ class TestNodeManagement(ClusterTestBase):
|
||||||
f"Node {node} hasn't gone to the READY and ONLINE state after {timeout * attempts} second"
|
f"Node {node} hasn't gone to the READY and ONLINE state after {timeout * attempts} second"
|
||||||
)
|
)
|
||||||
|
|
||||||
@allure.step("Wait for node {node} is ready")
|
|
||||||
def wait_for_node_to_be_ready(self, node: StorageNode) -> None:
|
|
||||||
timeout, attempts = 30, 6
|
|
||||||
for _ in range(attempts):
|
|
||||||
try:
|
|
||||||
health_check = storage_node_healthcheck(node)
|
|
||||||
if health_check.health_status == "READY":
|
|
||||||
return
|
|
||||||
except Exception as err:
|
|
||||||
logger.warning(f"Node {node} is not ready:\n{err}")
|
|
||||||
sleep(timeout)
|
|
||||||
raise AssertionError(
|
|
||||||
f"Node {node} hasn't gone to the READY state after {timeout * attempts} seconds"
|
|
||||||
)
|
|
||||||
|
|
||||||
@allure.step("Wait for {expected_copies} object copies in the wallet")
|
@allure.step("Wait for {expected_copies} object copies in the wallet")
|
||||||
def wait_for_expected_object_copies(
|
def wait_for_expected_object_copies(
|
||||||
self, wallet: str, cid: str, oid: str, expected_copies: int = 2
|
self, wallet: str, cid: str, oid: str, expected_copies: int = 2
|
||||||
|
|
Loading…
Reference in a new issue