2022-07-11 14:11:26 +00:00
import logging
2023-11-20 10:49:54 +00:00
import os
2023-10-25 12:47:29 +00:00
import random
2023-05-31 15:31:35 +00:00
from datetime import datetime
2023-03-09 10:19:41 +00:00
from time import sleep
2022-07-11 14:11:26 +00:00
import allure
import pytest
2023-11-20 10:49:54 +00:00
from frostfs_testlib . cli import FrostfsCli
from frostfs_testlib . cli . netmap_parser import NetmapParser
from frostfs_testlib . resources . cli import FROSTFS_CLI_EXEC
from frostfs_testlib . resources . common import DEFAULT_WALLET_CONFIG , MORPH_BLOCK_TIME
2023-05-15 09:59:33 +00:00
from frostfs_testlib . resources . wellknown_acl import PUBLIC_ACL
2023-10-31 14:51:09 +00:00
from frostfs_testlib . s3 import S3ClientWrapper , VersioningStatus
2023-11-20 10:49:54 +00:00
from frostfs_testlib . steps . cli . container import (
StorageContainer ,
StorageContainerInfo ,
create_container ,
search_nodes_with_container ,
)
from frostfs_testlib . steps . cli . object import (
delete_object ,
get_object ,
put_object ,
put_object_to_random_node ,
search_object ,
)
2023-05-15 09:59:33 +00:00
from frostfs_testlib . steps . node_management import (
2023-03-09 10:19:41 +00:00
check_node_in_map ,
check_node_not_in_map ,
exclude_node_from_network_map ,
include_node_to_network_map ,
2023-05-15 09:59:33 +00:00
remove_nodes_from_map_morph ,
2023-03-09 10:19:41 +00:00
wait_for_node_to_be_ready ,
)
2023-05-15 09:59:33 +00:00
from frostfs_testlib . steps . s3 import s3_helper
2023-10-31 11:28:44 +00:00
from frostfs_testlib . storage . cluster import Cluster , ClusterNode , S3Gate , StorageNode
2023-05-31 15:31:35 +00:00
from frostfs_testlib . storage . controllers import ClusterStateController , ShardsWatcher
2023-08-02 11:54:03 +00:00
from frostfs_testlib . storage . dataclasses . object_size import ObjectSize
2023-11-20 10:49:54 +00:00
from frostfs_testlib . storage . dataclasses . storage_object_info import ModeNode , StorageObjectInfo
2023-05-31 15:31:35 +00:00
from frostfs_testlib . storage . dataclasses . wallet import WalletInfo
2023-05-15 09:59:33 +00:00
from frostfs_testlib . testing . cluster_test_base import ClusterTestBase
2023-05-30 13:42:27 +00:00
from frostfs_testlib . testing . test_control import expect_not_raises
2023-05-15 09:59:33 +00:00
from frostfs_testlib . utils import datetime_utils
2023-10-25 12:47:29 +00:00
from frostfs_testlib . utils . failover_utils import wait_object_replication
2023-05-30 13:42:27 +00:00
from frostfs_testlib . utils . file_keeper import FileKeeper
2023-05-15 09:59:33 +00:00
from frostfs_testlib . utils . file_utils import generate_file , get_file_hash
2023-03-09 10:19:41 +00:00
2022-09-28 12:07:16 +00:00
logger = logging . getLogger ( " NeoLogger " )
2022-12-05 22:31:45 +00:00
stopped_nodes : list [ StorageNode ] = [ ]
2022-07-11 14:11:26 +00:00
2023-05-30 13:42:27 +00:00
@pytest.fixture ( scope = " function " )
@allure.title ( " Provide File Keeper " )
def file_keeper ( ) :
keeper = FileKeeper ( )
yield keeper
keeper . restore_files ( )
2022-10-13 16:13:45 +00:00
@allure.step ( " Return all stopped hosts " )
2023-03-14 09:21:40 +00:00
@pytest.fixture ( scope = " function " , autouse = True )
2023-10-25 12:47:29 +00:00
def after_run_return_all_stopped_hosts ( cluster_state_controller : ClusterStateController ) - > str :
yield
cluster_state_controller . start_stopped_hosts ( )
2022-07-11 14:11:26 +00:00
2023-10-31 11:28:44 +00:00
@allure.step ( " Return all stopped services after test " )
2023-05-30 13:42:27 +00:00
@pytest.fixture ( scope = " function " )
def after_run_return_all_stopped_services ( cluster_state_controller : ClusterStateController ) :
yield
2023-10-31 11:28:44 +00:00
cluster_state_controller . start_all_stopped_services ( )
2022-07-11 14:11:26 +00:00
2022-07-14 06:21:20 +00:00
@pytest.mark.failover
2023-10-25 12:47:29 +00:00
@pytest.mark.failover_storage
2022-12-05 22:31:45 +00:00
class TestFailoverStorage ( ClusterTestBase ) :
2023-09-08 10:35:34 +00:00
@allure.title ( " Shutdown and start node (stop_mode= {stop_mode} ) " )
2023-08-02 11:54:03 +00:00
@pytest.mark.parametrize ( " stop_mode " , [ " hard " , " soft " ] )
2022-12-05 22:31:45 +00:00
@pytest.mark.failover_reboot
def test_lose_storage_node_host (
2023-08-02 11:54:03 +00:00
self ,
default_wallet ,
stop_mode : str ,
require_multiple_hosts ,
simple_object_size : ObjectSize ,
2023-10-25 12:47:29 +00:00
cluster : Cluster ,
cluster_state_controller : ClusterStateController ,
2022-12-05 22:31:45 +00:00
) :
wallet = default_wallet
placement_rule = " REP 2 IN X CBF 2 SELECT 2 FROM * AS X "
2023-08-02 11:54:03 +00:00
source_file_path = generate_file ( simple_object_size . value )
2023-10-25 12:47:29 +00:00
stopped_hosts_nodes = [ ]
2022-12-05 22:31:45 +00:00
2023-10-25 12:47:29 +00:00
with allure . step ( f " Create container and put object " ) :
cid = create_container (
wallet ,
2022-12-05 22:31:45 +00:00
shell = self . shell ,
2023-10-25 12:47:29 +00:00
endpoint = self . cluster . default_rpc_endpoint ,
rule = placement_rule ,
basic_acl = PUBLIC_ACL ,
2022-12-05 22:31:45 +00:00
)
2023-10-25 12:47:29 +00:00
oid = put_object_to_random_node ( wallet , source_file_path , cid , shell = self . shell , cluster = self . cluster )
with allure . step ( f " Wait for replication and get nodes with object " ) :
nodes_with_object = wait_object_replication ( cid , oid , 2 , shell = self . shell , nodes = self . cluster . storage_nodes )
with allure . step ( f " Stop 2 nodes with object and wait replication one by one " ) :
for storage_node in random . sample ( nodes_with_object , 2 ) :
stopped_hosts_nodes . append ( storage_node )
cluster_node = cluster . node ( storage_node )
cluster_state_controller . stop_node_host ( cluster_node , stop_mode )
replicated_nodes = wait_object_replication (
cid ,
oid ,
2 ,
shell = self . shell ,
nodes = list ( set ( self . cluster . storage_nodes ) - { * stopped_hosts_nodes } ) ,
)
2022-12-05 22:31:45 +00:00
with allure . step ( " Check object data is not corrupted " ) :
2023-10-25 12:47:29 +00:00
got_file_path = get_object (
wallet , cid , oid , endpoint = replicated_nodes [ 0 ] . get_rpc_endpoint ( ) , shell = self . shell
)
2022-12-05 22:31:45 +00:00
assert get_file_hash ( source_file_path ) == get_file_hash ( got_file_path )
2022-12-16 08:30:47 +00:00
with allure . step ( " Return all hosts " ) :
2023-10-25 12:47:29 +00:00
cluster_state_controller . start_stopped_hosts ( )
2022-12-05 22:31:45 +00:00
with allure . step ( " Check object data is not corrupted " ) :
2023-10-25 12:47:29 +00:00
replicated_nodes = wait_object_replication ( cid , oid , 2 , shell = self . shell , nodes = self . cluster . storage_nodes )
got_file_path = get_object (
wallet , cid , oid , shell = self . shell , endpoint = replicated_nodes [ 0 ] . get_rpc_endpoint ( )
)
2022-12-05 22:31:45 +00:00
assert get_file_hash ( source_file_path ) == get_file_hash ( got_file_path )
2023-09-08 10:35:34 +00:00
@allure.title ( " Do not ignore unhealthy tree endpoints (s3_client= {s3_client} ) " )
2023-06-13 09:05:35 +00:00
def test_unhealthy_tree (
self ,
s3_client : S3ClientWrapper ,
2023-08-02 11:54:03 +00:00
simple_object_size : ObjectSize ,
2023-06-13 09:05:35 +00:00
cluster_state_controller : ClusterStateController ,
after_run_return_all_stopped_services ,
) :
default_node = self . cluster . cluster_nodes [ 0 ]
with allure . step ( " Turn S3 GW off on default node " ) :
2023-10-31 11:28:44 +00:00
cluster_state_controller . stop_service_of_type ( default_node , S3Gate )
2023-06-13 09:05:35 +00:00
with allure . step ( " Turn off storage on default node " ) :
2023-10-31 11:28:44 +00:00
cluster_state_controller . stop_service_of_type ( default_node , StorageNode )
2023-06-13 09:05:35 +00:00
with allure . step ( " Turn on S3 GW on default node " ) :
2023-10-31 11:28:44 +00:00
cluster_state_controller . start_service_of_type ( default_node , S3Gate )
2023-06-13 09:05:35 +00:00
with allure . step ( " Turn on storage on default node " ) :
2023-10-31 11:28:44 +00:00
cluster_state_controller . start_service_of_type ( default_node , StorageNode )
2023-06-13 09:05:35 +00:00
with allure . step ( " Create bucket with REP 1 SELECT 1 policy " ) :
bucket = s3_client . create_bucket (
location_constraint = " load-1-1 " ,
)
2023-08-02 11:54:03 +00:00
file_path = generate_file ( simple_object_size . value )
2023-06-13 09:05:35 +00:00
file_name = s3_helper . object_key_from_file_path ( file_path )
with allure . step ( " Put object into bucket " ) :
put_object = s3_client . put_object ( bucket , file_path )
s3_helper . check_objects_in_bucket ( s3_client , bucket , expected_objects = [ file_name ] )
with allure . step ( " Turn off all storage nodes except default " ) :
for node in self . cluster . cluster_nodes [ 1 : ] :
with allure . step ( f " Stop storage service on node: { node } " ) :
2023-10-31 11:28:44 +00:00
cluster_state_controller . stop_service_of_type ( node , StorageNode )
2023-06-13 09:05:35 +00:00
with allure . step ( " Check that object is available " ) :
s3_helper . check_objects_in_bucket ( s3_client , bucket , expected_objects = [ file_name ] )
2023-10-25 12:47:29 +00:00
with allure . step ( " Start storage nodes " ) :
2023-10-31 11:28:44 +00:00
cluster_state_controller . start_all_stopped_services ( )
2023-10-25 12:47:29 +00:00
2023-03-09 10:19:41 +00:00
@pytest.mark.failover
@pytest.mark.failover_empty_map
2023-05-15 09:59:33 +00:00
class TestEmptyMap ( ClusterTestBase ) :
2023-03-09 10:19:41 +00:00
"""
A set of tests for makes map empty and verify that we can read objects after that
"""
2023-05-15 09:59:33 +00:00
2023-03-09 10:19:41 +00:00
@allure.step ( " Teardown after EmptyMap offline test " )
@pytest.fixture ( )
def empty_map_offline_teardown ( self ) :
yield
with allure . step ( " Return all storage nodes to network map " ) :
2023-10-25 12:47:29 +00:00
for node in stopped_nodes :
2023-05-15 09:59:33 +00:00
include_node_to_network_map ( node , node , shell = self . shell , cluster = self . cluster )
2023-03-09 10:19:41 +00:00
stopped_nodes . remove ( node )
@pytest.mark.failover_empty_map_offlne
2023-09-08 10:35:34 +00:00
@allure.title ( " Empty network map via offline all storage nodes (s3_client= {s3_client} ) " )
2023-05-15 09:59:33 +00:00
def test_offline_all_storage_nodes (
self ,
s3_client : S3ClientWrapper ,
bucket : str ,
2023-08-02 11:54:03 +00:00
simple_object_size : ObjectSize ,
2023-05-15 09:59:33 +00:00
empty_map_offline_teardown ,
) :
2023-03-09 10:19:41 +00:00
"""
The test makes network map empty ( set offline status on all storage nodes ) then returns all nodes to map and checks that object can read through s3 .
2023-05-15 09:59:33 +00:00
2023-03-09 10:19:41 +00:00
Steps :
1. Check that bucket is empty
2 : PUT object into bucket
3 : Check that object exists in bucket
4 : Exclude all storage nodes from network map ( set status OFFLINE )
5 : Return all storage nodes to network map
6 : Check that we can read object from #2
Args :
bucket : bucket which contains tested object
simple_object_size : size of object
"""
2023-08-02 11:54:03 +00:00
file_path = generate_file ( simple_object_size . value )
2023-05-15 09:59:33 +00:00
file_name = s3_helper . object_key_from_file_path ( file_path )
2023-03-09 10:19:41 +00:00
bucket_objects = [ file_name ]
2023-05-15 09:59:33 +00:00
objects_list = s3_client . list_objects ( bucket )
2023-03-09 10:19:41 +00:00
assert not objects_list , f " Expected empty bucket, got { objects_list } "
with allure . step ( " Put object into bucket " ) :
2023-05-15 09:59:33 +00:00
s3_client . put_object ( bucket , file_path )
2023-03-09 10:19:41 +00:00
with allure . step ( " Check that object exists in bucket " ) :
2023-05-15 09:59:33 +00:00
s3_helper . check_objects_in_bucket ( s3_client , bucket , bucket_objects )
2023-03-09 10:19:41 +00:00
storage_nodes = self . cluster . storage_nodes
with allure . step ( " Exclude all storage nodes from network map " ) :
for node in storage_nodes :
stopped_nodes . append ( node )
2023-09-27 15:28:56 +00:00
exclude_node_from_network_map ( node , node , shell = self . shell , cluster = self . cluster )
2023-03-09 10:19:41 +00:00
with allure . step ( " Return all storage nodes to network map " ) :
for node in storage_nodes :
2023-05-15 09:59:33 +00:00
include_node_to_network_map ( node , node , shell = self . shell , cluster = self . cluster )
2023-03-09 10:19:41 +00:00
stopped_nodes . remove ( node )
with allure . step ( " Check that we can read object " ) :
2023-05-15 09:59:33 +00:00
s3_helper . check_objects_in_bucket ( s3_client , bucket , bucket_objects )
2023-03-09 10:19:41 +00:00
@allure.step ( " Teardown after EmptyMap stop service test " )
@pytest.fixture ( )
2023-10-25 12:47:29 +00:00
def empty_map_stop_service_teardown ( self , cluster_state_controller : ClusterStateController ) :
2023-03-09 10:19:41 +00:00
yield
with allure . step ( " Return all storage nodes to network map " ) :
2023-10-31 11:28:44 +00:00
cluster_state_controller . start_all_stopped_services ( )
2023-10-25 12:47:29 +00:00
for node in stopped_nodes :
2023-03-09 10:19:41 +00:00
check_node_in_map ( node , shell = self . shell , alive_node = node )
@pytest.mark.failover_empty_map_stop_service
2023-09-08 10:35:34 +00:00
@allure.title ( " Empty network map via stop all storage services (s3_client= {s3_client} ) " )
2023-05-15 09:59:33 +00:00
def test_stop_all_storage_nodes (
self ,
s3_client : S3ClientWrapper ,
bucket : str ,
2023-08-02 11:54:03 +00:00
simple_object_size : ObjectSize ,
2023-05-15 09:59:33 +00:00
empty_map_stop_service_teardown ,
2023-10-25 12:47:29 +00:00
cluster_state_controller : ClusterStateController ,
2023-05-15 09:59:33 +00:00
) :
2023-03-09 10:19:41 +00:00
"""
2023-05-15 09:59:33 +00:00
The test makes network map empty ( stop storage service on all nodes
then use ' frostfs-adm morph delete-nodes ' to delete nodes from map )
2023-03-09 10:19:41 +00:00
then start all services and checks that object can read through s3 .
Steps :
1. Check that bucket is empty
2 : PUT object into bucket
3 : Check that object exists in bucket
2023-05-15 09:59:33 +00:00
4 : Exclude all storage nodes from network map ( stop storage service
2023-03-09 10:19:41 +00:00
and manual exclude from map )
5 : Return all storage nodes to network map
6 : Check that we can read object from #2
Args :
bucket : bucket which contains tested object
simple_object_size : size of object
"""
2023-08-02 11:54:03 +00:00
file_path = generate_file ( simple_object_size . value )
2023-05-15 09:59:33 +00:00
file_name = s3_helper . object_key_from_file_path ( file_path )
2023-03-09 10:19:41 +00:00
bucket_objects = [ file_name ]
2023-05-15 09:59:33 +00:00
objects_list = s3_client . list_objects ( bucket )
2023-03-09 10:19:41 +00:00
assert not objects_list , f " Expected empty bucket, got { objects_list } "
with allure . step ( " Put object into bucket " ) :
2023-05-15 09:59:33 +00:00
s3_client . put_object ( bucket , file_path )
2023-03-09 10:19:41 +00:00
with allure . step ( " Check that object exists in bucket " ) :
2023-05-15 09:59:33 +00:00
s3_helper . check_objects_in_bucket ( s3_client , bucket , bucket_objects )
2023-03-09 10:19:41 +00:00
with allure . step ( " Stop all storage nodes " ) :
2023-10-31 11:28:44 +00:00
cluster_state_controller . stop_services_of_type ( StorageNode )
2023-03-09 10:19:41 +00:00
2023-05-15 09:59:33 +00:00
with allure . step ( " Remove all nodes from network map " ) :
2023-10-25 12:47:29 +00:00
remove_nodes_from_map_morph (
shell = self . shell , cluster = self . cluster , remove_nodes = self . cluster . services ( StorageNode )
)
2023-03-09 10:19:41 +00:00
with allure . step ( " Return all storage nodes to network map " ) :
2023-10-25 12:47:29 +00:00
self . return_nodes_after_stop_with_check_empty_map ( cluster_state_controller )
2023-03-09 10:19:41 +00:00
with allure . step ( " Check that object exists in bucket " ) :
2023-05-15 09:59:33 +00:00
s3_helper . check_objects_in_bucket ( s3_client , bucket , bucket_objects )
2023-03-09 10:19:41 +00:00
@allure.step ( " Return all nodes to cluster with check empty map first " )
2023-10-25 12:47:29 +00:00
def return_nodes_after_stop_with_check_empty_map ( self , cluster_state_controller : ClusterStateController ) - > None :
first_node = self . cluster . cluster_nodes [ 0 ] . service ( StorageNode )
with allure . step ( " Start first node and check network map " ) :
2023-10-31 11:28:44 +00:00
cluster_state_controller . start_service_of_type ( self . cluster . cluster_nodes [ 0 ] , StorageNode )
2023-10-25 12:47:29 +00:00
wait_for_node_to_be_ready ( first_node )
for check_node in self . cluster . storage_nodes :
check_node_not_in_map ( check_node , shell = self . shell , alive_node = first_node )
for node in self . cluster . cluster_nodes [ 1 : ] :
storage_node = node . service ( StorageNode )
2023-10-31 11:28:44 +00:00
cluster_state_controller . start_service_of_type ( node , StorageNode )
2023-10-25 12:47:29 +00:00
wait_for_node_to_be_ready ( storage_node )
2023-03-09 10:19:41 +00:00
sleep ( datetime_utils . parse_time ( MORPH_BLOCK_TIME ) )
self . tick_epochs ( 1 )
2023-10-25 12:47:29 +00:00
check_node_in_map ( storage_node , shell = self . shell , alive_node = first_node )
2023-05-02 18:46:59 +00:00
2023-09-08 10:35:34 +00:00
@allure.title ( " Object loss from fstree/blobovnicza (versioning=enabled, s3_client= {s3_client} ) " )
2023-05-02 18:46:59 +00:00
def test_s3_fstree_blobovnicza_loss_versioning_on (
2023-05-26 04:49:23 +00:00
self ,
s3_client : S3ClientWrapper ,
2023-08-02 11:54:03 +00:00
simple_object_size : ObjectSize ,
2023-05-26 04:49:23 +00:00
cluster_state_controller : ClusterStateController ,
2023-05-02 18:46:59 +00:00
) :
bucket = s3_client . create_bucket ( )
s3_helper . set_bucket_versioning ( s3_client , bucket , VersioningStatus . ENABLED )
2023-08-02 11:54:03 +00:00
file_path = generate_file ( simple_object_size . value )
2023-05-02 18:46:59 +00:00
file_name = s3_helper . object_key_from_file_path ( file_path )
object_versions = [ ]
with allure . step ( " Put object into one bucket " ) :
put_object = s3_client . put_object ( bucket , file_path )
s3_helper . check_objects_in_bucket ( s3_client , bucket , expected_objects = [ file_name ] )
object_versions . append ( put_object )
with allure . step ( " Stop all storage nodes " ) :
2023-10-31 11:28:44 +00:00
cluster_state_controller . stop_services_of_type ( StorageNode )
2023-05-02 18:46:59 +00:00
with allure . step ( " Delete blobovnicza and fstree from all nodes " ) :
for node in self . cluster . storage_nodes :
node . delete_blobovnicza ( )
node . delete_fstree ( )
with allure . step ( " Start all storage nodes " ) :
2023-10-31 11:28:44 +00:00
cluster_state_controller . start_all_stopped_services ( )
2023-05-02 18:46:59 +00:00
# need to get Delete Marker first
with allure . step ( " Delete the object from the bucket " ) :
delete_object = s3_client . delete_object ( bucket , file_name )
object_versions . append ( delete_object [ " VersionId " ] )
# and now delete all versions of object (including Delete Markers)
with allure . step ( " Delete all versions of the object from the bucket " ) :
for version in object_versions :
delete_object = s3_client . delete_object ( bucket , file_name , version_id = version )
with allure . step ( " Delete bucket " ) :
s3_client . delete_bucket ( bucket )
2023-10-13 08:52:42 +00:00
@allure.title ( " Object loss from fstree/blobovnicza (versioning=disabled, s3_client= {s3_client} ) " )
2023-05-02 18:46:59 +00:00
def test_s3_fstree_blobovnicza_loss_versioning_off (
2023-05-26 04:49:23 +00:00
self ,
s3_client : S3ClientWrapper ,
2023-08-02 11:54:03 +00:00
simple_object_size : ObjectSize ,
2023-05-26 04:49:23 +00:00
cluster_state_controller : ClusterStateController ,
2023-05-02 18:46:59 +00:00
) :
bucket = s3_client . create_bucket ( )
2023-08-02 11:54:03 +00:00
file_path = generate_file ( simple_object_size . value )
2023-05-02 18:46:59 +00:00
file_name = s3_helper . object_key_from_file_path ( file_path )
with allure . step ( " Put object into one bucket " ) :
s3_client . put_object ( bucket , file_path )
s3_helper . check_objects_in_bucket ( s3_client , bucket , expected_objects = [ file_name ] )
with allure . step ( " Stop all storage nodes " ) :
2023-10-31 11:28:44 +00:00
cluster_state_controller . stop_services_of_type ( StorageNode )
2023-05-02 18:46:59 +00:00
with allure . step ( " Delete blobovnicza and fstree from all nodes " ) :
for node in self . cluster . storage_nodes :
node . delete_blobovnicza ( )
node . delete_fstree ( )
with allure . step ( " Start all storage nodes " ) :
2023-10-31 11:28:44 +00:00
cluster_state_controller . start_all_stopped_services ( )
2023-05-02 18:46:59 +00:00
with allure . step ( " Delete the object from the bucket " ) :
s3_client . delete_object ( bucket , file_name )
with allure . step ( " Delete bucket " ) :
s3_client . delete_bucket ( bucket )
2023-05-22 06:55:51 +00:00
@pytest.mark.skip ( reason = " Need to increase cache lifetime " )
@pytest.mark.parametrize (
# versioning should NOT be VersioningStatus.SUSPENDED, it needs to be undefined
" versioning_status " ,
2023-08-07 09:43:16 +00:00
[ VersioningStatus . ENABLED , VersioningStatus . UNDEFINED ] ,
2023-05-22 06:55:51 +00:00
)
@allure.title (
2023-09-08 10:35:34 +00:00
" After Pilorama.db loss on all nodes list objects should return nothing in second listing (versioning_status= {versioning_status} , s3_client= {s3_client} ) "
2023-05-22 06:55:51 +00:00
)
def test_s3_pilorama_loss (
self ,
s3_client : S3ClientWrapper ,
2023-08-02 11:54:03 +00:00
simple_object_size : ObjectSize ,
2023-05-22 06:55:51 +00:00
versioning_status : VersioningStatus ,
2023-05-26 04:49:23 +00:00
cluster_state_controller : ClusterStateController ,
2023-05-22 06:55:51 +00:00
) :
bucket = s3_client . create_bucket ( )
2023-08-07 09:43:16 +00:00
s3_helper . set_bucket_versioning ( s3_client , bucket , versioning_status )
2023-05-22 06:55:51 +00:00
2023-08-02 11:54:03 +00:00
file_path = generate_file ( simple_object_size . value )
2023-05-22 06:55:51 +00:00
file_name = s3_helper . object_key_from_file_path ( file_path )
with allure . step ( " Put object into one bucket " ) :
s3_client . put_object ( bucket , file_path )
s3_helper . check_objects_in_bucket ( s3_client , bucket , expected_objects = [ file_name ] )
with allure . step ( " Stop all storage nodes " ) :
2023-10-31 11:28:44 +00:00
cluster_state_controller . stop_services_of_type ( StorageNode )
2023-05-22 06:55:51 +00:00
with allure . step ( " Delete pilorama.db from all nodes " ) :
for node in self . cluster . storage_nodes :
node . delete_pilorama ( )
with allure . step ( " Start all storage nodes " ) :
2023-10-31 11:28:44 +00:00
cluster_state_controller . start_all_stopped_services ( )
2023-05-22 06:55:51 +00:00
with allure . step ( " Check list objects first time " ) :
objects_list = s3_client . list_objects ( bucket )
assert objects_list , f " Expected not empty bucket "
with allure . step ( " Check list objects second time " ) :
objects_list = s3_client . list_objects ( bucket )
assert not objects_list , f " Expected empty bucket, got { objects_list } "
with allure . step ( " Delete bucket " ) :
s3_client . delete_bucket ( bucket )
2023-05-30 13:42:27 +00:00
@pytest.mark.failover
@pytest.mark.failover_data_loss
class TestStorageDataLoss ( ClusterTestBase ) :
2023-05-30 14:14:26 +00:00
@allure.step ( " Get list of all piloramas on node " )
2023-10-25 12:47:29 +00:00
def get_piloramas_list ( self , node : StorageNode ) - > list :
data_directory_path = node . get_data_directory ( )
2023-05-30 14:14:26 +00:00
cmd = f " sudo ls -1 { data_directory_path } /meta*/pilorama* "
2023-10-25 12:47:29 +00:00
shell = node . host . get_shell ( )
2023-05-30 14:14:26 +00:00
stdout = shell . exec ( cmd ) . stdout
piloramas = stdout . split ( " \n " )
return piloramas
2023-05-30 13:42:27 +00:00
@allure.title (
2023-09-08 10:35:34 +00:00
" After metabase loss on all nodes operations on objects and buckets should be still available via S3 (s3_client= {s3_client} ) "
2023-05-30 13:42:27 +00:00
)
@pytest.mark.metabase_loss
def test_metabase_loss (
self ,
s3_client : S3ClientWrapper ,
2023-08-02 11:54:03 +00:00
simple_object_size : ObjectSize ,
complex_object_size : ObjectSize ,
2023-05-30 13:42:27 +00:00
cluster_state_controller : ClusterStateController ,
after_run_return_all_stopped_services : str ,
file_keeper : FileKeeper ,
) :
allure . dynamic . description ( after_run_return_all_stopped_services )
with allure . step ( " Create bucket " ) :
bucket = s3_client . create_bucket ( )
with allure . step ( " Put objects into bucket " ) :
2023-08-02 11:54:03 +00:00
simple_object_path = generate_file ( simple_object_size . value )
2023-05-30 13:42:27 +00:00
simple_object_key = s3_helper . object_key_from_file_path ( simple_object_path )
2023-08-02 11:54:03 +00:00
complex_object_path = generate_file ( complex_object_size . value )
2023-05-30 13:42:27 +00:00
complex_object_key = s3_helper . object_key_from_file_path ( complex_object_path )
s3_client . put_object ( bucket , simple_object_path )
s3_client . put_object ( bucket , complex_object_path )
with allure . step ( " Check objects are in bucket " ) :
s3_helper . check_objects_in_bucket (
s3_client , bucket , expected_objects = [ simple_object_key , complex_object_key ]
)
with allure . step ( " Stop storage services on all nodes " ) :
2023-10-31 11:28:44 +00:00
cluster_state_controller . stop_services_of_type ( StorageNode )
2023-05-30 13:42:27 +00:00
with allure . step ( " Delete metabase from all nodes " ) :
for node in cluster_state_controller . cluster . storage_nodes :
node . delete_metabase ( )
with allure . step ( " Enable resync_metabase option for storage services " ) :
for storage_node in cluster_state_controller . cluster . storage_nodes :
with allure . step ( f " Enable resync_metabase option for { storage_node } " ) :
2023-10-25 12:47:29 +00:00
config_file_path , config = storage_node . get_shards_config ( )
2023-05-30 13:42:27 +00:00
if not config [ " storage " ] [ " shard " ] [ " default " ] [ " resync_metabase " ] :
file_keeper . add ( storage_node , config_file_path )
config [ " storage " ] [ " shard " ] [ " default " ] [ " resync_metabase " ] = True
2023-10-25 12:47:29 +00:00
storage_node . save_config ( config , config_file_path )
2023-05-30 13:42:27 +00:00
with allure . step ( " Start storage services on all nodes " ) :
2023-10-31 11:28:44 +00:00
cluster_state_controller . start_all_stopped_services ( )
with allure . step ( " Wait for tree rebalance " ) :
# TODO: Use product metric when we have proper ones for this check
sleep ( 30 )
2023-05-30 13:42:27 +00:00
with allure . step ( " Delete objects from bucket " ) :
with allure . step ( " Delete simple object from bucket " ) :
with expect_not_raises ( ) :
s3_client . delete_object ( bucket , simple_object_key )
with allure . step ( " Delete complex object from bucket " ) :
with expect_not_raises ( ) :
s3_client . delete_object ( bucket , complex_object_key )
with allure . step ( " Delete bucket " ) :
with expect_not_raises ( ) :
s3_client . delete_bucket ( bucket )
2023-05-31 15:31:35 +00:00
2023-10-13 08:52:42 +00:00
@allure.title ( " Write cache loss on one node should not affect shards and should not produce errors in log " )
2023-05-31 15:31:35 +00:00
@pytest.mark.write_cache_loss
def test_write_cache_loss_on_one_node (
self ,
node_under_test : ClusterNode ,
2023-08-02 11:54:03 +00:00
simple_object_size : ObjectSize ,
2023-05-31 15:31:35 +00:00
cluster_state_controller : ClusterStateController ,
shards_watcher : ShardsWatcher ,
default_wallet : str ,
test_start_time : datetime ,
after_run_return_all_stopped_services : str ,
) :
exception_messages = [ ]
allure . dynamic . description ( after_run_return_all_stopped_services )
with allure . step ( f " Create container on node { node_under_test } " ) :
locode = node_under_test . storage_node . get_un_locode ( )
placement_rule = f """ REP 1 IN X
CBF 1
SELECT 1 FROM C AS X
FILTER ' UN-LOCODE ' EQ ' {locode} ' AS C """
cid = create_container (
default_wallet ,
self . shell ,
node_under_test . storage_node . get_rpc_endpoint ( ) ,
rule = placement_rule ,
)
container = StorageContainer (
StorageContainerInfo ( cid , WalletInfo ( default_wallet ) ) ,
self . shell ,
cluster_state_controller . cluster ,
)
with allure . step ( f " Put couple objects to container on node { node_under_test } " ) :
storage_objects : list [ StorageObjectInfo ] = [ ]
for _ in range ( 5 ) :
storage_object = container . generate_object (
2023-08-02 11:54:03 +00:00
simple_object_size . value ,
endpoint = node_under_test . storage_node . get_rpc_endpoint ( ) ,
2023-05-31 15:31:35 +00:00
)
storage_objects . append ( storage_object )
with allure . step ( " Take shards snapshot " ) :
shards_watcher . take_shards_snapshot ( )
with allure . step ( f " Stop storage service on node { node_under_test } " ) :
2023-10-31 11:28:44 +00:00
cluster_state_controller . stop_service_of_type ( node_under_test , StorageNode )
2023-05-31 15:31:35 +00:00
with allure . step ( f " Delete write cache from node { node_under_test } " ) :
node_under_test . storage_node . delete_write_cache ( )
with allure . step ( f " Start storage service on node { node_under_test } " ) :
2023-10-31 11:28:44 +00:00
cluster_state_controller . start_all_stopped_services ( )
2023-05-31 15:31:35 +00:00
with allure . step ( " Objects should be available " ) :
for storage_object in storage_objects :
get_object (
storage_object . wallet_file_path ,
container . get_id ( ) ,
storage_object . oid ,
self . shell ,
node_under_test . storage_node . get_rpc_endpoint ( ) ,
)
with allure . step ( " No shards should have new errors " ) :
shards_watcher . take_shards_snapshot ( )
shards_with_errors = shards_watcher . get_shards_with_new_errors ( )
if shards_with_errors :
exception_messages . append ( f " Shards have new errors: { shards_with_errors } " )
with allure . step ( " No shards should have degraded status " ) :
snapshot = shards_watcher . get_shards_snapshot ( )
for shard in snapshot :
status = snapshot [ shard ] [ " mode " ]
if status != " read-write " :
exception_messages . append ( f " Shard { shard } changed status to { status } " )
with allure . step ( " No related errors should be in log " ) :
if node_under_test . host . is_message_in_logs (
message_regex = r " \ Wno such file or directory \ W " , since = test_start_time
) :
exception_messages . append ( f " Node { node_under_test } have shard errors in logs " )
with allure . step ( " Pass test if no errors found " ) :
assert not exception_messages , " \n " . join ( exception_messages )
2023-05-31 10:16:38 +00:00
@allure.title (
2023-09-08 10:35:34 +00:00
" Loss of one node should trigger use of tree and storage service in another node (s3_client= {s3_client} ) "
2023-05-31 10:16:38 +00:00
)
def test_s3_one_endpoint_loss (
self ,
bucket ,
s3_client : S3ClientWrapper ,
2023-08-02 11:54:03 +00:00
simple_object_size : ObjectSize ,
2023-05-31 10:16:38 +00:00
after_run_return_all_stopped_services ,
cluster_state_controller : ClusterStateController ,
) :
# TODO: need to check that s3 gate is connected to localhost (such metric will be supported in 1.3)
2023-10-13 08:52:42 +00:00
with allure . step ( " Stop one node and wait for rebalance connection of s3 gate to storage service " ) :
2023-05-31 10:16:38 +00:00
current_node = self . cluster . cluster_nodes [ 0 ]
2023-10-31 11:28:44 +00:00
cluster_state_controller . stop_service_of_type ( current_node , StorageNode )
2023-05-31 10:16:38 +00:00
# waiting for rebalance connection of s3 gate to storage service
sleep ( 60 )
2023-08-02 11:54:03 +00:00
file_path = generate_file ( simple_object_size . value )
2023-05-31 10:16:38 +00:00
file_name = s3_helper . object_key_from_file_path ( file_path )
with allure . step ( " Put object into one bucket " ) :
put_object = s3_client . put_object ( bucket , file_path )
2023-05-30 14:14:26 +00:00
s3_helper . check_objects_in_bucket ( s3_client , bucket , expected_objects = [ file_name ] )
2023-10-13 08:52:42 +00:00
@allure.title ( " After Pilorama.db loss on one node object is retrievable (s3_client= {s3_client} ) " )
2023-05-30 14:14:26 +00:00
def test_s3_one_pilorama_loss (
self ,
s3_client : S3ClientWrapper ,
2023-08-02 11:54:03 +00:00
simple_object_size : ObjectSize ,
2023-05-30 14:14:26 +00:00
cluster_state_controller : ClusterStateController ,
) :
bucket = s3_client . create_bucket (
location_constraint = " load-1-4 " ,
grant_read = " uri=http://acs.amazonaws.com/groups/global/AllUsers " ,
)
s3_helper . set_bucket_versioning ( s3_client , bucket , VersioningStatus . ENABLED )
with allure . step ( " Check bucket versioning " ) :
bucket_versioning = s3_client . get_bucket_versioning_status ( bucket )
assert bucket_versioning == " Enabled " , " Bucket should have enabled versioning "
2023-08-02 11:54:03 +00:00
file_path = generate_file ( simple_object_size . value )
2023-05-30 14:14:26 +00:00
file_name = s3_helper . object_key_from_file_path ( file_path )
object_versions = [ ]
with allure . step ( " Put object into one bucket " ) :
put_object = s3_client . put_object ( bucket , file_path )
s3_helper . check_objects_in_bucket ( s3_client , bucket , expected_objects = [ file_name ] )
object_versions . append ( put_object )
node_to_check = self . cluster . storage_nodes [ 0 ]
piloramas_list_before_removing = { }
with allure . step ( " Get list of all pilorama.db " ) :
2023-10-25 12:47:29 +00:00
piloramas_list_before_removing = self . get_piloramas_list ( node_to_check )
2023-05-30 14:14:26 +00:00
with allure . step ( " Stop all storage nodes " ) :
2023-10-31 11:28:44 +00:00
cluster_state_controller . stop_services_of_type ( StorageNode )
2023-05-30 14:14:26 +00:00
with allure . step ( " Delete pilorama.db from one node " ) :
node_to_check . delete_pilorama ( )
with allure . step ( " Start all storage nodes " ) :
2023-10-31 11:28:44 +00:00
cluster_state_controller . start_all_stopped_services ( )
2023-05-30 14:14:26 +00:00
with allure . step ( " Tick epoch to trigger sync and then wait for 1 minute " ) :
self . tick_epochs ( 1 )
sleep ( 120 )
piloramas_list_afrer_removing = { }
with allure . step ( " Get list of all pilorama.db after sync " ) :
2023-10-25 12:47:29 +00:00
piloramas_list_afrer_removing = self . get_piloramas_list ( node_to_check )
2023-10-13 08:52:42 +00:00
assert piloramas_list_afrer_removing == piloramas_list_before_removing , " List of pilorama.db is different "
2023-05-30 14:14:26 +00:00
with allure . step ( " Check bucket versioning " ) :
bucket_versioning = s3_client . get_bucket_versioning_status ( bucket )
assert bucket_versioning == " Enabled " , " Bucket should have enabled versioning "
with allure . step ( " Check list objects " ) :
objects_list = s3_client . list_objects ( bucket )
assert objects_list , f " Expected not empty bucket "
with allure . step ( " Delete the object from the bucket " ) :
delete_object = s3_client . delete_object ( bucket , file_name )
assert " DeleteMarker " in delete_object . keys ( ) , " Delete markers not found "
with allure . step ( " Check list objects " ) :
objects_list = s3_client . list_objects_versions ( bucket )
assert objects_list , f " Expected not empty bucket "
object_versions . append ( delete_object [ " VersionId " ] )
# and now delete all versions of object (including Delete Markers)
with allure . step ( " Delete all versions of the object from the bucket " ) :
for version in object_versions :
delete_object = s3_client . delete_object ( bucket , file_name , version_id = version )
with allure . step ( " Check list objects " ) :
objects_list = s3_client . list_objects_versions ( bucket )
assert not objects_list , f " Expected empty bucket "
with allure . step ( " Delete bucket " ) :
s3_client . delete_bucket ( bucket )
2023-11-20 10:49:54 +00:00
@pytest.mark.maintenance
class TestMaintenanceMode ( ClusterTestBase ) :
change_node : ClusterNode = None
@pytest.fixture ( )
@allure.title ( " Init Frostfs CLI remote " )
def frostfs_cli_remote ( self , node_under_test : ClusterNode ) - > FrostfsCli :
host = node_under_test . host
service_config = host . get_service_config ( node_under_test . storage_node . name )
wallet_path = service_config . attributes [ " wallet_path " ]
wallet_password = service_config . attributes [ " wallet_password " ]
shell = host . get_shell ( )
wallet_config_path = f " /tmp/ { node_under_test . storage_node . name } -config.yaml "
wallet_config = f ' wallet: { wallet_path } \n password: " { wallet_password } " '
shell . exec ( f " echo ' { wallet_config } ' > { wallet_config_path } " )
cli = FrostfsCli ( shell = shell , frostfs_cli_exec_path = FROSTFS_CLI_EXEC , config_file = wallet_config_path )
return cli
@pytest.fixture ( )
@allure.title ( " Init Frostfs CLI remote " )
def frostfs_cli ( self ) - > FrostfsCli :
cli = FrostfsCli ( shell = self . shell , frostfs_cli_exec_path = FROSTFS_CLI_EXEC , config_file = DEFAULT_WALLET_CONFIG )
return cli
@pytest.fixture ( )
def restore_online_mode_node ( self , cluster_state_controller : ClusterStateController , default_wallet : str ) :
yield
cluster_state_controller . set_mode_node ( cluster_node = self . change_node , wallet = default_wallet , status = " online " )
self . tick_epoch ( wait_block = 2 )
def basic_operations ( self , wallet , cid , oid , shell , endpoint , matchs , object_size ) :
file_path = generate_file ( object_size )
default_kw = { " wallet " : wallet , " cid " : cid , " shell " : shell , " endpoint " : endpoint }
operations = {
get_object : { " oid " : oid } ,
search_object : { } ,
delete_object : { " oid " : oid } ,
put_object : { " path " : file_path } ,
}
for index , operation , kw in enumerate ( operations . items ( ) ) :
with allure . step ( f " Run { operation . __name__ } object, waiting response - { matchs [ index ] } " ) :
default_kw . update ( kw )
if operation == search_object and " Found " in matchs [ index ] :
operation ( * * default_kw )
continue
with pytest . raises ( RuntimeError , match = matchs [ index ] ) :
operation ( * * default_kw )
os . remove ( file_path )
@allure.title ( " Test of basic node operations in maintenance mode " )
def test_maintenance_mode (
self ,
default_wallet : str ,
simple_object_size : ObjectSize ,
cluster_state_controller : ClusterStateController ,
restore_online_mode_node : None ,
) :
with allure . step ( " Create container and create \ put object " ) :
cid = create_container (
wallet = default_wallet ,
shell = self . shell ,
endpoint = self . cluster . default_rpc_endpoint ,
rule = " REP 1 CBF 1 " ,
)
node = search_nodes_with_container (
wallet = default_wallet ,
cid = cid ,
shell = self . shell ,
endpoint = self . cluster . default_rpc_endpoint ,
cluster = self . cluster ,
)
self . change_node = node [ 0 ]
file_path = generate_file ( simple_object_size . value )
oid = put_object (
wallet = default_wallet ,
path = file_path ,
cid = cid ,
shell = self . shell ,
endpoint = self . change_node . storage_node . get_rpc_endpoint ( ) ,
)
with allure . step ( " Enable MaintenanceModeAllowed: " ) :
cluster_state_controller . set_mode_node (
cluster_node = self . change_node , wallet = default_wallet , status = " maintenance "
)
other_nodes = list ( set ( self . cluster . cluster_nodes ) - set ( node ) )
node_and_match = {
self . change_node : [ " node is under maintenance " ] * 4 ,
other_nodes [ 0 ] : [
" object not found " ,
" Found 0 objects " ,
" object not found " ,
" node is under maintenance " ,
] ,
}
with allure . step ( " Run basic operations " ) :
for cluster_node , matchs in node_and_match . items ( ) :
self . basic_operations (
wallet = default_wallet ,
cid = cid ,
oid = oid ,
shell = self . shell ,
endpoint = cluster_node . storage_node . get_rpc_endpoint ( ) ,
matchs = matchs ,
object_size = simple_object_size . value ,
)
@pytest.mark.sanity
@allure.title ( " MAINTENANCE and OFFLINE mode transitions " )
def test_mode_transitions (
self ,
cluster_state_controller : ClusterStateController ,
node_under_test : ClusterNode ,
default_wallet : str ,
frostfs_cli : FrostfsCli ,
restore_online_mode_node : None ,
) :
self . change_node = node_under_test
cluster_nodes = list ( set ( self . cluster . cluster_nodes ) - { node_under_test } )
with allure . step ( " Tick epoch " ) :
self . tick_epochs ( epochs_to_tick = 2 , alive_node = cluster_nodes [ 0 ] . storage_node , wait_block = 2 )
with allure . step ( " Set node mode to offline " ) :
cluster_state_controller . set_mode_node (
cluster_node = node_under_test ,
wallet = default_wallet ,
status = ModeNode . OFFLINE . value ,
)
with allure . step ( " Tick epoch to update the network map " ) :
self . tick_epochs ( epochs_to_tick = 2 , alive_node = cluster_nodes [ 0 ] . storage_node , wait_block = 2 )
with ( ( allure . step ( " Check node mode = offline, after update the network map " ) ) ) :
netmap = frostfs_cli . netmap . snapshot (
rpc_endpoint = cluster_nodes [ 0 ] . storage_node . get_rpc_endpoint ( ) , wallet = default_wallet
) . stdout
netmap = NetmapParser . snapshot_all_nodes ( netmap )
assert node_under_test . host_ip not in [
netmap_node . node for netmap_node in netmap
] , f " Node { node_under_test . host_ip } not in state offline "
with allure . step ( " Restart storage service " ) :
cluster_state_controller . stop_storage_service ( node_under_test )
cluster_state_controller . start_storage_service ( node_under_test )
with allure . step ( " Tick epoch after restart storage service and set mode to offline " ) :
self . tick_epochs ( epochs_to_tick = 2 , alive_node = cluster_nodes [ 0 ] . storage_node , wait_block = 2 )
with ( allure . step ( " Check node mode = online, after restart storage service " ) ) :
netmap = frostfs_cli . netmap . snapshot (
rpc_endpoint = cluster_nodes [ 0 ] . storage_node . get_rpc_endpoint ( ) , wallet = default_wallet
) . stdout
node_state = NetmapParser . snapshot_one_node ( netmap , node_under_test ) . node_status
assert node_state == ModeNode . ONLINE . value . upper ( ) , (
f " Node actual state - { node_state } , " f " expect - { ModeNode . ONLINE . value } "
)
with allure . step ( " Tick epoch " ) :
self . tick_epochs ( epochs_to_tick = 2 , alive_node = cluster_nodes [ 0 ] . storage_node , wait_block = 2 )
with allure . step ( " Set node mode to maintenance " ) :
cluster_state_controller . set_mode_node (
cluster_node = node_under_test , wallet = default_wallet , status = ModeNode . MAINTENANCE . value
)
with allure . step ( " Restart storage service " ) :
cluster_state_controller . stop_storage_service ( node_under_test )
cluster_state_controller . start_storage_service ( node_under_test )
with allure . step ( " Tick epoch after restart storage service " ) :
self . tick_epochs ( epochs_to_tick = 2 , alive_node = cluster_nodes [ 0 ] . storage_node , wait_block = 2 )
with allure . step ( " Check node mode = maintenance, after restart storage service and tick epoch " ) :
netmap = frostfs_cli . netmap . snapshot (
rpc_endpoint = cluster_nodes [ 0 ] . storage_node . get_rpc_endpoint ( ) , wallet = default_wallet
) . stdout
node_state = NetmapParser . snapshot_one_node ( netmap , node_under_test ) . node_status
assert node_state == ModeNode . MAINTENANCE . value . upper ( ) , (
f " Node actual state - { node_state } , " f " expect - { ModeNode . MAINTENANCE . value } "
)
with allure . step ( " Tick epoch " ) :
self . tick_epochs ( epochs_to_tick = 2 , alive_node = cluster_nodes [ 0 ] . storage_node , wait_block = 2 )
with allure . step ( " Set node mode to offline " ) :
netmap = frostfs_cli . netmap . snapshot (
rpc_endpoint = cluster_nodes [ 0 ] . storage_node . get_rpc_endpoint ( ) , wallet = default_wallet
) . stdout
node_state = NetmapParser . snapshot_one_node ( netmap , node_under_test ) . node_status
assert node_state == ModeNode . OFFLINE . value . upper ( ) , (
f " Node actual state - { node_state } , " f " expect - { ModeNode . OFFLINE . value } "
)
with allure . step ( " Stop storage service " ) :
cluster_state_controller . stop_storage_service ( node_under_test )
with allure . step ( " Tick epoch " ) :
self . tick_epochs ( epochs_to_tick = 2 , alive_node = cluster_nodes [ 0 ] . storage_node , wait_block = 2 )
with allure . step ( " Start storage service " ) :
cluster_state_controller . start_storage_service ( node_under_test )
with allure . step ( " Check node mode = offline, after tick epoch and start storage service " ) :
netmap = frostfs_cli . netmap . snapshot (
rpc_endpoint = cluster_nodes [ 0 ] . storage_node . get_rpc_endpoint ( ) , wallet = default_wallet
) . stdout
node_state = NetmapParser . snapshot_one_node ( netmap , node_under_test ) . node_status
assert node_state == ModeNode . OFFLINE . value . upper ( ) , (
f " Node actual state - { node_state } , " f " expect - { ModeNode . OFFLINE . value } "
)
with allure . step ( " Tick epoch " ) :
self . tick_epochs ( epochs_to_tick = 2 , alive_node = cluster_nodes [ 0 ] . storage_node , wait_block = 2 )
with allure . step ( " Check node mode = online, after start storage service and tick epoch " ) :
netmap = frostfs_cli . netmap . snapshot (
rpc_endpoint = cluster_nodes [ 0 ] . storage_node . get_rpc_endpoint ( ) , wallet = default_wallet
) . stdout
node_state = NetmapParser . snapshot_one_node ( netmap , node_under_test ) . node_status
assert node_state == ModeNode . ONLINE . value . upper ( ) , (
f " Node actual state - { node_state } , " f " expect - { ModeNode . ONLINE . value } "
)
with allure . step ( " Tick epoch " ) :
self . tick_epochs ( epochs_to_tick = 2 , alive_node = cluster_nodes [ 0 ] . storage_node , wait_block = 2 )
with allure . step ( " Set node mode to maintenance " ) :
cluster_state_controller . set_mode_node (
cluster_node = node_under_test , wallet = default_wallet , status = ModeNode . MAINTENANCE . value
)
with allure . step ( " Stop storage service " ) :
cluster_state_controller . stop_storage_service ( node_under_test )
with allure . step ( " Tick epoch " ) :
self . tick_epochs ( epochs_to_tick = 2 , alive_node = cluster_nodes [ 0 ] . storage_node , wait_block = 2 )
with allure . step ( " Start storage service " ) :
cluster_state_controller . start_storage_service ( node_under_test )
with allure . step ( " Check node mode = maintenance " ) :
netmap = frostfs_cli . netmap . snapshot (
rpc_endpoint = cluster_nodes [ 0 ] . storage_node . get_rpc_endpoint ( ) , wallet = default_wallet
) . stdout
node_state = NetmapParser . snapshot_one_node ( netmap , node_under_test ) . node_status
assert (
node_state == ModeNode . MAINTENANCE . value . upper ( )
) , f " Node actual state - { node_state } , expect - { ModeNode . MAINTENANCE . value } "
with allure . step ( " Tick epoch " ) :
self . tick_epochs ( epochs_to_tick = 2 , alive_node = cluster_nodes [ 0 ] . storage_node , wait_block = 2 )
with allure . step ( " Check node mode = maintenance " ) :
netmap = frostfs_cli . netmap . snapshot (
rpc_endpoint = cluster_nodes [ 0 ] . storage_node . get_rpc_endpoint ( ) , wallet = default_wallet
) . stdout
node_state = NetmapParser . snapshot_one_node ( netmap , node_under_test ) . node_status
assert node_state == ModeNode . MAINTENANCE . value . upper ( ) , (
f " Node actual state - { node_state } , " f " expect - { ModeNode . MAINTENANCE . value } "
)
@allure.title ( " A node cannot go into maintenance if maintenance is prohibited globally in the network " )
def test_maintenance_globally_forbidden (
self ,
cluster_state_controller : ClusterStateController ,
node_under_test : ClusterNode ,
frostfs_cli_remote : FrostfsCli ,
frostfs_cli : FrostfsCli ,
default_wallet : str ,
restore_online_mode_node : None ,
) :
self . change_node = node_under_test
control_endpoint = node_under_test . service ( StorageNode ) . get_control_endpoint ( )
with allure . step ( " Set MaintenanceModeAllowed = false " ) :
cluster_state_controller . set_maintenance_mode_allowed ( " false " , node_under_test )
with allure . step ( " Set status node - maintenance " ) :
with pytest . raises ( RuntimeError , match = " maintenance mode is not allowed by the network " ) :
frostfs_cli_remote . control . set_status ( endpoint = control_endpoint , status = " maintenance " )
with allure . step ( " Set MaintenanceModeAllowed = true " ) :
cluster_state_controller . set_maintenance_mode_allowed ( " true " , node_under_test )
with allure . step ( " Set status node - maintenance " ) :
output = frostfs_cli_remote . control . set_status ( endpoint = control_endpoint , status = " maintenance " )
assert " update request successfully sent " in output . stdout , f " Response = { output } "
with allure . step ( " Tick epoch " ) :
self . tick_epoch ( wait_block = 2 )
with allure . step ( " Check state node = maintenance " ) :
netmap_node = NetmapParser . snapshot_one_node (
frostfs_cli . netmap . snapshot (
rpc_endpoint = node_under_test . storage_node . get_rpc_endpoint ( ) , wallet = default_wallet
) . stdout ,
node_under_test ,
)
assert (
netmap_node . node_status == ModeNode . MAINTENANCE . value . upper ( )
) , f " Node actual state - { netmap_node . node_status } , expect - { ModeNode . MAINTENANCE . value } "
with allure . step ( " Set status node - online " ) :
frostfs_cli_remote . control . set_status ( endpoint = control_endpoint , status = " online " )
with allure . step ( " Tick epoch " ) :
self . tick_epoch ( )
with allure . step ( " Check state node: online " ) :
netmap_node = NetmapParser . snapshot_one_node (
frostfs_cli . netmap . snapshot (
rpc_endpoint = node_under_test . storage_node . get_rpc_endpoint ( ) , wallet = default_wallet
) . stdout ,
node_under_test ,
)
assert (
netmap_node . node_status == ModeNode . ONLINE . value . upper ( )
) , f " Node actual state - { netmap_node . node_status } , expect - { ModeNode . ONLINE . value } "