[#118] Add after-deploy healthcheck

Signed-off-by: Andrey Berezin <a.berezin@yadro.com>
This commit is contained in:
Andrey Berezin 2023-10-23 17:11:53 +03:00 committed by Andrey Berezin
parent 283149f837
commit f159cd89f3

View file

@ -1,12 +1,13 @@
import logging import logging
import os import os
import shutil import shutil
from datetime import datetime from datetime import datetime, timedelta, timezone
from typing import Optional from typing import Optional
import allure import allure
import pytest import pytest
import yaml import yaml
from dateutil import parser
from frostfs_testlib.hosting import Hosting from frostfs_testlib.hosting import Hosting
from frostfs_testlib.reporter import AllureHandler, get_reporter from frostfs_testlib.reporter import AllureHandler, get_reporter
from frostfs_testlib.resources.common import ( from frostfs_testlib.resources.common import (
@ -24,17 +25,18 @@ from frostfs_testlib.steps.node_management import storage_node_healthcheck
from frostfs_testlib.steps.s3 import s3_helper from frostfs_testlib.steps.s3 import s3_helper
from frostfs_testlib.storage.cluster import Cluster, ClusterNode from frostfs_testlib.storage.cluster import Cluster, ClusterNode
from frostfs_testlib.storage.controllers.cluster_state_controller import ClusterStateController from frostfs_testlib.storage.controllers.cluster_state_controller import ClusterStateController
from frostfs_testlib.storage.dataclasses.frostfs_services import StorageNode
from frostfs_testlib.storage.dataclasses.object_size import ObjectSize from frostfs_testlib.storage.dataclasses.object_size import ObjectSize
from frostfs_testlib.storage.dataclasses.wallet import WalletFactory, WalletInfo from frostfs_testlib.storage.dataclasses.wallet import WalletFactory, WalletInfo
from frostfs_testlib.testing.cluster_test_base import ClusterTestBase from frostfs_testlib.testing.cluster_test_base import ClusterTestBase
from frostfs_testlib.testing.parallel import parallel from frostfs_testlib.testing.parallel import parallel
from frostfs_testlib.testing.test_control import wait_for_success
from frostfs_testlib.utils import env_utils, version_utils from frostfs_testlib.utils import env_utils, version_utils
from pytest_tests.resources.common import HOSTING_CONFIG_FILE, TEST_CYCLES_COUNT from pytest_tests.resources.common import HOSTING_CONFIG_FILE, TEST_CYCLES_COUNT
logger = logging.getLogger("NeoLogger") logger = logging.getLogger("NeoLogger")
# Add logs check test even if it's not fit to mark selectors # Add logs check test even if it's not fit to mark selectors
def pytest_configure(config: pytest.Config): def pytest_configure(config: pytest.Config):
markers = config.option.markexpr markers = config.option.markexpr
@ -192,9 +194,7 @@ def s3_client(
policy=s3_policy, policy=s3_policy,
container_placement_policy=auth_container_placement_policy, container_placement_policy=auth_container_placement_policy,
) )
containers_list = list_containers( containers_list = list_containers(wallet.path, shell=client_shell, endpoint=cluster.default_rpc_endpoint)
wallet.path, shell=client_shell, endpoint=cluster.default_rpc_endpoint
)
assert cid in containers_list, f"Expected cid {cid} in {containers_list}" assert cid in containers_list, f"Expected cid {cid} in {containers_list}"
s3_client_cls = request.param s3_client_cls = request.param
@ -232,7 +232,7 @@ def two_buckets(s3_client: S3ClientWrapper):
s3_helper.delete_bucket_with_objects(s3_client, bucket_name) s3_helper.delete_bucket_with_objects(s3_client, bucket_name)
@allure.step("Check binary versions") @allure.step("[Autouse/Session] Check binary versions")
@pytest.fixture(scope="session", autouse=True) @pytest.fixture(scope="session", autouse=True)
def check_binary_versions(hosting: Hosting, client_shell: Shell, request: pytest.FixtureRequest): def check_binary_versions(hosting: Hosting, client_shell: Shell, request: pytest.FixtureRequest):
local_versions = version_utils.get_local_binaries_versions(client_shell) local_versions = version_utils.get_local_binaries_versions(client_shell)
@ -269,7 +269,42 @@ def session_start_time():
return start_time return start_time
@allure.title("Run health check for all nodes") @allure.title("[Autouse/Session] After deploy healthcheck")
@pytest.fixture(scope="session", autouse=True)
def after_deploy_healthcheck(cluster: Cluster):
with allure.step("Wait for cluster readiness after deploy"):
parallel(readiness_on_node, cluster.cluster_nodes)
SERVICE_ACTIVE_TIME = 15
@wait_for_success(60 * SERVICE_ACTIVE_TIME * 2, 1)
@allure.step("Check readiness on node {cluster_node}")
def readiness_on_node(cluster_node: ClusterNode):
# TODO: Move to healtcheck classes
svc_name = cluster_node.service(StorageNode).get_service_systemctl_name()
with allure.step(f"Check service {svc_name} is active"):
result = cluster_node.host.get_shell().exec(f"systemctl is-active {svc_name}")
assert "active" == result.stdout.strip(), f"Service {svc_name} should be in active state"
with allure.step(f"Check service {svc_name} is active more than {SERVICE_ACTIVE_TIME} minutes"):
result = cluster_node.host.get_shell().exec(
f"systemctl show {svc_name} --property ActiveEnterTimestamp | cut -d '=' -f 2"
)
start_time = parser.parse(result.stdout.strip())
current_time = datetime.now(tz=timezone.utc)
active_time = current_time - start_time
active_minutes = active_time.seconds // 60
active_seconds = active_time.seconds - active_minutes * 60
assert active_time > timedelta(
minutes=SERVICE_ACTIVE_TIME
), f"Service should be in active state more than {SERVICE_ACTIVE_TIME} minutes, current {active_minutes}m:{active_seconds}s"
@allure.title("[Autouse/Test] Run health check for all nodes")
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
def run_health_check(cluster: Cluster, request: pytest.FixtureRequest): def run_health_check(cluster: Cluster, request: pytest.FixtureRequest):
if request.node.get_closest_marker("no_healthcheck"): if request.node.get_closest_marker("no_healthcheck"):