From f159cd89f3c3445cff8b34889b00084de2234931 Mon Sep 17 00:00:00 2001 From: Andrey Berezin Date: Mon, 23 Oct 2023 17:11:53 +0300 Subject: [PATCH] [#118] Add after-deploy healthcheck Signed-off-by: Andrey Berezin --- pytest_tests/testsuites/conftest.py | 49 ++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 7 deletions(-) diff --git a/pytest_tests/testsuites/conftest.py b/pytest_tests/testsuites/conftest.py index 1dad1aa4..58965196 100644 --- a/pytest_tests/testsuites/conftest.py +++ b/pytest_tests/testsuites/conftest.py @@ -1,12 +1,13 @@ import logging import os import shutil -from datetime import datetime +from datetime import datetime, timedelta, timezone from typing import Optional import allure import pytest import yaml +from dateutil import parser from frostfs_testlib.hosting import Hosting from frostfs_testlib.reporter import AllureHandler, get_reporter from frostfs_testlib.resources.common import ( @@ -24,17 +25,18 @@ from frostfs_testlib.steps.node_management import storage_node_healthcheck from frostfs_testlib.steps.s3 import s3_helper from frostfs_testlib.storage.cluster import Cluster, ClusterNode from frostfs_testlib.storage.controllers.cluster_state_controller import ClusterStateController +from frostfs_testlib.storage.dataclasses.frostfs_services import StorageNode from frostfs_testlib.storage.dataclasses.object_size import ObjectSize from frostfs_testlib.storage.dataclasses.wallet import WalletFactory, WalletInfo from frostfs_testlib.testing.cluster_test_base import ClusterTestBase from frostfs_testlib.testing.parallel import parallel +from frostfs_testlib.testing.test_control import wait_for_success from frostfs_testlib.utils import env_utils, version_utils from pytest_tests.resources.common import HOSTING_CONFIG_FILE, TEST_CYCLES_COUNT logger = logging.getLogger("NeoLogger") - # Add logs check test even if it's not fit to mark selectors def pytest_configure(config: pytest.Config): markers = config.option.markexpr @@ -192,9 +194,7 @@ def s3_client( policy=s3_policy, container_placement_policy=auth_container_placement_policy, ) - containers_list = list_containers( - wallet.path, shell=client_shell, endpoint=cluster.default_rpc_endpoint - ) + containers_list = list_containers(wallet.path, shell=client_shell, endpoint=cluster.default_rpc_endpoint) assert cid in containers_list, f"Expected cid {cid} in {containers_list}" s3_client_cls = request.param @@ -232,7 +232,7 @@ def two_buckets(s3_client: S3ClientWrapper): s3_helper.delete_bucket_with_objects(s3_client, bucket_name) -@allure.step("Check binary versions") +@allure.step("[Autouse/Session] Check binary versions") @pytest.fixture(scope="session", autouse=True) def check_binary_versions(hosting: Hosting, client_shell: Shell, request: pytest.FixtureRequest): local_versions = version_utils.get_local_binaries_versions(client_shell) @@ -269,7 +269,42 @@ def session_start_time(): return start_time -@allure.title("Run health check for all nodes") +@allure.title("[Autouse/Session] After deploy healthcheck") +@pytest.fixture(scope="session", autouse=True) +def after_deploy_healthcheck(cluster: Cluster): + with allure.step("Wait for cluster readiness after deploy"): + parallel(readiness_on_node, cluster.cluster_nodes) + + +SERVICE_ACTIVE_TIME = 15 + + +@wait_for_success(60 * SERVICE_ACTIVE_TIME * 2, 1) +@allure.step("Check readiness on node {cluster_node}") +def readiness_on_node(cluster_node: ClusterNode): + # TODO: Move to healtcheck classes + svc_name = cluster_node.service(StorageNode).get_service_systemctl_name() + with allure.step(f"Check service {svc_name} is active"): + result = cluster_node.host.get_shell().exec(f"systemctl is-active {svc_name}") + assert "active" == result.stdout.strip(), f"Service {svc_name} should be in active state" + + with allure.step(f"Check service {svc_name} is active more than {SERVICE_ACTIVE_TIME} minutes"): + result = cluster_node.host.get_shell().exec( + f"systemctl show {svc_name} --property ActiveEnterTimestamp | cut -d '=' -f 2" + ) + start_time = parser.parse(result.stdout.strip()) + current_time = datetime.now(tz=timezone.utc) + active_time = current_time - start_time + + active_minutes = active_time.seconds // 60 + active_seconds = active_time.seconds - active_minutes * 60 + + assert active_time > timedelta( + minutes=SERVICE_ACTIVE_TIME + ), f"Service should be in active state more than {SERVICE_ACTIVE_TIME} minutes, current {active_minutes}m:{active_seconds}s" + + +@allure.title("[Autouse/Test] Run health check for all nodes") @pytest.fixture(autouse=True) def run_health_check(cluster: Cluster, request: pytest.FixtureRequest): if request.node.get_closest_marker("no_healthcheck"):