[#118] Add after-deploy healthcheck
Signed-off-by: Andrey Berezin <a.berezin@yadro.com>
This commit is contained in:
parent
283149f837
commit
f159cd89f3
1 changed files with 42 additions and 7 deletions
|
@ -1,12 +1,13 @@
|
|||
import logging
|
||||
import os
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Optional
|
||||
|
||||
import allure
|
||||
import pytest
|
||||
import yaml
|
||||
from dateutil import parser
|
||||
from frostfs_testlib.hosting import Hosting
|
||||
from frostfs_testlib.reporter import AllureHandler, get_reporter
|
||||
from frostfs_testlib.resources.common import (
|
||||
|
@ -24,17 +25,18 @@ from frostfs_testlib.steps.node_management import storage_node_healthcheck
|
|||
from frostfs_testlib.steps.s3 import s3_helper
|
||||
from frostfs_testlib.storage.cluster import Cluster, ClusterNode
|
||||
from frostfs_testlib.storage.controllers.cluster_state_controller import ClusterStateController
|
||||
from frostfs_testlib.storage.dataclasses.frostfs_services import StorageNode
|
||||
from frostfs_testlib.storage.dataclasses.object_size import ObjectSize
|
||||
from frostfs_testlib.storage.dataclasses.wallet import WalletFactory, WalletInfo
|
||||
from frostfs_testlib.testing.cluster_test_base import ClusterTestBase
|
||||
from frostfs_testlib.testing.parallel import parallel
|
||||
from frostfs_testlib.testing.test_control import wait_for_success
|
||||
from frostfs_testlib.utils import env_utils, version_utils
|
||||
|
||||
from pytest_tests.resources.common import HOSTING_CONFIG_FILE, TEST_CYCLES_COUNT
|
||||
|
||||
logger = logging.getLogger("NeoLogger")
|
||||
|
||||
|
||||
# Add logs check test even if it's not fit to mark selectors
|
||||
def pytest_configure(config: pytest.Config):
|
||||
markers = config.option.markexpr
|
||||
|
@ -192,9 +194,7 @@ def s3_client(
|
|||
policy=s3_policy,
|
||||
container_placement_policy=auth_container_placement_policy,
|
||||
)
|
||||
containers_list = list_containers(
|
||||
wallet.path, shell=client_shell, endpoint=cluster.default_rpc_endpoint
|
||||
)
|
||||
containers_list = list_containers(wallet.path, shell=client_shell, endpoint=cluster.default_rpc_endpoint)
|
||||
assert cid in containers_list, f"Expected cid {cid} in {containers_list}"
|
||||
|
||||
s3_client_cls = request.param
|
||||
|
@ -232,7 +232,7 @@ def two_buckets(s3_client: S3ClientWrapper):
|
|||
s3_helper.delete_bucket_with_objects(s3_client, bucket_name)
|
||||
|
||||
|
||||
@allure.step("Check binary versions")
|
||||
@allure.step("[Autouse/Session] Check binary versions")
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
def check_binary_versions(hosting: Hosting, client_shell: Shell, request: pytest.FixtureRequest):
|
||||
local_versions = version_utils.get_local_binaries_versions(client_shell)
|
||||
|
@ -269,7 +269,42 @@ def session_start_time():
|
|||
return start_time
|
||||
|
||||
|
||||
@allure.title("Run health check for all nodes")
|
||||
@allure.title("[Autouse/Session] After deploy healthcheck")
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
def after_deploy_healthcheck(cluster: Cluster):
|
||||
with allure.step("Wait for cluster readiness after deploy"):
|
||||
parallel(readiness_on_node, cluster.cluster_nodes)
|
||||
|
||||
|
||||
SERVICE_ACTIVE_TIME = 15
|
||||
|
||||
|
||||
@wait_for_success(60 * SERVICE_ACTIVE_TIME * 2, 1)
|
||||
@allure.step("Check readiness on node {cluster_node}")
|
||||
def readiness_on_node(cluster_node: ClusterNode):
|
||||
# TODO: Move to healtcheck classes
|
||||
svc_name = cluster_node.service(StorageNode).get_service_systemctl_name()
|
||||
with allure.step(f"Check service {svc_name} is active"):
|
||||
result = cluster_node.host.get_shell().exec(f"systemctl is-active {svc_name}")
|
||||
assert "active" == result.stdout.strip(), f"Service {svc_name} should be in active state"
|
||||
|
||||
with allure.step(f"Check service {svc_name} is active more than {SERVICE_ACTIVE_TIME} minutes"):
|
||||
result = cluster_node.host.get_shell().exec(
|
||||
f"systemctl show {svc_name} --property ActiveEnterTimestamp | cut -d '=' -f 2"
|
||||
)
|
||||
start_time = parser.parse(result.stdout.strip())
|
||||
current_time = datetime.now(tz=timezone.utc)
|
||||
active_time = current_time - start_time
|
||||
|
||||
active_minutes = active_time.seconds // 60
|
||||
active_seconds = active_time.seconds - active_minutes * 60
|
||||
|
||||
assert active_time > timedelta(
|
||||
minutes=SERVICE_ACTIVE_TIME
|
||||
), f"Service should be in active state more than {SERVICE_ACTIVE_TIME} minutes, current {active_minutes}m:{active_seconds}s"
|
||||
|
||||
|
||||
@allure.title("[Autouse/Test] Run health check for all nodes")
|
||||
@pytest.fixture(autouse=True)
|
||||
def run_health_check(cluster: Cluster, request: pytest.FixtureRequest):
|
||||
if request.node.get_closest_marker("no_healthcheck"):
|
||||
|
|
Loading…
Reference in a new issue