Change log collection fixture to put all logs into archive
When collecting logs we dump all of the logs into a directory, because it is RAM-intensive to keep entire set of logs in memory, especially in a large cluster. And we attach logs to Allure not as individual files, but as single zip archive, because it is more convenient to download, attach to bugs, etc. Signed-off-by: Vladimir Domnich <v.domnich@yadro.com>
This commit is contained in:
parent
94d6ec6b12
commit
7e31610462
2 changed files with 47 additions and 41 deletions
|
@ -1,5 +1,6 @@
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
|
@ -75,16 +76,19 @@ class LocalDevEnvStorageServiceHelper:
|
||||||
def get_binaries_version(self) -> dict:
|
def get_binaries_version(self) -> dict:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
def logs(self, since: Optional[datetime], until: Optional[datetime]) -> dict[str, str]:
|
def dump_logs(self, directory_path: str, since: Optional[datetime],
|
||||||
|
until: Optional[datetime]) -> None:
|
||||||
# All containers are running on the same host, so we can use 1st node to collect all logs
|
# All containers are running on the same host, so we can use 1st node to collect all logs
|
||||||
first_node_name = next(iter(NEOFS_NETMAP_DICT))
|
first_node_name = next(iter(NEOFS_NETMAP_DICT))
|
||||||
client = self._get_docker_client(first_node_name)
|
client = self._get_docker_client(first_node_name)
|
||||||
|
|
||||||
logs_by_container = {}
|
|
||||||
for container_name in self.ALL_CONTAINERS:
|
for container_name in self.ALL_CONTAINERS:
|
||||||
logs = client.logs(container_name, since=since, until=until)
|
logs = client.logs(container_name, since=since, until=until)
|
||||||
logs_by_container[container_name] = logs
|
|
||||||
return logs_by_container
|
# Dump logs to the directory
|
||||||
|
file_path = os.path.join(directory_path, f"{container_name}-log.txt")
|
||||||
|
with open(file_path, "wb") as file:
|
||||||
|
file.write(logs)
|
||||||
|
|
||||||
def _get_container_by_name(self, node_name: str, container_name: str) -> dict:
|
def _get_container_by_name(self, node_name: str, container_name: str) -> dict:
|
||||||
client = self._get_docker_client(node_name)
|
client = self._get_docker_client(node_name)
|
||||||
|
@ -119,14 +123,6 @@ class LocalDevEnvStorageServiceHelper:
|
||||||
class CloudVmStorageServiceHelper:
|
class CloudVmStorageServiceHelper:
|
||||||
STORAGE_SERVICE = "neofs-storage.service"
|
STORAGE_SERVICE = "neofs-storage.service"
|
||||||
|
|
||||||
ALL_SERVICES = [
|
|
||||||
"neofs-http.service",
|
|
||||||
"neofs-s3.service",
|
|
||||||
"neofs-storage.service",
|
|
||||||
"neofs-ir.service",
|
|
||||||
"neofs-morph-cn.service",
|
|
||||||
]
|
|
||||||
|
|
||||||
def stop_node(self, node_name: str, wait: bool = True) -> None:
|
def stop_node(self, node_name: str, wait: bool = True) -> None:
|
||||||
with _create_ssh_client(node_name) as ssh_client:
|
with _create_ssh_client(node_name) as ssh_client:
|
||||||
cmd = f"sudo systemctl stop {self.STORAGE_SERVICE}"
|
cmd = f"sudo systemctl stop {self.STORAGE_SERVICE}"
|
||||||
|
@ -223,21 +219,24 @@ class CloudVmStorageServiceHelper:
|
||||||
f'(mismatch on node {node_name})'
|
f'(mismatch on node {node_name})'
|
||||||
return version_map
|
return version_map
|
||||||
|
|
||||||
def logs(self, since: Optional[datetime], until: Optional[datetime]) -> dict[str, str]:
|
def dump_logs(self, directory_path: str, since: Optional[datetime],
|
||||||
logs_by_service_id = {}
|
until: Optional[datetime]) -> None:
|
||||||
for node_name in NEOFS_NETMAP_DICT:
|
for node_name, node_info in NEOFS_NETMAP_DICT.items():
|
||||||
with _create_ssh_client(node_name) as ssh_client:
|
with _create_ssh_client(node_name) as ssh_client:
|
||||||
for service_name in self.ALL_SERVICES:
|
# We do not filter out logs of neofs services, because system logs might contain
|
||||||
filters = " ".join([
|
# information that is useful for troubleshooting
|
||||||
f"--since '{since:%Y-%m-%d %H:%M:%S}'" if since else "",
|
filters = " ".join([
|
||||||
f"--until '{until:%Y-%m-%d %H:%M:%S}'" if until else "",
|
f"--since '{since:%Y-%m-%d %H:%M:%S}'" if since else "",
|
||||||
])
|
f"--until '{until:%Y-%m-%d %H:%M:%S}'" if until else "",
|
||||||
result = ssh_client.exec(f"journalctl -u {service_name} --no-pager {filters}")
|
])
|
||||||
logs = result.stdout
|
result = ssh_client.exec(f"journalctl --no-pager {filters}")
|
||||||
|
logs = result.stdout
|
||||||
|
|
||||||
service_id = f"{node_name}_{service_name}"
|
# Dump logs to the directory. We include node endpoint in file name, because almost
|
||||||
logs_by_service_id[service_id] = logs
|
# everywhere in Allure report we are logging endpoints rather than node names
|
||||||
return logs_by_service_id
|
file_path = os.path.join(directory_path, f"{node_name}-{node_info['rpc']}-log.txt")
|
||||||
|
with open(file_path, "w") as file:
|
||||||
|
file.write(logs)
|
||||||
|
|
||||||
|
|
||||||
class RemoteDevEnvStorageServiceHelper(LocalDevEnvStorageServiceHelper):
|
class RemoteDevEnvStorageServiceHelper(LocalDevEnvStorageServiceHelper):
|
||||||
|
|
|
@ -63,17 +63,34 @@ def _get_binaries_version_local(binaries: list) -> dict:
|
||||||
return env_out
|
return env_out
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope='session', autouse=True)
|
@pytest.fixture(scope='session')
|
||||||
@allure.title('Collect logs')
|
@allure.title('Prepare tmp directory')
|
||||||
def collect_logs():
|
def prepare_tmp_dir():
|
||||||
|
full_path = f'{os.getcwd()}/{ASSETS_DIR}'
|
||||||
|
shutil.rmtree(full_path, ignore_errors=True)
|
||||||
|
os.mkdir(full_path)
|
||||||
|
yield full_path
|
||||||
|
shutil.rmtree(full_path)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session", autouse=True)
|
||||||
|
@allure.title("Collect logs")
|
||||||
|
def collect_logs(prepare_tmp_dir):
|
||||||
start_time = datetime.utcnow()
|
start_time = datetime.utcnow()
|
||||||
yield
|
yield
|
||||||
end_time = datetime.utcnow()
|
end_time = datetime.utcnow()
|
||||||
|
|
||||||
|
# Dump logs to temp directory (because they might be too large to keep in RAM)
|
||||||
|
logs_dir = os.path.join(prepare_tmp_dir, "logs")
|
||||||
|
os.makedirs(logs_dir)
|
||||||
|
|
||||||
helper = get_storage_service_helper()
|
helper = get_storage_service_helper()
|
||||||
logs_by_service_id = helper.logs(since=start_time, until=end_time)
|
helper.dump_logs(logs_dir, since=start_time, until=end_time)
|
||||||
for service_id, logs in logs_by_service_id.items():
|
|
||||||
allure.attach(logs, f"logs_{service_id}", allure.attachment_type.TEXT)
|
# Zip all files and attach to Allure because it is more convenient to download a single
|
||||||
|
# zip with all logs rather than mess with individual logs files per service or node
|
||||||
|
logs_zip_file_path = shutil.make_archive(logs_dir, "zip", logs_dir)
|
||||||
|
allure.attach.file(logs_zip_file_path, name="logs.zip", extension="zip")
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope='session', autouse=True)
|
@pytest.fixture(scope='session', autouse=True)
|
||||||
|
@ -89,16 +106,6 @@ def run_health_check(collect_logs):
|
||||||
raise AssertionError(f'Nodes {failed_nodes} are not healthy')
|
raise AssertionError(f'Nodes {failed_nodes} are not healthy')
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope='session')
|
|
||||||
@allure.title('Prepare tmp directory')
|
|
||||||
def prepare_tmp_dir():
|
|
||||||
full_path = f'{os.getcwd()}/{ASSETS_DIR}'
|
|
||||||
shutil.rmtree(full_path, ignore_errors=True)
|
|
||||||
os.mkdir(full_path)
|
|
||||||
yield full_path
|
|
||||||
shutil.rmtree(full_path)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope='session')
|
@pytest.fixture(scope='session')
|
||||||
@allure.title('Prepare wallet and deposit')
|
@allure.title('Prepare wallet and deposit')
|
||||||
def prepare_wallet_and_deposit(prepare_tmp_dir):
|
def prepare_wallet_and_deposit(prepare_tmp_dir):
|
||||||
|
|
Loading…
Reference in a new issue