diff --git a/src/agent/README.md b/src/agent/README.md index f89334d..55877d2 100644 --- a/src/agent/README.md +++ b/src/agent/README.md @@ -34,6 +34,15 @@ Agent 不再依赖配置文件;所有参数均由环境变量与主机名推 | `MASTER_ENDPOINT` | 是 | N/A | Master 基础地址,可写 `http://host:3000` 或 `host:3000`(自动补全 `http://`)。 | | `REPORT_INTERVAL_SECONDS` | 否 | `60` | 状态上报间隔(秒)。必须为正整数。 | | `AGENT_HOSTNAME` | 否 | `$(hostname)` | 覆盖容器内主机名,便于测试或特殊命名需求。 | +| `AGENT_ENV` | 否 | 来源于主机名 | 运行环境标识(如 `dev`、`prod`)。与 `AGENT_USER`、`AGENT_INSTANCE` 必须同时设置。 | +| `AGENT_USER` | 否 | 来源于主机名 | 归属用户或团队标识。与 `AGENT_ENV`、`AGENT_INSTANCE` 必须同时设置。 | +| `AGENT_INSTANCE` | 否 | 来源于主机名 | 实例编号或别名。与 `AGENT_ENV`、`AGENT_USER` 必须同时设置。 | + +主机名与元数据的解析优先级: + +1. 若设置 `AGENT_ENV` / `AGENT_USER` / `AGENT_INSTANCE` 且全部存在,则直接使用这些值。 +2. 否则按历史约定从主机名解析 `env-user-instance` 前缀。 +3. 如果两者都无法得到完整结果,Agent 启动会失败并提示需要提供上述环境变量。 派生路径: diff --git a/src/agent/app/collector.py b/src/agent/app/collector.py index 1b61caa..6c913df 100644 --- a/src/agent/app/collector.py +++ b/src/agent/app/collector.py @@ -18,13 +18,12 @@ _HOSTNAME_PATTERN = re.compile(r"^([^-]+)-([^-]+)-([^-]+)-.*$") def collect_metadata(config: AgentConfig) -> Dict[str, Any]: """汇总节点注册需要的静态信息。""" hostname = config.hostname - env, user, instance = _parse_hostname(hostname) meta = { "hostname": hostname, "ip": _detect_ip_address(), - "env": env, - "user": user, - "instance": instance, + "env": config.environment, + "user": config.user, + "instance": config.instance, "cpu_number": _detect_cpu_count(), "memory_in_bytes": _detect_memory_bytes(), "gpu_number": _detect_gpu_count(), diff --git a/src/agent/app/config.py b/src/agent/app/config.py index dae5d47..bf02cf5 100644 --- a/src/agent/app/config.py +++ b/src/agent/app/config.py @@ -14,6 +14,9 @@ DEFAULT_REPORT_INTERVAL_SECONDS: Final[int] = 60 @dataclass(frozen=True) class AgentConfig: hostname: str + environment: str + user: str + instance: str node_file: str version: str master_endpoint: str @@ -47,10 +50,51 @@ def _resolve_hostname() -> str: return os.environ.get("AGENT_HOSTNAME") or socket.gethostname() +def _resolve_metadata_fields(hostname: str) -> tuple[str, str, str]: + env = os.environ.get("AGENT_ENV") + user = os.environ.get("AGENT_USER") + instance = os.environ.get("AGENT_INSTANCE") + + if env and user and instance: + return env, user, instance + + if any([env, user, instance]): + LOGGER = None + try: + from .log import get_logger + + LOGGER = get_logger("argus.agent.config") + except Exception: # pragma: no cover - defensive + LOGGER = None + if LOGGER is not None: + LOGGER.warning( + "Incomplete metadata environment variables; falling back to hostname parsing", + extra={ + "has_env": bool(env), + "has_user": bool(user), + "has_instance": bool(instance), + }, + ) + env = user = instance = None + + from .collector import _parse_hostname # Local import to avoid circular dependency + + env, user, instance = _parse_hostname(hostname) + + if not all([env, user, instance]): + raise ValueError( + "Failed to determine metadata fields; set AGENT_ENV/USER/INSTANCE or use supported hostname pattern" + ) + + return env, user, instance + + def load_config() -> AgentConfig: """从环境变量推导配置,移除了外部配置文件依赖。""" hostname = _resolve_hostname() + environment, user, instance = _resolve_metadata_fields(hostname) + node_file = f"/private/argus/agent/{hostname}/node.json" health_dir = f"/private/argus/agent/{hostname}/health/" @@ -66,6 +110,9 @@ def load_config() -> AgentConfig: return AgentConfig( hostname=hostname, + environment=environment, + user=user, + instance=instance, node_file=node_file, version=VERSION, master_endpoint=master_endpoint, diff --git a/src/agent/dist/argus-agent b/src/agent/dist/argus-agent index 4fef67c..d5703fe 100755 Binary files a/src/agent/dist/argus-agent and b/src/agent/dist/argus-agent differ diff --git a/src/agent/tests/__init__.py b/src/agent/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/agent/tests/docker-compose.yml b/src/agent/tests/docker-compose.yml index e24e252..0dd4743 100644 --- a/src/agent/tests/docker-compose.yml +++ b/src/agent/tests/docker-compose.yml @@ -60,6 +60,36 @@ services: ipv4_address: 172.28.0.20 restart: always + agent_env: + image: ubuntu:22.04 + container_name: argus-agent-env-e2e + hostname: host_abc + depends_on: + - master + - bind + environment: + - MASTER_ENDPOINT=http://master.argus.com:3000 + - REPORT_INTERVAL_SECONDS=2 + - AGENT_ENV=prod + - AGENT_USER=ml + - AGENT_INSTANCE=node-3 + - AGENT_HOSTNAME=host_abc + - "ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}" + - "ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}" + volumes: + - ./private/argus/agent/host_abc:/private/argus/agent/host_abc + - ./private/argus/agent/host_abc/health:/private/argus/agent/host_abc/health + - ./private/argus/etc:/private/argus/etc + - ../dist/argus-agent:/usr/local/bin/argus-agent:ro + - ./scripts/agent_entrypoint.sh:/usr/local/bin/agent-entrypoint.sh:ro + - ../scripts/agent_deployment_verify.sh:/usr/local/bin/agent_deployment_verify.sh:ro + entrypoint: + - /usr/local/bin/agent-entrypoint.sh + networks: + default: + ipv4_address: 172.28.0.21 + restart: always + networks: default: driver: bridge diff --git a/src/agent/tests/scripts/00_e2e_test.sh b/src/agent/tests/scripts/00_e2e_test.sh index 9515d34..14e27f7 100755 --- a/src/agent/tests/scripts/00_e2e_test.sh +++ b/src/agent/tests/scripts/00_e2e_test.sh @@ -7,10 +7,10 @@ SCRIPTS=( "02_up.sh" "03_wait_and_assert_registration.sh" "04_write_health_files.sh" - "08_verify_agent.sh" - "05_assert_status_on_master.sh" - "06_restart_agent_and_reregister.sh" - "07_down.sh" + "05_verify_agent.sh" + "06_assert_status_on_master.sh" + "07_restart_agent_and_reregister.sh" + "08_down.sh" ) for script in "${SCRIPTS[@]}"; do diff --git a/src/agent/tests/scripts/02_up.sh b/src/agent/tests/scripts/02_up.sh index 56c4cda..d490a50 100755 --- a/src/agent/tests/scripts/02_up.sh +++ b/src/agent/tests/scripts/02_up.sh @@ -41,7 +41,7 @@ compose() { fi } -docker container rm -f argus-agent-e2e argus-master-agent-e2e argus-bind-agent-e2e >/dev/null 2>&1 || true +docker container rm -f argus-agent-e2e argus-agent-env-e2e argus-master-agent-e2e argus-bind-agent-e2e >/dev/null 2>&1 || true docker network rm tests_default >/dev/null 2>&1 || true diff --git a/src/agent/tests/scripts/03_wait_and_assert_registration.sh b/src/agent/tests/scripts/03_wait_and_assert_registration.sh index 7e9c127..8b0481b 100755 --- a/src/agent/tests/scripts/03_wait_and_assert_registration.sh +++ b/src/agent/tests/scripts/03_wait_and_assert_registration.sh @@ -6,11 +6,14 @@ TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" TMP_ROOT="$TEST_ROOT/tmp" API_BASE="http://localhost:32300/api/v1/master" AGENT_HOSTNAME="dev-e2euser-e2einst-pod-0" +ENV_AGENT_HOSTNAME="host_abc" NODE_FILE="$TEST_ROOT/private/argus/agent/$AGENT_HOSTNAME/node.json" +ENV_NODE_FILE="$TEST_ROOT/private/argus/agent/$ENV_AGENT_HOSTNAME/node.json" mkdir -p "$TMP_ROOT" -node_id="" +primary_node_id="" +env_node_id="" for _ in {1..30}; do sleep 2 response=$(curl -sS "$API_BASE/nodes" || true) @@ -19,24 +22,49 @@ for _ in {1..30}; do fi list_file="$TMP_ROOT/nodes_list.json" echo "$response" > "$list_file" - node_id=$(python3 - "$list_file" <<'PY' + readarray -t node_ids < <(python3 - "$list_file" "$AGENT_HOSTNAME" "$ENV_AGENT_HOSTNAME" <<'PY' import json, sys + with open(sys.argv[1]) as handle: nodes = json.load(handle) -print(nodes[0]["id"] if nodes else "") + +target_primary = sys.argv[2] +target_env = sys.argv[3] + +primary_id = "" +env_id = "" + +for node in nodes: + if node.get("name") == target_primary: + primary_id = node.get("id", "") + if node.get("name") == target_env: + env_id = node.get("id", "") + +print(primary_id) +print(env_id) PY -) - if [[ -n "$node_id" ]]; then + ) + + primary_node_id="${node_ids[0]}" + env_node_id="${node_ids[1]}" + + if [[ -n "$primary_node_id" && -n "$env_node_id" ]]; then break fi done -if [[ -z "$node_id" ]]; then - echo "[ERROR] Agent did not register within timeout" >&2 +if [[ -z "$primary_node_id" ]]; then + echo "[ERROR] Primary agent did not register within timeout" >&2 exit 1 fi -echo "$node_id" > "$TMP_ROOT/node_id" +if [[ -z "$env_node_id" ]]; then + echo "[ERROR] Env-variable agent did not register within timeout" >&2 + exit 1 +fi + +echo "$primary_node_id" > "$TMP_ROOT/node_id" +echo "$env_node_id" > "$TMP_ROOT/node_id_host_abc" if [[ ! -f "$NODE_FILE" ]]; then echo "[ERROR] node.json not created at $NODE_FILE" >&2 @@ -50,8 +78,20 @@ with open(sys.argv[1]) as handle: assert "id" in node and node["id"], "node.json missing id" PY +if [[ ! -f "$ENV_NODE_FILE" ]]; then + echo "[ERROR] node.json not created at $ENV_NODE_FILE" >&2 + exit 1 +fi + +python3 - "$ENV_NODE_FILE" <<'PY' +import json, sys +with open(sys.argv[1]) as handle: + node = json.load(handle) +assert "id" in node and node["id"], "env agent node.json missing id" +PY + detail_file="$TMP_ROOT/initial_detail.json" -curl -sS "$API_BASE/nodes/$node_id" -o "$detail_file" +curl -sS "$API_BASE/nodes/$primary_node_id" -o "$detail_file" python3 - "$detail_file" "$TMP_ROOT/initial_ip" <<'PY' import json, sys, pathlib with open(sys.argv[1]) as handle: @@ -62,4 +102,5 @@ if not ip: pathlib.Path(sys.argv[2]).write_text(ip) PY -echo "[INFO] Agent registered with node id $node_id" +echo "[INFO] Agent registered with node id $primary_node_id" +echo "[INFO] Env-variable agent registered with node id $env_node_id" diff --git a/src/agent/tests/scripts/05_verify_agent.sh b/src/agent/tests/scripts/05_verify_agent.sh new file mode 100755 index 0000000..2d4d9b8 --- /dev/null +++ b/src/agent/tests/scripts/05_verify_agent.sh @@ -0,0 +1,60 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +REPO_ROOT="$(cd "$TEST_ROOT/.." && pwd)" +VERIFY_SCRIPT="$REPO_ROOT/scripts/agent_deployment_verify.sh" +ENV_NODE_ID_FILE="$TEST_ROOT/tmp/node_id_host_abc" +PRIMARY_CONTAINER="argus-agent-e2e" +ENV_CONTAINER="argus-agent-env-e2e" +PRIMARY_HOSTNAME="dev-e2euser-e2einst-pod-0" +ENV_HOSTNAME="host_abc" + +if ! docker ps --format '{{.Names}}' | grep -q "^${PRIMARY_CONTAINER}$"; then + echo "[WARN] agent container not running; skip verification" + exit 0 +fi + +if docker exec -i "$PRIMARY_CONTAINER" bash -lc 'command -v curl >/dev/null && command -v jq >/dev/null'; then + echo "[INFO] curl/jq already installed in agent container" +else + echo "[INFO] Installing curl/jq in agent container" + docker exec -i "$PRIMARY_CONTAINER" bash -lc 'apt-get update >/dev/null 2>&1 && apt-get install -y curl jq >/dev/null 2>&1' || true +fi + +if [[ ! -f "$VERIFY_SCRIPT" ]]; then + echo "[ERROR] Verification script missing at $VERIFY_SCRIPT" >&2 + exit 1 +fi + +run_verifier() { + local container="$1" hostname="$2" + + if ! docker ps --format '{{.Names}}' | grep -q "^${container}$"; then + echo "[WARN] container $container not running; skip" + return + fi + + if ! docker exec -i "$container" bash -lc 'command -v /usr/local/bin/agent_deployment_verify.sh >/dev/null'; then + echo "[ERROR] /usr/local/bin/agent_deployment_verify.sh missing in $container" >&2 + exit 1 + fi + + echo "[INFO] Running verification for $hostname in $container" + docker exec -i "$container" env VERIFY_HOSTNAME="$hostname" /usr/local/bin/agent_deployment_verify.sh +} + +run_verifier "$PRIMARY_CONTAINER" "$PRIMARY_HOSTNAME" + +if docker ps --format '{{.Names}}' | grep -q "^${ENV_CONTAINER}$"; then + if docker exec -i "$ENV_CONTAINER" bash -lc 'command -v curl >/dev/null && command -v jq >/dev/null'; then + echo "[INFO] curl/jq already installed in env agent container" + else + echo "[INFO] Installing curl/jq in env agent container" + docker exec -i "$ENV_CONTAINER" bash -lc 'apt-get update >/dev/null 2>&1 && apt-get install -y curl jq >/dev/null 2>&1' || true + fi + run_verifier "$ENV_CONTAINER" "$ENV_HOSTNAME" +else + echo "[WARN] env-driven agent container not running; skip secondary verification" +fi diff --git a/src/agent/tests/scripts/05_assert_status_on_master.sh b/src/agent/tests/scripts/06_assert_status_on_master.sh similarity index 57% rename from src/agent/tests/scripts/05_assert_status_on_master.sh rename to src/agent/tests/scripts/06_assert_status_on_master.sh index b1b0a87..3c58426 100755 --- a/src/agent/tests/scripts/05_assert_status_on_master.sh +++ b/src/agent/tests/scripts/06_assert_status_on_master.sh @@ -6,6 +6,8 @@ TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" TMP_ROOT="$TEST_ROOT/tmp" API_BASE="http://localhost:32300/api/v1/master" NODE_ID="$(cat "$TMP_ROOT/node_id")" +ENV_NODE_ID="$(cat "$TMP_ROOT/node_id_host_abc")" +ENV_HOSTNAME="host_abc" NODES_JSON="$TEST_ROOT/private/argus/metric/prometheus/nodes.json" success=false @@ -41,13 +43,36 @@ if [[ ! -f "$NODES_JSON" ]]; then exit 1 fi -python3 - "$NODES_JSON" <<'PY' +python3 - "$NODES_JSON" "$NODE_ID" "$ENV_NODE_ID" <<'PY' import json, sys with open(sys.argv[1]) as handle: nodes = json.load(handle) -assert len(nodes) == 1, nodes -entry = nodes[0] -assert entry["node_id"], entry + +expected_primary = sys.argv[2] +expected_env = sys.argv[3] + +ids = {entry.get("node_id") for entry in nodes} +assert expected_primary in ids, nodes +assert expected_env in ids, nodes +assert len(nodes) >= 2, nodes PY echo "[INFO] Master reflects agent health and nodes.json entries" + +env_detail_file="$TMP_ROOT/env_agent_detail.json" +curl -sS "$API_BASE/nodes/$ENV_NODE_ID" -o "$env_detail_file" +python3 - "$env_detail_file" "$ENV_HOSTNAME" <<'PY' +import json, sys +with open(sys.argv[1]) as handle: + node = json.load(handle) + +expected_name = sys.argv[2] + +assert node.get("name") == expected_name, node +meta = node.get("meta_data", {}) +assert meta.get("env") == "prod", meta +assert meta.get("user") == "ml", meta +assert meta.get("instance") == "node-3", meta +PY + +echo "[INFO] Env-variable agent reports expected metadata" diff --git a/src/agent/tests/scripts/06_restart_agent_and_reregister.sh b/src/agent/tests/scripts/07_restart_agent_and_reregister.sh similarity index 94% rename from src/agent/tests/scripts/06_restart_agent_and_reregister.sh rename to src/agent/tests/scripts/07_restart_agent_and_reregister.sh index 78c6322..9fa272e 100755 --- a/src/agent/tests/scripts/06_restart_agent_and_reregister.sh +++ b/src/agent/tests/scripts/07_restart_agent_and_reregister.sh @@ -10,6 +10,7 @@ AGENT_HOSTNAME="dev-e2euser-e2einst-pod-0" NETWORK_NAME="tests_default" NEW_AGENT_IP="172.28.0.200" ENTRYPOINT_SCRIPT="$SCRIPT_DIR/agent_entrypoint.sh" +VERIFY_SCRIPT="$TEST_ROOT/../scripts/agent_deployment_verify.sh" ENV_FILE="$TEST_ROOT/.env" # 中文提示:重启场景也需要同样的入口脚本,确保 DNS 注册逻辑一致 @@ -18,6 +19,11 @@ if [[ ! -f "$ENTRYPOINT_SCRIPT" ]]; then exit 1 fi +if [[ ! -f "$VERIFY_SCRIPT" ]]; then + echo "[ERROR] agent verification script missing at $VERIFY_SCRIPT" >&2 + exit 1 +fi + if [[ ! -f "$TMP_ROOT/agent_binary_path" ]]; then echo "[ERROR] Agent binary path missing; rerun bootstrap" >&2 exit 1 @@ -94,6 +100,7 @@ if ! docker run -d \ -v "$TEST_ROOT/private/argus/etc:/private/argus/etc" \ -v "$AGENT_BINARY:/usr/local/bin/argus-agent:ro" \ -v "$ENTRYPOINT_SCRIPT:/usr/local/bin/agent-entrypoint.sh:ro" \ + -v "$VERIFY_SCRIPT:/usr/local/bin/agent_deployment_verify.sh:ro" \ -e MASTER_ENDPOINT=http://master.argus.com:3000 \ -e REPORT_INTERVAL_SECONDS=2 \ -e ARGUS_BUILD_UID="$AGENT_UID" \ diff --git a/src/agent/tests/scripts/07_down.sh b/src/agent/tests/scripts/08_down.sh similarity index 89% rename from src/agent/tests/scripts/07_down.sh rename to src/agent/tests/scripts/08_down.sh index b9674ee..4accf14 100755 --- a/src/agent/tests/scripts/07_down.sh +++ b/src/agent/tests/scripts/08_down.sh @@ -13,7 +13,7 @@ compose() { fi } -docker container rm -f argus-agent-e2e >/dev/null 2>&1 || true +docker container rm -f argus-agent-e2e argus-agent-env-e2e >/dev/null 2>&1 || true pushd "$TEST_ROOT" >/dev/null compose down --remove-orphans diff --git a/src/agent/tests/scripts/08_verify_agent.sh b/src/agent/tests/scripts/08_verify_agent.sh deleted file mode 100755 index 8b347b0..0000000 --- a/src/agent/tests/scripts/08_verify_agent.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" -VERIFY_SCRIPT="$(cd "$TEST_ROOT/.." && pwd)/scripts/agent_deployment_verify.sh" - -if ! docker ps --format '{{.Names}}' | grep -q '^argus-agent-e2e$'; then - echo "[WARN] agent container not running; skip verification" - exit 0 -fi - -if docker exec -i argus-agent-e2e bash -lc 'command -v curl >/dev/null && command -v jq >/dev/null'; then - echo "[INFO] curl/jq already installed in agent container" -else - echo "[INFO] Installing curl/jq in agent container" - docker exec -i argus-agent-e2e bash -lc 'apt-get update >/dev/null 2>&1 && apt-get install -y curl jq >/dev/null 2>&1' || true -fi - -if docker exec -i argus-agent-e2e bash -lc 'command -v /usr/local/bin/agent_deployment_verify.sh >/dev/null'; then - docker exec -i argus-agent-e2e /usr/local/bin/agent_deployment_verify.sh -elif [[ -x "$VERIFY_SCRIPT" ]]; then - docker exec -i argus-agent-e2e "$VERIFY_SCRIPT" -else - echo "[WARN] agent_deployment_verify.sh not found" -fi diff --git a/src/agent/tests/test_config_metadata.py b/src/agent/tests/test_config_metadata.py new file mode 100644 index 0000000..390fd7d --- /dev/null +++ b/src/agent/tests/test_config_metadata.py @@ -0,0 +1,133 @@ +from __future__ import annotations + +import os +import unittest +from contextlib import contextmanager +from unittest.mock import patch + +from app.config import AgentConfig, load_config + + +@contextmanager +def temp_env(**overrides: str | None): + originals: dict[str, str | None] = {} + try: + for key, value in overrides.items(): + originals[key] = os.environ.get(key) + if value is None: + os.environ.pop(key, None) + else: + os.environ[key] = value + yield + finally: + for key, original in originals.items(): + if original is None: + os.environ.pop(key, None) + else: + os.environ[key] = original + + +class LoadConfigMetadataTests(unittest.TestCase): + @patch("app.config.Path.mkdir") + def test_metadata_from_environment_variables(self, mock_mkdir): + with temp_env( + MASTER_ENDPOINT="http://master.local", + AGENT_HOSTNAME="dev-user-one-pod", + AGENT_ENV="prod", + AGENT_USER="ops", + AGENT_INSTANCE="node-1", + ): + config = load_config() + + self.assertEqual(config.environment, "prod") + self.assertEqual(config.user, "ops") + self.assertEqual(config.instance, "node-1") + mock_mkdir.assert_called() + + @patch("app.config.Path.mkdir") + def test_metadata_falls_back_to_hostname(self, mock_mkdir): + with temp_env( + MASTER_ENDPOINT="http://master.local", + AGENT_HOSTNAME="qa-team-abc-pod-2", + AGENT_ENV=None, + AGENT_USER=None, + AGENT_INSTANCE=None, + ): + config = load_config() + + self.assertEqual(config.environment, "qa") + self.assertEqual(config.user, "team") + self.assertEqual(config.instance, "abc") + mock_mkdir.assert_called() + + @patch("app.config.Path.mkdir") + def test_partial_environment_variables_fallback(self, mock_mkdir): + with temp_env( + MASTER_ENDPOINT="http://master.local", + AGENT_HOSTNAME="stage-ml-001-node", + AGENT_ENV="prod", + AGENT_USER=None, + AGENT_INSTANCE=None, + ): + config = load_config() + + self.assertEqual(config.environment, "stage") + self.assertEqual(config.user, "ml") + self.assertEqual(config.instance, "001") + mock_mkdir.assert_called() + + @patch("app.config.Path.mkdir") + def test_invalid_hostname_raises_error(self, mock_mkdir): + with temp_env( + MASTER_ENDPOINT="http://master.local", + AGENT_HOSTNAME="invalidhostname", + AGENT_ENV=None, + AGENT_USER=None, + AGENT_INSTANCE=None, + ): + with self.assertRaises(ValueError): + load_config() + + mock_mkdir.assert_not_called() + + +class CollectMetadataTests(unittest.TestCase): + @patch("app.collector._detect_ip_address", return_value="127.0.0.1") + @patch("app.collector._detect_gpu_count", return_value=0) + @patch("app.collector._detect_memory_bytes", return_value=1024) + @patch("app.collector._detect_cpu_count", return_value=8) + def test_collect_metadata_uses_config_fields( + self, + mock_cpu, + mock_memory, + mock_gpu, + mock_ip, + ): + config = AgentConfig( + hostname="dev-user-001-pod", + environment="prod", + user="ops", + instance="node-1", + node_file="/tmp/node.json", + version="1.0.0", + master_endpoint="http://master.local", + report_interval_seconds=60, + health_dir="/tmp/health", + ) + + from app.collector import collect_metadata + + metadata = collect_metadata(config) + + self.assertEqual(metadata["env"], "prod") + self.assertEqual(metadata["user"], "ops") + self.assertEqual(metadata["instance"], "node-1") + self.assertEqual(metadata["hostname"], "dev-user-001-pod") + self.assertEqual(metadata["ip"], "127.0.0.1") + self.assertEqual(metadata["cpu_number"], 8) + self.assertEqual(metadata["memory_in_bytes"], 1024) + self.assertEqual(metadata["gpu_number"], 0) + + +if __name__ == "__main__": + unittest.main()