[#23] 增加环境变量指定dev/user/instance,优先从环境变量,如果没提供再从hostname解析
This commit is contained in:
parent
83b53dac7f
commit
22f40aaafc
@ -34,6 +34,15 @@ Agent 不再依赖配置文件;所有参数均由环境变量与主机名推
|
|||||||
| `MASTER_ENDPOINT` | 是 | N/A | Master 基础地址,可写 `http://host:3000` 或 `host:3000`(自动补全 `http://`)。 |
|
| `MASTER_ENDPOINT` | 是 | N/A | Master 基础地址,可写 `http://host:3000` 或 `host:3000`(自动补全 `http://`)。 |
|
||||||
| `REPORT_INTERVAL_SECONDS` | 否 | `60` | 状态上报间隔(秒)。必须为正整数。 |
|
| `REPORT_INTERVAL_SECONDS` | 否 | `60` | 状态上报间隔(秒)。必须为正整数。 |
|
||||||
| `AGENT_HOSTNAME` | 否 | `$(hostname)` | 覆盖容器内主机名,便于测试或特殊命名需求。 |
|
| `AGENT_HOSTNAME` | 否 | `$(hostname)` | 覆盖容器内主机名,便于测试或特殊命名需求。 |
|
||||||
|
| `AGENT_ENV` | 否 | 来源于主机名 | 运行环境标识(如 `dev`、`prod`)。与 `AGENT_USER`、`AGENT_INSTANCE` 必须同时设置。 |
|
||||||
|
| `AGENT_USER` | 否 | 来源于主机名 | 归属用户或团队标识。与 `AGENT_ENV`、`AGENT_INSTANCE` 必须同时设置。 |
|
||||||
|
| `AGENT_INSTANCE` | 否 | 来源于主机名 | 实例编号或别名。与 `AGENT_ENV`、`AGENT_USER` 必须同时设置。 |
|
||||||
|
|
||||||
|
主机名与元数据的解析优先级:
|
||||||
|
|
||||||
|
1. 若设置 `AGENT_ENV` / `AGENT_USER` / `AGENT_INSTANCE` 且全部存在,则直接使用这些值。
|
||||||
|
2. 否则按历史约定从主机名解析 `env-user-instance` 前缀。
|
||||||
|
3. 如果两者都无法得到完整结果,Agent 启动会失败并提示需要提供上述环境变量。
|
||||||
|
|
||||||
派生路径:
|
派生路径:
|
||||||
|
|
||||||
|
@ -18,13 +18,12 @@ _HOSTNAME_PATTERN = re.compile(r"^([^-]+)-([^-]+)-([^-]+)-.*$")
|
|||||||
def collect_metadata(config: AgentConfig) -> Dict[str, Any]:
|
def collect_metadata(config: AgentConfig) -> Dict[str, Any]:
|
||||||
"""汇总节点注册需要的静态信息。"""
|
"""汇总节点注册需要的静态信息。"""
|
||||||
hostname = config.hostname
|
hostname = config.hostname
|
||||||
env, user, instance = _parse_hostname(hostname)
|
|
||||||
meta = {
|
meta = {
|
||||||
"hostname": hostname,
|
"hostname": hostname,
|
||||||
"ip": _detect_ip_address(),
|
"ip": _detect_ip_address(),
|
||||||
"env": env,
|
"env": config.environment,
|
||||||
"user": user,
|
"user": config.user,
|
||||||
"instance": instance,
|
"instance": config.instance,
|
||||||
"cpu_number": _detect_cpu_count(),
|
"cpu_number": _detect_cpu_count(),
|
||||||
"memory_in_bytes": _detect_memory_bytes(),
|
"memory_in_bytes": _detect_memory_bytes(),
|
||||||
"gpu_number": _detect_gpu_count(),
|
"gpu_number": _detect_gpu_count(),
|
||||||
|
@ -14,6 +14,9 @@ DEFAULT_REPORT_INTERVAL_SECONDS: Final[int] = 60
|
|||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class AgentConfig:
|
class AgentConfig:
|
||||||
hostname: str
|
hostname: str
|
||||||
|
environment: str
|
||||||
|
user: str
|
||||||
|
instance: str
|
||||||
node_file: str
|
node_file: str
|
||||||
version: str
|
version: str
|
||||||
master_endpoint: str
|
master_endpoint: str
|
||||||
@ -47,10 +50,51 @@ def _resolve_hostname() -> str:
|
|||||||
return os.environ.get("AGENT_HOSTNAME") or socket.gethostname()
|
return os.environ.get("AGENT_HOSTNAME") or socket.gethostname()
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_metadata_fields(hostname: str) -> tuple[str, str, str]:
|
||||||
|
env = os.environ.get("AGENT_ENV")
|
||||||
|
user = os.environ.get("AGENT_USER")
|
||||||
|
instance = os.environ.get("AGENT_INSTANCE")
|
||||||
|
|
||||||
|
if env and user and instance:
|
||||||
|
return env, user, instance
|
||||||
|
|
||||||
|
if any([env, user, instance]):
|
||||||
|
LOGGER = None
|
||||||
|
try:
|
||||||
|
from .log import get_logger
|
||||||
|
|
||||||
|
LOGGER = get_logger("argus.agent.config")
|
||||||
|
except Exception: # pragma: no cover - defensive
|
||||||
|
LOGGER = None
|
||||||
|
if LOGGER is not None:
|
||||||
|
LOGGER.warning(
|
||||||
|
"Incomplete metadata environment variables; falling back to hostname parsing",
|
||||||
|
extra={
|
||||||
|
"has_env": bool(env),
|
||||||
|
"has_user": bool(user),
|
||||||
|
"has_instance": bool(instance),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
env = user = instance = None
|
||||||
|
|
||||||
|
from .collector import _parse_hostname # Local import to avoid circular dependency
|
||||||
|
|
||||||
|
env, user, instance = _parse_hostname(hostname)
|
||||||
|
|
||||||
|
if not all([env, user, instance]):
|
||||||
|
raise ValueError(
|
||||||
|
"Failed to determine metadata fields; set AGENT_ENV/USER/INSTANCE or use supported hostname pattern"
|
||||||
|
)
|
||||||
|
|
||||||
|
return env, user, instance
|
||||||
|
|
||||||
|
|
||||||
def load_config() -> AgentConfig:
|
def load_config() -> AgentConfig:
|
||||||
"""从环境变量推导配置,移除了外部配置文件依赖。"""
|
"""从环境变量推导配置,移除了外部配置文件依赖。"""
|
||||||
|
|
||||||
hostname = _resolve_hostname()
|
hostname = _resolve_hostname()
|
||||||
|
environment, user, instance = _resolve_metadata_fields(hostname)
|
||||||
|
|
||||||
node_file = f"/private/argus/agent/{hostname}/node.json"
|
node_file = f"/private/argus/agent/{hostname}/node.json"
|
||||||
health_dir = f"/private/argus/agent/{hostname}/health/"
|
health_dir = f"/private/argus/agent/{hostname}/health/"
|
||||||
|
|
||||||
@ -66,6 +110,9 @@ def load_config() -> AgentConfig:
|
|||||||
|
|
||||||
return AgentConfig(
|
return AgentConfig(
|
||||||
hostname=hostname,
|
hostname=hostname,
|
||||||
|
environment=environment,
|
||||||
|
user=user,
|
||||||
|
instance=instance,
|
||||||
node_file=node_file,
|
node_file=node_file,
|
||||||
version=VERSION,
|
version=VERSION,
|
||||||
master_endpoint=master_endpoint,
|
master_endpoint=master_endpoint,
|
||||||
|
BIN
src/agent/dist/argus-agent
vendored
BIN
src/agent/dist/argus-agent
vendored
Binary file not shown.
0
src/agent/tests/__init__.py
Normal file
0
src/agent/tests/__init__.py
Normal file
@ -60,6 +60,36 @@ services:
|
|||||||
ipv4_address: 172.28.0.20
|
ipv4_address: 172.28.0.20
|
||||||
restart: always
|
restart: always
|
||||||
|
|
||||||
|
agent_env:
|
||||||
|
image: ubuntu:22.04
|
||||||
|
container_name: argus-agent-env-e2e
|
||||||
|
hostname: host_abc
|
||||||
|
depends_on:
|
||||||
|
- master
|
||||||
|
- bind
|
||||||
|
environment:
|
||||||
|
- MASTER_ENDPOINT=http://master.argus.com:3000
|
||||||
|
- REPORT_INTERVAL_SECONDS=2
|
||||||
|
- AGENT_ENV=prod
|
||||||
|
- AGENT_USER=ml
|
||||||
|
- AGENT_INSTANCE=node-3
|
||||||
|
- AGENT_HOSTNAME=host_abc
|
||||||
|
- "ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}"
|
||||||
|
- "ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}"
|
||||||
|
volumes:
|
||||||
|
- ./private/argus/agent/host_abc:/private/argus/agent/host_abc
|
||||||
|
- ./private/argus/agent/host_abc/health:/private/argus/agent/host_abc/health
|
||||||
|
- ./private/argus/etc:/private/argus/etc
|
||||||
|
- ../dist/argus-agent:/usr/local/bin/argus-agent:ro
|
||||||
|
- ./scripts/agent_entrypoint.sh:/usr/local/bin/agent-entrypoint.sh:ro
|
||||||
|
- ../scripts/agent_deployment_verify.sh:/usr/local/bin/agent_deployment_verify.sh:ro
|
||||||
|
entrypoint:
|
||||||
|
- /usr/local/bin/agent-entrypoint.sh
|
||||||
|
networks:
|
||||||
|
default:
|
||||||
|
ipv4_address: 172.28.0.21
|
||||||
|
restart: always
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
default:
|
default:
|
||||||
driver: bridge
|
driver: bridge
|
||||||
|
@ -7,10 +7,10 @@ SCRIPTS=(
|
|||||||
"02_up.sh"
|
"02_up.sh"
|
||||||
"03_wait_and_assert_registration.sh"
|
"03_wait_and_assert_registration.sh"
|
||||||
"04_write_health_files.sh"
|
"04_write_health_files.sh"
|
||||||
"08_verify_agent.sh"
|
"05_verify_agent.sh"
|
||||||
"05_assert_status_on_master.sh"
|
"06_assert_status_on_master.sh"
|
||||||
"06_restart_agent_and_reregister.sh"
|
"07_restart_agent_and_reregister.sh"
|
||||||
"07_down.sh"
|
"08_down.sh"
|
||||||
)
|
)
|
||||||
|
|
||||||
for script in "${SCRIPTS[@]}"; do
|
for script in "${SCRIPTS[@]}"; do
|
||||||
|
@ -41,7 +41,7 @@ compose() {
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
docker container rm -f argus-agent-e2e argus-master-agent-e2e argus-bind-agent-e2e >/dev/null 2>&1 || true
|
docker container rm -f argus-agent-e2e argus-agent-env-e2e argus-master-agent-e2e argus-bind-agent-e2e >/dev/null 2>&1 || true
|
||||||
|
|
||||||
docker network rm tests_default >/dev/null 2>&1 || true
|
docker network rm tests_default >/dev/null 2>&1 || true
|
||||||
|
|
||||||
|
@ -6,11 +6,14 @@ TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|||||||
TMP_ROOT="$TEST_ROOT/tmp"
|
TMP_ROOT="$TEST_ROOT/tmp"
|
||||||
API_BASE="http://localhost:32300/api/v1/master"
|
API_BASE="http://localhost:32300/api/v1/master"
|
||||||
AGENT_HOSTNAME="dev-e2euser-e2einst-pod-0"
|
AGENT_HOSTNAME="dev-e2euser-e2einst-pod-0"
|
||||||
|
ENV_AGENT_HOSTNAME="host_abc"
|
||||||
NODE_FILE="$TEST_ROOT/private/argus/agent/$AGENT_HOSTNAME/node.json"
|
NODE_FILE="$TEST_ROOT/private/argus/agent/$AGENT_HOSTNAME/node.json"
|
||||||
|
ENV_NODE_FILE="$TEST_ROOT/private/argus/agent/$ENV_AGENT_HOSTNAME/node.json"
|
||||||
|
|
||||||
mkdir -p "$TMP_ROOT"
|
mkdir -p "$TMP_ROOT"
|
||||||
|
|
||||||
node_id=""
|
primary_node_id=""
|
||||||
|
env_node_id=""
|
||||||
for _ in {1..30}; do
|
for _ in {1..30}; do
|
||||||
sleep 2
|
sleep 2
|
||||||
response=$(curl -sS "$API_BASE/nodes" || true)
|
response=$(curl -sS "$API_BASE/nodes" || true)
|
||||||
@ -19,24 +22,49 @@ for _ in {1..30}; do
|
|||||||
fi
|
fi
|
||||||
list_file="$TMP_ROOT/nodes_list.json"
|
list_file="$TMP_ROOT/nodes_list.json"
|
||||||
echo "$response" > "$list_file"
|
echo "$response" > "$list_file"
|
||||||
node_id=$(python3 - "$list_file" <<'PY'
|
readarray -t node_ids < <(python3 - "$list_file" "$AGENT_HOSTNAME" "$ENV_AGENT_HOSTNAME" <<'PY'
|
||||||
import json, sys
|
import json, sys
|
||||||
|
|
||||||
with open(sys.argv[1]) as handle:
|
with open(sys.argv[1]) as handle:
|
||||||
nodes = json.load(handle)
|
nodes = json.load(handle)
|
||||||
print(nodes[0]["id"] if nodes else "")
|
|
||||||
|
target_primary = sys.argv[2]
|
||||||
|
target_env = sys.argv[3]
|
||||||
|
|
||||||
|
primary_id = ""
|
||||||
|
env_id = ""
|
||||||
|
|
||||||
|
for node in nodes:
|
||||||
|
if node.get("name") == target_primary:
|
||||||
|
primary_id = node.get("id", "")
|
||||||
|
if node.get("name") == target_env:
|
||||||
|
env_id = node.get("id", "")
|
||||||
|
|
||||||
|
print(primary_id)
|
||||||
|
print(env_id)
|
||||||
PY
|
PY
|
||||||
)
|
)
|
||||||
if [[ -n "$node_id" ]]; then
|
|
||||||
|
primary_node_id="${node_ids[0]}"
|
||||||
|
env_node_id="${node_ids[1]}"
|
||||||
|
|
||||||
|
if [[ -n "$primary_node_id" && -n "$env_node_id" ]]; then
|
||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
if [[ -z "$node_id" ]]; then
|
if [[ -z "$primary_node_id" ]]; then
|
||||||
echo "[ERROR] Agent did not register within timeout" >&2
|
echo "[ERROR] Primary agent did not register within timeout" >&2
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "$node_id" > "$TMP_ROOT/node_id"
|
if [[ -z "$env_node_id" ]]; then
|
||||||
|
echo "[ERROR] Env-variable agent did not register within timeout" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "$primary_node_id" > "$TMP_ROOT/node_id"
|
||||||
|
echo "$env_node_id" > "$TMP_ROOT/node_id_host_abc"
|
||||||
|
|
||||||
if [[ ! -f "$NODE_FILE" ]]; then
|
if [[ ! -f "$NODE_FILE" ]]; then
|
||||||
echo "[ERROR] node.json not created at $NODE_FILE" >&2
|
echo "[ERROR] node.json not created at $NODE_FILE" >&2
|
||||||
@ -50,8 +78,20 @@ with open(sys.argv[1]) as handle:
|
|||||||
assert "id" in node and node["id"], "node.json missing id"
|
assert "id" in node and node["id"], "node.json missing id"
|
||||||
PY
|
PY
|
||||||
|
|
||||||
|
if [[ ! -f "$ENV_NODE_FILE" ]]; then
|
||||||
|
echo "[ERROR] node.json not created at $ENV_NODE_FILE" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
python3 - "$ENV_NODE_FILE" <<'PY'
|
||||||
|
import json, sys
|
||||||
|
with open(sys.argv[1]) as handle:
|
||||||
|
node = json.load(handle)
|
||||||
|
assert "id" in node and node["id"], "env agent node.json missing id"
|
||||||
|
PY
|
||||||
|
|
||||||
detail_file="$TMP_ROOT/initial_detail.json"
|
detail_file="$TMP_ROOT/initial_detail.json"
|
||||||
curl -sS "$API_BASE/nodes/$node_id" -o "$detail_file"
|
curl -sS "$API_BASE/nodes/$primary_node_id" -o "$detail_file"
|
||||||
python3 - "$detail_file" "$TMP_ROOT/initial_ip" <<'PY'
|
python3 - "$detail_file" "$TMP_ROOT/initial_ip" <<'PY'
|
||||||
import json, sys, pathlib
|
import json, sys, pathlib
|
||||||
with open(sys.argv[1]) as handle:
|
with open(sys.argv[1]) as handle:
|
||||||
@ -62,4 +102,5 @@ if not ip:
|
|||||||
pathlib.Path(sys.argv[2]).write_text(ip)
|
pathlib.Path(sys.argv[2]).write_text(ip)
|
||||||
PY
|
PY
|
||||||
|
|
||||||
echo "[INFO] Agent registered with node id $node_id"
|
echo "[INFO] Agent registered with node id $primary_node_id"
|
||||||
|
echo "[INFO] Env-variable agent registered with node id $env_node_id"
|
||||||
|
60
src/agent/tests/scripts/05_verify_agent.sh
Executable file
60
src/agent/tests/scripts/05_verify_agent.sh
Executable file
@ -0,0 +1,60 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||||
|
REPO_ROOT="$(cd "$TEST_ROOT/.." && pwd)"
|
||||||
|
VERIFY_SCRIPT="$REPO_ROOT/scripts/agent_deployment_verify.sh"
|
||||||
|
ENV_NODE_ID_FILE="$TEST_ROOT/tmp/node_id_host_abc"
|
||||||
|
PRIMARY_CONTAINER="argus-agent-e2e"
|
||||||
|
ENV_CONTAINER="argus-agent-env-e2e"
|
||||||
|
PRIMARY_HOSTNAME="dev-e2euser-e2einst-pod-0"
|
||||||
|
ENV_HOSTNAME="host_abc"
|
||||||
|
|
||||||
|
if ! docker ps --format '{{.Names}}' | grep -q "^${PRIMARY_CONTAINER}$"; then
|
||||||
|
echo "[WARN] agent container not running; skip verification"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
if docker exec -i "$PRIMARY_CONTAINER" bash -lc 'command -v curl >/dev/null && command -v jq >/dev/null'; then
|
||||||
|
echo "[INFO] curl/jq already installed in agent container"
|
||||||
|
else
|
||||||
|
echo "[INFO] Installing curl/jq in agent container"
|
||||||
|
docker exec -i "$PRIMARY_CONTAINER" bash -lc 'apt-get update >/dev/null 2>&1 && apt-get install -y curl jq >/dev/null 2>&1' || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ ! -f "$VERIFY_SCRIPT" ]]; then
|
||||||
|
echo "[ERROR] Verification script missing at $VERIFY_SCRIPT" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
run_verifier() {
|
||||||
|
local container="$1" hostname="$2"
|
||||||
|
|
||||||
|
if ! docker ps --format '{{.Names}}' | grep -q "^${container}$"; then
|
||||||
|
echo "[WARN] container $container not running; skip"
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! docker exec -i "$container" bash -lc 'command -v /usr/local/bin/agent_deployment_verify.sh >/dev/null'; then
|
||||||
|
echo "[ERROR] /usr/local/bin/agent_deployment_verify.sh missing in $container" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "[INFO] Running verification for $hostname in $container"
|
||||||
|
docker exec -i "$container" env VERIFY_HOSTNAME="$hostname" /usr/local/bin/agent_deployment_verify.sh
|
||||||
|
}
|
||||||
|
|
||||||
|
run_verifier "$PRIMARY_CONTAINER" "$PRIMARY_HOSTNAME"
|
||||||
|
|
||||||
|
if docker ps --format '{{.Names}}' | grep -q "^${ENV_CONTAINER}$"; then
|
||||||
|
if docker exec -i "$ENV_CONTAINER" bash -lc 'command -v curl >/dev/null && command -v jq >/dev/null'; then
|
||||||
|
echo "[INFO] curl/jq already installed in env agent container"
|
||||||
|
else
|
||||||
|
echo "[INFO] Installing curl/jq in env agent container"
|
||||||
|
docker exec -i "$ENV_CONTAINER" bash -lc 'apt-get update >/dev/null 2>&1 && apt-get install -y curl jq >/dev/null 2>&1' || true
|
||||||
|
fi
|
||||||
|
run_verifier "$ENV_CONTAINER" "$ENV_HOSTNAME"
|
||||||
|
else
|
||||||
|
echo "[WARN] env-driven agent container not running; skip secondary verification"
|
||||||
|
fi
|
@ -6,6 +6,8 @@ TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|||||||
TMP_ROOT="$TEST_ROOT/tmp"
|
TMP_ROOT="$TEST_ROOT/tmp"
|
||||||
API_BASE="http://localhost:32300/api/v1/master"
|
API_BASE="http://localhost:32300/api/v1/master"
|
||||||
NODE_ID="$(cat "$TMP_ROOT/node_id")"
|
NODE_ID="$(cat "$TMP_ROOT/node_id")"
|
||||||
|
ENV_NODE_ID="$(cat "$TMP_ROOT/node_id_host_abc")"
|
||||||
|
ENV_HOSTNAME="host_abc"
|
||||||
NODES_JSON="$TEST_ROOT/private/argus/metric/prometheus/nodes.json"
|
NODES_JSON="$TEST_ROOT/private/argus/metric/prometheus/nodes.json"
|
||||||
|
|
||||||
success=false
|
success=false
|
||||||
@ -41,13 +43,36 @@ if [[ ! -f "$NODES_JSON" ]]; then
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
python3 - "$NODES_JSON" <<'PY'
|
python3 - "$NODES_JSON" "$NODE_ID" "$ENV_NODE_ID" <<'PY'
|
||||||
import json, sys
|
import json, sys
|
||||||
with open(sys.argv[1]) as handle:
|
with open(sys.argv[1]) as handle:
|
||||||
nodes = json.load(handle)
|
nodes = json.load(handle)
|
||||||
assert len(nodes) == 1, nodes
|
|
||||||
entry = nodes[0]
|
expected_primary = sys.argv[2]
|
||||||
assert entry["node_id"], entry
|
expected_env = sys.argv[3]
|
||||||
|
|
||||||
|
ids = {entry.get("node_id") for entry in nodes}
|
||||||
|
assert expected_primary in ids, nodes
|
||||||
|
assert expected_env in ids, nodes
|
||||||
|
assert len(nodes) >= 2, nodes
|
||||||
PY
|
PY
|
||||||
|
|
||||||
echo "[INFO] Master reflects agent health and nodes.json entries"
|
echo "[INFO] Master reflects agent health and nodes.json entries"
|
||||||
|
|
||||||
|
env_detail_file="$TMP_ROOT/env_agent_detail.json"
|
||||||
|
curl -sS "$API_BASE/nodes/$ENV_NODE_ID" -o "$env_detail_file"
|
||||||
|
python3 - "$env_detail_file" "$ENV_HOSTNAME" <<'PY'
|
||||||
|
import json, sys
|
||||||
|
with open(sys.argv[1]) as handle:
|
||||||
|
node = json.load(handle)
|
||||||
|
|
||||||
|
expected_name = sys.argv[2]
|
||||||
|
|
||||||
|
assert node.get("name") == expected_name, node
|
||||||
|
meta = node.get("meta_data", {})
|
||||||
|
assert meta.get("env") == "prod", meta
|
||||||
|
assert meta.get("user") == "ml", meta
|
||||||
|
assert meta.get("instance") == "node-3", meta
|
||||||
|
PY
|
||||||
|
|
||||||
|
echo "[INFO] Env-variable agent reports expected metadata"
|
@ -10,6 +10,7 @@ AGENT_HOSTNAME="dev-e2euser-e2einst-pod-0"
|
|||||||
NETWORK_NAME="tests_default"
|
NETWORK_NAME="tests_default"
|
||||||
NEW_AGENT_IP="172.28.0.200"
|
NEW_AGENT_IP="172.28.0.200"
|
||||||
ENTRYPOINT_SCRIPT="$SCRIPT_DIR/agent_entrypoint.sh"
|
ENTRYPOINT_SCRIPT="$SCRIPT_DIR/agent_entrypoint.sh"
|
||||||
|
VERIFY_SCRIPT="$TEST_ROOT/../scripts/agent_deployment_verify.sh"
|
||||||
ENV_FILE="$TEST_ROOT/.env"
|
ENV_FILE="$TEST_ROOT/.env"
|
||||||
|
|
||||||
# 中文提示:重启场景也需要同样的入口脚本,确保 DNS 注册逻辑一致
|
# 中文提示:重启场景也需要同样的入口脚本,确保 DNS 注册逻辑一致
|
||||||
@ -18,6 +19,11 @@ if [[ ! -f "$ENTRYPOINT_SCRIPT" ]]; then
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [[ ! -f "$VERIFY_SCRIPT" ]]; then
|
||||||
|
echo "[ERROR] agent verification script missing at $VERIFY_SCRIPT" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
if [[ ! -f "$TMP_ROOT/agent_binary_path" ]]; then
|
if [[ ! -f "$TMP_ROOT/agent_binary_path" ]]; then
|
||||||
echo "[ERROR] Agent binary path missing; rerun bootstrap" >&2
|
echo "[ERROR] Agent binary path missing; rerun bootstrap" >&2
|
||||||
exit 1
|
exit 1
|
||||||
@ -94,6 +100,7 @@ if ! docker run -d \
|
|||||||
-v "$TEST_ROOT/private/argus/etc:/private/argus/etc" \
|
-v "$TEST_ROOT/private/argus/etc:/private/argus/etc" \
|
||||||
-v "$AGENT_BINARY:/usr/local/bin/argus-agent:ro" \
|
-v "$AGENT_BINARY:/usr/local/bin/argus-agent:ro" \
|
||||||
-v "$ENTRYPOINT_SCRIPT:/usr/local/bin/agent-entrypoint.sh:ro" \
|
-v "$ENTRYPOINT_SCRIPT:/usr/local/bin/agent-entrypoint.sh:ro" \
|
||||||
|
-v "$VERIFY_SCRIPT:/usr/local/bin/agent_deployment_verify.sh:ro" \
|
||||||
-e MASTER_ENDPOINT=http://master.argus.com:3000 \
|
-e MASTER_ENDPOINT=http://master.argus.com:3000 \
|
||||||
-e REPORT_INTERVAL_SECONDS=2 \
|
-e REPORT_INTERVAL_SECONDS=2 \
|
||||||
-e ARGUS_BUILD_UID="$AGENT_UID" \
|
-e ARGUS_BUILD_UID="$AGENT_UID" \
|
@ -13,7 +13,7 @@ compose() {
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
docker container rm -f argus-agent-e2e >/dev/null 2>&1 || true
|
docker container rm -f argus-agent-e2e argus-agent-env-e2e >/dev/null 2>&1 || true
|
||||||
|
|
||||||
pushd "$TEST_ROOT" >/dev/null
|
pushd "$TEST_ROOT" >/dev/null
|
||||||
compose down --remove-orphans
|
compose down --remove-orphans
|
@ -1,26 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
||||||
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
||||||
VERIFY_SCRIPT="$(cd "$TEST_ROOT/.." && pwd)/scripts/agent_deployment_verify.sh"
|
|
||||||
|
|
||||||
if ! docker ps --format '{{.Names}}' | grep -q '^argus-agent-e2e$'; then
|
|
||||||
echo "[WARN] agent container not running; skip verification"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
if docker exec -i argus-agent-e2e bash -lc 'command -v curl >/dev/null && command -v jq >/dev/null'; then
|
|
||||||
echo "[INFO] curl/jq already installed in agent container"
|
|
||||||
else
|
|
||||||
echo "[INFO] Installing curl/jq in agent container"
|
|
||||||
docker exec -i argus-agent-e2e bash -lc 'apt-get update >/dev/null 2>&1 && apt-get install -y curl jq >/dev/null 2>&1' || true
|
|
||||||
fi
|
|
||||||
|
|
||||||
if docker exec -i argus-agent-e2e bash -lc 'command -v /usr/local/bin/agent_deployment_verify.sh >/dev/null'; then
|
|
||||||
docker exec -i argus-agent-e2e /usr/local/bin/agent_deployment_verify.sh
|
|
||||||
elif [[ -x "$VERIFY_SCRIPT" ]]; then
|
|
||||||
docker exec -i argus-agent-e2e "$VERIFY_SCRIPT"
|
|
||||||
else
|
|
||||||
echo "[WARN] agent_deployment_verify.sh not found"
|
|
||||||
fi
|
|
133
src/agent/tests/test_config_metadata.py
Normal file
133
src/agent/tests/test_config_metadata.py
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import unittest
|
||||||
|
from contextlib import contextmanager
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
from app.config import AgentConfig, load_config
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def temp_env(**overrides: str | None):
|
||||||
|
originals: dict[str, str | None] = {}
|
||||||
|
try:
|
||||||
|
for key, value in overrides.items():
|
||||||
|
originals[key] = os.environ.get(key)
|
||||||
|
if value is None:
|
||||||
|
os.environ.pop(key, None)
|
||||||
|
else:
|
||||||
|
os.environ[key] = value
|
||||||
|
yield
|
||||||
|
finally:
|
||||||
|
for key, original in originals.items():
|
||||||
|
if original is None:
|
||||||
|
os.environ.pop(key, None)
|
||||||
|
else:
|
||||||
|
os.environ[key] = original
|
||||||
|
|
||||||
|
|
||||||
|
class LoadConfigMetadataTests(unittest.TestCase):
|
||||||
|
@patch("app.config.Path.mkdir")
|
||||||
|
def test_metadata_from_environment_variables(self, mock_mkdir):
|
||||||
|
with temp_env(
|
||||||
|
MASTER_ENDPOINT="http://master.local",
|
||||||
|
AGENT_HOSTNAME="dev-user-one-pod",
|
||||||
|
AGENT_ENV="prod",
|
||||||
|
AGENT_USER="ops",
|
||||||
|
AGENT_INSTANCE="node-1",
|
||||||
|
):
|
||||||
|
config = load_config()
|
||||||
|
|
||||||
|
self.assertEqual(config.environment, "prod")
|
||||||
|
self.assertEqual(config.user, "ops")
|
||||||
|
self.assertEqual(config.instance, "node-1")
|
||||||
|
mock_mkdir.assert_called()
|
||||||
|
|
||||||
|
@patch("app.config.Path.mkdir")
|
||||||
|
def test_metadata_falls_back_to_hostname(self, mock_mkdir):
|
||||||
|
with temp_env(
|
||||||
|
MASTER_ENDPOINT="http://master.local",
|
||||||
|
AGENT_HOSTNAME="qa-team-abc-pod-2",
|
||||||
|
AGENT_ENV=None,
|
||||||
|
AGENT_USER=None,
|
||||||
|
AGENT_INSTANCE=None,
|
||||||
|
):
|
||||||
|
config = load_config()
|
||||||
|
|
||||||
|
self.assertEqual(config.environment, "qa")
|
||||||
|
self.assertEqual(config.user, "team")
|
||||||
|
self.assertEqual(config.instance, "abc")
|
||||||
|
mock_mkdir.assert_called()
|
||||||
|
|
||||||
|
@patch("app.config.Path.mkdir")
|
||||||
|
def test_partial_environment_variables_fallback(self, mock_mkdir):
|
||||||
|
with temp_env(
|
||||||
|
MASTER_ENDPOINT="http://master.local",
|
||||||
|
AGENT_HOSTNAME="stage-ml-001-node",
|
||||||
|
AGENT_ENV="prod",
|
||||||
|
AGENT_USER=None,
|
||||||
|
AGENT_INSTANCE=None,
|
||||||
|
):
|
||||||
|
config = load_config()
|
||||||
|
|
||||||
|
self.assertEqual(config.environment, "stage")
|
||||||
|
self.assertEqual(config.user, "ml")
|
||||||
|
self.assertEqual(config.instance, "001")
|
||||||
|
mock_mkdir.assert_called()
|
||||||
|
|
||||||
|
@patch("app.config.Path.mkdir")
|
||||||
|
def test_invalid_hostname_raises_error(self, mock_mkdir):
|
||||||
|
with temp_env(
|
||||||
|
MASTER_ENDPOINT="http://master.local",
|
||||||
|
AGENT_HOSTNAME="invalidhostname",
|
||||||
|
AGENT_ENV=None,
|
||||||
|
AGENT_USER=None,
|
||||||
|
AGENT_INSTANCE=None,
|
||||||
|
):
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
load_config()
|
||||||
|
|
||||||
|
mock_mkdir.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
class CollectMetadataTests(unittest.TestCase):
|
||||||
|
@patch("app.collector._detect_ip_address", return_value="127.0.0.1")
|
||||||
|
@patch("app.collector._detect_gpu_count", return_value=0)
|
||||||
|
@patch("app.collector._detect_memory_bytes", return_value=1024)
|
||||||
|
@patch("app.collector._detect_cpu_count", return_value=8)
|
||||||
|
def test_collect_metadata_uses_config_fields(
|
||||||
|
self,
|
||||||
|
mock_cpu,
|
||||||
|
mock_memory,
|
||||||
|
mock_gpu,
|
||||||
|
mock_ip,
|
||||||
|
):
|
||||||
|
config = AgentConfig(
|
||||||
|
hostname="dev-user-001-pod",
|
||||||
|
environment="prod",
|
||||||
|
user="ops",
|
||||||
|
instance="node-1",
|
||||||
|
node_file="/tmp/node.json",
|
||||||
|
version="1.0.0",
|
||||||
|
master_endpoint="http://master.local",
|
||||||
|
report_interval_seconds=60,
|
||||||
|
health_dir="/tmp/health",
|
||||||
|
)
|
||||||
|
|
||||||
|
from app.collector import collect_metadata
|
||||||
|
|
||||||
|
metadata = collect_metadata(config)
|
||||||
|
|
||||||
|
self.assertEqual(metadata["env"], "prod")
|
||||||
|
self.assertEqual(metadata["user"], "ops")
|
||||||
|
self.assertEqual(metadata["instance"], "node-1")
|
||||||
|
self.assertEqual(metadata["hostname"], "dev-user-001-pod")
|
||||||
|
self.assertEqual(metadata["ip"], "127.0.0.1")
|
||||||
|
self.assertEqual(metadata["cpu_number"], 8)
|
||||||
|
self.assertEqual(metadata["memory_in_bytes"], 1024)
|
||||||
|
self.assertEqual(metadata["gpu_number"], 0)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
Loading…
x
Reference in New Issue
Block a user