diff --git a/src/agent/dist/argus-agent b/src/agent/dist/argus-agent index 56920df..c560696 100755 Binary files a/src/agent/dist/argus-agent and b/src/agent/dist/argus-agent differ diff --git a/src/agent/tests/docker-compose.yml b/src/agent/tests/docker-compose.yml index 8386ac2..2cd4220 100644 --- a/src/agent/tests/docker-compose.yml +++ b/src/agent/tests/docker-compose.yml @@ -1,7 +1,19 @@ services: + bind: + image: ${BIND_IMAGE_TAG:-argus-bind9:e2e} + container_name: argus-bind-agent-e2e + volumes: + - ./private:/private + restart: unless-stopped + networks: + default: + ipv4_address: 172.28.0.2 + master: image: argus-master:dev container_name: argus-master-agent-e2e + depends_on: + - bind environment: - OFFLINE_THRESHOLD_SECONDS=6 - ONLINE_THRESHOLD_SECONDS=2 @@ -11,6 +23,10 @@ services: volumes: - ./private/argus/master:/private/argus/master - ./private/argus/metric/prometheus:/private/argus/metric/prometheus + - ./private/argus/etc:/private/argus/etc + networks: + default: + ipv4_address: 172.28.0.10 agent: image: ubuntu:24.04 @@ -18,15 +34,21 @@ services: hostname: dev-e2euser-e2einst-pod-0 depends_on: - master + - bind environment: - - MASTER_ENDPOINT=http://master:3000 + - MASTER_ENDPOINT=http://master.argus.com:3000 - REPORT_INTERVAL_SECONDS=2 volumes: - ./private/argus/agent/dev-e2euser-e2einst-pod-0:/private/argus/agent/dev-e2euser-e2einst-pod-0 - ./private/argus/agent/health/dev-e2euser-e2einst-pod-0:/private/argus/agent/health/dev-e2euser-e2einst-pod-0 + - ./private/argus/etc:/private/argus/etc - ../dist/argus-agent:/usr/local/bin/argus-agent:ro + - ./scripts/agent_entrypoint.sh:/usr/local/bin/agent-entrypoint.sh:ro entrypoint: - - /usr/local/bin/argus-agent + - /usr/local/bin/agent-entrypoint.sh + networks: + default: + ipv4_address: 172.28.0.20 networks: default: diff --git a/src/agent/tests/scripts/01_bootstrap.sh b/src/agent/tests/scripts/01_bootstrap.sh index 95ca096..41a19e1 100755 --- a/src/agent/tests/scripts/01_bootstrap.sh +++ b/src/agent/tests/scripts/01_bootstrap.sh @@ -5,6 +5,7 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" AGENT_ROOT="$(cd "$TEST_ROOT/.." && pwd)" MASTER_ROOT="$(cd "$AGENT_ROOT/../master" && pwd)" +REPO_ROOT="$(cd "$AGENT_ROOT/../.." && pwd)" PRIVATE_ROOT="$TEST_ROOT/private" TMP_ROOT="$TEST_ROOT/tmp" @@ -13,15 +14,27 @@ AGENT_CONFIG_DIR="$PRIVATE_ROOT/argus/agent/$AGENT_HOSTNAME" AGENT_HEALTH_DIR="$PRIVATE_ROOT/argus/agent/health/$AGENT_HOSTNAME" MASTER_PRIVATE_DIR="$PRIVATE_ROOT/argus/master" METRIC_PRIVATE_DIR="$PRIVATE_ROOT/argus/metric/prometheus" +DNS_DIR="$PRIVATE_ROOT/argus/etc" +BIND_IMAGE_TAG="${BIND_IMAGE_TAG:-argus-bind9:e2e}" +BIND_ROOT="$(cd "$MASTER_ROOT/../bind" && pwd)" mkdir -p "$AGENT_CONFIG_DIR" mkdir -p "$AGENT_HEALTH_DIR" mkdir -p "$MASTER_PRIVATE_DIR" mkdir -p "$METRIC_PRIVATE_DIR" mkdir -p "$TMP_ROOT" +mkdir -p "$DNS_DIR" touch "$AGENT_HEALTH_DIR/.keep" +# 中文提示:准备 bind 模块提供的 update-dns.sh,模拟生产下发 +if [[ -f "$BIND_ROOT/build/update-dns.sh" ]]; then + cp "$BIND_ROOT/build/update-dns.sh" "$DNS_DIR/update-dns.sh" + chmod +x "$DNS_DIR/update-dns.sh" +else + echo "[WARN] bind update script missing at $BIND_ROOT/build/update-dns.sh" +fi + pushd "$MASTER_ROOT" >/dev/null ./scripts/build_images.sh --tag argus-master:dev popd >/dev/null @@ -37,6 +50,12 @@ if [[ ! -x "$AGENT_BINARY" ]]; then exit 1 fi +# 中文提示:构建测试专用 bind9 镜像,确保解析服务可用 +pushd "$REPO_ROOT" >/dev/null +docker build -f src/bind/build/Dockerfile -t "$BIND_IMAGE_TAG" . +popd >/dev/null + echo "$AGENT_BINARY" > "$TMP_ROOT/agent_binary_path" +echo "$BIND_IMAGE_TAG" > "$TMP_ROOT/bind_image_tag" echo "[INFO] Agent E2E bootstrap complete" diff --git a/src/agent/tests/scripts/02_up.sh b/src/agent/tests/scripts/02_up.sh index f1a1234..fcb4b09 100755 --- a/src/agent/tests/scripts/02_up.sh +++ b/src/agent/tests/scripts/02_up.sh @@ -17,6 +17,11 @@ if [[ ! -x "$AGENT_BINARY" ]]; then exit 1 fi +BIND_IMAGE_TAG_VALUE="argus-bind9:e2e" +if [[ -f "$TMP_ROOT/bind_image_tag" ]]; then + BIND_IMAGE_TAG_VALUE="$(cat "$TMP_ROOT/bind_image_tag")" +fi + compose() { if docker compose version >/dev/null 2>&1; then docker compose "$@" @@ -25,13 +30,13 @@ compose() { fi } -docker container rm -f argus-agent-e2e >/dev/null 2>&1 || true +docker container rm -f argus-agent-e2e argus-master-agent-e2e argus-bind-agent-e2e >/dev/null 2>&1 || true docker network rm tests_default >/dev/null 2>&1 || true pushd "$TEST_ROOT" >/dev/null compose down --remove-orphans || true -compose up -d +BIND_IMAGE_TAG="$BIND_IMAGE_TAG_VALUE" compose up -d popd >/dev/null echo "[INFO] Master+Agent stack started" diff --git a/src/agent/tests/scripts/06_restart_agent_and_reregister.sh b/src/agent/tests/scripts/06_restart_agent_and_reregister.sh index 6a517b5..eb1ba18 100755 --- a/src/agent/tests/scripts/06_restart_agent_and_reregister.sh +++ b/src/agent/tests/scripts/06_restart_agent_and_reregister.sh @@ -9,6 +9,13 @@ NODE_ID="$(cat "$TMP_ROOT/node_id")" AGENT_HOSTNAME="dev-e2euser-e2einst-pod-0" NETWORK_NAME="tests_default" NEW_AGENT_IP="172.28.0.200" +ENTRYPOINT_SCRIPT="$SCRIPT_DIR/agent_entrypoint.sh" + +# 中文提示:重启场景也需要同样的入口脚本,确保 DNS 注册逻辑一致 +if [[ ! -f "$ENTRYPOINT_SCRIPT" ]]; then + echo "[ERROR] agent entrypoint script missing at $ENTRYPOINT_SCRIPT" >&2 + exit 1 +fi if [[ ! -f "$TMP_ROOT/agent_binary_path" ]]; then echo "[ERROR] Agent binary path missing; rerun bootstrap" >&2 @@ -68,21 +75,17 @@ if ! docker run -d \ --ip "$NEW_AGENT_IP" \ -v "$AGENT_DIR:/private/argus/agent/$AGENT_HOSTNAME" \ -v "$HEALTH_DIR:/private/argus/agent/health/$AGENT_HOSTNAME" \ + -v "$TEST_ROOT/private/argus/etc:/private/argus/etc" \ -v "$AGENT_BINARY:/usr/local/bin/argus-agent:ro" \ - -e MASTER_ENDPOINT=http://master:3000 \ + -v "$ENTRYPOINT_SCRIPT:/usr/local/bin/agent-entrypoint.sh:ro" \ + -e MASTER_ENDPOINT=http://master.argus.com:3000 \ -e REPORT_INTERVAL_SECONDS=2 \ - ubuntu:24.04 \ - sleep 300 >/dev/null; then + --entrypoint /usr/local/bin/agent-entrypoint.sh \ + ubuntu:24.04 >/dev/null; then echo "[ERROR] Failed to start agent container with custom IP" >&2 exit 1 fi -# 在容器内启动真实 agent 进程 -if ! docker exec -d argus-agent-e2e /usr/local/bin/argus-agent; then - echo "[ERROR] Failed to spawn agent process inside container" >&2 - exit 1 -fi - success=false detail_file="$TMP_ROOT/post_restart.json" for _ in {1..20}; do diff --git a/src/agent/tests/scripts/agent_entrypoint.sh b/src/agent/tests/scripts/agent_entrypoint.sh new file mode 100755 index 0000000..8536da1 --- /dev/null +++ b/src/agent/tests/scripts/agent_entrypoint.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash +set -euo pipefail + +LOG_PREFIX="[AGENT-ENTRYPOINT]" +DNS_SCRIPT="/private/argus/etc/update-dns.sh" +DNS_CONF="/private/argus/etc/dns.conf" +TARGET_DOMAIN="master.argus.com" + +log() { + echo "${LOG_PREFIX} $*" +} + +# 中文提示:等待 bind 下发的 update-dns.sh 脚本 +for _ in {1..30}; do + if [[ -x "$DNS_SCRIPT" ]]; then + break + fi + log "等待 update-dns.sh 准备就绪..." + sleep 1 +done + +if [[ -x "$DNS_SCRIPT" ]]; then + log "执行 update-dns.sh 更新容器 DNS" + if ! "$DNS_SCRIPT"; then + log "update-dns.sh 执行失败,继续尝试默认 DNS" + fi +else + log "未获取到 update-dns.sh,使用镜像默认 DNS" +fi + +# 中文提示:记录当前 dns.conf 内容,便于排查 +if [[ -f "$DNS_CONF" ]]; then + log "dns.conf 内容: $(tr '\n' ' ' < "$DNS_CONF")" +else + log "dns.conf 暂未生成" +fi + +# 中文提示:尝试解析 master 域名,失败不阻塞但会打日志 +for _ in {1..30}; do + if getent hosts "$TARGET_DOMAIN" >/dev/null 2>&1; then + MASTER_IP=$(getent hosts "$TARGET_DOMAIN" | awk '{print $1}' | head -n 1) + log "master.argus.com 解析成功: $MASTER_IP" + break + fi + sleep 1 +done + +log "启动 argus-agent" +exec /usr/local/bin/argus-agent diff --git a/src/master/README.md b/src/master/README.md index e10f08d..316f324 100644 --- a/src/master/README.md +++ b/src/master/README.md @@ -60,6 +60,12 @@ cd src/master/tests 镜像构建阶段会安装 `supervisor`/`net-tools`/`inetutils-ping`/`vim` 等基础工具,并在运行前把 apt 源切换到内网镜像,方便容器内进一步运维。 +## 域名注册与 DNS 联动 + +- Master 容器启动时会主动执行 `/private/argus/etc/update-dns.sh`(若存在),把自身 `/etc/resolv.conf` 指向 bind 服务提供的 DNS;随后解析 `eth0` 的 IPv4 地址并写入 `/private/argus/etc/master.argus.com`。该文件会被 bind 模块的 `argus_dns_sync.sh` 监控,用于生成 `master.argus.com` → 当前容器 IP 的 A 记录。 +- 测试与生产都需要将 bind 下发的 `update-dns.sh`、`dns.conf` 等文件挂载到 `/private/argus/etc/`。在 E2E 场景中,`tests/private/argus/etc` 会由脚本自动准备。 +- 其他模块(如 agent)在启动脚本中只需执行同一份 `update-dns.sh`,即可使用域名访问 master;若域名注册异常,agent 将无法成功上报,可据此快速定位问题。 + ## REST API 详解 基础路径:`/api/v1/master`,全部返回 JSON。 diff --git a/src/master/build/start-master.sh b/src/master/build/start-master.sh index 97a2e15..ec57f3d 100755 --- a/src/master/build/start-master.sh +++ b/src/master/build/start-master.sh @@ -1,6 +1,30 @@ #!/usr/bin/env bash set -euo pipefail +# 中文提示:确保共享目录与 DNS 相关脚本存在 +DNS_DIR="/private/argus/etc" +DNS_SCRIPT="${DNS_DIR}/update-dns.sh" +MASTER_DOMAIN_FILE="${DNS_DIR}/master.argus.com" + +mkdir -p "$DNS_DIR" + +if [[ -x "$DNS_SCRIPT" ]]; then + echo "[INFO] Running update-dns.sh before master starts" + # 中文提示:若脚本存在则执行,保证容器使用 bind 作为 DNS + "$DNS_SCRIPT" || echo "[WARN] update-dns.sh execution failed" +else + echo "[WARN] DNS update script not found or not executable: $DNS_SCRIPT" +fi + +# 中文提示:记录 master 当前 IP,供 bind 服务同步 +MASTER_IP=$(ifconfig | grep -A 1 eth0 | grep inet | awk '{print $2}' || true) +if [[ -n "${MASTER_IP}" ]]; then + echo "current IP: ${MASTER_IP}" + echo "${MASTER_IP}" > "$MASTER_DOMAIN_FILE" +else + echo "[WARN] Failed to detect master IP via ifconfig" +fi + WORKERS=${GUNICORN_WORKERS:-4} BIND_ADDR=${GUNICORN_BIND:-0.0.0.0:3000} EXTRA_OPTS=${GUNICORN_EXTRA_ARGS:-} diff --git a/src/master/tests/docker-compose.yml b/src/master/tests/docker-compose.yml index 8c24661..7aaecf9 100644 --- a/src/master/tests/docker-compose.yml +++ b/src/master/tests/docker-compose.yml @@ -11,6 +11,7 @@ services: volumes: - ./private/argus/master:/private/argus/master - ./private/argus/metric/prometheus:/private/argus/metric/prometheus + - ./private/argus/etc:/private/argus/etc restart: unless-stopped networks: diff --git a/src/master/tests/scripts/01_up_master.sh b/src/master/tests/scripts/01_up_master.sh index 10f6dc2..c66698a 100755 --- a/src/master/tests/scripts/01_up_master.sh +++ b/src/master/tests/scripts/01_up_master.sh @@ -6,10 +6,14 @@ TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" MODULE_ROOT="$(cd "$TEST_ROOT/.." && pwd)" PRIVATE_ROOT="$TEST_ROOT/private" TMP_ROOT="$TEST_ROOT/tmp" +DNS_ROOT="$PRIVATE_ROOT/argus/etc" +BIND_UPDATE_SCRIPT_SRC="$(cd "$MODULE_ROOT/../bind" && pwd)/build/update-dns.sh" +BIND_UPDATE_SCRIPT_DEST="$DNS_ROOT/update-dns.sh" mkdir -p "$PRIVATE_ROOT/argus/master" mkdir -p "$PRIVATE_ROOT/argus/metric/prometheus" mkdir -p "$TMP_ROOT" +mkdir -p "$DNS_ROOT" # 确保上一次运行留下的容器/数据被清理 compose() { @@ -28,6 +32,15 @@ rm -rf "$TMP_ROOT" "$PRIVATE_ROOT" mkdir -p "$PRIVATE_ROOT/argus/master" mkdir -p "$PRIVATE_ROOT/argus/metric/prometheus" mkdir -p "$TMP_ROOT" +mkdir -p "$DNS_ROOT" + +# 中文提示:将 bind 模块自带的 update-dns.sh 下发到共享目录,模拟实际环境 +if [[ -f "$BIND_UPDATE_SCRIPT_SRC" ]]; then + cp "$BIND_UPDATE_SCRIPT_SRC" "$BIND_UPDATE_SCRIPT_DEST" + chmod +x "$BIND_UPDATE_SCRIPT_DEST" +else + echo "[WARN] bind update script missing at $BIND_UPDATE_SCRIPT_SRC" +fi pushd "$TEST_ROOT" >/dev/null compose down --remove-orphans || true diff --git a/src/master/tests/scripts/02_verify_ready_and_nodes_json.sh b/src/master/tests/scripts/02_verify_ready_and_nodes_json.sh index 1fa59df..65142dc 100755 --- a/src/master/tests/scripts/02_verify_ready_and_nodes_json.sh +++ b/src/master/tests/scripts/02_verify_ready_and_nodes_json.sh @@ -6,6 +6,7 @@ TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" PRIVATE_ROOT="$TEST_ROOT/private" API_BASE="http://localhost:31300" NODES_JSON_PATH="$PRIVATE_ROOT/argus/metric/prometheus/nodes.json" +MASTER_DOMAIN_FILE="$PRIVATE_ROOT/argus/etc/master.argus.com" # 等待 readyz 返回 200,确保数据库初始化完成 for _ in {1..30}; do @@ -49,3 +50,11 @@ then fi echo "[INFO] nodes.json 初始状态校验通过" + +# 中文提示:输出 master 写入的域名文件,失败不影响测试 +if [[ -f "$MASTER_DOMAIN_FILE" ]]; then + MASTER_IP=$(<"$MASTER_DOMAIN_FILE") + echo "[INFO] master.argus.com 记录: $MASTER_IP" +else + echo "[WARN] 未找到 master.argus.com 记录文件,目录=$MASTER_DOMAIN_FILE" +fi