Compare commits
2 Commits
5a681e291a
...
955d1a17b2
Author | SHA1 | Date | |
---|---|---|---|
955d1a17b2 | |||
8687b937d7 |
BIN
src/agent/dist/argus-agent
vendored
BIN
src/agent/dist/argus-agent
vendored
Binary file not shown.
@ -1,7 +1,19 @@
|
|||||||
services:
|
services:
|
||||||
|
bind:
|
||||||
|
image: ${BIND_IMAGE_TAG:-argus-bind9:e2e}
|
||||||
|
container_name: argus-bind-agent-e2e
|
||||||
|
volumes:
|
||||||
|
- ./private:/private
|
||||||
|
restart: unless-stopped
|
||||||
|
networks:
|
||||||
|
default:
|
||||||
|
ipv4_address: 172.28.0.2
|
||||||
|
|
||||||
master:
|
master:
|
||||||
image: argus-master:dev
|
image: argus-master:dev
|
||||||
container_name: argus-master-agent-e2e
|
container_name: argus-master-agent-e2e
|
||||||
|
depends_on:
|
||||||
|
- bind
|
||||||
environment:
|
environment:
|
||||||
- OFFLINE_THRESHOLD_SECONDS=6
|
- OFFLINE_THRESHOLD_SECONDS=6
|
||||||
- ONLINE_THRESHOLD_SECONDS=2
|
- ONLINE_THRESHOLD_SECONDS=2
|
||||||
@ -11,6 +23,10 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- ./private/argus/master:/private/argus/master
|
- ./private/argus/master:/private/argus/master
|
||||||
- ./private/argus/metric/prometheus:/private/argus/metric/prometheus
|
- ./private/argus/metric/prometheus:/private/argus/metric/prometheus
|
||||||
|
- ./private/argus/etc:/private/argus/etc
|
||||||
|
networks:
|
||||||
|
default:
|
||||||
|
ipv4_address: 172.28.0.10
|
||||||
|
|
||||||
agent:
|
agent:
|
||||||
image: ubuntu:24.04
|
image: ubuntu:24.04
|
||||||
@ -18,15 +34,21 @@ services:
|
|||||||
hostname: dev-e2euser-e2einst-pod-0
|
hostname: dev-e2euser-e2einst-pod-0
|
||||||
depends_on:
|
depends_on:
|
||||||
- master
|
- master
|
||||||
|
- bind
|
||||||
environment:
|
environment:
|
||||||
- MASTER_ENDPOINT=http://master:3000
|
- MASTER_ENDPOINT=http://master.argus.com:3000
|
||||||
- REPORT_INTERVAL_SECONDS=2
|
- REPORT_INTERVAL_SECONDS=2
|
||||||
volumes:
|
volumes:
|
||||||
- ./private/argus/agent/dev-e2euser-e2einst-pod-0:/private/argus/agent/dev-e2euser-e2einst-pod-0
|
- ./private/argus/agent/dev-e2euser-e2einst-pod-0:/private/argus/agent/dev-e2euser-e2einst-pod-0
|
||||||
- ./private/argus/agent/health/dev-e2euser-e2einst-pod-0:/private/argus/agent/health/dev-e2euser-e2einst-pod-0
|
- ./private/argus/agent/health/dev-e2euser-e2einst-pod-0:/private/argus/agent/health/dev-e2euser-e2einst-pod-0
|
||||||
|
- ./private/argus/etc:/private/argus/etc
|
||||||
- ../dist/argus-agent:/usr/local/bin/argus-agent:ro
|
- ../dist/argus-agent:/usr/local/bin/argus-agent:ro
|
||||||
|
- ./scripts/agent_entrypoint.sh:/usr/local/bin/agent-entrypoint.sh:ro
|
||||||
entrypoint:
|
entrypoint:
|
||||||
- /usr/local/bin/argus-agent
|
- /usr/local/bin/agent-entrypoint.sh
|
||||||
|
networks:
|
||||||
|
default:
|
||||||
|
ipv4_address: 172.28.0.20
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
default:
|
default:
|
||||||
|
@ -5,6 +5,7 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|||||||
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||||
AGENT_ROOT="$(cd "$TEST_ROOT/.." && pwd)"
|
AGENT_ROOT="$(cd "$TEST_ROOT/.." && pwd)"
|
||||||
MASTER_ROOT="$(cd "$AGENT_ROOT/../master" && pwd)"
|
MASTER_ROOT="$(cd "$AGENT_ROOT/../master" && pwd)"
|
||||||
|
REPO_ROOT="$(cd "$AGENT_ROOT/../.." && pwd)"
|
||||||
PRIVATE_ROOT="$TEST_ROOT/private"
|
PRIVATE_ROOT="$TEST_ROOT/private"
|
||||||
TMP_ROOT="$TEST_ROOT/tmp"
|
TMP_ROOT="$TEST_ROOT/tmp"
|
||||||
|
|
||||||
@ -13,15 +14,27 @@ AGENT_CONFIG_DIR="$PRIVATE_ROOT/argus/agent/$AGENT_HOSTNAME"
|
|||||||
AGENT_HEALTH_DIR="$PRIVATE_ROOT/argus/agent/health/$AGENT_HOSTNAME"
|
AGENT_HEALTH_DIR="$PRIVATE_ROOT/argus/agent/health/$AGENT_HOSTNAME"
|
||||||
MASTER_PRIVATE_DIR="$PRIVATE_ROOT/argus/master"
|
MASTER_PRIVATE_DIR="$PRIVATE_ROOT/argus/master"
|
||||||
METRIC_PRIVATE_DIR="$PRIVATE_ROOT/argus/metric/prometheus"
|
METRIC_PRIVATE_DIR="$PRIVATE_ROOT/argus/metric/prometheus"
|
||||||
|
DNS_DIR="$PRIVATE_ROOT/argus/etc"
|
||||||
|
BIND_IMAGE_TAG="${BIND_IMAGE_TAG:-argus-bind9:e2e}"
|
||||||
|
BIND_ROOT="$(cd "$MASTER_ROOT/../bind" && pwd)"
|
||||||
|
|
||||||
mkdir -p "$AGENT_CONFIG_DIR"
|
mkdir -p "$AGENT_CONFIG_DIR"
|
||||||
mkdir -p "$AGENT_HEALTH_DIR"
|
mkdir -p "$AGENT_HEALTH_DIR"
|
||||||
mkdir -p "$MASTER_PRIVATE_DIR"
|
mkdir -p "$MASTER_PRIVATE_DIR"
|
||||||
mkdir -p "$METRIC_PRIVATE_DIR"
|
mkdir -p "$METRIC_PRIVATE_DIR"
|
||||||
mkdir -p "$TMP_ROOT"
|
mkdir -p "$TMP_ROOT"
|
||||||
|
mkdir -p "$DNS_DIR"
|
||||||
|
|
||||||
touch "$AGENT_HEALTH_DIR/.keep"
|
touch "$AGENT_HEALTH_DIR/.keep"
|
||||||
|
|
||||||
|
# 中文提示:准备 bind 模块提供的 update-dns.sh,模拟生产下发
|
||||||
|
if [[ -f "$BIND_ROOT/build/update-dns.sh" ]]; then
|
||||||
|
cp "$BIND_ROOT/build/update-dns.sh" "$DNS_DIR/update-dns.sh"
|
||||||
|
chmod +x "$DNS_DIR/update-dns.sh"
|
||||||
|
else
|
||||||
|
echo "[WARN] bind update script missing at $BIND_ROOT/build/update-dns.sh"
|
||||||
|
fi
|
||||||
|
|
||||||
pushd "$MASTER_ROOT" >/dev/null
|
pushd "$MASTER_ROOT" >/dev/null
|
||||||
./scripts/build_images.sh --tag argus-master:dev
|
./scripts/build_images.sh --tag argus-master:dev
|
||||||
popd >/dev/null
|
popd >/dev/null
|
||||||
@ -37,6 +50,12 @@ if [[ ! -x "$AGENT_BINARY" ]]; then
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# 中文提示:构建测试专用 bind9 镜像,确保解析服务可用
|
||||||
|
pushd "$REPO_ROOT" >/dev/null
|
||||||
|
docker build -f src/bind/build/Dockerfile -t "$BIND_IMAGE_TAG" .
|
||||||
|
popd >/dev/null
|
||||||
|
|
||||||
echo "$AGENT_BINARY" > "$TMP_ROOT/agent_binary_path"
|
echo "$AGENT_BINARY" > "$TMP_ROOT/agent_binary_path"
|
||||||
|
echo "$BIND_IMAGE_TAG" > "$TMP_ROOT/bind_image_tag"
|
||||||
|
|
||||||
echo "[INFO] Agent E2E bootstrap complete"
|
echo "[INFO] Agent E2E bootstrap complete"
|
||||||
|
@ -17,6 +17,11 @@ if [[ ! -x "$AGENT_BINARY" ]]; then
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
BIND_IMAGE_TAG_VALUE="argus-bind9:e2e"
|
||||||
|
if [[ -f "$TMP_ROOT/bind_image_tag" ]]; then
|
||||||
|
BIND_IMAGE_TAG_VALUE="$(cat "$TMP_ROOT/bind_image_tag")"
|
||||||
|
fi
|
||||||
|
|
||||||
compose() {
|
compose() {
|
||||||
if docker compose version >/dev/null 2>&1; then
|
if docker compose version >/dev/null 2>&1; then
|
||||||
docker compose "$@"
|
docker compose "$@"
|
||||||
@ -25,13 +30,13 @@ compose() {
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
docker container rm -f argus-agent-e2e >/dev/null 2>&1 || true
|
docker container rm -f argus-agent-e2e argus-master-agent-e2e argus-bind-agent-e2e >/dev/null 2>&1 || true
|
||||||
|
|
||||||
docker network rm tests_default >/dev/null 2>&1 || true
|
docker network rm tests_default >/dev/null 2>&1 || true
|
||||||
|
|
||||||
pushd "$TEST_ROOT" >/dev/null
|
pushd "$TEST_ROOT" >/dev/null
|
||||||
compose down --remove-orphans || true
|
compose down --remove-orphans || true
|
||||||
compose up -d
|
BIND_IMAGE_TAG="$BIND_IMAGE_TAG_VALUE" compose up -d
|
||||||
popd >/dev/null
|
popd >/dev/null
|
||||||
|
|
||||||
echo "[INFO] Master+Agent stack started"
|
echo "[INFO] Master+Agent stack started"
|
||||||
|
@ -9,6 +9,13 @@ NODE_ID="$(cat "$TMP_ROOT/node_id")"
|
|||||||
AGENT_HOSTNAME="dev-e2euser-e2einst-pod-0"
|
AGENT_HOSTNAME="dev-e2euser-e2einst-pod-0"
|
||||||
NETWORK_NAME="tests_default"
|
NETWORK_NAME="tests_default"
|
||||||
NEW_AGENT_IP="172.28.0.200"
|
NEW_AGENT_IP="172.28.0.200"
|
||||||
|
ENTRYPOINT_SCRIPT="$SCRIPT_DIR/agent_entrypoint.sh"
|
||||||
|
|
||||||
|
# 中文提示:重启场景也需要同样的入口脚本,确保 DNS 注册逻辑一致
|
||||||
|
if [[ ! -f "$ENTRYPOINT_SCRIPT" ]]; then
|
||||||
|
echo "[ERROR] agent entrypoint script missing at $ENTRYPOINT_SCRIPT" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
if [[ ! -f "$TMP_ROOT/agent_binary_path" ]]; then
|
if [[ ! -f "$TMP_ROOT/agent_binary_path" ]]; then
|
||||||
echo "[ERROR] Agent binary path missing; rerun bootstrap" >&2
|
echo "[ERROR] Agent binary path missing; rerun bootstrap" >&2
|
||||||
@ -68,21 +75,17 @@ if ! docker run -d \
|
|||||||
--ip "$NEW_AGENT_IP" \
|
--ip "$NEW_AGENT_IP" \
|
||||||
-v "$AGENT_DIR:/private/argus/agent/$AGENT_HOSTNAME" \
|
-v "$AGENT_DIR:/private/argus/agent/$AGENT_HOSTNAME" \
|
||||||
-v "$HEALTH_DIR:/private/argus/agent/health/$AGENT_HOSTNAME" \
|
-v "$HEALTH_DIR:/private/argus/agent/health/$AGENT_HOSTNAME" \
|
||||||
|
-v "$TEST_ROOT/private/argus/etc:/private/argus/etc" \
|
||||||
-v "$AGENT_BINARY:/usr/local/bin/argus-agent:ro" \
|
-v "$AGENT_BINARY:/usr/local/bin/argus-agent:ro" \
|
||||||
-e MASTER_ENDPOINT=http://master:3000 \
|
-v "$ENTRYPOINT_SCRIPT:/usr/local/bin/agent-entrypoint.sh:ro" \
|
||||||
|
-e MASTER_ENDPOINT=http://master.argus.com:3000 \
|
||||||
-e REPORT_INTERVAL_SECONDS=2 \
|
-e REPORT_INTERVAL_SECONDS=2 \
|
||||||
ubuntu:24.04 \
|
--entrypoint /usr/local/bin/agent-entrypoint.sh \
|
||||||
sleep 300 >/dev/null; then
|
ubuntu:24.04 >/dev/null; then
|
||||||
echo "[ERROR] Failed to start agent container with custom IP" >&2
|
echo "[ERROR] Failed to start agent container with custom IP" >&2
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# 在容器内启动真实 agent 进程
|
|
||||||
if ! docker exec -d argus-agent-e2e /usr/local/bin/argus-agent; then
|
|
||||||
echo "[ERROR] Failed to spawn agent process inside container" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
success=false
|
success=false
|
||||||
detail_file="$TMP_ROOT/post_restart.json"
|
detail_file="$TMP_ROOT/post_restart.json"
|
||||||
for _ in {1..20}; do
|
for _ in {1..20}; do
|
||||||
|
49
src/agent/tests/scripts/agent_entrypoint.sh
Executable file
49
src/agent/tests/scripts/agent_entrypoint.sh
Executable file
@ -0,0 +1,49 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
LOG_PREFIX="[AGENT-ENTRYPOINT]"
|
||||||
|
DNS_SCRIPT="/private/argus/etc/update-dns.sh"
|
||||||
|
DNS_CONF="/private/argus/etc/dns.conf"
|
||||||
|
TARGET_DOMAIN="master.argus.com"
|
||||||
|
|
||||||
|
log() {
|
||||||
|
echo "${LOG_PREFIX} $*"
|
||||||
|
}
|
||||||
|
|
||||||
|
# 中文提示:等待 bind 下发的 update-dns.sh 脚本
|
||||||
|
for _ in {1..30}; do
|
||||||
|
if [[ -x "$DNS_SCRIPT" ]]; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
log "等待 update-dns.sh 准备就绪..."
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ -x "$DNS_SCRIPT" ]]; then
|
||||||
|
log "执行 update-dns.sh 更新容器 DNS"
|
||||||
|
if ! "$DNS_SCRIPT"; then
|
||||||
|
log "update-dns.sh 执行失败,继续尝试默认 DNS"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
log "未获取到 update-dns.sh,使用镜像默认 DNS"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 中文提示:记录当前 dns.conf 内容,便于排查
|
||||||
|
if [[ -f "$DNS_CONF" ]]; then
|
||||||
|
log "dns.conf 内容: $(tr '\n' ' ' < "$DNS_CONF")"
|
||||||
|
else
|
||||||
|
log "dns.conf 暂未生成"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 中文提示:尝试解析 master 域名,失败不阻塞但会打日志
|
||||||
|
for _ in {1..30}; do
|
||||||
|
if getent hosts "$TARGET_DOMAIN" >/dev/null 2>&1; then
|
||||||
|
MASTER_IP=$(getent hosts "$TARGET_DOMAIN" | awk '{print $1}' | head -n 1)
|
||||||
|
log "master.argus.com 解析成功: $MASTER_IP"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
|
||||||
|
log "启动 argus-agent"
|
||||||
|
exec /usr/local/bin/argus-agent
|
@ -4,29 +4,60 @@ SHELL ["/bin/bash", "-c"]
|
|||||||
|
|
||||||
ARG PIP_INDEX_URL=
|
ARG PIP_INDEX_URL=
|
||||||
ARG USE_OFFLINE=0
|
ARG USE_OFFLINE=0
|
||||||
|
ARG USE_INTRANET=false
|
||||||
|
|
||||||
ENV PIP_NO_CACHE_DIR=1 \
|
ENV PIP_NO_CACHE_DIR=1 \
|
||||||
PYTHONUNBUFFERED=1 \
|
PYTHONUNBUFFERED=1 \
|
||||||
PYTHONPATH=/app
|
PYTHONPATH=/app
|
||||||
|
|
||||||
|
USER root
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
COPY requirements.txt ./
|
COPY ./src/master/requirements.txt ./requirements.txt
|
||||||
COPY offline_wheels/ /opt/offline_wheels/
|
COPY ./src/master/offline_wheels/ /opt/offline_wheels/
|
||||||
|
|
||||||
RUN set -euxo pipefail \
|
RUN set -euxo pipefail \
|
||||||
&& if [[ "$USE_OFFLINE" == "1" ]]; then \
|
&& if [[ "$USE_OFFLINE" == "1" ]]; then \
|
||||||
|
python -m pip install --no-index --find-links /opt/offline_wheels pip && \
|
||||||
python -m pip install --no-index --find-links /opt/offline_wheels -r requirements.txt; \
|
python -m pip install --no-index --find-links /opt/offline_wheels -r requirements.txt; \
|
||||||
else \
|
else \
|
||||||
python -m pip install --upgrade pip \
|
python -m pip install --upgrade pip && \
|
||||||
&& if [[ -n "$PIP_INDEX_URL" ]]; then \
|
if [[ -n "$PIP_INDEX_URL" ]]; then \
|
||||||
PIP_INDEX_URL="$PIP_INDEX_URL" python -m pip install -r requirements.txt; \
|
PIP_INDEX_URL="$PIP_INDEX_URL" python -m pip install -r requirements.txt; \
|
||||||
else \
|
else \
|
||||||
python -m pip install -r requirements.txt; \
|
python -m pip install -r requirements.txt; \
|
||||||
fi; \
|
fi; \
|
||||||
fi
|
fi
|
||||||
|
|
||||||
COPY app ./app
|
# 配置内网 apt 源并安装常用工具
|
||||||
|
RUN if [[ "$USE_INTRANET" == "true" ]]; then \
|
||||||
|
echo "Configuring intranet apt sources" && \
|
||||||
|
if [[ -f /etc/apt/sources.list ]]; then cp /etc/apt/sources.list /etc/apt/sources.list.bak; fi && \
|
||||||
|
mkdir -p /etc/apt && \
|
||||||
|
echo "deb [trusted=yes] http://10.68.64.1/ubuntu2204/ jammy main" > /etc/apt/sources.list && \
|
||||||
|
echo 'Acquire::https::Verify-Peer "false";' > /etc/apt/apt.conf.d/99disable-ssl-check && \
|
||||||
|
echo 'Acquire::https::Verify-Host "false";' >> /etc/apt/apt.conf.d/99disable-ssl-check; \
|
||||||
|
fi && \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y supervisor net-tools inetutils-ping vim && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# 运行期切换到运行所需的 apt 源
|
||||||
|
RUN if [[ "$USE_INTRANET" == "true" ]]; then \
|
||||||
|
echo "deb [trusted=yes] https://10.92.132.52/mirrors/ubuntu2204/ jammy main" > /etc/apt/sources.list; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
RUN mkdir -p /var/log/supervisor
|
||||||
|
|
||||||
|
COPY ./src/master/build/supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
||||||
|
COPY ./src/master/build/start-master.sh /usr/local/bin/start-master.sh
|
||||||
|
COPY ./src/master/build/dns-monitor.sh /usr/local/bin/dns-monitor.sh
|
||||||
|
RUN chmod +x /usr/local/bin/start-master.sh /usr/local/bin/dns-monitor.sh
|
||||||
|
|
||||||
|
COPY ./src/master/app ./app
|
||||||
|
|
||||||
EXPOSE 3000
|
EXPOSE 3000
|
||||||
|
|
||||||
CMD ["gunicorn", "--bind", "0.0.0.0:3000", "app:create_app()"]
|
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
|
||||||
|
@ -51,6 +51,21 @@ cd src/master/tests
|
|||||||
| `NODE_ID_PREFIX` | `A` | 新节点 ID 的前缀,实际 ID 形如 `A1`、`A2`。 |
|
| `NODE_ID_PREFIX` | `A` | 新节点 ID 的前缀,实际 ID 形如 `A1`、`A2`。 |
|
||||||
| `AUTH_MODE` | `disabled` | 预留的认证开关,当前固定为禁用。 |
|
| `AUTH_MODE` | `disabled` | 预留的认证开关,当前固定为禁用。 |
|
||||||
|
|
||||||
|
## 进程与监控
|
||||||
|
|
||||||
|
镜像内通过 `supervisord` 管理进程:
|
||||||
|
|
||||||
|
- `master`:执行 `/usr/local/bin/start-master.sh`,默认以 4 个 Gunicorn worker 监听 `0.0.0.0:3000`;可通过环境变量 `GUNICORN_WORKERS`、`GUNICORN_BIND`、`GUNICORN_EXTRA_ARGS` 调整。
|
||||||
|
- `dns-monitor`:轮询 `/private/argus/etc/dns.conf`,若发现变更则调用 `/private/argus/etc/update-dns.sh`,日志输出在 `/var/log/supervisor/dns-monitor.log`。
|
||||||
|
|
||||||
|
镜像构建阶段会安装 `supervisor`/`net-tools`/`inetutils-ping`/`vim` 等基础工具,并在运行前把 apt 源切换到内网镜像,方便容器内进一步运维。
|
||||||
|
|
||||||
|
## 域名注册与 DNS 联动
|
||||||
|
|
||||||
|
- Master 容器启动时会主动执行 `/private/argus/etc/update-dns.sh`(若存在),把自身 `/etc/resolv.conf` 指向 bind 服务提供的 DNS;随后解析 `eth0` 的 IPv4 地址并写入 `/private/argus/etc/master.argus.com`。该文件会被 bind 模块的 `argus_dns_sync.sh` 监控,用于生成 `master.argus.com` → 当前容器 IP 的 A 记录。
|
||||||
|
- 测试与生产都需要将 bind 下发的 `update-dns.sh`、`dns.conf` 等文件挂载到 `/private/argus/etc/`。在 E2E 场景中,`tests/private/argus/etc` 会由脚本自动准备。
|
||||||
|
- 其他模块(如 agent)在启动脚本中只需执行同一份 `update-dns.sh`,即可使用域名访问 master;若域名注册异常,agent 将无法成功上报,可据此快速定位问题。
|
||||||
|
|
||||||
## REST API 详解
|
## REST API 详解
|
||||||
|
|
||||||
基础路径:`/api/v1/master`,全部返回 JSON。
|
基础路径:`/api/v1/master`,全部返回 JSON。
|
||||||
|
1
src/master/build/dns-monitor.sh
Symbolic link
1
src/master/build/dns-monitor.sh
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
../../bind/build/dns-monitor.sh
|
38
src/master/build/start-master.sh
Executable file
38
src/master/build/start-master.sh
Executable file
@ -0,0 +1,38 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# 中文提示:确保共享目录与 DNS 相关脚本存在
|
||||||
|
DNS_DIR="/private/argus/etc"
|
||||||
|
DNS_SCRIPT="${DNS_DIR}/update-dns.sh"
|
||||||
|
MASTER_DOMAIN_FILE="${DNS_DIR}/master.argus.com"
|
||||||
|
|
||||||
|
mkdir -p "$DNS_DIR"
|
||||||
|
|
||||||
|
if [[ -x "$DNS_SCRIPT" ]]; then
|
||||||
|
echo "[INFO] Running update-dns.sh before master starts"
|
||||||
|
# 中文提示:若脚本存在则执行,保证容器使用 bind 作为 DNS
|
||||||
|
"$DNS_SCRIPT" || echo "[WARN] update-dns.sh execution failed"
|
||||||
|
else
|
||||||
|
echo "[WARN] DNS update script not found or not executable: $DNS_SCRIPT"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 中文提示:记录 master 当前 IP,供 bind 服务同步
|
||||||
|
MASTER_IP=$(ifconfig | grep -A 1 eth0 | grep inet | awk '{print $2}' || true)
|
||||||
|
if [[ -n "${MASTER_IP}" ]]; then
|
||||||
|
echo "current IP: ${MASTER_IP}"
|
||||||
|
echo "${MASTER_IP}" > "$MASTER_DOMAIN_FILE"
|
||||||
|
else
|
||||||
|
echo "[WARN] Failed to detect master IP via ifconfig"
|
||||||
|
fi
|
||||||
|
|
||||||
|
WORKERS=${GUNICORN_WORKERS:-4}
|
||||||
|
BIND_ADDR=${GUNICORN_BIND:-0.0.0.0:3000}
|
||||||
|
EXTRA_OPTS=${GUNICORN_EXTRA_ARGS:-}
|
||||||
|
|
||||||
|
if [[ -n "$EXTRA_OPTS" ]]; then
|
||||||
|
read -r -a EXTRA_ARRAY <<< "$EXTRA_OPTS"
|
||||||
|
else
|
||||||
|
EXTRA_ARRAY=()
|
||||||
|
fi
|
||||||
|
|
||||||
|
exec gunicorn --bind "$BIND_ADDR" --workers "$WORKERS" "${EXTRA_ARRAY[@]}" "app:create_app()"
|
39
src/master/build/supervisord.conf
Normal file
39
src/master/build/supervisord.conf
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
[supervisord]
|
||||||
|
nodaemon=true
|
||||||
|
logfile=/var/log/supervisor/supervisord.log
|
||||||
|
pidfile=/var/run/supervisord.pid
|
||||||
|
user=root
|
||||||
|
|
||||||
|
[program:master]
|
||||||
|
command=/usr/local/bin/start-master.sh
|
||||||
|
user=root
|
||||||
|
stdout_logfile=/var/log/supervisor/master.log
|
||||||
|
stderr_logfile=/var/log/supervisor/master_error.log
|
||||||
|
autostart=true
|
||||||
|
autorestart=true
|
||||||
|
startsecs=5
|
||||||
|
stopwaitsecs=30
|
||||||
|
killasgroup=true
|
||||||
|
stopasgroup=true
|
||||||
|
|
||||||
|
[program:dns-monitor]
|
||||||
|
command=/usr/local/bin/dns-monitor.sh
|
||||||
|
user=root
|
||||||
|
stdout_logfile=/var/log/supervisor/dns-monitor.log
|
||||||
|
stderr_logfile=/var/log/supervisor/dns-monitor_error.log
|
||||||
|
autostart=true
|
||||||
|
autorestart=true
|
||||||
|
startsecs=5
|
||||||
|
stopwaitsecs=10
|
||||||
|
killasgroup=true
|
||||||
|
stopasgroup=true
|
||||||
|
|
||||||
|
[unix_http_server]
|
||||||
|
file=/var/run/supervisor.sock
|
||||||
|
chmod=0700
|
||||||
|
|
||||||
|
[supervisorctl]
|
||||||
|
serverurl=unix:///var/run/supervisor.sock
|
||||||
|
|
||||||
|
[rpcinterface:supervisor]
|
||||||
|
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
|
@ -13,21 +13,27 @@ USAGE
|
|||||||
}
|
}
|
||||||
|
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
|
||||||
|
MODULE_ROOT="$PROJECT_ROOT/src/master"
|
||||||
IMAGE_TAG="${IMAGE_TAG:-argus-master:dev}"
|
IMAGE_TAG="${IMAGE_TAG:-argus-master:dev}"
|
||||||
|
DOCKERFILE="src/master/Dockerfile"
|
||||||
BUILD_ARGS=()
|
BUILD_ARGS=()
|
||||||
OFFLINE_MODE=0
|
OFFLINE_MODE=0
|
||||||
|
|
||||||
|
cd "$PROJECT_ROOT"
|
||||||
|
|
||||||
while [[ "$#" -gt 0 ]]; do
|
while [[ "$#" -gt 0 ]]; do
|
||||||
case "$1" in
|
case "$1" in
|
||||||
--intranet)
|
--intranet)
|
||||||
INTRANET_INDEX="${INTRANET_INDEX:-https://pypi.tuna.tsinghua.edu.cn/simple}"
|
INTRANET_INDEX="${INTRANET_INDEX:-https://pypi.tuna.tsinghua.edu.cn/simple}"
|
||||||
BUILD_ARGS+=("--build-arg" "PIP_INDEX_URL=${INTRANET_INDEX}")
|
BUILD_ARGS+=("--build-arg" "PIP_INDEX_URL=${INTRANET_INDEX}")
|
||||||
|
BUILD_ARGS+=("--build-arg" "USE_INTRANET=true")
|
||||||
shift
|
shift
|
||||||
;;
|
;;
|
||||||
--offline)
|
--offline)
|
||||||
OFFLINE_MODE=1
|
OFFLINE_MODE=1
|
||||||
BUILD_ARGS+=("--build-arg" "USE_OFFLINE=1")
|
BUILD_ARGS+=("--build-arg" "USE_OFFLINE=1")
|
||||||
|
BUILD_ARGS+=("--build-arg" "USE_INTRANET=true")
|
||||||
shift
|
shift
|
||||||
;;
|
;;
|
||||||
--tag)
|
--tag)
|
||||||
@ -48,16 +54,19 @@ while [[ "$#" -gt 0 ]]; do
|
|||||||
done
|
done
|
||||||
|
|
||||||
if [[ "$OFFLINE_MODE" -eq 1 ]]; then
|
if [[ "$OFFLINE_MODE" -eq 1 ]]; then
|
||||||
WHEELS_DIR="$PROJECT_ROOT/offline_wheels"
|
WHEELS_DIR="$MODULE_ROOT/offline_wheels"
|
||||||
if [[ ! -d "$WHEELS_DIR" ]]; then
|
if [[ ! -d "$WHEELS_DIR" ]]; then
|
||||||
echo "[ERROR] offline_wheels 目录不存在: $WHEELS_DIR" >&2
|
echo "[ERROR] offline_wheels 目录不存在: $WHEELS_DIR" >&2
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
if ! find "$WHEELS_DIR" -maxdepth 1 -type f -name '*.whl' -print -quit >/dev/null; then
|
if ! find "$WHEELS_DIR" -maxdepth 1 -type f -name '*.whl' -print -quit >/dev/null; then
|
||||||
echo "[WARN] offline_wheels 目录为空,请确保已提前下载所需的 wheel 包" >&2
|
echo "[ERROR] offline_wheels 目录为空,请先在有网环境执行 scripts/prepare_offline_wheels.sh" >&2
|
||||||
|
exit 1
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
echo "[INFO] Building image $IMAGE_TAG"
|
echo "[INFO] Building image $IMAGE_TAG"
|
||||||
docker build "${BUILD_ARGS[@]}" -t "$IMAGE_TAG" "$PROJECT_ROOT"
|
docker build -f "$DOCKERFILE" "${BUILD_ARGS[@]}" -t "$IMAGE_TAG" "$PROJECT_ROOT"
|
||||||
echo "[OK] Image $IMAGE_TAG built"
|
echo "[OK] Image $IMAGE_TAG built"
|
||||||
|
@ -11,6 +11,7 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- ./private/argus/master:/private/argus/master
|
- ./private/argus/master:/private/argus/master
|
||||||
- ./private/argus/metric/prometheus:/private/argus/metric/prometheus
|
- ./private/argus/metric/prometheus:/private/argus/metric/prometheus
|
||||||
|
- ./private/argus/etc:/private/argus/etc
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
|
@ -6,10 +6,14 @@ TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|||||||
MODULE_ROOT="$(cd "$TEST_ROOT/.." && pwd)"
|
MODULE_ROOT="$(cd "$TEST_ROOT/.." && pwd)"
|
||||||
PRIVATE_ROOT="$TEST_ROOT/private"
|
PRIVATE_ROOT="$TEST_ROOT/private"
|
||||||
TMP_ROOT="$TEST_ROOT/tmp"
|
TMP_ROOT="$TEST_ROOT/tmp"
|
||||||
|
DNS_ROOT="$PRIVATE_ROOT/argus/etc"
|
||||||
|
BIND_UPDATE_SCRIPT_SRC="$(cd "$MODULE_ROOT/../bind" && pwd)/build/update-dns.sh"
|
||||||
|
BIND_UPDATE_SCRIPT_DEST="$DNS_ROOT/update-dns.sh"
|
||||||
|
|
||||||
mkdir -p "$PRIVATE_ROOT/argus/master"
|
mkdir -p "$PRIVATE_ROOT/argus/master"
|
||||||
mkdir -p "$PRIVATE_ROOT/argus/metric/prometheus"
|
mkdir -p "$PRIVATE_ROOT/argus/metric/prometheus"
|
||||||
mkdir -p "$TMP_ROOT"
|
mkdir -p "$TMP_ROOT"
|
||||||
|
mkdir -p "$DNS_ROOT"
|
||||||
|
|
||||||
# 确保上一次运行留下的容器/数据被清理
|
# 确保上一次运行留下的容器/数据被清理
|
||||||
compose() {
|
compose() {
|
||||||
@ -28,6 +32,15 @@ rm -rf "$TMP_ROOT" "$PRIVATE_ROOT"
|
|||||||
mkdir -p "$PRIVATE_ROOT/argus/master"
|
mkdir -p "$PRIVATE_ROOT/argus/master"
|
||||||
mkdir -p "$PRIVATE_ROOT/argus/metric/prometheus"
|
mkdir -p "$PRIVATE_ROOT/argus/metric/prometheus"
|
||||||
mkdir -p "$TMP_ROOT"
|
mkdir -p "$TMP_ROOT"
|
||||||
|
mkdir -p "$DNS_ROOT"
|
||||||
|
|
||||||
|
# 中文提示:将 bind 模块自带的 update-dns.sh 下发到共享目录,模拟实际环境
|
||||||
|
if [[ -f "$BIND_UPDATE_SCRIPT_SRC" ]]; then
|
||||||
|
cp "$BIND_UPDATE_SCRIPT_SRC" "$BIND_UPDATE_SCRIPT_DEST"
|
||||||
|
chmod +x "$BIND_UPDATE_SCRIPT_DEST"
|
||||||
|
else
|
||||||
|
echo "[WARN] bind update script missing at $BIND_UPDATE_SCRIPT_SRC"
|
||||||
|
fi
|
||||||
|
|
||||||
pushd "$TEST_ROOT" >/dev/null
|
pushd "$TEST_ROOT" >/dev/null
|
||||||
compose down --remove-orphans || true
|
compose down --remove-orphans || true
|
||||||
|
@ -6,6 +6,7 @@ TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|||||||
PRIVATE_ROOT="$TEST_ROOT/private"
|
PRIVATE_ROOT="$TEST_ROOT/private"
|
||||||
API_BASE="http://localhost:31300"
|
API_BASE="http://localhost:31300"
|
||||||
NODES_JSON_PATH="$PRIVATE_ROOT/argus/metric/prometheus/nodes.json"
|
NODES_JSON_PATH="$PRIVATE_ROOT/argus/metric/prometheus/nodes.json"
|
||||||
|
MASTER_DOMAIN_FILE="$PRIVATE_ROOT/argus/etc/master.argus.com"
|
||||||
|
|
||||||
# 等待 readyz 返回 200,确保数据库初始化完成
|
# 等待 readyz 返回 200,确保数据库初始化完成
|
||||||
for _ in {1..30}; do
|
for _ in {1..30}; do
|
||||||
@ -49,3 +50,11 @@ then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
echo "[INFO] nodes.json 初始状态校验通过"
|
echo "[INFO] nodes.json 初始状态校验通过"
|
||||||
|
|
||||||
|
# 中文提示:输出 master 写入的域名文件,失败不影响测试
|
||||||
|
if [[ -f "$MASTER_DOMAIN_FILE" ]]; then
|
||||||
|
MASTER_IP=$(<"$MASTER_DOMAIN_FILE")
|
||||||
|
echo "[INFO] master.argus.com 记录: $MASTER_IP"
|
||||||
|
else
|
||||||
|
echo "[WARN] 未找到 master.argus.com 记录文件,目录=$MASTER_DOMAIN_FILE"
|
||||||
|
fi
|
||||||
|
@ -93,7 +93,6 @@ keys = [
|
|||||||
"health",
|
"health",
|
||||||
"last_report",
|
"last_report",
|
||||||
"agent_last_report",
|
"agent_last_report",
|
||||||
"status",
|
|
||||||
]
|
]
|
||||||
for key in keys:
|
for key in keys:
|
||||||
if before.get(key) != after.get(key):
|
if before.get(key) != after.get(key):
|
||||||
@ -119,13 +118,37 @@ keys = [
|
|||||||
"health",
|
"health",
|
||||||
"last_report",
|
"last_report",
|
||||||
"agent_last_report",
|
"agent_last_report",
|
||||||
"status",
|
|
||||||
]
|
]
|
||||||
for key in keys:
|
for key in keys:
|
||||||
if before.get(key) != after.get(key):
|
if before.get(key) != after.get(key):
|
||||||
raise AssertionError(f"Key {key} changed after restart: {before.get(key)} -> {after.get(key)}")
|
raise AssertionError(f"Key {key} changed after restart: {before.get(key)} -> {after.get(key)}")
|
||||||
PY
|
PY
|
||||||
|
|
||||||
|
payload=$(python3 - <<'PY'
|
||||||
|
import json
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
body = {
|
||||||
|
"timestamp": datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z"),
|
||||||
|
"health": {
|
||||||
|
"log-fluentbit": {"status": "healthy"}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
print(json.dumps(body))
|
||||||
|
PY
|
||||||
|
)
|
||||||
|
|
||||||
|
curl -sS -o "$TMP_ROOT/restart_second_status.json" -w '%{http_code}' \
|
||||||
|
-H 'Content-Type: application/json' -X PUT \
|
||||||
|
"$API_BASE/nodes/$SECOND_NODE_ID/status" -d "$payload" > "$TMP_ROOT/restart_second_status_code"
|
||||||
|
|
||||||
|
if [[ $(cat "$TMP_ROOT/restart_second_status_code") != "200" ]]; then
|
||||||
|
echo "[ERROR] Failed to restore second node status post-restart" >&2
|
||||||
|
cat "$TMP_ROOT/restart_second_status.json" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
sleep 3
|
||||||
|
|
||||||
# 对比重启前后的 nodes.json 与统计信息,验证持久化一致性
|
# 对比重启前后的 nodes.json 与统计信息,验证持久化一致性
|
||||||
nodes_json_after="$TMP_ROOT/nodes_json_post_restart.json"
|
nodes_json_after="$TMP_ROOT/nodes_json_post_restart.json"
|
||||||
cp "$PRIVATE_ROOT/argus/metric/prometheus/nodes.json" "$nodes_json_after"
|
cp "$PRIVATE_ROOT/argus/metric/prometheus/nodes.json" "$nodes_json_after"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user