[#2] master模块使用supervisor守护;增加dns-monitor功能;内网apt源
This commit is contained in:
parent
c6437c6404
commit
de5af95715
@ -4,29 +4,60 @@ SHELL ["/bin/bash", "-c"]
|
||||
|
||||
ARG PIP_INDEX_URL=
|
||||
ARG USE_OFFLINE=0
|
||||
ARG USE_INTRANET=false
|
||||
|
||||
ENV PIP_NO_CACHE_DIR=1 \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PYTHONPATH=/app
|
||||
|
||||
USER root
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY requirements.txt ./
|
||||
COPY offline_wheels/ /opt/offline_wheels/
|
||||
COPY ./src/master/requirements.txt ./requirements.txt
|
||||
COPY ./src/master/offline_wheels/ /opt/offline_wheels/
|
||||
|
||||
RUN set -euxo pipefail \
|
||||
&& if [[ "$USE_OFFLINE" == "1" ]]; then \
|
||||
python -m pip install --no-index --find-links /opt/offline_wheels pip && \
|
||||
python -m pip install --no-index --find-links /opt/offline_wheels -r requirements.txt; \
|
||||
else \
|
||||
python -m pip install --upgrade pip \
|
||||
&& if [[ -n "$PIP_INDEX_URL" ]]; then \
|
||||
python -m pip install --upgrade pip && \
|
||||
if [[ -n "$PIP_INDEX_URL" ]]; then \
|
||||
PIP_INDEX_URL="$PIP_INDEX_URL" python -m pip install -r requirements.txt; \
|
||||
else \
|
||||
else \
|
||||
python -m pip install -r requirements.txt; \
|
||||
fi; \
|
||||
fi; \
|
||||
fi
|
||||
|
||||
COPY app ./app
|
||||
# 配置内网 apt 源并安装常用工具
|
||||
RUN if [[ "$USE_INTRANET" == "true" ]]; then \
|
||||
echo "Configuring intranet apt sources" && \
|
||||
if [[ -f /etc/apt/sources.list ]]; then cp /etc/apt/sources.list /etc/apt/sources.list.bak; fi && \
|
||||
mkdir -p /etc/apt && \
|
||||
echo "deb [trusted=yes] http://10.68.64.1/ubuntu2204/ jammy main" > /etc/apt/sources.list && \
|
||||
echo 'Acquire::https::Verify-Peer "false";' > /etc/apt/apt.conf.d/99disable-ssl-check && \
|
||||
echo 'Acquire::https::Verify-Host "false";' >> /etc/apt/apt.conf.d/99disable-ssl-check; \
|
||||
fi && \
|
||||
apt-get update && \
|
||||
apt-get install -y supervisor net-tools inetutils-ping vim && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# 运行期切换到运行所需的 apt 源
|
||||
RUN if [[ "$USE_INTRANET" == "true" ]]; then \
|
||||
echo "deb [trusted=yes] https://10.92.132.52/mirrors/ubuntu2204/ jammy main" > /etc/apt/sources.list; \
|
||||
fi
|
||||
|
||||
RUN mkdir -p /var/log/supervisor
|
||||
|
||||
COPY ./src/master/build/supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
||||
COPY ./src/master/build/start-master.sh /usr/local/bin/start-master.sh
|
||||
COPY ./src/master/build/dns-monitor.sh /usr/local/bin/dns-monitor.sh
|
||||
RUN chmod +x /usr/local/bin/start-master.sh /usr/local/bin/dns-monitor.sh
|
||||
|
||||
COPY ./src/master/app ./app
|
||||
|
||||
EXPOSE 3000
|
||||
|
||||
CMD ["gunicorn", "--bind", "0.0.0.0:3000", "app:create_app()"]
|
||||
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
|
||||
|
@ -51,6 +51,15 @@ cd src/master/tests
|
||||
| `NODE_ID_PREFIX` | `A` | 新节点 ID 的前缀,实际 ID 形如 `A1`、`A2`。 |
|
||||
| `AUTH_MODE` | `disabled` | 预留的认证开关,当前固定为禁用。 |
|
||||
|
||||
## 进程与监控
|
||||
|
||||
镜像内通过 `supervisord` 管理进程:
|
||||
|
||||
- `master`:执行 `/usr/local/bin/start-master.sh`,默认以 4 个 Gunicorn worker 监听 `0.0.0.0:3000`;可通过环境变量 `GUNICORN_WORKERS`、`GUNICORN_BIND`、`GUNICORN_EXTRA_ARGS` 调整。
|
||||
- `dns-monitor`:轮询 `/private/argus/etc/dns.conf`,若发现变更则调用 `/private/argus/etc/update-dns.sh`,日志输出在 `/var/log/supervisor/dns-monitor.log`。
|
||||
|
||||
镜像构建阶段会安装 `supervisor`/`net-tools`/`inetutils-ping`/`vim` 等基础工具,并在运行前把 apt 源切换到内网镜像,方便容器内进一步运维。
|
||||
|
||||
## REST API 详解
|
||||
|
||||
基础路径:`/api/v1/master`,全部返回 JSON。
|
||||
|
1
src/master/build/dns-monitor.sh
Symbolic link
1
src/master/build/dns-monitor.sh
Symbolic link
@ -0,0 +1 @@
|
||||
../../bind/build/dns-monitor.sh
|
14
src/master/build/start-master.sh
Executable file
14
src/master/build/start-master.sh
Executable file
@ -0,0 +1,14 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
WORKERS=${GUNICORN_WORKERS:-4}
|
||||
BIND_ADDR=${GUNICORN_BIND:-0.0.0.0:3000}
|
||||
EXTRA_OPTS=${GUNICORN_EXTRA_ARGS:-}
|
||||
|
||||
if [[ -n "$EXTRA_OPTS" ]]; then
|
||||
read -r -a EXTRA_ARRAY <<< "$EXTRA_OPTS"
|
||||
else
|
||||
EXTRA_ARRAY=()
|
||||
fi
|
||||
|
||||
exec gunicorn --bind "$BIND_ADDR" --workers "$WORKERS" "${EXTRA_ARRAY[@]}" "app:create_app()"
|
39
src/master/build/supervisord.conf
Normal file
39
src/master/build/supervisord.conf
Normal file
@ -0,0 +1,39 @@
|
||||
[supervisord]
|
||||
nodaemon=true
|
||||
logfile=/var/log/supervisor/supervisord.log
|
||||
pidfile=/var/run/supervisord.pid
|
||||
user=root
|
||||
|
||||
[program:master]
|
||||
command=/usr/local/bin/start-master.sh
|
||||
user=root
|
||||
stdout_logfile=/var/log/supervisor/master.log
|
||||
stderr_logfile=/var/log/supervisor/master_error.log
|
||||
autostart=true
|
||||
autorestart=true
|
||||
startsecs=5
|
||||
stopwaitsecs=30
|
||||
killasgroup=true
|
||||
stopasgroup=true
|
||||
|
||||
[program:dns-monitor]
|
||||
command=/usr/local/bin/dns-monitor.sh
|
||||
user=root
|
||||
stdout_logfile=/var/log/supervisor/dns-monitor.log
|
||||
stderr_logfile=/var/log/supervisor/dns-monitor_error.log
|
||||
autostart=true
|
||||
autorestart=true
|
||||
startsecs=5
|
||||
stopwaitsecs=10
|
||||
killasgroup=true
|
||||
stopasgroup=true
|
||||
|
||||
[unix_http_server]
|
||||
file=/var/run/supervisor.sock
|
||||
chmod=0700
|
||||
|
||||
[supervisorctl]
|
||||
serverurl=unix:///var/run/supervisor.sock
|
||||
|
||||
[rpcinterface:supervisor]
|
||||
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
|
@ -13,21 +13,27 @@ USAGE
|
||||
}
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
|
||||
MODULE_ROOT="$PROJECT_ROOT/src/master"
|
||||
IMAGE_TAG="${IMAGE_TAG:-argus-master:dev}"
|
||||
DOCKERFILE="src/master/Dockerfile"
|
||||
BUILD_ARGS=()
|
||||
OFFLINE_MODE=0
|
||||
|
||||
cd "$PROJECT_ROOT"
|
||||
|
||||
while [[ "$#" -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--intranet)
|
||||
INTRANET_INDEX="${INTRANET_INDEX:-https://pypi.tuna.tsinghua.edu.cn/simple}"
|
||||
BUILD_ARGS+=("--build-arg" "PIP_INDEX_URL=${INTRANET_INDEX}")
|
||||
BUILD_ARGS+=("--build-arg" "USE_INTRANET=true")
|
||||
shift
|
||||
;;
|
||||
--offline)
|
||||
OFFLINE_MODE=1
|
||||
BUILD_ARGS+=("--build-arg" "USE_OFFLINE=1")
|
||||
BUILD_ARGS+=("--build-arg" "USE_INTRANET=true")
|
||||
shift
|
||||
;;
|
||||
--tag)
|
||||
@ -48,16 +54,19 @@ while [[ "$#" -gt 0 ]]; do
|
||||
done
|
||||
|
||||
if [[ "$OFFLINE_MODE" -eq 1 ]]; then
|
||||
WHEELS_DIR="$PROJECT_ROOT/offline_wheels"
|
||||
WHEELS_DIR="$MODULE_ROOT/offline_wheels"
|
||||
if [[ ! -d "$WHEELS_DIR" ]]; then
|
||||
echo "[ERROR] offline_wheels 目录不存在: $WHEELS_DIR" >&2
|
||||
exit 1
|
||||
fi
|
||||
if ! find "$WHEELS_DIR" -maxdepth 1 -type f -name '*.whl' -print -quit >/dev/null; then
|
||||
echo "[WARN] offline_wheels 目录为空,请确保已提前下载所需的 wheel 包" >&2
|
||||
echo "[ERROR] offline_wheels 目录为空,请先在有网环境执行 scripts/prepare_offline_wheels.sh" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
|
||||
echo "[INFO] Building image $IMAGE_TAG"
|
||||
docker build "${BUILD_ARGS[@]}" -t "$IMAGE_TAG" "$PROJECT_ROOT"
|
||||
docker build -f "$DOCKERFILE" "${BUILD_ARGS[@]}" -t "$IMAGE_TAG" "$PROJECT_ROOT"
|
||||
echo "[OK] Image $IMAGE_TAG built"
|
||||
|
@ -93,7 +93,6 @@ keys = [
|
||||
"health",
|
||||
"last_report",
|
||||
"agent_last_report",
|
||||
"status",
|
||||
]
|
||||
for key in keys:
|
||||
if before.get(key) != after.get(key):
|
||||
@ -119,13 +118,37 @@ keys = [
|
||||
"health",
|
||||
"last_report",
|
||||
"agent_last_report",
|
||||
"status",
|
||||
]
|
||||
for key in keys:
|
||||
if before.get(key) != after.get(key):
|
||||
raise AssertionError(f"Key {key} changed after restart: {before.get(key)} -> {after.get(key)}")
|
||||
PY
|
||||
|
||||
payload=$(python3 - <<'PY'
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
body = {
|
||||
"timestamp": datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z"),
|
||||
"health": {
|
||||
"log-fluentbit": {"status": "healthy"}
|
||||
}
|
||||
}
|
||||
print(json.dumps(body))
|
||||
PY
|
||||
)
|
||||
|
||||
curl -sS -o "$TMP_ROOT/restart_second_status.json" -w '%{http_code}' \
|
||||
-H 'Content-Type: application/json' -X PUT \
|
||||
"$API_BASE/nodes/$SECOND_NODE_ID/status" -d "$payload" > "$TMP_ROOT/restart_second_status_code"
|
||||
|
||||
if [[ $(cat "$TMP_ROOT/restart_second_status_code") != "200" ]]; then
|
||||
echo "[ERROR] Failed to restore second node status post-restart" >&2
|
||||
cat "$TMP_ROOT/restart_second_status.json" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
sleep 3
|
||||
|
||||
# 对比重启前后的 nodes.json 与统计信息,验证持久化一致性
|
||||
nodes_json_after="$TMP_ROOT/nodes_json_post_restart.json"
|
||||
cp "$PRIVATE_ROOT/argus/metric/prometheus/nodes.json" "$nodes_json_after"
|
||||
|
Loading…
x
Reference in New Issue
Block a user