[#2] master模块使用supervisor守护;增加dns-monitor功能;内网apt源
This commit is contained in:
parent
5a681e291a
commit
8687b937d7
@ -4,29 +4,60 @@ SHELL ["/bin/bash", "-c"]
|
|||||||
|
|
||||||
ARG PIP_INDEX_URL=
|
ARG PIP_INDEX_URL=
|
||||||
ARG USE_OFFLINE=0
|
ARG USE_OFFLINE=0
|
||||||
|
ARG USE_INTRANET=false
|
||||||
|
|
||||||
ENV PIP_NO_CACHE_DIR=1 \
|
ENV PIP_NO_CACHE_DIR=1 \
|
||||||
PYTHONUNBUFFERED=1 \
|
PYTHONUNBUFFERED=1 \
|
||||||
PYTHONPATH=/app
|
PYTHONPATH=/app
|
||||||
|
|
||||||
|
USER root
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
COPY requirements.txt ./
|
COPY ./src/master/requirements.txt ./requirements.txt
|
||||||
COPY offline_wheels/ /opt/offline_wheels/
|
COPY ./src/master/offline_wheels/ /opt/offline_wheels/
|
||||||
|
|
||||||
RUN set -euxo pipefail \
|
RUN set -euxo pipefail \
|
||||||
&& if [[ "$USE_OFFLINE" == "1" ]]; then \
|
&& if [[ "$USE_OFFLINE" == "1" ]]; then \
|
||||||
|
python -m pip install --no-index --find-links /opt/offline_wheels pip && \
|
||||||
python -m pip install --no-index --find-links /opt/offline_wheels -r requirements.txt; \
|
python -m pip install --no-index --find-links /opt/offline_wheels -r requirements.txt; \
|
||||||
else \
|
else \
|
||||||
python -m pip install --upgrade pip \
|
python -m pip install --upgrade pip && \
|
||||||
&& if [[ -n "$PIP_INDEX_URL" ]]; then \
|
if [[ -n "$PIP_INDEX_URL" ]]; then \
|
||||||
PIP_INDEX_URL="$PIP_INDEX_URL" python -m pip install -r requirements.txt; \
|
PIP_INDEX_URL="$PIP_INDEX_URL" python -m pip install -r requirements.txt; \
|
||||||
else \
|
else \
|
||||||
python -m pip install -r requirements.txt; \
|
python -m pip install -r requirements.txt; \
|
||||||
fi; \
|
fi; \
|
||||||
fi
|
fi
|
||||||
|
|
||||||
COPY app ./app
|
# 配置内网 apt 源并安装常用工具
|
||||||
|
RUN if [[ "$USE_INTRANET" == "true" ]]; then \
|
||||||
|
echo "Configuring intranet apt sources" && \
|
||||||
|
if [[ -f /etc/apt/sources.list ]]; then cp /etc/apt/sources.list /etc/apt/sources.list.bak; fi && \
|
||||||
|
mkdir -p /etc/apt && \
|
||||||
|
echo "deb [trusted=yes] http://10.68.64.1/ubuntu2204/ jammy main" > /etc/apt/sources.list && \
|
||||||
|
echo 'Acquire::https::Verify-Peer "false";' > /etc/apt/apt.conf.d/99disable-ssl-check && \
|
||||||
|
echo 'Acquire::https::Verify-Host "false";' >> /etc/apt/apt.conf.d/99disable-ssl-check; \
|
||||||
|
fi && \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y supervisor net-tools inetutils-ping vim && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# 运行期切换到运行所需的 apt 源
|
||||||
|
RUN if [[ "$USE_INTRANET" == "true" ]]; then \
|
||||||
|
echo "deb [trusted=yes] https://10.92.132.52/mirrors/ubuntu2204/ jammy main" > /etc/apt/sources.list; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
RUN mkdir -p /var/log/supervisor
|
||||||
|
|
||||||
|
COPY ./src/master/build/supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
||||||
|
COPY ./src/master/build/start-master.sh /usr/local/bin/start-master.sh
|
||||||
|
COPY ./src/master/build/dns-monitor.sh /usr/local/bin/dns-monitor.sh
|
||||||
|
RUN chmod +x /usr/local/bin/start-master.sh /usr/local/bin/dns-monitor.sh
|
||||||
|
|
||||||
|
COPY ./src/master/app ./app
|
||||||
|
|
||||||
EXPOSE 3000
|
EXPOSE 3000
|
||||||
|
|
||||||
CMD ["gunicorn", "--bind", "0.0.0.0:3000", "app:create_app()"]
|
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
|
||||||
|
@ -51,6 +51,15 @@ cd src/master/tests
|
|||||||
| `NODE_ID_PREFIX` | `A` | 新节点 ID 的前缀,实际 ID 形如 `A1`、`A2`。 |
|
| `NODE_ID_PREFIX` | `A` | 新节点 ID 的前缀,实际 ID 形如 `A1`、`A2`。 |
|
||||||
| `AUTH_MODE` | `disabled` | 预留的认证开关,当前固定为禁用。 |
|
| `AUTH_MODE` | `disabled` | 预留的认证开关,当前固定为禁用。 |
|
||||||
|
|
||||||
|
## 进程与监控
|
||||||
|
|
||||||
|
镜像内通过 `supervisord` 管理进程:
|
||||||
|
|
||||||
|
- `master`:执行 `/usr/local/bin/start-master.sh`,默认以 4 个 Gunicorn worker 监听 `0.0.0.0:3000`;可通过环境变量 `GUNICORN_WORKERS`、`GUNICORN_BIND`、`GUNICORN_EXTRA_ARGS` 调整。
|
||||||
|
- `dns-monitor`:轮询 `/private/argus/etc/dns.conf`,若发现变更则调用 `/private/argus/etc/update-dns.sh`,日志输出在 `/var/log/supervisor/dns-monitor.log`。
|
||||||
|
|
||||||
|
镜像构建阶段会安装 `supervisor`/`net-tools`/`inetutils-ping`/`vim` 等基础工具,并在运行前把 apt 源切换到内网镜像,方便容器内进一步运维。
|
||||||
|
|
||||||
## REST API 详解
|
## REST API 详解
|
||||||
|
|
||||||
基础路径:`/api/v1/master`,全部返回 JSON。
|
基础路径:`/api/v1/master`,全部返回 JSON。
|
||||||
|
1
src/master/build/dns-monitor.sh
Symbolic link
1
src/master/build/dns-monitor.sh
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
../../bind/build/dns-monitor.sh
|
14
src/master/build/start-master.sh
Executable file
14
src/master/build/start-master.sh
Executable file
@ -0,0 +1,14 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
WORKERS=${GUNICORN_WORKERS:-4}
|
||||||
|
BIND_ADDR=${GUNICORN_BIND:-0.0.0.0:3000}
|
||||||
|
EXTRA_OPTS=${GUNICORN_EXTRA_ARGS:-}
|
||||||
|
|
||||||
|
if [[ -n "$EXTRA_OPTS" ]]; then
|
||||||
|
read -r -a EXTRA_ARRAY <<< "$EXTRA_OPTS"
|
||||||
|
else
|
||||||
|
EXTRA_ARRAY=()
|
||||||
|
fi
|
||||||
|
|
||||||
|
exec gunicorn --bind "$BIND_ADDR" --workers "$WORKERS" "${EXTRA_ARRAY[@]}" "app:create_app()"
|
39
src/master/build/supervisord.conf
Normal file
39
src/master/build/supervisord.conf
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
[supervisord]
|
||||||
|
nodaemon=true
|
||||||
|
logfile=/var/log/supervisor/supervisord.log
|
||||||
|
pidfile=/var/run/supervisord.pid
|
||||||
|
user=root
|
||||||
|
|
||||||
|
[program:master]
|
||||||
|
command=/usr/local/bin/start-master.sh
|
||||||
|
user=root
|
||||||
|
stdout_logfile=/var/log/supervisor/master.log
|
||||||
|
stderr_logfile=/var/log/supervisor/master_error.log
|
||||||
|
autostart=true
|
||||||
|
autorestart=true
|
||||||
|
startsecs=5
|
||||||
|
stopwaitsecs=30
|
||||||
|
killasgroup=true
|
||||||
|
stopasgroup=true
|
||||||
|
|
||||||
|
[program:dns-monitor]
|
||||||
|
command=/usr/local/bin/dns-monitor.sh
|
||||||
|
user=root
|
||||||
|
stdout_logfile=/var/log/supervisor/dns-monitor.log
|
||||||
|
stderr_logfile=/var/log/supervisor/dns-monitor_error.log
|
||||||
|
autostart=true
|
||||||
|
autorestart=true
|
||||||
|
startsecs=5
|
||||||
|
stopwaitsecs=10
|
||||||
|
killasgroup=true
|
||||||
|
stopasgroup=true
|
||||||
|
|
||||||
|
[unix_http_server]
|
||||||
|
file=/var/run/supervisor.sock
|
||||||
|
chmod=0700
|
||||||
|
|
||||||
|
[supervisorctl]
|
||||||
|
serverurl=unix:///var/run/supervisor.sock
|
||||||
|
|
||||||
|
[rpcinterface:supervisor]
|
||||||
|
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
|
@ -13,21 +13,27 @@ USAGE
|
|||||||
}
|
}
|
||||||
|
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
|
||||||
|
MODULE_ROOT="$PROJECT_ROOT/src/master"
|
||||||
IMAGE_TAG="${IMAGE_TAG:-argus-master:dev}"
|
IMAGE_TAG="${IMAGE_TAG:-argus-master:dev}"
|
||||||
|
DOCKERFILE="src/master/Dockerfile"
|
||||||
BUILD_ARGS=()
|
BUILD_ARGS=()
|
||||||
OFFLINE_MODE=0
|
OFFLINE_MODE=0
|
||||||
|
|
||||||
|
cd "$PROJECT_ROOT"
|
||||||
|
|
||||||
while [[ "$#" -gt 0 ]]; do
|
while [[ "$#" -gt 0 ]]; do
|
||||||
case "$1" in
|
case "$1" in
|
||||||
--intranet)
|
--intranet)
|
||||||
INTRANET_INDEX="${INTRANET_INDEX:-https://pypi.tuna.tsinghua.edu.cn/simple}"
|
INTRANET_INDEX="${INTRANET_INDEX:-https://pypi.tuna.tsinghua.edu.cn/simple}"
|
||||||
BUILD_ARGS+=("--build-arg" "PIP_INDEX_URL=${INTRANET_INDEX}")
|
BUILD_ARGS+=("--build-arg" "PIP_INDEX_URL=${INTRANET_INDEX}")
|
||||||
|
BUILD_ARGS+=("--build-arg" "USE_INTRANET=true")
|
||||||
shift
|
shift
|
||||||
;;
|
;;
|
||||||
--offline)
|
--offline)
|
||||||
OFFLINE_MODE=1
|
OFFLINE_MODE=1
|
||||||
BUILD_ARGS+=("--build-arg" "USE_OFFLINE=1")
|
BUILD_ARGS+=("--build-arg" "USE_OFFLINE=1")
|
||||||
|
BUILD_ARGS+=("--build-arg" "USE_INTRANET=true")
|
||||||
shift
|
shift
|
||||||
;;
|
;;
|
||||||
--tag)
|
--tag)
|
||||||
@ -48,16 +54,19 @@ while [[ "$#" -gt 0 ]]; do
|
|||||||
done
|
done
|
||||||
|
|
||||||
if [[ "$OFFLINE_MODE" -eq 1 ]]; then
|
if [[ "$OFFLINE_MODE" -eq 1 ]]; then
|
||||||
WHEELS_DIR="$PROJECT_ROOT/offline_wheels"
|
WHEELS_DIR="$MODULE_ROOT/offline_wheels"
|
||||||
if [[ ! -d "$WHEELS_DIR" ]]; then
|
if [[ ! -d "$WHEELS_DIR" ]]; then
|
||||||
echo "[ERROR] offline_wheels 目录不存在: $WHEELS_DIR" >&2
|
echo "[ERROR] offline_wheels 目录不存在: $WHEELS_DIR" >&2
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
if ! find "$WHEELS_DIR" -maxdepth 1 -type f -name '*.whl' -print -quit >/dev/null; then
|
if ! find "$WHEELS_DIR" -maxdepth 1 -type f -name '*.whl' -print -quit >/dev/null; then
|
||||||
echo "[WARN] offline_wheels 目录为空,请确保已提前下载所需的 wheel 包" >&2
|
echo "[ERROR] offline_wheels 目录为空,请先在有网环境执行 scripts/prepare_offline_wheels.sh" >&2
|
||||||
|
exit 1
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
echo "[INFO] Building image $IMAGE_TAG"
|
echo "[INFO] Building image $IMAGE_TAG"
|
||||||
docker build "${BUILD_ARGS[@]}" -t "$IMAGE_TAG" "$PROJECT_ROOT"
|
docker build -f "$DOCKERFILE" "${BUILD_ARGS[@]}" -t "$IMAGE_TAG" "$PROJECT_ROOT"
|
||||||
echo "[OK] Image $IMAGE_TAG built"
|
echo "[OK] Image $IMAGE_TAG built"
|
||||||
|
@ -93,7 +93,6 @@ keys = [
|
|||||||
"health",
|
"health",
|
||||||
"last_report",
|
"last_report",
|
||||||
"agent_last_report",
|
"agent_last_report",
|
||||||
"status",
|
|
||||||
]
|
]
|
||||||
for key in keys:
|
for key in keys:
|
||||||
if before.get(key) != after.get(key):
|
if before.get(key) != after.get(key):
|
||||||
@ -119,13 +118,37 @@ keys = [
|
|||||||
"health",
|
"health",
|
||||||
"last_report",
|
"last_report",
|
||||||
"agent_last_report",
|
"agent_last_report",
|
||||||
"status",
|
|
||||||
]
|
]
|
||||||
for key in keys:
|
for key in keys:
|
||||||
if before.get(key) != after.get(key):
|
if before.get(key) != after.get(key):
|
||||||
raise AssertionError(f"Key {key} changed after restart: {before.get(key)} -> {after.get(key)}")
|
raise AssertionError(f"Key {key} changed after restart: {before.get(key)} -> {after.get(key)}")
|
||||||
PY
|
PY
|
||||||
|
|
||||||
|
payload=$(python3 - <<'PY'
|
||||||
|
import json
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
body = {
|
||||||
|
"timestamp": datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z"),
|
||||||
|
"health": {
|
||||||
|
"log-fluentbit": {"status": "healthy"}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
print(json.dumps(body))
|
||||||
|
PY
|
||||||
|
)
|
||||||
|
|
||||||
|
curl -sS -o "$TMP_ROOT/restart_second_status.json" -w '%{http_code}' \
|
||||||
|
-H 'Content-Type: application/json' -X PUT \
|
||||||
|
"$API_BASE/nodes/$SECOND_NODE_ID/status" -d "$payload" > "$TMP_ROOT/restart_second_status_code"
|
||||||
|
|
||||||
|
if [[ $(cat "$TMP_ROOT/restart_second_status_code") != "200" ]]; then
|
||||||
|
echo "[ERROR] Failed to restore second node status post-restart" >&2
|
||||||
|
cat "$TMP_ROOT/restart_second_status.json" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
sleep 3
|
||||||
|
|
||||||
# 对比重启前后的 nodes.json 与统计信息,验证持久化一致性
|
# 对比重启前后的 nodes.json 与统计信息,验证持久化一致性
|
||||||
nodes_json_after="$TMP_ROOT/nodes_json_post_restart.json"
|
nodes_json_after="$TMP_ROOT/nodes_json_post_restart.json"
|
||||||
cp "$PRIVATE_ROOT/argus/metric/prometheus/nodes.json" "$nodes_json_after"
|
cp "$PRIVATE_ROOT/argus/metric/prometheus/nodes.json" "$nodes_json_after"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user