diff --git a/build/build_images_for_arm.sh b/build/build_images_for_arm.sh index 61144f5..d6f6a98 100755 --- a/build/build_images_for_arm.sh +++ b/build/build_images_for_arm.sh @@ -855,7 +855,7 @@ if [[ "$build_web" == true || "$build_alert" == true ]]; then ) for build_spec in "${alert_builds[@]}"; do IFS='|' read -r image_label dockerfile_path image_tag build_context <<< "$build_spec" - if build_image "$image_label" "$dockerfile_path" "$image_tag" "$build_context"; then + if build_image "$image_label" "$dockerfile_path" "$image_tag" "$build_context" --build-arg ALERTMANAGER_ARCH=arm64; then images_built+=("$image_tag") else build_failed=true diff --git a/deployment_new/templates/client_arm/scripts/config.sh b/deployment_new/templates/client_arm/scripts/config.sh index 517043e..5d2355e 100644 --- a/deployment_new/templates/client_arm/scripts/config.sh +++ b/deployment_new/templates/client_arm/scripts/config.sh @@ -39,39 +39,29 @@ check_disk "/var/lib/docker" || true # 导入 cluster-info.env(默认取当前包根,也可用 CLUSTER_INFO 指定路径) CI_IN="${CLUSTER_INFO:-$PKG_ROOT/cluster-info.env}" -info "读取 cluster-info.env: $CI_IN" -[[ -f "$CI_IN" ]] || { err "找不到 cluster-info.env(默认当前包根,或设置环境变量 CLUSTER_INFO 指定绝对路径)"; exit 1; } -set -a; source "$CI_IN"; set +a -[[ -n "${SWARM_MANAGER_ADDR:-}" && -n "${SWARM_JOIN_TOKEN_WORKER:-}" ]] || { err "cluster-info.env 缺少 SWARM 信息(SWARM_MANAGER_ADDR/SWARM_JOIN_TOKEN_WORKER)"; exit 1; } - -# 加入 Swarm(幂等) -info "加入 Swarm(幂等):$SWARM_MANAGER_ADDR" -docker swarm join --token "$SWARM_JOIN_TOKEN_WORKER" "$SWARM_MANAGER_ADDR":2377 >/dev/null 2>&1 || true - -# 导入 busybox 并做 overlay 预热与连通性 NET_NAME="${ARGUS_OVERLAY_NET:-argus-sys-net}" -if ! docker image inspect busybox:latest >/dev/null 2>&1; then - if [[ -f "$PKG_ROOT/images/busybox.tar" ]]; then - info "加载 busybox.tar 以预热 overlay" - docker load -i "$PKG_ROOT/images/busybox.tar" >/dev/null - else - err "缺少 busybox 镜像(包内 images/busybox.tar 或本地 busybox:latest),无法预热 overlay $NET_NAME"; exit 1 + +SWARM_AVAILABLE=true + +if [[ -f "$CI_IN" ]]; then + info "读取 cluster-info.env: $CI_IN" + set -a; source "$CI_IN"; set +a + + # 尝试加入 Swarm(幂等),允许 join-token 为空 + if [[ -n "${SWARM_MANAGER_ADDR:-}" && -n "${SWARM_JOIN_TOKEN_WORKER:-}" ]]; then + info "尝试加入 Swarm(幂等):$SWARM_MANAGER_ADDR" + docker swarm join --token "$SWARM_JOIN_TOKEN_WORKER" "$SWARM_MANAGER_ADDR":2377 >/dev/null 2>&1 || true fi -fi -# 预热容器(worker 侧加入 overlay 以便本地可见) -docker rm -f argus-net-warmup >/dev/null 2>&1 || true -info "启动 warmup 容器加入 overlay: $NET_NAME" -docker run -d --rm --name argus-net-warmup --network "$NET_NAME" busybox:latest sleep 600 >/dev/null 2>&1 || true -for i in {1..60}; do docker network inspect "$NET_NAME" >/dev/null 2>&1 && { info "overlay 可见 (t=${i}s)"; break; }; sleep 1; done -docker network inspect "$NET_NAME" >/dev/null 2>&1 || { err "预热后仍未看到 overlay: $NET_NAME;请确认 server 侧 overlay 已创建且可达"; exit 1; } - -# 通过 warmup 容器测试实际数据通路(alias → master) -if ! docker exec argus-net-warmup sh -lc "ping -c 1 -W 2 master.argus.com >/dev/null 2>&1"; then - err "warmup 容器内无法通过别名访问 master.argus.com;请确认 server compose 已启动并加入 overlay $NET_NAME" - exit 1 + # 检查当前节点 Swarm 状态 + if ! docker info 2>/dev/null | grep -q "Swarm: active"; then + info "检测到当前节点 Swarm 未启用,client_arm 将退回本地 network 模式" + SWARM_AVAILABLE=false + fi +else + info "未找到 cluster-info.env($CI_IN),client_arm 将以本地 network 模式运行" + SWARM_AVAILABLE=false fi -info "warmup 容器内可达 master.argus.com(Docker DNS + alias 正常)" # 生成/更新 .env(保留人工填写项,不覆盖已有键) if [[ ! -f "$ENV_OUT" ]]; then @@ -80,14 +70,59 @@ fi set_kv(){ local k="$1" v="$2"; if grep -q "^${k}=" "$ENV_OUT"; then sed -i -E "s#^${k}=.*#${k}=${v}#" "$ENV_OUT"; else echo "${k}=${v}" >> "$ENV_OUT"; fi } -# 写入与 Swarm 相关的字段,便于后续诊断 -set_kv SWARM_MANAGER_ADDR "${SWARM_MANAGER_ADDR:-}" -set_kv SWARM_JOIN_TOKEN_WORKER "${SWARM_JOIN_TOKEN_WORKER:-}" -set_kv SWARM_JOIN_TOKEN_MANAGER "${SWARM_JOIN_TOKEN_MANAGER:-}" +if [[ "$SWARM_AVAILABLE" == true ]]; then + # ===== Swarm 模式:强依赖 overlay 网络和 DNS alias ===== + # 导入 busybox 并做 overlay 预热与连通性 + if ! docker image inspect busybox:latest >/dev/null 2>&1; then + if [[ -f "$PKG_ROOT/images/busybox.tar" ]]; then + info "加载 busybox.tar 以预热 overlay" + docker load -i "$PKG_ROOT/images/busybox.tar" >/dev/null + else + err "缺少 busybox 镜像(包内 images/busybox.tar 或本地 busybox:latest),无法预热 overlay $NET_NAME"; exit 1 + fi + fi -# 若未显式设置 MASTER_ENDPOINT,则默认走 overlay 别名 -if ! grep -q '^MASTER_ENDPOINT=' "$ENV_OUT"; then - echo "MASTER_ENDPOINT=http://master.argus.com:3000" >> "$ENV_OUT" + docker rm -f argus-net-warmup >/dev/null 2>&1 || true + info "启动 warmup 容器加入 overlay: $NET_NAME" + docker run -d --rm --name argus-net-warmup --network "$NET_NAME" busybox:latest sleep 600 >/dev/null 2>&1 || true + for i in {1..60}; do docker network inspect "$NET_NAME" >/dev/null 2>&1 && { info "overlay 可见 (t=${i}s)"; break; }; sleep 1; done + docker network inspect "$NET_NAME" >/dev/null 2>&1 || { err "预热后仍未看到 overlay: $NET_NAME;请确认 server 侧 overlay 已创建且可达"; exit 1; } + + if ! docker exec argus-net-warmup sh -lc "ping -c 1 -W 2 master.argus.com >/dev/null 2>&1"; then + err "warmup 容器内无法通过别名访问 master.argus.com;请确认 server compose 已启动并加入 overlay $NET_NAME" + exit 1 + fi + info "warmup 容器内可达 master.argus.com(Docker DNS + alias 正常)" + + # 写入与 Swarm 相关的字段,便于后续诊断 + set_kv SWARM_MANAGER_ADDR "${SWARM_MANAGER_ADDR:-}" + set_kv SWARM_JOIN_TOKEN_WORKER "${SWARM_JOIN_TOKEN_WORKER:-}" + set_kv SWARM_JOIN_TOKEN_MANAGER "${SWARM_JOIN_TOKEN_MANAGER:-}" + + # 若未显式设置 MASTER_ENDPOINT,则默认走 overlay 别名 + if ! grep -q '^MASTER_ENDPOINT=' "$ENV_OUT"; then + echo "MASTER_ENDPOINT=http://master.argus.com:3000" >> "$ENV_OUT" + fi +else + # ===== 本地 network 退化模式:不依赖 Swarm 和 overlay ===== + if ! docker network inspect "$NET_NAME" >/dev/null 2>&1; then + info "创建本地 bridge 网络: $NET_NAME(client_arm 退化模式)" + docker network create "$NET_NAME" >/dev/null + else + info "本地网络已存在: $NET_NAME" + fi + + # 若未显式设置 MASTER_ENDPOINT,则提示用户后续手动配置 + if ! grep -q '^MASTER_ENDPOINT=' "$ENV_OUT"; then + echo "MASTER_ENDPOINT=" >> "$ENV_OUT" + fi + + ME=$(grep -E '^MASTER_ENDPOINT=' "$ENV_OUT" | head -1 | cut -d= -f2-) + if [[ -z "$ME" ]]; then + err "本地 network 模式下必须配置 MASTER_ENDPOINT(示例:http://:),请编辑 compose/.env 后重试" + else + info "当前为本地 network 模式,metric-cpu-node 将通过 MASTER_ENDPOINT=${ME} 访问 master" + fi fi # 检查用户必须填写的字段 diff --git a/deployment_new/templates/client_arm/scripts_for_x86/config.sh b/deployment_new/templates/client_arm/scripts_for_x86/config.sh index 4bd661f..2f91fc2 100644 --- a/deployment_new/templates/client_arm/scripts_for_x86/config.sh +++ b/deployment_new/templates/client_arm/scripts_for_x86/config.sh @@ -33,39 +33,28 @@ check_disk(){ local p="$1"; local need=5120; local free check_disk "$PKG_ROOT" || true check_disk "/var/lib/docker" || true -# 导入 cluster-info.env CI_IN="${CLUSTER_INFO:-$PKG_ROOT/cluster-info.env}" -info "读取 cluster-info.env: $CI_IN" -[[ -f "$CI_IN" ]] || { err "找不到 cluster-info.env(默认当前包根,或设置环境变量 CLUSTER_INFO 指定绝对路径)"; exit 1; } -set -a; source "$CI_IN"; set +a -[[ -n "${SWARM_MANAGER_ADDR:-}" && -n "${SWARM_JOIN_TOKEN_WORKER:-}" ]] || { err "cluster-info.env 缺少 SWARM 信息(SWARM_MANAGER_ADDR/SWARM_JOIN_TOKEN_WORKER)"; exit 1; } - -# 加入 Swarm(幂等) -info "加入 Swarm(幂等):$SWARM_MANAGER_ADDR" -docker swarm join --token "$SWARM_JOIN_TOKEN_WORKER" "$SWARM_MANAGER_ADDR":2377 >/dev/null 2>&1 || true - -# 导入 busybox 并做 overlay 预热与连通性 NET_NAME="${ARGUS_OVERLAY_NET:-argus-sys-net}" -if ! docker image inspect busybox:latest >/dev/null 2>&1; then - if [[ -f "$PKG_ROOT/images/busybox.tar" ]]; then - info "加载 busybox.tar 以预热 overlay" - docker load -i "$PKG_ROOT/images/busybox.tar" >/dev/null - else - err "缺少 busybox 镜像(包内 images/busybox.tar 或本地 busybox:latest),无法预热 overlay $NET_NAME"; exit 1 + +SWARM_AVAILABLE=true + +if [[ -f "$CI_IN" ]]; then + info "读取 cluster-info.env: $CI_IN" + set -a; source "$CI_IN"; set +a + + if [[ -n "${SWARM_MANAGER_ADDR:-}" && -n "${SWARM_JOIN_TOKEN_WORKER:-}" ]]; then + info "加入 Swarm(幂等):$SWARM_MANAGER_ADDR" + docker swarm join --token "$SWARM_JOIN_TOKEN_WORKER" "$SWARM_MANAGER_ADDR":2377 >/dev/null 2>&1 || true fi -fi -docker rm -f argus-net-warmup >/dev/null 2>&1 || true -info "启动 warmup 容器加入 overlay: $NET_NAME" -docker run -d --rm --name argus-net-warmup --network "$NET_NAME" busybox:latest sleep 600 >/dev/null 2>&1 || true -for i in {1..60}; do docker network inspect "$NET_NAME" >/dev/null 2>&1 && { info "overlay 可见 (t=${i}s)"; break; }; sleep 1; done -docker network inspect "$NET_NAME" >/dev/null 2>&1 || { err "预热后仍未看到 overlay: $NET_NAME;请确认 server 侧 overlay 已创建且可达"; exit 1; } - -if ! docker exec argus-net-warmup sh -lc "ping -c 1 -W 2 master.argus.com >/dev/null 2>&1"; then - err "warmup 容器内无法通过别名访问 master.argus.com;请确认 server compose 已启动并加入 overlay $NET_NAME" - exit 1 + if ! docker info 2>/dev/null | grep -q "Swarm: active"; then + info "检测到当前节点 Swarm 未启用,client_arm 将退回本地 network 模式" + SWARM_AVAILABLE=false + fi +else + info "未找到 cluster-info.env($CI_IN),client_arm 将以本地 network 模式运行" + SWARM_AVAILABLE=false fi -info "warmup 容器内可达 master.argus.com(Docker DNS + alias 正常)" if [[ ! -f "$ENV_OUT" ]]; then cp "$ENV_EX" "$ENV_OUT" @@ -73,12 +62,53 @@ fi set_kv(){ local k="$1" v="$2"; if grep -q "^${k}=" "$ENV_OUT"; then sed -i -E "s#^${k}=.*#${k}=${v}#" "$ENV_OUT"; else echo "${k}=${v}" >> "$ENV_OUT"; fi } -set_kv SWARM_MANAGER_ADDR "${SWARM_MANAGER_ADDR:-}" -set_kv SWARM_JOIN_TOKEN_WORKER "${SWARM_JOIN_TOKEN_WORKER:-}" -set_kv SWARM_JOIN_TOKEN_MANAGER "${SWARM_JOIN_TOKEN_MANAGER:-}" +if [[ "$SWARM_AVAILABLE" == true ]]; then + if ! docker image inspect busybox:latest >/dev/null 2>&1; then + if [[ -f "$PKG_ROOT/images/busybox.tar" ]]; then + info "加载 busybox.tar 以预热 overlay" + docker load -i "$PKG_ROOT/images/busybox.tar" >/dev/null + else + err "缺少 busybox 镜像(包内 images/busybox.tar 或本地 busybox:latest),无法预热 overlay $NET_NAME"; exit 1 + fi + fi -if ! grep -q '^MASTER_ENDPOINT=' "$ENV_OUT"; then - echo "MASTER_ENDPOINT=http://master.argus.com:3000" >> "$ENV_OUT" + docker rm -f argus-net-warmup >/dev/null 2>&1 || true + info "启动 warmup 容器加入 overlay: $NET_NAME" + docker run -d --rm --name argus-net-warmup --network "$NET_NAME" busybox:latest sleep 600 >/dev/null 2>&1 || true + for i in {1..60}; do docker network inspect "$NET_NAME" >/dev/null 2>&1 && { info "overlay 可见 (t=${i}s)"; break; }; sleep 1; done + docker network inspect "$NET_NAME" >/dev/null 2>&1 || { err "预热后仍未看到 overlay: $NET_NAME;请确认 server 侧 overlay 已创建且可达"; exit 1; } + + if ! docker exec argus-net-warmup sh -lc "ping -c 1 -W 2 master.argus.com >/dev/null 2>&1"; then + err "warmup 容器内无法通过别名访问 master.argus.com;请确认 server compose 已启动并加入 overlay $NET_NAME" + exit 1 + fi + info "warmup 容器内可达 master.argus.com(Docker DNS + alias 正常)" + + set_kv SWARM_MANAGER_ADDR "${SWARM_MANAGER_ADDR:-}" + set_kv SWARM_JOIN_TOKEN_WORKER "${SWARM_JOIN_TOKEN_WORKER:-}" + set_kv SWARM_JOIN_TOKEN_MANAGER "${SWARM_JOIN_TOKEN_MANAGER:-}" + + if ! grep -q '^MASTER_ENDPOINT=' "$ENV_OUT"; then + echo "MASTER_ENDPOINT=http://master.argus.com:3000" >> "$ENV_OUT" + fi +else + if ! docker network inspect "$NET_NAME" >/dev/null 2>&1; then + info "创建本地 bridge 网络: $NET_NAME(client_arm 退化模式)" + docker network create "$NET_NAME" >/dev/null + else + info "本地网络已存在: $NET_NAME" + fi + + if ! grep -q '^MASTER_ENDPOINT=' "$ENV_OUT"; then + echo "MASTER_ENDPOINT=" >> "$ENV_OUT" + fi + + ME=$(grep -E '^MASTER_ENDPOINT=' "$ENV_OUT" | head -1 | cut -d= -f2-) + if [[ -z "$ME" ]]; then + err "本地 network 模式下必须配置 MASTER_ENDPOINT(示例:http://:),请编辑 compose/.env 后重试" + else + info "当前为本地 network 模式,metric-cpu-node 将通过 MASTER_ENDPOINT=${ME} 访问 master" + fi fi REQ_VARS=(AGENT_ENV AGENT_USER AGENT_INSTANCE CPU_NODE_HOSTNAME) @@ -93,4 +123,3 @@ if [[ ${#missing[@]} -gt 0 ]]; then fi info "已生成 compose/.env;可执行 scripts_for_x86/install.sh 启动 ARM Client-CPU" - diff --git a/deployment_new/templates/server_arm/compose/docker-compose.yml b/deployment_new/templates/server_arm/compose/docker-compose.yml index 820bc57..1d332b8 100644 --- a/deployment_new/templates/server_arm/compose/docker-compose.yml +++ b/deployment_new/templates/server_arm/compose/docker-compose.yml @@ -9,6 +9,8 @@ services: image: ${MASTER_IMAGE_TAG:-argus-master-arm64:${PKG_VERSION}} platform: linux/arm64 container_name: argus-master-sys + security_opt: + - seccomp:unconfined environment: - OFFLINE_THRESHOLD_SECONDS=180 - ONLINE_THRESHOLD_SECONDS=120 @@ -150,4 +152,3 @@ services: - "${WEB_PROXY_PORT_8084:-8084}:8084" - "${WEB_PROXY_PORT_8085:-8085}:8085" restart: unless-stopped - diff --git a/deployment_new/templates/server_arm/scripts/config.sh b/deployment_new/templates/server_arm/scripts/config.sh index 60caf08..2997219 100644 --- a/deployment_new/templates/server_arm/scripts/config.sh +++ b/deployment_new/templates/server_arm/scripts/config.sh @@ -39,14 +39,10 @@ cp "$ENV_EX" "$ENV_OUT" # 读取/生成 SWARM_MANAGER_ADDR(ARM 下同样引导 Swarm + cluster-info) SWARM_MANAGER_ADDR=${SWARM_MANAGER_ADDR:-} if [[ -z "${SWARM_MANAGER_ADDR}" ]]; then - read -rp "请输入本机管理地址 SWARM_MANAGER_ADDR: " SWARM_MANAGER_ADDR + read -rp "请输入本机管理地址 SWARM_MANAGER_ADDR(可填当前主机会被其他节点访问到的 IP 或主机名): " SWARM_MANAGER_ADDR fi info "SWARM_MANAGER_ADDR=$SWARM_MANAGER_ADDR" -# 校验 IP 属于本机网卡 -if ! ip -o addr | awk '{print $4}' | cut -d'/' -f1 | grep -qx "$SWARM_MANAGER_ADDR"; then - err "SWARM_MANAGER_ADDR 非本机地址: $SWARM_MANAGER_ADDR"; exit 1; fi - info "开始分配服务端口(起始=20000,避免系统占用与相互冲突)" is_port_used(){ local p="$1"; ss -tulnH 2>/dev/null | awk '{print $5}' | sed 's/.*://g' | grep -qx "$p"; } declare -A PRESENT=() CHOSEN=() USED=() diff --git a/deployment_new/templates/server_arm/scripts/install.sh b/deployment_new/templates/server_arm/scripts/install.sh index 641924a..cea8c77 100644 --- a/deployment_new/templates/server_arm/scripts/install.sh +++ b/deployment_new/templates/server_arm/scripts/install.sh @@ -32,33 +32,62 @@ if [[ -z "$SMADDR" && -f "$CI_FILE" ]]; then fi SWARM_MANAGER_ADDR="$SMADDR" -# Swarm init & overlay(ARM 版也按生产环境方式启用 Swarm) NET_NAME="${ARGUS_OVERLAY_NET:-argus-sys-net}" +SWARM_AVAILABLE=true + +# Swarm init & overlay(ARM 版优先按生产方式启用 Swarm,但在无法启用时退回本地 bridge) if ! docker info 2>/dev/null | grep -q "Swarm: active"; then - [[ -n "${SWARM_MANAGER_ADDR:-}" ]] || { err "SWARM_MANAGER_ADDR 未设置,请在 scripts/config.sh 中配置"; exit 1; } - info "初始化 Swarm (--advertise-addr $SWARM_MANAGER_ADDR)" - docker swarm init --advertise-addr "$SWARM_MANAGER_ADDR" >/dev/null 2>&1 || true + if [[ -n "${SWARM_MANAGER_ADDR:-}" ]]; then + info "Swarm 未激活,尝试初始化 Swarm (--advertise-addr $SWARM_MANAGER_ADDR)" + if ! docker swarm init --advertise-addr "$SWARM_MANAGER_ADDR" >/dev/null 2>&1; then + err "Swarm 初始化失败,本次部署将退回本地 bridge 网络: $NET_NAME(仅支持单机测试,不支持 swarm client)" + SWARM_AVAILABLE=false + else + info "Swarm 初始化成功" + fi + else + info "SWARM_MANAGER_ADDR 未设置且 Swarm 未激活,将使用本地 bridge 网络: $NET_NAME(仅支持单机测试)" + SWARM_AVAILABLE=false + fi else info "Swarm 已激活" fi -if ! docker network inspect "$NET_NAME" >/dev/null 2>&1; then - info "创建 overlay 网络: $NET_NAME" - docker network create -d overlay --attachable "$NET_NAME" >/dev/null -else - info "overlay 网络已存在: $NET_NAME" -fi +if [[ "$SWARM_AVAILABLE" == true ]]; then + if ! docker network inspect "$NET_NAME" >/dev/null 2>&1; then + info "创建 overlay 网络: $NET_NAME" + docker network create -d overlay --attachable "$NET_NAME" >/dev/null + else + info "overlay 网络已存在: $NET_NAME" + fi -# 将 Swarm join token 写入 cluster-info.env,供 ARM client 使用 -TOKEN_WORKER=$(docker swarm join-token -q worker 2>/dev/null || echo "") -TOKEN_MANAGER=$(docker swarm join-token -q manager 2>/dev/null || echo "") -CI_OUT="$PKG_ROOT/cluster-info.env" -info "写入 cluster-info.env (manager/token)" -{ - echo "SWARM_MANAGER_ADDR=${SWARM_MANAGER_ADDR:-}" - echo "SWARM_JOIN_TOKEN_WORKER=${TOKEN_WORKER:-}" - echo "SWARM_JOIN_TOKEN_MANAGER=${TOKEN_MANAGER:-}" -} > "$CI_OUT" + # 将 Swarm join token 写入 cluster-info.env,供 ARM client 使用 + TOKEN_WORKER=$(docker swarm join-token -q worker 2>/dev/null || echo "") + TOKEN_MANAGER=$(docker swarm join-token -q manager 2>/dev/null || echo "") + CI_OUT="$PKG_ROOT/cluster-info.env" + info "写入 cluster-info.env (manager/token)" + { + echo "SWARM_MANAGER_ADDR=${SWARM_MANAGER_ADDR:-}" + echo "SWARM_JOIN_TOKEN_WORKER=${TOKEN_WORKER:-}" + echo "SWARM_JOIN_TOKEN_MANAGER=${TOKEN_MANAGER:-}" + } > "$CI_OUT" +else + # 本机无法启用 Swarm,只维护 SWARM_MANAGER_ADDR,join token 留空 + CI_OUT="$PKG_ROOT/cluster-info.env" + info "写入 cluster-info.env(Swarm 未启用,仅保存 SWARM_MANAGER_ADDR)" + { + echo "SWARM_MANAGER_ADDR=${SWARM_MANAGER_ADDR:-}" + echo "SWARM_JOIN_TOKEN_WORKER=" + echo "SWARM_JOIN_TOKEN_MANAGER=" + } > "$CI_OUT" + + if ! docker network inspect "$NET_NAME" >/dev/null 2>&1; then + info "创建本地 bridge 网络: $NET_NAME" + docker network create "$NET_NAME" >/dev/null + else + info "本地网络已存在: $NET_NAME" + fi +fi # 导入镜像 IMAGES_DIR="$PKG_ROOT/images" diff --git a/src/alert/alertmanager/build/alertmanager-0.28.1.linux-arm64.tar.gz b/src/alert/alertmanager/build/alertmanager-0.28.1.linux-arm64.tar.gz new file mode 100644 index 0000000..a816461 Binary files /dev/null and b/src/alert/alertmanager/build/alertmanager-0.28.1.linux-arm64.tar.gz differ diff --git a/src/alert/alertmanager/build/fetch-dist.sh b/src/alert/alertmanager/build/fetch-dist.sh old mode 100644 new mode 100755 diff --git a/src/web/build_tools/proxy/Dockerfile.arm64 b/src/web/build_tools/proxy/Dockerfile.arm64 new file mode 100644 index 0000000..aff32c7 --- /dev/null +++ b/src/web/build_tools/proxy/Dockerfile.arm64 @@ -0,0 +1,79 @@ +FROM ubuntu:24.04 + +USER root + +# 安装 nginx 和 supervisor +RUN apt-get update && \ + apt-get install -y nginx supervisor curl vim net-tools inetutils-ping ca-certificates passwd && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +ENV FRONTEND_BASE_PATH=/private/argus/web/proxy +ARG ARGUS_BUILD_UID=2133 +ARG ARGUS_BUILD_GID=2015 +ENV ARGUS_BUILD_UID=${ARGUS_BUILD_UID} +ENV ARGUS_BUILD_GID=${ARGUS_BUILD_GID} + +RUN mkdir -p ${FRONTEND_BASE_PATH} && \ + mkdir -p /private/argus/etc + +# 创建 proxy 用户(可自定义 UID/GID) +RUN set -eux; \ + if ! getent group "${ARGUS_BUILD_GID}" >/dev/null; then \ + groupadd -g "${ARGUS_BUILD_GID}" web_proxy || true; \ + fi; \ + if id web_proxy >/dev/null 2>&1; then \ + current_uid="$(id -u web_proxy)"; \ + if [ "$current_uid" != "${ARGUS_BUILD_UID}" ] && ! getent passwd "${ARGUS_BUILD_UID}" >/dev/null; then \ + usermod -u "${ARGUS_BUILD_UID}" web_proxy; \ + fi; \ + usermod -g "${ARGUS_BUILD_GID}" web_proxy || true; \ + else \ + if ! getent passwd "${ARGUS_BUILD_UID}" >/dev/null; then \ + useradd -M -s /usr/sbin/nologin -u "${ARGUS_BUILD_UID}" -g "${ARGUS_BUILD_GID}" web_proxy; \ + else \ + echo "UID ${ARGUS_BUILD_UID} already exists; skip creating user 'web_proxy'"; \ + fi; \ + fi; \ + chown -R "${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" ${FRONTEND_BASE_PATH} /private/argus/etc /usr/local/bin || true + +# 配置内网 apt 源 (如果指定了内网选项) +RUN if [ "$USE_INTRANET" = "true" ]; then \ + echo "Configuring intranet apt sources..." && \ + cp /etc/apt/sources.list /etc/apt/sources.list.bak && \ + echo "deb [trusted=yes] http://10.68.64.1/ubuntu2204/ jammy main" > /etc/apt/sources.list && \ + echo 'Acquire::https::Verify-Peer "false";' > /etc/apt/apt.conf.d/99disable-ssl-check && \ + echo 'Acquire::https::Verify-Host "false";' >> /etc/apt/apt.conf.d/99disable-ssl-check; \ + fi + +RUN if [ "$USE_INTRANET" = "true" ]; then \ + echo "deb [trusted=yes] https://10.92.132.52/mirrors/ubuntu2204/ jammy main" > /etc/apt/sources.list; \ + fi + +# 复制 nginx 配置模板 +COPY src/web/build_tools/proxy/nginx.conf.template /etc/nginx/nginx.conf.template + +# ARM minimal profile:使用不包含 Kibana/ES 的端口配置 +COPY src/web/build_tools/proxy/conf.d/ports.arm.conf /etc/nginx/conf.d/ports.conf + +# 复制 supervisor 配置 +COPY src/web/build_tools/proxy/supervisord.conf /etc/supervisor/conf.d/supervisord.conf + +# 创建 supervisor 日志目录 +RUN mkdir -p /var/log/supervisor + +# 复制 ARM 专用启动脚本 +COPY src/web/build_tools/proxy/start-proxy-supervised.arm.sh /usr/local/bin/start-proxy-supervised.sh +RUN chmod +x /usr/local/bin/start-proxy-supervised.sh +COPY src/web/build_tools/proxy/start-proxy-retry.sh /usr/local/bin/start-proxy-retry.sh +RUN chmod +x /usr/local/bin/start-proxy-retry.sh + +# 复制 DNS 监控脚本(复用 bind 模块脚本) +COPY src/bind/build/dns-monitor.sh /usr/local/bin/dns-monitor.sh +RUN chmod +x /usr/local/bin/dns-monitor.sh + +EXPOSE 80 8080 8081 8082 8083 8084 8085 + +USER root + +CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"] + diff --git a/src/web/build_tools/proxy/conf.d/ports.arm.conf b/src/web/build_tools/proxy/conf.d/ports.arm.conf new file mode 100644 index 0000000..6b10c48 --- /dev/null +++ b/src/web/build_tools/proxy/conf.d/ports.arm.conf @@ -0,0 +1,84 @@ +map $http_upgrade $connection_upgrade { default upgrade; "" close; } + +# 允许的跨域来源(仅用于 8084/8085) +# 放开为任意来源:将来端口/域名变更均无需调整。 +# 注意:若前端需要携带凭证(cookies/Authorization),这种“回显 Origin”的方式比 "*" 更通用。 +map $http_origin $cors_allow { + default $http_origin; +} + +# 8080 - Portal +server { + listen 8080; + server_name _; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + proxy_http_version 1.1; + location / { proxy_pass http://web.argus.com:8080/; } +} + +# 8081 - Grafana +server { + listen 8081; + server_name _; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + proxy_http_version 1.1; + location / { proxy_pass http://grafana.metric.argus.com:3000/; } +} + +# 8082 - Prometheus +server { + listen 8082; + server_name _; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + proxy_http_version 1.1; + location / { proxy_pass http://prom.metric.argus.com:9090/; } +} + +# 8084 - Alertmanager(含 CORS) +server { + listen 8084; + server_name _; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_hide_header Access-Control-Allow-Origin; + add_header 'Access-Control-Allow-Origin' $cors_allow always; + add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, OPTIONS' always; + add_header 'Access-Control-Allow-Headers' 'Origin, Content-Type, Accept, Authorization' always; + if ($request_method = OPTIONS) { return 204; } + proxy_http_version 1.1; + location / { proxy_pass http://alertmanager.alert.argus.com:9093/; } +} + +# 8085 - Master(含 CORS) +server { + listen 8085; + server_name _; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + add_header 'Access-Control-Allow-Origin' $cors_allow always; + add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, OPTIONS' always; + add_header 'Access-Control-Allow-Headers' 'Origin, Content-Type, Accept, Authorization' always; + if ($request_method = OPTIONS) { return 204; } + proxy_http_version 1.1; + location / { proxy_pass http://master.argus.com:3000/; } +} + diff --git a/src/web/build_tools/proxy/start-proxy-supervised.arm.sh b/src/web/build_tools/proxy/start-proxy-supervised.arm.sh new file mode 100644 index 0000000..a99ad1d --- /dev/null +++ b/src/web/build_tools/proxy/start-proxy-supervised.arm.sh @@ -0,0 +1,102 @@ +#!/bin/bash +set -euo pipefail + +echo "[INFO] Starting proxy under supervisor (ARM minimal profile)..." + +TEMPLATE="/etc/nginx/nginx.conf.template" +TARGET="/etc/nginx/nginx.conf" +DNS_CONF_PRIVATE="/private/argus/etc/dns.conf" +DNS_CONF_SYSTEM="/etc/resolv.conf" +DNS_DIR="/private/argus/etc" +DNS_SCRIPT="${DNS_DIR}/update-dns.sh" +RUNTIME_UID="${ARGUS_BUILD_UID:-2133}" +RUNTIME_GID="${ARGUS_BUILD_GID:-2015}" + +mkdir -p "$DNS_DIR" +chown -R "$RUNTIME_UID:$RUNTIME_GID" "$DNS_DIR" 2>/dev/null || true + +if [[ -x "$DNS_SCRIPT" ]]; then + echo "[INFO] Running update-dns.sh before proxy starts" + "$DNS_SCRIPT" || echo "[WARN] update-dns.sh execution failed" +else + echo "[WARN] DNS update script not found or not executable: $DNS_SCRIPT" +fi + +# ========== 读取 DNS ========== +RESOLVERS="" +for i in $(seq 1 10); do + if [ -f "$DNS_CONF_PRIVATE" ]; then + RESOLVERS=$(awk '/^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/{print $1}' "$DNS_CONF_PRIVATE" | tr '\n' ' ') + fi + [ -n "$RESOLVERS" ] && break + sleep 1 +done + +if [ -z "$RESOLVERS" ]; then + echo "未在 $DNS_CONF_PRIVATE 中找到有效 DNS,使用系统 /etc/resolv.conf" + RESOLVERS=$(awk '/^nameserver/ {print $2}' "$DNS_CONF_SYSTEM" | tr '\n' ' ') +fi + +if [ -z "$RESOLVERS" ]; then + echo "警告: 未找到任何 DNS,使用默认 8.8.8.8" + RESOLVERS="8.8.8.8" +fi + +echo "检测到 DNS 服务器列表: $RESOLVERS" + +# ========== 生成 nginx.conf ========== +if [ -f "$TEMPLATE" ]; then + echo "从模板生成 nginx.conf ..." + if ! echo " $RESOLVERS " | grep -q " 127.0.0.11 "; then + RESOLVERS="${RESOLVERS} 127.0.0.11" + fi + sed "s|__RESOLVERS__|$RESOLVERS|" "$TEMPLATE" > "$TARGET" +else + echo "错误: 找不到 nginx.conf.template ($TEMPLATE)" + exit 1 +fi + +grep resolver "$TARGET" || true + +# ========== 等待上游域名准备(ARM 最小服务集:无 Kibana/ES) ========== +UPSTREAM_DOMAINS=( + web.argus.com + grafana.metric.argus.com + prom.metric.argus.com + alertmanager.alert.argus.com + master.argus.com +) +WAIT_MAX=15 +WAITED=0 +MISSING=() +while :; do + MISSING=() + for d in "${UPSTREAM_DOMAINS[@]}"; do + if [ ! -s "/private/argus/etc/${d}" ]; then + MISSING+=("$d") + fi + done + if [ ${#MISSING[@]} -eq 0 ] || [ "$WAITED" -ge "$WAIT_MAX" ]; then + break + fi + echo "[INFO] 等待上游域名记录生成(${WAITED}/${WAIT_MAX}) 缺失: ${MISSING[*]}" + sleep 1 + WAITED=$((WAITED+1)) +done + +# Quick upstream reachability snapshot(ARM 下仅检查存在的服务) +declare -a _UPSTREAMS=( + "http://web.argus.com:8080/" + "http://grafana.metric.argus.com:3000/api/health" + "http://prom.metric.argus.com:9090/-/ready" + "http://alertmanager.alert.argus.com:9093/api/v2/status" + "http://master.argus.com:3000/readyz" +) +for u in "${_UPSTREAMS[@]}"; do + code=$(curl -4 -s -o /dev/null -w "%{http_code}" "$u" || echo 000) + echo "[INFO] upstream check: $u -> $code" +done + +echo "[INFO] Launching nginx..." +exec /usr/sbin/nginx -g "daemon off;" +