160 lines
6.4 KiB
Bash
160 lines
6.4 KiB
Bash
#!/usr/bin/env bash
|
||
set -euo pipefail
|
||
|
||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||
PKG_ROOT="$ROOT_DIR"
|
||
ENV_FILE="$PKG_ROOT/compose/.env"
|
||
COMPOSE_FILE="$PKG_ROOT/compose/docker-compose.yml"
|
||
|
||
info(){ echo -e "\033[34m[INSTALL]\033[0m $*"; }
|
||
err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; }
|
||
|
||
require(){ local ok=1; for c in "$@"; do command -v "$c" >/dev/null 2>&1 || { err "缺少依赖: $c"; ok=0; }; done; [[ $ok -eq 1 ]]; }
|
||
|
||
require_compose(){
|
||
if docker compose version >/dev/null 2>&1; then return 0; fi
|
||
if command -v docker-compose >/dev/null 2>&1 && docker-compose version >/dev/null 2>&1; then return 0; fi
|
||
err "未检测到 Docker Compose,请安装 docker compose v2 或 docker-compose v1"; exit 1
|
||
}
|
||
|
||
require docker curl jq awk sed tar gzip
|
||
require_compose
|
||
|
||
[[ -f "$ENV_FILE" ]] || { err "缺少 compose/.env,请先运行 scripts/config.sh"; exit 1; }
|
||
info "使用环境文件: $ENV_FILE"
|
||
set -a; source "$ENV_FILE"; set +a
|
||
|
||
# 兼容:若 .env 未包含 SWARM_MANAGER_ADDR,则从已存在的 cluster-info.env 读取以避免写空
|
||
SMADDR="${SWARM_MANAGER_ADDR:-}"
|
||
CI_FILE="$PKG_ROOT/cluster-info.env"
|
||
if [[ -z "$SMADDR" && -f "$CI_FILE" ]]; then
|
||
SMADDR=$(sed -n 's/^SWARM_MANAGER_ADDR=\(.*\)$/\1/p' "$CI_FILE" | head -n1)
|
||
fi
|
||
SWARM_MANAGER_ADDR="$SMADDR"
|
||
|
||
NET_NAME="${ARGUS_OVERLAY_NET:-argus-sys-net}"
|
||
SWARM_AVAILABLE=true
|
||
|
||
# Swarm init & overlay(ARM 版优先按生产方式启用 Swarm,但在无法启用时退回本地 bridge)
|
||
if ! docker info 2>/dev/null | grep -q "Swarm: active"; then
|
||
if [[ -n "${SWARM_MANAGER_ADDR:-}" ]]; then
|
||
info "Swarm 未激活,尝试初始化 Swarm (--advertise-addr $SWARM_MANAGER_ADDR)"
|
||
if ! docker swarm init --advertise-addr "$SWARM_MANAGER_ADDR" >/dev/null 2>&1; then
|
||
err "Swarm 初始化失败,本次部署将退回本地 bridge 网络: $NET_NAME(仅支持单机测试,不支持 swarm client)"
|
||
SWARM_AVAILABLE=false
|
||
else
|
||
info "Swarm 初始化成功"
|
||
fi
|
||
else
|
||
info "SWARM_MANAGER_ADDR 未设置且 Swarm 未激活,将使用本地 bridge 网络: $NET_NAME(仅支持单机测试)"
|
||
SWARM_AVAILABLE=false
|
||
fi
|
||
else
|
||
info "Swarm 已激活"
|
||
fi
|
||
|
||
if [[ "$SWARM_AVAILABLE" == true ]]; then
|
||
if ! docker network inspect "$NET_NAME" >/dev/null 2>&1; then
|
||
info "创建 overlay 网络: $NET_NAME"
|
||
docker network create -d overlay --attachable "$NET_NAME" >/dev/null
|
||
else
|
||
info "overlay 网络已存在: $NET_NAME"
|
||
fi
|
||
|
||
# 将 Swarm join token 写入 cluster-info.env,供 ARM client 使用
|
||
TOKEN_WORKER=$(docker swarm join-token -q worker 2>/dev/null || echo "")
|
||
TOKEN_MANAGER=$(docker swarm join-token -q manager 2>/dev/null || echo "")
|
||
CI_OUT="$PKG_ROOT/cluster-info.env"
|
||
info "写入 cluster-info.env (manager/token)"
|
||
{
|
||
echo "SWARM_MANAGER_ADDR=${SWARM_MANAGER_ADDR:-}"
|
||
echo "SWARM_JOIN_TOKEN_WORKER=${TOKEN_WORKER:-}"
|
||
echo "SWARM_JOIN_TOKEN_MANAGER=${TOKEN_MANAGER:-}"
|
||
} > "$CI_OUT"
|
||
else
|
||
# 本机无法启用 Swarm,只维护 SWARM_MANAGER_ADDR,join token 留空
|
||
CI_OUT="$PKG_ROOT/cluster-info.env"
|
||
info "写入 cluster-info.env(Swarm 未启用,仅保存 SWARM_MANAGER_ADDR)"
|
||
{
|
||
echo "SWARM_MANAGER_ADDR=${SWARM_MANAGER_ADDR:-}"
|
||
echo "SWARM_JOIN_TOKEN_WORKER="
|
||
echo "SWARM_JOIN_TOKEN_MANAGER="
|
||
} > "$CI_OUT"
|
||
|
||
if ! docker network inspect "$NET_NAME" >/dev/null 2>&1; then
|
||
info "创建本地 bridge 网络: $NET_NAME"
|
||
docker network create "$NET_NAME" >/dev/null
|
||
else
|
||
info "本地网络已存在: $NET_NAME"
|
||
fi
|
||
fi
|
||
|
||
# 导入镜像
|
||
IMAGES_DIR="$PKG_ROOT/images"
|
||
shopt -s nullglob
|
||
tars=("$IMAGES_DIR"/*.tar.gz)
|
||
if [[ ${#tars[@]} -eq 0 ]]; then err "images 目录为空,缺少镜像 tar.gz"; exit 1; fi
|
||
total=${#tars[@]}; idx=0
|
||
for tgz in "${tars[@]}"; do
|
||
idx=$((idx+1))
|
||
info "导入镜像 ($idx/$total): $(basename "$tgz")"
|
||
tmp=$(mktemp); gunzip -c "$tgz" > "$tmp"; docker load -i "$tmp" >/dev/null; rm -f "$tmp"
|
||
done
|
||
shopt -u nullglob
|
||
|
||
# 启动服务
|
||
PROJECT="${COMPOSE_PROJECT_NAME:-argus-server-arm}"
|
||
info "启动服务栈 (docker compose -p $PROJECT up -d)"
|
||
docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f "$COMPOSE_FILE" up -d
|
||
docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f "$COMPOSE_FILE" ps
|
||
|
||
# 简单就绪检查(best-effort,不阻塞安装)
|
||
code(){ curl -4 -s -o /dev/null -w "%{http_code}" "$1" || echo 000; }
|
||
gf_ok(){ local body; body=$(curl -s "http://127.0.0.1:${GRAFANA_PORT:-3000}/api/health" || true); echo "$body" | grep -q '"database"\s*:\s*"ok"'; }
|
||
|
||
RETRIES=${RETRIES:-60}; SLEEP=${SLEEP:-5}; ok=0
|
||
info "等待基础服务就绪 (<= $((RETRIES*SLEEP))s)"
|
||
for i in $(seq 1 "$RETRIES"); do
|
||
e1=$(code "http://127.0.0.1:${MASTER_PORT:-32300}/readyz")
|
||
e2=$(code "http://127.0.0.1:${PROMETHEUS_PORT:-9090}/-/ready")
|
||
e3=$(code "http://127.0.0.1:${ALERTMANAGER_PORT:-9093}/api/v2/status")
|
||
e4=000; gf_ok && e4=200
|
||
info "[ready] t=$((i*SLEEP))s master=$e1 prom=$e2 graf=$e4 alert=$e3"
|
||
[[ "$e1" == 200 ]] && ok=$((ok+1))
|
||
[[ "$e2" == 200 ]] && ok=$((ok+1))
|
||
[[ "$e3" == 200 ]] && ok=$((ok+1))
|
||
[[ "$e4" == 200 ]] && ok=$((ok+1))
|
||
if [[ $ok -ge 4 ]]; then break; fi; ok=0; sleep "$SLEEP"
|
||
done
|
||
[[ $ok -ge 4 ]] || err "部分服务未就绪(可稍后执行 scripts/selfcheck.sh 进行复查)"
|
||
|
||
# 安装报告(ARM 精简版)
|
||
ts=$(date +%Y%m%d-%H%M%S)
|
||
RPT="$PKG_ROOT/安装报告_ARM_${ts}.md"
|
||
{
|
||
echo "# Argus ARM Server 安装报告 (${ts})"
|
||
echo
|
||
echo "## 端口映射"
|
||
echo "- MASTER_PORT=${MASTER_PORT}"
|
||
echo "- PROMETHEUS_PORT=${PROMETHEUS_PORT}"
|
||
echo "- GRAFANA_PORT=${GRAFANA_PORT}"
|
||
echo "- ALERTMANAGER_PORT=${ALERTMANAGER_PORT}"
|
||
echo "- WEB_PROXY_PORT_8080=${WEB_PROXY_PORT_8080} ... 8085=${WEB_PROXY_PORT_8085}"
|
||
echo
|
||
echo "## 网络"
|
||
echo "- NET=${NET_NAME}"
|
||
echo "- ARGUS_OVERLAY_NET=${ARGUS_OVERLAY_NET:-argus-sys-net}"
|
||
echo
|
||
echo "## 健康检查(简要)"
|
||
echo "- master/readyz=$(code http://127.0.0.1:${MASTER_PORT:-32300}/readyz)"
|
||
echo "- prometheus/ready=$(code http://127.0.0.1:${PROMETHEUS_PORT:-9090}/-/ready)"
|
||
echo "- grafana/api/health=$(code http://127.0.0.1:${GRAFANA_PORT:-3000}/api/health)"
|
||
echo "- alertmanager/api/v2/status=$(code http://127.0.0.1:${ALERTMANAGER_PORT:-9093}/api/v2/status)"
|
||
} > "$RPT"
|
||
info "已生成报告: $RPT"
|
||
|
||
# 刷新 web-proxy 上的 nginx 配置(best-effort)
|
||
docker exec argus-web-proxy nginx -t >/dev/null 2>&1 && docker exec argus-web-proxy nginx -s reload >/dev/null 2>&1 || true
|
||
|
||
info "安装完成。可通过 scripts/status.sh 查看当前服务状态。"
|