168 lines
7.0 KiB
Bash
168 lines
7.0 KiB
Bash
#!/usr/bin/env bash
|
||
set -euo pipefail
|
||
|
||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||
PKG_ROOT="$ROOT_DIR"
|
||
ENV_FILE="$PKG_ROOT/compose/.env"
|
||
COMPOSE_FILE="$PKG_ROOT/compose/docker-compose.yml"
|
||
|
||
info(){ echo -e "\033[34m[INSTALL]\033[0m $*"; }
|
||
err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; }
|
||
require(){ local ok=1; for c in "$@"; do command -v "$c" >/dev/null 2>&1 || { err "缺少依赖: $c"; ok=0; }; done; [[ $ok -eq 1 ]]; }
|
||
require docker curl jq awk sed tar gzip
|
||
|
||
[[ -f "$ENV_FILE" ]] || { err "缺少 compose/.env,请先运行 scripts/config.sh"; exit 1; }
|
||
info "使用环境文件: $ENV_FILE"
|
||
set -a; source "$ENV_FILE"; set +a
|
||
# 兼容:若 .env 未包含 SWARM_MANAGER_ADDR,则从已存在的 cluster-info.env 读取以避免写空
|
||
SMADDR="${SWARM_MANAGER_ADDR:-}"
|
||
CI_FILE="$PKG_ROOT/cluster-info.env"
|
||
if [[ -z "$SMADDR" && -f "$CI_FILE" ]]; then
|
||
SMADDR=$(sed -n 's/^SWARM_MANAGER_ADDR=\(.*\)$/\1/p' "$CI_FILE" | head -n1)
|
||
fi
|
||
SWARM_MANAGER_ADDR="$SMADDR"
|
||
|
||
# Swarm init & overlay
|
||
if ! docker info 2>/dev/null | grep -q "Swarm: active"; then
|
||
[[ -n "${SWARM_MANAGER_ADDR:-}" ]] || { err "SWARM_MANAGER_ADDR 未设置,请在 scripts/config.sh 中配置"; exit 1; }
|
||
info "初始化 Swarm (--advertise-addr $SWARM_MANAGER_ADDR)"
|
||
docker swarm init --advertise-addr "$SWARM_MANAGER_ADDR" >/dev/null 2>&1 || true
|
||
else
|
||
info "Swarm 已激活"
|
||
fi
|
||
NET_NAME="${ARGUS_OVERLAY_NET:-argus-sys-net}"
|
||
if ! docker network inspect "$NET_NAME" >/dev/null 2>&1; then
|
||
info "创建 overlay 网络: $NET_NAME"
|
||
docker network create -d overlay --attachable "$NET_NAME" >/dev/null
|
||
else
|
||
info "overlay 网络已存在: $NET_NAME"
|
||
fi
|
||
|
||
# Load images
|
||
IMAGES_DIR="$PKG_ROOT/images"
|
||
shopt -s nullglob
|
||
tars=("$IMAGES_DIR"/*.tar.gz)
|
||
if [[ ${#tars[@]} -eq 0 ]]; then err "images 目录为空,缺少镜像 tar.gz"; exit 1; fi
|
||
total=${#tars[@]}; idx=0
|
||
for tgz in "${tars[@]}"; do
|
||
idx=$((idx+1))
|
||
info "导入镜像 ($idx/$total): $(basename "$tgz")"
|
||
tmp=$(mktemp); gunzip -c "$tgz" > "$tmp"; docker load -i "$tmp" >/dev/null; rm -f "$tmp"
|
||
done
|
||
shopt -u nullglob
|
||
|
||
# Compose up
|
||
info "启动服务栈 (docker compose up -d)"
|
||
docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" up -d
|
||
docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" ps
|
||
|
||
# Wait readiness (best-effort)
|
||
code(){ curl -4 -s -o /dev/null -w "%{http_code}" "$1" || echo 000; }
|
||
prom_ok(){ (exec 3<>/dev/tcp/127.0.0.1/${PROMETHEUS_PORT:-9090}) >/dev/null 2>&1 && return 0 || return 1; }
|
||
kb_ok(){ local body; body=$(curl -s "http://127.0.0.1:${KIBANA_PORT:-5601}/api/status" || true); echo "$body" | grep -q '"level"\s*:\s*"available"'; }
|
||
RETRIES=${RETRIES:-60}; SLEEP=${SLEEP:-5}; ok=0
|
||
info "等待基础服务就绪 (<= $((RETRIES*SLEEP))s)"
|
||
for i in $(seq 1 "$RETRIES"); do
|
||
e1=$(code "http://127.0.0.1:${MASTER_PORT:-32300}/readyz")
|
||
e2=$(code "http://127.0.0.1:${ES_HTTP_PORT:-9200}/_cluster/health")
|
||
e3=000; prom_ok && e3=200
|
||
e4=$(code "http://127.0.0.1:${GRAFANA_PORT:-3000}/api/health")
|
||
e5=$(code "http://127.0.0.1:${ALERTMANAGER_PORT:-9093}/api/v2/status")
|
||
e6=$(kb_ok && echo 200 || echo 000)
|
||
info "[ready] t=$((i*SLEEP))s master=$e1 es=$e2 prom=$e3 graf=$e4 alert=$e5 kibana=$e6"
|
||
[[ "$e1" == 200 ]] && ok=$((ok+1))
|
||
[[ "$e2" == 200 ]] && ok=$((ok+1))
|
||
[[ "$e3" == 200 ]] && ok=$((ok+1))
|
||
[[ "$e4" == 200 ]] && ok=$((ok+1))
|
||
[[ "$e5" == 200 ]] && ok=$((ok+1))
|
||
[[ "$e6" == 200 ]] && ok=$((ok+1))
|
||
if [[ $ok -ge 6 ]]; then break; fi; ok=0; sleep "$SLEEP"
|
||
done
|
||
[[ $ok -ge 6 ]] || err "部分服务未就绪(可稍后重试 selfcheck)"
|
||
|
||
# Resolve overlay IPs
|
||
bind_c=argus-bind-sys; ftp_c=argus-ftp
|
||
BINDIP=$(docker inspect -f '{{ (index .NetworkSettings.Networks "'$NET_NAME'").IPAddress }}' "$bind_c" 2>/dev/null || true)
|
||
FTPIP=$(docker inspect -f '{{ (index .NetworkSettings.Networks "'$NET_NAME'").IPAddress }}' "$ftp_c" 2>/dev/null || true)
|
||
info "解析 overlay IP: BINDIP=${BINDIP:-<empty>} FTPIP=${FTPIP:-<empty>}"
|
||
|
||
# Swarm join tokens
|
||
TOKEN_WORKER=$(docker swarm join-token -q worker 2>/dev/null || echo "")
|
||
TOKEN_MANAGER=$(docker swarm join-token -q manager 2>/dev/null || echo "")
|
||
|
||
# cluster-info.env
|
||
CI="$PKG_ROOT/cluster-info.env"
|
||
info "写入 cluster-info.env (manager/token/IP)"
|
||
{
|
||
echo "SWARM_MANAGER_ADDR=${SWARM_MANAGER_ADDR:-}"
|
||
echo "BINDIP=${BINDIP:-}"
|
||
echo "FTPIP=${FTPIP:-}"
|
||
echo "SWARM_JOIN_TOKEN_WORKER=${TOKEN_WORKER:-}"
|
||
echo "SWARM_JOIN_TOKEN_MANAGER=${TOKEN_MANAGER:-}"
|
||
} > "$CI"
|
||
info "已输出 $CI"
|
||
|
||
# 安装报告
|
||
ts=$(date +%Y%m%d-%H%M%S)
|
||
RPT="$PKG_ROOT/安装报告_${ts}.md"
|
||
{
|
||
echo "# Argus Server 安装报告 (${ts})"
|
||
echo
|
||
echo "## 端口映射"
|
||
echo "- MASTER_PORT=${MASTER_PORT}"
|
||
echo "- ES_HTTP_PORT=${ES_HTTP_PORT}"
|
||
echo "- KIBANA_PORT=${KIBANA_PORT}"
|
||
echo "- PROMETHEUS_PORT=${PROMETHEUS_PORT}"
|
||
echo "- GRAFANA_PORT=${GRAFANA_PORT}"
|
||
echo "- ALERTMANAGER_PORT=${ALERTMANAGER_PORT}"
|
||
echo "- WEB_PROXY_PORT_8080=${WEB_PROXY_PORT_8080} ... 8085=${WEB_PROXY_PORT_8085}"
|
||
echo
|
||
echo "## Swarm/Overlay"
|
||
echo "- SWARM_MANAGER_ADDR=${SWARM_MANAGER_ADDR:-}"
|
||
echo "- NET=${NET_NAME}"
|
||
echo "- JOIN_TOKEN_WORKER=${TOKEN_WORKER:-}"
|
||
echo "- JOIN_TOKEN_MANAGER=${TOKEN_MANAGER:-}"
|
||
echo
|
||
echo "## Overlay IPs"
|
||
echo "- BINDIP=${BINDIP:-}"
|
||
echo "- FTPIP=${FTPIP:-}"
|
||
echo
|
||
echo "## 健康检查(简要)"
|
||
echo "- master/readyz=$(code http://127.0.0.1:${MASTER_PORT:-32300}/readyz)"
|
||
echo "- es/_cluster/health=$(code http://127.0.0.1:${ES_HTTP_PORT:-9200}/_cluster/health)"
|
||
echo "- grafana/api/health=$(code http://127.0.0.1:${GRAFANA_PORT:-3000}/api/health)"
|
||
echo "- prometheus/tcp=$([[ $(prom_ok; echo $?) == 0 ]] && echo 200 || echo 000)"
|
||
echo "- alertmanager/api/v2/status=$(code http://127.0.0.1:${ALERTMANAGER_PORT:-9093}/api/v2/status)"
|
||
echo "- kibana/api/status=$([[ $(kb_ok; echo $?) == 0 ]] && echo available || echo not-ready)"
|
||
} > "$RPT"
|
||
info "已生成报告: $RPT"
|
||
|
||
info "安装完成。可将 cluster-info.env 分发给 Client-GPU 安装方。"
|
||
|
||
# 写入域名→overlay IP 并热更新 Bind/Nginx
|
||
ETC_DIR="$PKG_ROOT/private/argus/etc"; mkdir -p "$ETC_DIR"
|
||
declare -A MAP
|
||
MAP[web-frontend]=web.argus.com
|
||
MAP[argus-grafana]=grafana.metric.argus.com
|
||
MAP[argus-prometheus]=prom.metric.argus.com
|
||
MAP[argus-kibana-sys]=kibana.log.argus.com
|
||
MAP[argus-alertmanager]=alertmanager.alert.argus.com
|
||
MAP[argus-master-sys]=master.argus.com
|
||
changed=0
|
||
for cname in "${!MAP[@]}"; do
|
||
domain="${MAP[$cname]}"; fpath="$ETC_DIR/$domain"
|
||
ip=$(docker inspect -f '{{ (index .NetworkSettings.Networks "'$NET_NAME'").IPAddress }}' "$cname" 2>/dev/null || true)
|
||
[[ -z "$ip" ]] && { echo "[DNS-FIX][WARN] $domain: container $cname no overlay IP yet"; continue; }
|
||
cur=$(cat "$fpath" 2>/dev/null || echo "")
|
||
if [[ "$cur" != "$ip" ]]; then
|
||
echo "$ip" > "$fpath"; echo "[DNS-FIX][SET] $domain = $ip (was: ${cur:-<empty>})"; changed=1
|
||
else
|
||
echo "[DNS-FIX][OK] $domain already $ip"
|
||
fi
|
||
done
|
||
if [[ $changed -eq 1 ]]; then
|
||
docker exec argus-bind-sys /usr/local/bin/reload-bind9.sh >/dev/null 2>&1 || docker exec argus-bind-sys rndc reload >/dev/null 2>&1 || true
|
||
sleep 1
|
||
fi
|
||
docker exec argus-web-proxy nginx -t >/dev/null 2>&1 && docker exec argus-web-proxy nginx -s reload >/dev/null 2>&1 || true
|