225 lines
8.3 KiB
Bash
Executable File
225 lines
8.3 KiB
Bash
Executable File
#!/usr/bin/env bash
|
||
set -euo pipefail
|
||
|
||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||
PKG_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" # version root
|
||
|
||
PROJECT_NAME="argus-sys"
|
||
|
||
log() { echo -e "\033[0;34m[INSTALL]\033[0m $*"; }
|
||
err() { echo -e "\033[0;31m[ERROR ]\033[0m $*" >&2; }
|
||
|
||
require() { command -v "$1" >/dev/null 2>&1 || { err "missing command: $1"; exit 1; }; }
|
||
|
||
require docker
|
||
if docker compose version >/dev/null 2>&1; then COMPOSE=(docker compose); else require docker-compose; COMPOSE=(docker-compose); fi
|
||
|
||
ENV_FILE="$PKG_ROOT/compose/.env"
|
||
ENV_TEMPLATE="$PKG_ROOT/compose/.env.example"
|
||
|
||
find_free_port() {
|
||
local prefer="$1"; local start=${2:-20000}; local max=${3:-65000};
|
||
if ! ss -ltnH 2>/dev/null | awk -v pat=":"$prefer"$" '$4 ~ pat{f=1} END{exit f?0:1}'; then echo "$prefer"; return; fi
|
||
for ((p=start; p<=max; p++)); do
|
||
if ! ss -ltnH 2>/dev/null | awk -v pat=":"$p"$" '$4 ~ pat{f=1} END{exit f?0:1}'; then echo "$p"; return; fi
|
||
done
|
||
return 1
|
||
}
|
||
|
||
prepare_env() {
|
||
if [[ -f "$ENV_FILE" ]]; then log ".env exists, keep as-is"; return; fi
|
||
[[ -f "$ENV_TEMPLATE" ]] || { err "missing $ENV_TEMPLATE"; exit 1; }
|
||
cp "$ENV_TEMPLATE" "$ENV_FILE"
|
||
# overlay 模式下,避免为不同服务分配到同一新端口;保持模板端口不做自动改写
|
||
}
|
||
|
||
prepare_data_dirs() {
|
||
if [[ $EUID -ne 0 ]]; then
|
||
echo -e "\033[1;33m[WARN]\033[0m running as non-root: will not chown data dirs."
|
||
echo -e "\033[1;33m[WARN]\033[0m If you hit Permission denied, run: sudo $SCRIPT_DIR/server-prepare-dirs.sh"
|
||
# still ensure basic directories exist (no chown)
|
||
mkdir -p \
|
||
"$PKG_ROOT/private/argus/etc" \
|
||
"$PKG_ROOT/private/argus/metric/prometheus" \
|
||
"$PKG_ROOT/private/argus/metric/prometheus/data" \
|
||
"$PKG_ROOT/private/argus/metric/prometheus/rules" \
|
||
"$PKG_ROOT/private/argus/metric/grafana" \
|
||
"$PKG_ROOT/private/argus/metric/grafana/data" \
|
||
"$PKG_ROOT/private/argus/metric/grafana/logs" \
|
||
"$PKG_ROOT/private/argus/metric/grafana/plugins" \
|
||
"$PKG_ROOT/private/argus/metric/grafana/provisioning/datasources" \
|
||
"$PKG_ROOT/private/argus/metric/grafana/provisioning/dashboards" \
|
||
"$PKG_ROOT/private/argus/metric/grafana/data/sessions" \
|
||
"$PKG_ROOT/private/argus/metric/grafana/data/dashboards" \
|
||
"$PKG_ROOT/private/argus/alert/alertmanager" \
|
||
"$PKG_ROOT/private/argus/metric/ftp/share"
|
||
# non-root: relax permissions to avoid container UID mismatch blocking writes
|
||
chmod -R a+rwx "$PKG_ROOT/private/argus" 2>/dev/null || true
|
||
fi
|
||
}
|
||
|
||
ensure_swarm_and_overlay() {
|
||
local net_name="${OVERLAY_NET_NAME:-argus-sys-net}"
|
||
# Require swarm active
|
||
local state
|
||
state=$(docker info --format '{{.Swarm.LocalNodeState}}' 2>/dev/null || echo "")
|
||
if [[ "$state" != "active" ]]; then
|
||
err "Docker Swarm is not active. On this host run:"
|
||
err " docker swarm init --advertise-addr <this_host_ip>"
|
||
exit 1
|
||
fi
|
||
# Create attachable overlay if missing
|
||
if ! docker network inspect "$net_name" >/dev/null 2>&1; then
|
||
log "creating attachable overlay network: $net_name"
|
||
docker network create --driver overlay --attachable "$net_name" >/dev/null
|
||
fi
|
||
}
|
||
|
||
bootstrap_dns_conf() {
|
||
local etc_dir="$PKG_ROOT/private/argus/etc"
|
||
mkdir -p "$etc_dir"
|
||
local dns_file="$etc_dir/dns.conf"
|
||
if [[ ! -s "$dns_file" ]]; then
|
||
# detect host primary IP
|
||
local host_ip
|
||
host_ip=$(ip route get 1.1.1.1 2>/dev/null | awk '/src/ {print $7; exit}')
|
||
[[ -z "$host_ip" ]] && host_ip=$(hostname -I 2>/dev/null | awk '{print $1}')
|
||
if [[ -n "$host_ip" ]]; then
|
||
echo "$host_ip" > "$dns_file"
|
||
log "wrote initial dns.conf with host IP: $host_ip"
|
||
else
|
||
err "failed to determine host IP for dns.conf; please edit $dns_file manually"
|
||
fi
|
||
fi
|
||
}
|
||
|
||
load_images() {
|
||
local tar="$PKG_ROOT/images/all-images.tar.gz"
|
||
[[ -f "$tar" ]] || { err "missing images tar: $tar"; exit 1; }
|
||
log "loading images from $(basename "$tar") (may take minutes)"
|
||
gunzip -c "$tar" | docker load >/dev/null
|
||
}
|
||
|
||
bring_up() {
|
||
log "starting services via compose"
|
||
ensure_swarm_and_overlay
|
||
bootstrap_dns_conf
|
||
local ov="$PKG_ROOT/compose/docker-compose.os-compat.override.yml"
|
||
if [[ ! -f "$ov" ]]; then
|
||
cat > "$ov" <<'YAML'
|
||
services:
|
||
bind:
|
||
security_opt: ["label=disable"]
|
||
userns_mode: "host"
|
||
tmpfs:
|
||
- /run/named
|
||
master:
|
||
security_opt: ["label=disable"]
|
||
userns_mode: "host"
|
||
es:
|
||
security_opt: ["label=disable"]
|
||
userns_mode: "host"
|
||
kibana:
|
||
security_opt: ["label=disable"]
|
||
userns_mode: "host"
|
||
ftp:
|
||
security_opt: ["label=disable"]
|
||
userns_mode: "host"
|
||
prometheus:
|
||
security_opt: ["label=disable"]
|
||
userns_mode: "host"
|
||
grafana:
|
||
security_opt: ["label=disable"]
|
||
userns_mode: "host"
|
||
alertmanager:
|
||
security_opt: ["label=disable"]
|
||
userns_mode: "host"
|
||
# ensure runtime path matches container expectation
|
||
volumes:
|
||
- ../private/argus/etc:/private/argus/etc
|
||
- ../private/argus/alert/alertmanager:/alertmanager
|
||
web-frontend:
|
||
security_opt: ["label=disable"]
|
||
userns_mode: "host"
|
||
web-proxy:
|
||
security_opt: ["label=disable"]
|
||
userns_mode: "host"
|
||
YAML
|
||
log "generated OS-compat override: $(basename "$ov")"
|
||
fi
|
||
# 仅启动服务端组件,避免误起测试节点(node-a/node-b/test-node/test-gpu-node)
|
||
local services=(bind master es kibana ftp prometheus grafana alertmanager web-frontend web-proxy)
|
||
log "services: ${services[*]}"
|
||
(cd "$PKG_ROOT/compose" && "${COMPOSE[@]}" -p "$PROJECT_NAME" -f docker-compose.yml -f $(basename "$ov") up -d "${services[@]}")
|
||
}
|
||
|
||
dns_bootstrap() {
|
||
log "DNS bootstrap: initializing shared dns.conf and container resolv.conf"
|
||
local etc_dir="$PKG_ROOT/private/argus/etc"
|
||
mkdir -p "$etc_dir"
|
||
# 1) ensure dns.conf exists (fallback to bind IP 172.31.0.2)
|
||
if [[ ! -s "$etc_dir/dns.conf" ]]; then
|
||
if echo "172.31.0.2" > "$etc_dir/dns.conf" 2>/dev/null; then
|
||
log "wrote fallback dns.conf with 172.31.0.2"
|
||
else
|
||
# host-side write denied (ownership 1000:1000); write via bind container instead
|
||
if docker ps --format '{{.Names}}' | grep -q '^argus-bind-sys$'; then
|
||
docker exec argus-bind-sys sh -lc 'echo 172.31.0.2 > /private/argus/etc/dns.conf && chmod 644 /private/argus/etc/dns.conf' || true
|
||
log "fallback dns.conf written via bind container"
|
||
else
|
||
log "bind not ready; skip writing fallback dns.conf"
|
||
fi
|
||
fi
|
||
fi
|
||
# 2) wait briefly for bind to copy update-dns.sh into shared etc (bind startup.sh does this)
|
||
local i=0
|
||
while [[ ! -x "$etc_dir/update-dns.sh" && $i -lt 20 ]]; do
|
||
sleep 0.5; ((i++));
|
||
done
|
||
if [[ ! -x "$etc_dir/update-dns.sh" ]]; then
|
||
log "update-dns.sh not present yet; continuing with existing resolv.conf"
|
||
fi
|
||
# 3) run update-dns.sh inside key containers so /etc/resolv.conf points to bind
|
||
local c
|
||
for c in argus-master-sys argus-es-sys argus-kibana-sys argus-grafana argus-prometheus argus-ftp argus-web-frontend argus-web-proxy argus-alertmanager; do
|
||
if docker ps --format '{{.Names}}' | grep -q "^${c}$"; then
|
||
docker exec "$c" sh -lc 'test -x /private/argus/etc/update-dns.sh && /private/argus/etc/update-dns.sh || true' >/dev/null 2>&1 || true
|
||
fi
|
||
done
|
||
# 4) wait for service A-record hint files generated by services (best-effort)
|
||
local need=( es.log.argus.com kibana.log.argus.com master.argus.com grafana.metric.argus.com prom.metric.argus.com alertmanager.alert.argus.com )
|
||
local waited=0; local missing=1
|
||
while (( waited < 15 )); do
|
||
missing=0
|
||
for f in "${need[@]}"; do [[ -s "$etc_dir/$f" ]] || { missing=1; break; }; done
|
||
[[ $missing -eq 0 ]] && break
|
||
sleep 1; ((waited++))
|
||
done
|
||
# 5) reload bind zone (script uses supervisor to restart bind9)
|
||
if docker ps --format '{{.Names}}' | grep -q '^argus-bind-sys$'; then
|
||
docker exec argus-bind-sys sh -lc '/usr/local/bin/reload-bind9.sh' >/dev/null 2>&1 || true
|
||
fi
|
||
# 6) restart web-proxy once to re-render nginx resolver with latest dns.conf
|
||
if docker ps --format '{{.Names}}' | grep -q '^argus-web-proxy$'; then
|
||
docker restart argus-web-proxy >/dev/null 2>&1 || true
|
||
fi
|
||
}
|
||
|
||
selfcheck() {
|
||
log "running selfcheck"
|
||
bash "$PKG_ROOT/scripts/server-selfcheck.sh" || { err "selfcheck failed"; exit 1; }
|
||
}
|
||
|
||
main() {
|
||
mkdir -p "$PKG_ROOT/logs"
|
||
prepare_env
|
||
prepare_data_dirs
|
||
load_images
|
||
bring_up
|
||
dns_bootstrap
|
||
selfcheck
|
||
log "install completed. See logs in $PKG_ROOT/logs/"
|
||
}
|
||
|
||
main "$@"
|