argus/deployment/build/templates/scripts/server-install.sh

195 lines
7.4 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PKG_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" # version root
PROJECT_NAME="argus-sys"
log() { echo -e "\033[0;34m[INSTALL]\033[0m $*"; }
err() { echo -e "\033[0;31m[ERROR ]\033[0m $*" >&2; }
require() { command -v "$1" >/dev/null 2>&1 || { err "missing command: $1"; exit 1; }; }
require docker
if docker compose version >/dev/null 2>&1; then COMPOSE=(docker compose); else require docker-compose; COMPOSE=(docker-compose); fi
ENV_FILE="$PKG_ROOT/compose/.env"
ENV_TEMPLATE="$PKG_ROOT/compose/.env.example"
find_free_port() {
local prefer="$1"; local start=${2:-20000}; local max=${3:-65000};
if ! ss -ltnH 2>/dev/null | awk -v pat=":"$prefer"$" '$4 ~ pat{f=1} END{exit f?0:1}'; then echo "$prefer"; return; fi
for ((p=start; p<=max; p++)); do
if ! ss -ltnH 2>/dev/null | awk -v pat=":"$p"$" '$4 ~ pat{f=1} END{exit f?0:1}'; then echo "$p"; return; fi
done
return 1
}
prepare_env() {
if [[ -f "$ENV_FILE" ]]; then log ".env exists, keep as-is"; return; fi
[[ -f "$ENV_TEMPLATE" ]] || { err "missing $ENV_TEMPLATE"; exit 1; }
cp "$ENV_TEMPLATE" "$ENV_FILE"
# auto-assign ports if busy
for key in MASTER_PORT ES_HTTP_PORT KIBANA_PORT NODE_A_PORT NODE_B_PORT PROMETHEUS_PORT GRAFANA_PORT ALERTMANAGER_PORT \
WEB_PROXY_PORT_8080 WEB_PROXY_PORT_8081 WEB_PROXY_PORT_8082 WEB_PROXY_PORT_8083 WEB_PROXY_PORT_8084 WEB_PROXY_PORT_8085 \
FTP_PORT FTP_DATA_PORT; do
val=$(grep -E "^${key}=" "$ENV_FILE" | tail -1 | cut -d= -f2)
new=$(find_free_port "$val") || true
if [[ -n "${new:-}" && "$new" != "$val" ]]; then
sed -i "s/^${key}=.*/${key}=${new}/" "$ENV_FILE"
log "port ${key} busy -> ${new}"
fi
done
}
prepare_data_dirs() {
if [[ $EUID -ne 0 ]]; then
echo -e "\033[1;33m[WARN]\033[0m running as non-root: will not chown data dirs."
echo -e "\033[1;33m[WARN]\033[0m If you hit Permission denied, run: sudo $SCRIPT_DIR/server-prepare-dirs.sh"
# still ensure basic directories exist (no chown)
mkdir -p \
"$PKG_ROOT/private/argus/etc" \
"$PKG_ROOT/private/argus/metric/prometheus" \
"$PKG_ROOT/private/argus/metric/prometheus/data" \
"$PKG_ROOT/private/argus/metric/prometheus/rules" \
"$PKG_ROOT/private/argus/metric/grafana" \
"$PKG_ROOT/private/argus/metric/grafana/data" \
"$PKG_ROOT/private/argus/metric/grafana/logs" \
"$PKG_ROOT/private/argus/metric/grafana/plugins" \
"$PKG_ROOT/private/argus/metric/grafana/provisioning/datasources" \
"$PKG_ROOT/private/argus/metric/grafana/provisioning/dashboards" \
"$PKG_ROOT/private/argus/metric/grafana/data/sessions" \
"$PKG_ROOT/private/argus/metric/grafana/data/dashboards" \
"$PKG_ROOT/private/argus/alert/alertmanager" \
"$PKG_ROOT/private/argus/metric/ftp/share"
fi
}
load_images() {
local tar="$PKG_ROOT/images/all-images.tar.gz"
[[ -f "$tar" ]] || { err "missing images tar: $tar"; exit 1; }
log "loading images from $(basename "$tar") (may take minutes)"
gunzip -c "$tar" | docker load >/dev/null
}
bring_up() {
log "starting services via compose"
local ov="$PKG_ROOT/compose/docker-compose.os-compat.override.yml"
if [[ ! -f "$ov" ]]; then
cat > "$ov" <<'YAML'
services:
bind:
security_opt: ["label=disable"]
userns_mode: "host"
tmpfs:
- /run/named
master:
security_opt: ["label=disable"]
userns_mode: "host"
es:
security_opt: ["label=disable"]
userns_mode: "host"
kibana:
security_opt: ["label=disable"]
userns_mode: "host"
ftp:
security_opt: ["label=disable"]
userns_mode: "host"
prometheus:
security_opt: ["label=disable"]
userns_mode: "host"
grafana:
security_opt: ["label=disable"]
userns_mode: "host"
alertmanager:
security_opt: ["label=disable"]
userns_mode: "host"
# ensure runtime path matches container expectation
volumes:
- ../private/argus/etc:/private/argus/etc
- ../private/argus/alert/alertmanager:/alertmanager
web-frontend:
security_opt: ["label=disable"]
userns_mode: "host"
web-proxy:
security_opt: ["label=disable"]
userns_mode: "host"
YAML
log "generated OS-compat override: $(basename "$ov")"
fi
# 仅启动服务端组件避免误起测试节点node-a/node-b/test-node/test-gpu-node
local services=(bind master es kibana ftp prometheus grafana alertmanager web-frontend web-proxy)
log "services: ${services[*]}"
(cd "$PKG_ROOT/compose" && "${COMPOSE[@]}" -p "$PROJECT_NAME" -f docker-compose.yml -f $(basename "$ov") up -d "${services[@]}")
}
dns_bootstrap() {
log "DNS bootstrap: initializing shared dns.conf and container resolv.conf"
local etc_dir="$PKG_ROOT/private/argus/etc"
mkdir -p "$etc_dir"
# 1) ensure dns.conf exists (fallback to bind IP 172.31.0.2)
if [[ ! -s "$etc_dir/dns.conf" ]]; then
if echo "172.31.0.2" > "$etc_dir/dns.conf" 2>/dev/null; then
log "wrote fallback dns.conf with 172.31.0.2"
else
# host-side write denied (ownership 1000:1000); write via bind container instead
if docker ps --format '{{.Names}}' | grep -q '^argus-bind-sys$'; then
docker exec argus-bind-sys sh -lc 'echo 172.31.0.2 > /private/argus/etc/dns.conf && chmod 644 /private/argus/etc/dns.conf' || true
log "fallback dns.conf written via bind container"
else
log "bind not ready; skip writing fallback dns.conf"
fi
fi
fi
# 2) wait briefly for bind to copy update-dns.sh into shared etc (bind startup.sh does this)
local i=0
while [[ ! -x "$etc_dir/update-dns.sh" && $i -lt 20 ]]; do
sleep 0.5; ((i++));
done
if [[ ! -x "$etc_dir/update-dns.sh" ]]; then
log "update-dns.sh not present yet; continuing with existing resolv.conf"
fi
# 3) run update-dns.sh inside key containers so /etc/resolv.conf points to bind
local c
for c in argus-master-sys argus-es-sys argus-kibana-sys argus-grafana argus-prometheus argus-ftp argus-web-frontend argus-web-proxy argus-alertmanager; do
if docker ps --format '{{.Names}}' | grep -q "^${c}$"; then
docker exec "$c" sh -lc 'test -x /private/argus/etc/update-dns.sh && /private/argus/etc/update-dns.sh || true' >/dev/null 2>&1 || true
fi
done
# 4) wait for service A-record hint files generated by services (best-effort)
local need=( es.log.argus.com kibana.log.argus.com master.argus.com grafana.metric.argus.com prom.metric.argus.com alertmanager.alert.argus.com )
local waited=0; local missing=1
while (( waited < 15 )); do
missing=0
for f in "${need[@]}"; do [[ -s "$etc_dir/$f" ]] || { missing=1; break; }; done
[[ $missing -eq 0 ]] && break
sleep 1; ((waited++))
done
# 5) reload bind zone (script uses supervisor to restart bind9)
if docker ps --format '{{.Names}}' | grep -q '^argus-bind-sys$'; then
docker exec argus-bind-sys sh -lc '/usr/local/bin/reload-bind9.sh' >/dev/null 2>&1 || true
fi
# 6) restart web-proxy once to re-render nginx resolver with latest dns.conf
if docker ps --format '{{.Names}}' | grep -q '^argus-web-proxy$'; then
docker restart argus-web-proxy >/dev/null 2>&1 || true
fi
}
selfcheck() {
log "running selfcheck"
bash "$PKG_ROOT/scripts/server-selfcheck.sh" || { err "selfcheck failed"; exit 1; }
}
main() {
prepare_env
prepare_data_dirs
load_images
bring_up
dns_bootstrap
selfcheck
log "install completed. See logs in $PKG_ROOT/logs/"
}
main "$@"