195 lines
7.4 KiB
Bash
Executable File
195 lines
7.4 KiB
Bash
Executable File
#!/usr/bin/env bash
|
||
set -euo pipefail
|
||
|
||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||
PKG_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" # version root
|
||
|
||
PROJECT_NAME="argus-sys"
|
||
|
||
log() { echo -e "\033[0;34m[INSTALL]\033[0m $*"; }
|
||
err() { echo -e "\033[0;31m[ERROR ]\033[0m $*" >&2; }
|
||
|
||
require() { command -v "$1" >/dev/null 2>&1 || { err "missing command: $1"; exit 1; }; }
|
||
|
||
require docker
|
||
if docker compose version >/dev/null 2>&1; then COMPOSE=(docker compose); else require docker-compose; COMPOSE=(docker-compose); fi
|
||
|
||
ENV_FILE="$PKG_ROOT/compose/.env"
|
||
ENV_TEMPLATE="$PKG_ROOT/compose/.env.example"
|
||
|
||
find_free_port() {
|
||
local prefer="$1"; local start=${2:-20000}; local max=${3:-65000};
|
||
if ! ss -ltnH 2>/dev/null | awk -v pat=":"$prefer"$" '$4 ~ pat{f=1} END{exit f?0:1}'; then echo "$prefer"; return; fi
|
||
for ((p=start; p<=max; p++)); do
|
||
if ! ss -ltnH 2>/dev/null | awk -v pat=":"$p"$" '$4 ~ pat{f=1} END{exit f?0:1}'; then echo "$p"; return; fi
|
||
done
|
||
return 1
|
||
}
|
||
|
||
prepare_env() {
|
||
if [[ -f "$ENV_FILE" ]]; then log ".env exists, keep as-is"; return; fi
|
||
[[ -f "$ENV_TEMPLATE" ]] || { err "missing $ENV_TEMPLATE"; exit 1; }
|
||
cp "$ENV_TEMPLATE" "$ENV_FILE"
|
||
# auto-assign ports if busy
|
||
for key in MASTER_PORT ES_HTTP_PORT KIBANA_PORT NODE_A_PORT NODE_B_PORT PROMETHEUS_PORT GRAFANA_PORT ALERTMANAGER_PORT \
|
||
WEB_PROXY_PORT_8080 WEB_PROXY_PORT_8081 WEB_PROXY_PORT_8082 WEB_PROXY_PORT_8083 WEB_PROXY_PORT_8084 WEB_PROXY_PORT_8085 \
|
||
FTP_PORT FTP_DATA_PORT; do
|
||
val=$(grep -E "^${key}=" "$ENV_FILE" | tail -1 | cut -d= -f2)
|
||
new=$(find_free_port "$val") || true
|
||
if [[ -n "${new:-}" && "$new" != "$val" ]]; then
|
||
sed -i "s/^${key}=.*/${key}=${new}/" "$ENV_FILE"
|
||
log "port ${key} busy -> ${new}"
|
||
fi
|
||
done
|
||
}
|
||
|
||
prepare_data_dirs() {
|
||
if [[ $EUID -ne 0 ]]; then
|
||
echo -e "\033[1;33m[WARN]\033[0m running as non-root: will not chown data dirs."
|
||
echo -e "\033[1;33m[WARN]\033[0m If you hit Permission denied, run: sudo $SCRIPT_DIR/server-prepare-dirs.sh"
|
||
# still ensure basic directories exist (no chown)
|
||
mkdir -p \
|
||
"$PKG_ROOT/private/argus/etc" \
|
||
"$PKG_ROOT/private/argus/metric/prometheus" \
|
||
"$PKG_ROOT/private/argus/metric/prometheus/data" \
|
||
"$PKG_ROOT/private/argus/metric/prometheus/rules" \
|
||
"$PKG_ROOT/private/argus/metric/grafana" \
|
||
"$PKG_ROOT/private/argus/metric/grafana/data" \
|
||
"$PKG_ROOT/private/argus/metric/grafana/logs" \
|
||
"$PKG_ROOT/private/argus/metric/grafana/plugins" \
|
||
"$PKG_ROOT/private/argus/metric/grafana/provisioning/datasources" \
|
||
"$PKG_ROOT/private/argus/metric/grafana/provisioning/dashboards" \
|
||
"$PKG_ROOT/private/argus/metric/grafana/data/sessions" \
|
||
"$PKG_ROOT/private/argus/metric/grafana/data/dashboards" \
|
||
"$PKG_ROOT/private/argus/alert/alertmanager" \
|
||
"$PKG_ROOT/private/argus/metric/ftp/share"
|
||
fi
|
||
}
|
||
|
||
load_images() {
|
||
local tar="$PKG_ROOT/images/all-images.tar.gz"
|
||
[[ -f "$tar" ]] || { err "missing images tar: $tar"; exit 1; }
|
||
log "loading images from $(basename "$tar") (may take minutes)"
|
||
gunzip -c "$tar" | docker load >/dev/null
|
||
}
|
||
|
||
bring_up() {
|
||
log "starting services via compose"
|
||
local ov="$PKG_ROOT/compose/docker-compose.os-compat.override.yml"
|
||
if [[ ! -f "$ov" ]]; then
|
||
cat > "$ov" <<'YAML'
|
||
services:
|
||
bind:
|
||
security_opt: ["label=disable"]
|
||
userns_mode: "host"
|
||
tmpfs:
|
||
- /run/named
|
||
master:
|
||
security_opt: ["label=disable"]
|
||
userns_mode: "host"
|
||
es:
|
||
security_opt: ["label=disable"]
|
||
userns_mode: "host"
|
||
kibana:
|
||
security_opt: ["label=disable"]
|
||
userns_mode: "host"
|
||
ftp:
|
||
security_opt: ["label=disable"]
|
||
userns_mode: "host"
|
||
prometheus:
|
||
security_opt: ["label=disable"]
|
||
userns_mode: "host"
|
||
grafana:
|
||
security_opt: ["label=disable"]
|
||
userns_mode: "host"
|
||
alertmanager:
|
||
security_opt: ["label=disable"]
|
||
userns_mode: "host"
|
||
# ensure runtime path matches container expectation
|
||
volumes:
|
||
- ../private/argus/etc:/private/argus/etc
|
||
- ../private/argus/alert/alertmanager:/alertmanager
|
||
web-frontend:
|
||
security_opt: ["label=disable"]
|
||
userns_mode: "host"
|
||
web-proxy:
|
||
security_opt: ["label=disable"]
|
||
userns_mode: "host"
|
||
YAML
|
||
log "generated OS-compat override: $(basename "$ov")"
|
||
fi
|
||
# 仅启动服务端组件,避免误起测试节点(node-a/node-b/test-node/test-gpu-node)
|
||
local services=(bind master es kibana ftp prometheus grafana alertmanager web-frontend web-proxy)
|
||
log "services: ${services[*]}"
|
||
(cd "$PKG_ROOT/compose" && "${COMPOSE[@]}" -p "$PROJECT_NAME" -f docker-compose.yml -f $(basename "$ov") up -d "${services[@]}")
|
||
}
|
||
|
||
dns_bootstrap() {
|
||
log "DNS bootstrap: initializing shared dns.conf and container resolv.conf"
|
||
local etc_dir="$PKG_ROOT/private/argus/etc"
|
||
mkdir -p "$etc_dir"
|
||
# 1) ensure dns.conf exists (fallback to bind IP 172.31.0.2)
|
||
if [[ ! -s "$etc_dir/dns.conf" ]]; then
|
||
if echo "172.31.0.2" > "$etc_dir/dns.conf" 2>/dev/null; then
|
||
log "wrote fallback dns.conf with 172.31.0.2"
|
||
else
|
||
# host-side write denied (ownership 1000:1000); write via bind container instead
|
||
if docker ps --format '{{.Names}}' | grep -q '^argus-bind-sys$'; then
|
||
docker exec argus-bind-sys sh -lc 'echo 172.31.0.2 > /private/argus/etc/dns.conf && chmod 644 /private/argus/etc/dns.conf' || true
|
||
log "fallback dns.conf written via bind container"
|
||
else
|
||
log "bind not ready; skip writing fallback dns.conf"
|
||
fi
|
||
fi
|
||
fi
|
||
# 2) wait briefly for bind to copy update-dns.sh into shared etc (bind startup.sh does this)
|
||
local i=0
|
||
while [[ ! -x "$etc_dir/update-dns.sh" && $i -lt 20 ]]; do
|
||
sleep 0.5; ((i++));
|
||
done
|
||
if [[ ! -x "$etc_dir/update-dns.sh" ]]; then
|
||
log "update-dns.sh not present yet; continuing with existing resolv.conf"
|
||
fi
|
||
# 3) run update-dns.sh inside key containers so /etc/resolv.conf points to bind
|
||
local c
|
||
for c in argus-master-sys argus-es-sys argus-kibana-sys argus-grafana argus-prometheus argus-ftp argus-web-frontend argus-web-proxy argus-alertmanager; do
|
||
if docker ps --format '{{.Names}}' | grep -q "^${c}$"; then
|
||
docker exec "$c" sh -lc 'test -x /private/argus/etc/update-dns.sh && /private/argus/etc/update-dns.sh || true' >/dev/null 2>&1 || true
|
||
fi
|
||
done
|
||
# 4) wait for service A-record hint files generated by services (best-effort)
|
||
local need=( es.log.argus.com kibana.log.argus.com master.argus.com grafana.metric.argus.com prom.metric.argus.com alertmanager.alert.argus.com )
|
||
local waited=0; local missing=1
|
||
while (( waited < 15 )); do
|
||
missing=0
|
||
for f in "${need[@]}"; do [[ -s "$etc_dir/$f" ]] || { missing=1; break; }; done
|
||
[[ $missing -eq 0 ]] && break
|
||
sleep 1; ((waited++))
|
||
done
|
||
# 5) reload bind zone (script uses supervisor to restart bind9)
|
||
if docker ps --format '{{.Names}}' | grep -q '^argus-bind-sys$'; then
|
||
docker exec argus-bind-sys sh -lc '/usr/local/bin/reload-bind9.sh' >/dev/null 2>&1 || true
|
||
fi
|
||
# 6) restart web-proxy once to re-render nginx resolver with latest dns.conf
|
||
if docker ps --format '{{.Names}}' | grep -q '^argus-web-proxy$'; then
|
||
docker restart argus-web-proxy >/dev/null 2>&1 || true
|
||
fi
|
||
}
|
||
|
||
selfcheck() {
|
||
log "running selfcheck"
|
||
bash "$PKG_ROOT/scripts/server-selfcheck.sh" || { err "selfcheck failed"; exit 1; }
|
||
}
|
||
|
||
main() {
|
||
prepare_env
|
||
prepare_data_dirs
|
||
load_images
|
||
bring_up
|
||
dns_bootstrap
|
||
selfcheck
|
||
log "install completed. See logs in $PKG_ROOT/logs/"
|
||
}
|
||
|
||
main "$@"
|