argus/deployment/build/templates/scripts/server-selfcheck.sh
2025-11-05 09:57:08 +08:00

105 lines
4.4 KiB
Bash
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
log() { echo -e "\033[0;34m[CHECK]\033[0m $*"; }
err() { echo -e "\033[0;31m[ERROR]\033[0m $*" >&2; }
ENV_FILE="$ROOT/compose/.env"; [[ -f "$ENV_FILE" ]] && set -a && source "$ENV_FILE" && set +a
wait_http() { local url="$1"; local attempts=${2:-120}; local i=1; while ((i<=attempts)); do curl -fsS "$url" >/dev/null 2>&1 && return 0; echo "[..] waiting $url ($i/$attempts)"; sleep 5; ((i++)); done; return 1; }
code_for() { curl -s -o /dev/null -w "%{http_code}" "$1" || echo 000; }
header_val() { curl -s -D - -o /dev/null "$@" | awk -F': ' 'BEGIN{IGNORECASE=1}$1=="Access-Control-Allow-Origin"{gsub("\r","",$2);print $2}'; }
LOG_DIR="$ROOT/logs"
mkdir -p "$LOG_DIR" || true
OUT_JSON="$LOG_DIR/selfcheck.json"
tmp=$(mktemp)
ok=1
log "checking overlay network"
net_ok=false
if docker network inspect "${OVERLAY_NET_NAME:-argus-sys-net}" >/dev/null 2>&1; then
if docker network inspect "${OVERLAY_NET_NAME:-argus-sys-net}" | grep -q '"Driver": "overlay"'; then net_ok=true; fi
fi
[[ "$net_ok" == true ]] || ok=0
log "checking Elasticsearch (via domain inside web-proxy)"
if docker exec argus-web-proxy sh -lc "curl -fsS http://es.log.argus.com:9200/_cluster/health?wait_for_status=yellow&timeout=1s" >/dev/null 2>&1; then es_ok=true; else es_ok=false; ok=0; fi
log "checking Kibana (via domain inside web-proxy)"
kb_code=$(docker exec argus-web-proxy sh -lc "curl -s -o /dev/null -w '%{http_code}' http://kibana.log.argus.com:5601/api/status" || echo 000)
kb_ok=false
if [[ "$kb_code" == "200" ]]; then body=$(curl -sS "http://localhost:${KIBANA_PORT:-5601}/api/status"); echo "$body" | grep -q '"level":"available"' && kb_ok=true; fi
[[ "$kb_ok" == true ]] || ok=0
log "checking Master (via domain inside web-proxy)"
if docker exec argus-web-proxy sh -lc "curl -fsS http://master.argus.com:3000/readyz" >/dev/null 2>&1; then true; else ok=0; fi
log "checking FTP"
if docker ps --format '{{.Names}}' | grep -q '^argus-ftp$'; then
if docker exec argus-ftp sh -lc 'test -w /private/argus/ftp/share'; then ftp_ok=true; else ftp_ok=false; ok=0; fi
else
ftp_ok=false; ok=0;
fi
log "checking Prometheus"
wait_http "http://localhost:${PROMETHEUS_PORT:-9090}/-/ready" 60 || ok=0
log "checking Grafana"
gf_code=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:${GRAFANA_PORT:-3000}/api/health" || echo 000)
gf_ok=false; if [[ "$gf_code" == "200" ]]; then body=$(curl -sS "http://localhost:${GRAFANA_PORT:-3000}/api/health"); echo "$body" | grep -q '"database"\s*:\s*"ok"' && gf_ok=true; fi
[[ "$gf_ok" == true ]] || ok=0
log "checking Alertmanager"
wait_http "http://localhost:${ALERTMANAGER_PORT:-9093}/api/v2/status" 60 || ok=0
log "checking Web-Proxy"
p8080=$(code_for "http://localhost:${WEB_PROXY_PORT_8080:-8080}/")
p8083=$(code_for "http://localhost:${WEB_PROXY_PORT_8083:-8083}/")
cors8084=$(header_val -H "Origin: http://localhost:${WEB_PROXY_PORT_8080:-8080}" "http://localhost:${WEB_PROXY_PORT_8084:-8084}/api/v2/status" || true)
cors8085=$(header_val -H "Origin: http://localhost:${WEB_PROXY_PORT_8080:-8080}" "http://localhost:${WEB_PROXY_PORT_8085:-8085}/api/v1/master/nodes" || true)
wp_ok=true
# 有些环境首页可能 403此处接受 200/403
([[ "$p8080" == 200 || "$p8080" == 403 ]]) || wp_ok=false
([[ "$p8083" == 200 || "$p8083" == 302 || "$p8083" == 403 ]]) || wp_ok=false
[[ -n "$cors8084" && -n "$cors8085" ]] || wp_ok=false
[[ "$wp_ok" == true ]] || ok=0
cat > "$tmp" <<JSON
{
"es": $es_ok,
"kibana": $kb_ok,
"master_readyz": true,
"ftp_share_writable": $ftp_ok,
"prometheus": true,
"grafana": $gf_ok,
"alertmanager": true,
"web_proxy": $wp_ok,
"overlay_net": $net_ok,
"timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
}
JSON
if ! mv "$tmp" "$OUT_JSON" 2>/dev/null; then
# fallback when logs dir not writable (no sudo allowed)
OUT_JSON="/tmp/selfcheck_$(date -u +%Y%m%d-%H%M%SZ).json"
cp "$tmp" "$OUT_JSON"
log "selfcheck.json written to $OUT_JSON (logs dir not writable)"
fi
if [[ "$ok" == 1 ]]; then
log "selfcheck OK"
exit 0
else
err "selfcheck FAILED (see $OUT_JSON)"
# If diagnose script exists, run it to collect more details
if [[ -x "$SCRIPT_DIR/server-diagnose.sh" ]]; then
# run diagnose; it will print the actual timestamped file paths and update 'latest' symlinks
"$SCRIPT_DIR/server-diagnose.sh" || true
fi
exit 1
fi