#!/usr/bin/env bash set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" log() { echo -e "\033[0;34m[CHECK]\033[0m $*"; } err() { echo -e "\033[0;31m[ERROR]\033[0m $*" >&2; } ENV_FILE="$ROOT/compose/.env"; [[ -f "$ENV_FILE" ]] && set -a && source "$ENV_FILE" && set +a wait_http() { local url="$1"; local attempts=${2:-120}; local i=1; while ((i<=attempts)); do curl -fsS "$url" >/dev/null 2>&1 && return 0; echo "[..] waiting $url ($i/$attempts)"; sleep 5; ((i++)); done; return 1; } code_for() { curl -s -o /dev/null -w "%{http_code}" "$1" || echo 000; } header_val() { curl -s -D - -o /dev/null "$@" | awk -F': ' 'BEGIN{IGNORECASE=1}$1=="Access-Control-Allow-Origin"{gsub("\r","",$2);print $2}'; } LOG_DIR="$ROOT/logs" mkdir -p "$LOG_DIR" || true OUT_JSON="$LOG_DIR/selfcheck.json" tmp=$(mktemp) ok=1 log "checking overlay network" net_ok=false if docker network inspect "${OVERLAY_NET_NAME:-argus-sys-net}" >/dev/null 2>&1; then if docker network inspect "${OVERLAY_NET_NAME:-argus-sys-net}" | grep -q '"Driver": "overlay"'; then net_ok=true; fi fi [[ "$net_ok" == true ]] || ok=0 log "checking Elasticsearch (via domain inside web-proxy)" if docker exec argus-web-proxy sh -lc "curl -fsS http://es.log.argus.com:9200/_cluster/health?wait_for_status=yellow&timeout=1s" >/dev/null 2>&1; then es_ok=true; else es_ok=false; ok=0; fi log "checking Kibana (via domain inside web-proxy)" kb_code=$(docker exec argus-web-proxy sh -lc "curl -s -o /dev/null -w '%{http_code}' http://kibana.log.argus.com:5601/api/status" || echo 000) kb_ok=false if [[ "$kb_code" == "200" ]]; then body=$(curl -sS "http://localhost:${KIBANA_PORT:-5601}/api/status"); echo "$body" | grep -q '"level":"available"' && kb_ok=true; fi [[ "$kb_ok" == true ]] || ok=0 log "checking Master (via domain inside web-proxy)" if docker exec argus-web-proxy sh -lc "curl -fsS http://master.argus.com:3000/readyz" >/dev/null 2>&1; then true; else ok=0; fi log "checking FTP" if docker ps --format '{{.Names}}' | grep -q '^argus-ftp$'; then if docker exec argus-ftp sh -lc 'test -w /private/argus/ftp/share'; then ftp_ok=true; else ftp_ok=false; ok=0; fi else ftp_ok=false; ok=0; fi log "checking Prometheus" wait_http "http://localhost:${PROMETHEUS_PORT:-9090}/-/ready" 60 || ok=0 log "checking Grafana" gf_code=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:${GRAFANA_PORT:-3000}/api/health" || echo 000) gf_ok=false; if [[ "$gf_code" == "200" ]]; then body=$(curl -sS "http://localhost:${GRAFANA_PORT:-3000}/api/health"); echo "$body" | grep -q '"database"\s*:\s*"ok"' && gf_ok=true; fi [[ "$gf_ok" == true ]] || ok=0 log "checking Alertmanager" wait_http "http://localhost:${ALERTMANAGER_PORT:-9093}/api/v2/status" 60 || ok=0 log "checking Web-Proxy" p8080=$(code_for "http://localhost:${WEB_PROXY_PORT_8080:-8080}/") p8083=$(code_for "http://localhost:${WEB_PROXY_PORT_8083:-8083}/") cors8084=$(header_val -H "Origin: http://localhost:${WEB_PROXY_PORT_8080:-8080}" "http://localhost:${WEB_PROXY_PORT_8084:-8084}/api/v2/status" || true) cors8085=$(header_val -H "Origin: http://localhost:${WEB_PROXY_PORT_8080:-8080}" "http://localhost:${WEB_PROXY_PORT_8085:-8085}/api/v1/master/nodes" || true) wp_ok=true # 有些环境首页可能 403,此处接受 200/403 ([[ "$p8080" == 200 || "$p8080" == 403 ]]) || wp_ok=false ([[ "$p8083" == 200 || "$p8083" == 302 || "$p8083" == 403 ]]) || wp_ok=false [[ -n "$cors8084" && -n "$cors8085" ]] || wp_ok=false [[ "$wp_ok" == true ]] || ok=0 cat > "$tmp" </dev/null; then # fallback when logs dir not writable (no sudo allowed) OUT_JSON="/tmp/selfcheck_$(date -u +%Y%m%d-%H%M%SZ).json" cp "$tmp" "$OUT_JSON" log "selfcheck.json written to $OUT_JSON (logs dir not writable)" fi if [[ "$ok" == 1 ]]; then log "selfcheck OK" exit 0 else err "selfcheck FAILED (see $OUT_JSON)" # If diagnose script exists, run it to collect more details if [[ -x "$SCRIPT_DIR/server-diagnose.sh" ]]; then # run diagnose; it will print the actual timestamped file paths and update 'latest' symlinks "$SCRIPT_DIR/server-diagnose.sh" || true fi exit 1 fi