105 lines
4.4 KiB
Bash
Executable File
105 lines
4.4 KiB
Bash
Executable File
#!/usr/bin/env bash
|
||
set -euo pipefail
|
||
|
||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||
ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||
|
||
log() { echo -e "\033[0;34m[CHECK]\033[0m $*"; }
|
||
err() { echo -e "\033[0;31m[ERROR]\033[0m $*" >&2; }
|
||
|
||
ENV_FILE="$ROOT/compose/.env"; [[ -f "$ENV_FILE" ]] && set -a && source "$ENV_FILE" && set +a
|
||
|
||
wait_http() { local url="$1"; local attempts=${2:-120}; local i=1; while ((i<=attempts)); do curl -fsS "$url" >/dev/null 2>&1 && return 0; echo "[..] waiting $url ($i/$attempts)"; sleep 5; ((i++)); done; return 1; }
|
||
code_for() { curl -s -o /dev/null -w "%{http_code}" "$1" || echo 000; }
|
||
header_val() { curl -s -D - -o /dev/null "$@" | awk -F': ' 'BEGIN{IGNORECASE=1}$1=="Access-Control-Allow-Origin"{gsub("\r","",$2);print $2}'; }
|
||
|
||
LOG_DIR="$ROOT/logs"
|
||
mkdir -p "$LOG_DIR" || true
|
||
OUT_JSON="$LOG_DIR/selfcheck.json"
|
||
tmp=$(mktemp)
|
||
|
||
ok=1
|
||
|
||
log "checking overlay network"
|
||
net_ok=false
|
||
if docker network inspect "${OVERLAY_NET_NAME:-argus-sys-net}" >/dev/null 2>&1; then
|
||
if docker network inspect "${OVERLAY_NET_NAME:-argus-sys-net}" | grep -q '"Driver": "overlay"'; then net_ok=true; fi
|
||
fi
|
||
[[ "$net_ok" == true ]] || ok=0
|
||
|
||
log "checking Elasticsearch (via domain inside web-proxy)"
|
||
if docker exec argus-web-proxy sh -lc "curl -fsS http://es.log.argus.com:9200/_cluster/health?wait_for_status=yellow&timeout=1s" >/dev/null 2>&1; then es_ok=true; else es_ok=false; ok=0; fi
|
||
|
||
log "checking Kibana (via domain inside web-proxy)"
|
||
kb_code=$(docker exec argus-web-proxy sh -lc "curl -s -o /dev/null -w '%{http_code}' http://kibana.log.argus.com:5601/api/status" || echo 000)
|
||
kb_ok=false
|
||
if [[ "$kb_code" == "200" ]]; then body=$(curl -sS "http://localhost:${KIBANA_PORT:-5601}/api/status"); echo "$body" | grep -q '"level":"available"' && kb_ok=true; fi
|
||
[[ "$kb_ok" == true ]] || ok=0
|
||
|
||
log "checking Master (via domain inside web-proxy)"
|
||
if docker exec argus-web-proxy sh -lc "curl -fsS http://master.argus.com:3000/readyz" >/dev/null 2>&1; then true; else ok=0; fi
|
||
|
||
log "checking FTP"
|
||
if docker ps --format '{{.Names}}' | grep -q '^argus-ftp$'; then
|
||
if docker exec argus-ftp sh -lc 'test -w /private/argus/ftp/share'; then ftp_ok=true; else ftp_ok=false; ok=0; fi
|
||
else
|
||
ftp_ok=false; ok=0;
|
||
fi
|
||
|
||
log "checking Prometheus"
|
||
wait_http "http://localhost:${PROMETHEUS_PORT:-9090}/-/ready" 60 || ok=0
|
||
|
||
log "checking Grafana"
|
||
gf_code=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:${GRAFANA_PORT:-3000}/api/health" || echo 000)
|
||
gf_ok=false; if [[ "$gf_code" == "200" ]]; then body=$(curl -sS "http://localhost:${GRAFANA_PORT:-3000}/api/health"); echo "$body" | grep -q '"database"\s*:\s*"ok"' && gf_ok=true; fi
|
||
[[ "$gf_ok" == true ]] || ok=0
|
||
|
||
log "checking Alertmanager"
|
||
wait_http "http://localhost:${ALERTMANAGER_PORT:-9093}/api/v2/status" 60 || ok=0
|
||
|
||
log "checking Web-Proxy"
|
||
p8080=$(code_for "http://localhost:${WEB_PROXY_PORT_8080:-8080}/")
|
||
p8083=$(code_for "http://localhost:${WEB_PROXY_PORT_8083:-8083}/")
|
||
cors8084=$(header_val -H "Origin: http://localhost:${WEB_PROXY_PORT_8080:-8080}" "http://localhost:${WEB_PROXY_PORT_8084:-8084}/api/v2/status" || true)
|
||
cors8085=$(header_val -H "Origin: http://localhost:${WEB_PROXY_PORT_8080:-8080}" "http://localhost:${WEB_PROXY_PORT_8085:-8085}/api/v1/master/nodes" || true)
|
||
wp_ok=true
|
||
# 有些环境首页可能 403,此处接受 200/403
|
||
([[ "$p8080" == 200 || "$p8080" == 403 ]]) || wp_ok=false
|
||
([[ "$p8083" == 200 || "$p8083" == 302 || "$p8083" == 403 ]]) || wp_ok=false
|
||
[[ -n "$cors8084" && -n "$cors8085" ]] || wp_ok=false
|
||
[[ "$wp_ok" == true ]] || ok=0
|
||
|
||
cat > "$tmp" <<JSON
|
||
{
|
||
"es": $es_ok,
|
||
"kibana": $kb_ok,
|
||
"master_readyz": true,
|
||
"ftp_share_writable": $ftp_ok,
|
||
"prometheus": true,
|
||
"grafana": $gf_ok,
|
||
"alertmanager": true,
|
||
"web_proxy": $wp_ok,
|
||
"overlay_net": $net_ok,
|
||
"timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
||
}
|
||
JSON
|
||
|
||
if ! mv "$tmp" "$OUT_JSON" 2>/dev/null; then
|
||
# fallback when logs dir not writable (no sudo allowed)
|
||
OUT_JSON="/tmp/selfcheck_$(date -u +%Y%m%d-%H%M%SZ).json"
|
||
cp "$tmp" "$OUT_JSON"
|
||
log "selfcheck.json written to $OUT_JSON (logs dir not writable)"
|
||
fi
|
||
if [[ "$ok" == 1 ]]; then
|
||
log "selfcheck OK"
|
||
exit 0
|
||
else
|
||
err "selfcheck FAILED (see $OUT_JSON)"
|
||
# If diagnose script exists, run it to collect more details
|
||
if [[ -x "$SCRIPT_DIR/server-diagnose.sh" ]]; then
|
||
# run diagnose; it will print the actual timestamped file paths and update 'latest' symlinks
|
||
"$SCRIPT_DIR/server-diagnose.sh" || true
|
||
fi
|
||
exit 1
|
||
fi
|