argus/deployment/build/templates/scripts/server-selfcheck.sh
2025-10-30 11:21:05 +08:00

76 lines
3.2 KiB
Bash

#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
log() { echo -e "\033[0;34m[CHECK]\033[0m $*"; }
err() { echo -e "\033[0;31m[ERROR]\033[0m $*" >&2; }
ENV_FILE="$ROOT/compose/.env"; [[ -f "$ENV_FILE" ]] && set -a && source "$ENV_FILE" && set +a
wait_http() { local url="$1"; local attempts=${2:-120}; local i=1; while ((i<=attempts)); do curl -fsS "$url" >/dev/null 2>&1 && return 0; echo "[..] waiting $url ($i/$attempts)"; sleep 5; ((i++)); done; return 1; }
code_for() { curl -s -o /dev/null -w "%{http_code}" "$1" || echo 000; }
header_val() { curl -s -D - -o /dev/null "$@" | awk -F': ' 'BEGIN{IGNORECASE=1}$1=="Access-Control-Allow-Origin"{gsub("\r","",$2);print $2}'; }
mkdir -p "$ROOT/logs"
OUT_JSON="$ROOT/logs/selfcheck.json"
tmp=$(mktemp)
ok=1
log "checking Elasticsearch"
if curl -fsS "http://localhost:${ES_HTTP_PORT:-9200}/_cluster/health?wait_for_status=yellow&timeout=1s" >/dev/null 2>&1; then es_ok=true; else es_ok=false; ok=0; fi
log "checking Kibana"
kb_code=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:${KIBANA_PORT:-5601}/api/status" || echo 000)
kb_ok=false
if [[ "$kb_code" == "200" ]]; then body=$(curl -sS "http://localhost:${KIBANA_PORT:-5601}/api/status"); echo "$body" | grep -q '"level":"available"' && kb_ok=true; fi
[[ "$kb_ok" == true ]] || ok=0
log "checking Master"
wait_http "http://localhost:${MASTER_PORT:-32300}/readyz" 60 || ok=0
log "checking FTP"
ftp_root="$ROOT/private/argus/metric/ftp/share"; [[ -d "$ftp_root" && -w "$ftp_root" ]] && ftp_ok=true || { ftp_ok=false; ok=0; }
log "checking Prometheus"
wait_http "http://localhost:${PROMETHEUS_PORT:-9090}/-/ready" 60 || ok=0
log "checking Grafana"
gf_code=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:${GRAFANA_PORT:-3000}/api/health" || echo 000)
gf_ok=false; if [[ "$gf_code" == "200" ]]; then body=$(curl -sS "http://localhost:${GRAFANA_PORT:-3000}/api/health"); echo "$body" | grep -q '"database"\s*:\s*"ok"' && gf_ok=true; fi
[[ "$gf_ok" == true ]] || ok=0
log "checking Alertmanager"
wait_http "http://localhost:${ALERTMANAGER_PORT:-9093}/api/v2/status" 60 || ok=0
log "checking Web-Proxy"
p8080=$(code_for "http://localhost:${WEB_PROXY_PORT_8080:-8080}/")
p8083=$(code_for "http://localhost:${WEB_PROXY_PORT_8083:-8083}/")
cors8084=$(header_val -H "Origin: http://localhost:${WEB_PROXY_PORT_8080:-8080}" "http://localhost:${WEB_PROXY_PORT_8084:-8084}/api/v2/status" || true)
cors8085=$(header_val -H "Origin: http://localhost:${WEB_PROXY_PORT_8080:-8080}" "http://localhost:${WEB_PROXY_PORT_8085:-8085}/api/v1/master/nodes" || true)
wp_ok=true
[[ "$p8080" == 200 ]] || wp_ok=false
([[ "$p8083" == 200 || "$p8083" == 302 ]]) || wp_ok=false
[[ -n "$cors8084" && -n "$cors8085" ]] || wp_ok=false
[[ "$wp_ok" == true ]] || ok=0
cat > "$tmp" <<JSON
{
"es": $es_ok,
"kibana": $kb_ok,
"master_readyz": true,
"ftp_share_writable": $ftp_ok,
"prometheus": true,
"grafana": $gf_ok,
"alertmanager": true,
"web_proxy": $wp_ok,
"timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
}
JSON
mv "$tmp" "$OUT_JSON"
[[ "$ok" == 1 ]] && { log "selfcheck OK"; exit 0; } || { err "selfcheck FAILED (see $OUT_JSON)"; exit 1; }