#!/usr/bin/env bash set -euo pipefail echo "[INFO] Sending logs via node-a/node-b and asserting ES counts..." # 载入端口变量 TEST_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")"/.. && pwd)" if [[ -f "$TEST_ROOT/.env" ]]; then set -a; source "$TEST_ROOT/.env"; set +a fi # Robust count helper: tolerates 404/503 and non-JSON responses, returns integer >=0 get_count() { local idx="$1"; local tmp; tmp=$(mktemp) local code code=$(curl -s -o "$tmp" -w "%{http_code}" "http://localhost:${ES_HTTP_PORT:-9200}/${idx}/_count?ignore_unavailable=true&allow_no_indices=true" || true) if [[ "$code" == "200" ]]; then local val val=$(jq -r '(.count // 0) | tonumber? // 0' "$tmp" 2>/dev/null || echo 0) echo "$val" else echo 0 fi rm -f "$tmp" } train0=$(get_count "train-*") infer0=$(get_count "infer-*") base=$((train0 + infer0)) echo "[INFO] initial counts: train=${train0} infer=${infer0} total=${base}" send_logs() { local cname="$1"; local hosttag="$2" docker exec "$cname" sh -lc 'mkdir -p /logs/train /logs/infer' docker exec "$cname" sh -lc "ts=\$(date -u +%Y-%m-%dT%H:%M:%SZ); echo \"\$ts INFO [$hosttag] training step=1 loss=1.23 model=bert\" >> /logs/train/train-demo.log" docker exec "$cname" sh -lc "ts=\$(date -u +%Y-%m-%dT%H:%M:%SZ); echo \"\$ts INFO [$hosttag] training step=2 loss=1.10 model=bert\" >> /logs/train/train-demo.log" docker exec "$cname" sh -lc "ts=\$(date -u +%Y-%m-%dT%H:%M:%SZ); echo \"\$ts WARN [$hosttag] inference slow on batch=2 latency=1.9s\" >> /logs/infer/infer-demo.log" } # Determine container names node_a=$(docker ps --format '{{.Names}}' | grep -E '^argus-node-a$|argus-sys-node-a-1' | head -n1) node_b=$(docker ps --format '{{.Names}}' | grep -E '^argus-node-b$|argus-sys-node-b-1' | head -n1) send_logs "$node_a" "host01" send_logs "$node_b" "host02" echo "[INFO] Waiting for ES to ingest..." # Proactively refresh indices (ignore errors if not created yet) curl -s -X POST "http://localhost:${ES_HTTP_PORT:-9200}/train-*/_refresh" >/dev/null 2>&1 || true curl -s -X POST "http://localhost:${ES_HTTP_PORT:-9200}/infer-*/_refresh" >/dev/null 2>&1 || true # Retry up to 120s for counts to increase and reach threshold (>=4) final=0 threshold=4 for attempt in {1..60}; do train1=$(get_count "train-*") infer1=$(get_count "infer-*") final=$((train1 + infer1)) if (( final > base && final >= threshold )); then break fi echo "[..] waiting ES counts increase to >=${threshold} ($attempt/60) current=${final} base=${base}" # refresh indices again to speed up visibility curl -s -X POST "http://localhost:${ES_HTTP_PORT:-9200}/train-*/_refresh" >/dev/null 2>&1 || true curl -s -X POST "http://localhost:${ES_HTTP_PORT:-9200}/infer-*/_refresh" >/dev/null 2>&1 || true sleep 2 done echo "[INFO] final counts: train=${train1} infer=${infer1} total=${final}" if (( final <= base )); then echo "[ERR] ES total did not increase (${base} -> ${final})" >&2 exit 1 fi # Minimal threshold to be tolerant: expect at least 4 documents (2 train + 1 infer per node) if (( final < 4 )); then echo "[ERR] ES total below expected threshold: ${final} < 4" >&2 exit 1 fi # Health endpoints es_health=$(curl -s "http://localhost:${ES_HTTP_PORT:-9200}/_cluster/health" | grep -o '"status":"[^"]*"' | cut -d'"' -f4) if [[ "$es_health" != "green" && "$es_health" != "yellow" ]]; then echo "[ERR] ES health not green/yellow: $es_health" >&2 exit 1 fi if ! curl -fs "http://localhost:${KIBANA_PORT:-5601}/api/status" >/dev/null 2>&1; then echo "[WARN] Kibana status endpoint not available" fi echo "[OK] ES counts increased and services healthy"