74 lines
2.2 KiB
Bash
Executable File
74 lines
2.2 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
# shellcheck source=common.sh
|
|
source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/common.sh"
|
|
|
|
ensure_env_file
|
|
ensure_paths_defined
|
|
|
|
log "Sending logs and asserting ES counts"
|
|
|
|
get_count() {
|
|
local idx="$1"
|
|
curl -s "http://localhost:9200/${idx}/_count?ignore_unavailable=true&allow_no_indices=true" | sed -E 's/.*"count":([0-9]+).*/\1/' | awk 'NF{print $0;exit} END{if(NR==0)print 0}'
|
|
}
|
|
|
|
train0=$(get_count "train-*")
|
|
infer0=$(get_count "infer-*")
|
|
base=$((train0 + infer0))
|
|
log "initial counts: train=${train0} infer=${infer0} total=${base}"
|
|
|
|
service_id() {
|
|
compose ps -q "$1"
|
|
}
|
|
|
|
send_logs() {
|
|
local sid="$1"; local hosttag="$2"
|
|
docker exec "$sid" sh -lc 'mkdir -p /logs/train /logs/infer'
|
|
docker exec "$sid" sh -lc "ts=\
|
|
\$(date '+%F %T'); echo \"\$ts INFO [$hosttag] training step=1 loss=1.23 model=bert\" >> /logs/train/train-demo.log"
|
|
docker exec "$sid" sh -lc "ts=\
|
|
\$(date '+%F %T'); echo \"\$ts INFO [$hosttag] training step=2 loss=1.10 model=bert\" >> /logs/train/train-demo.log"
|
|
docker exec "$sid" sh -lc "ts=\
|
|
\$(date '+%F %T'); echo \"\$ts WARN [$hosttag] inference slow on batch=2 latency=1.9s\" >> /logs/infer/infer-demo.log"
|
|
}
|
|
|
|
CID_A="$(service_id node-a)"
|
|
CID_B="$(service_id node-b)"
|
|
|
|
[[ -n "$CID_A" && -n "$CID_B" ]] || { echo "[ERR] node containers not found" >&2; exit 1; }
|
|
|
|
send_logs "$CID_A" "host01"
|
|
send_logs "$CID_B" "host02"
|
|
|
|
log "Waiting for ES to ingest"
|
|
sleep 10
|
|
|
|
train1=$(get_count "train-*")
|
|
infer1=$(get_count "infer-*")
|
|
final=$((train1 + infer1))
|
|
log "final counts: train=${train1} infer=${infer1} total=${final}"
|
|
|
|
if (( final <= base )); then
|
|
echo "[ERR] ES total did not increase (${base} -> ${final})" >&2
|
|
exit 1
|
|
fi
|
|
|
|
if (( final < 4 )); then
|
|
echo "[ERR] ES total below expected threshold: ${final} < 4" >&2
|
|
exit 1
|
|
fi
|
|
|
|
es_health=$(curl -s "http://localhost:9200/_cluster/health" | grep -o '"status":"[^"]*"' | cut -d'"' -f4)
|
|
if [[ "$es_health" != "green" && "$es_health" != "yellow" ]]; then
|
|
echo "[ERR] ES health not green/yellow: $es_health" >&2
|
|
exit 1
|
|
fi
|
|
|
|
if ! curl -fs "http://localhost:5601/api/status" >/dev/null 2>&1; then
|
|
echo "[WARN] Kibana status endpoint not available"
|
|
fi
|
|
|
|
log "ES counts increased and services healthy"
|