argus/src/sys/debug/scripts/06_write_health_and_assert.sh
yuyr 31ccb0b1b8 增加sys/debug 部署测试;agent dev/user/instance元信息提取优化;sys/tests 优化 (#26)
Reviewed-on: #26
Reviewed-by: xuxt <xuxt@zgclab.edu.cn>
Reviewed-by: huhy <husteryezi@163.com>
Reviewed-by: sundapeng <sundp@mail.zgclab.edu.cn>
2025-10-16 17:16:07 +08:00

79 lines
2.1 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
# shellcheck source=common.sh
source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/common.sh"
ensure_env_file
ensure_paths_defined
API_BASE="http://localhost:32300/api/v1/master"
HEALTH_A="$SYS_DEBUG_PRIVATE_NODEA/argus/agent/$HOST_A/health"
HEALTH_B="$SYS_DEBUG_PRIVATE_NODEB/argus/agent/$HOST_B/health"
write_health() {
local dir="$1"; mkdir -p "$dir"
cat > "$dir/log-fluentbit.json" <<JSON
{ "status": "healthy", "timestamp": "2025-10-13T12:05:00Z" }
JSON
cat > "$dir/metric-node-exporter.json" <<JSON
{ "status": "healthy", "timestamp": "2025-10-13T12:05:00Z" }
JSON
}
log "Writing health files for both nodes"
write_health "$HEALTH_A"
write_health "$HEALTH_B"
ID_A="$TMP_DIR/node_id_a"
ID_B="$TMP_DIR/node_id_b"
[[ -f "$ID_A" && -f "$ID_B" ]] || { echo "[ERR] node id files missing in $TMP_DIR" >&2; exit 1; }
ID_A_VAL="$(cat "$ID_A")"
ID_B_VAL="$(cat "$ID_B")"
check_health() {
local id="$1"; local tries=40
for _ in $(seq 1 $tries); do
sleep 2
resp=$(curl -fsS "$API_BASE/nodes/$id" 2>/dev/null || true)
[[ -z "$resp" ]] && continue
echo "$resp" > "$TMP_DIR/node_${id}_detail.json"
if python3 - "$TMP_DIR/node_${id}_detail.json" <<'PY'
import json,sys
node=json.load(open(sys.argv[1]))
h=node.get("health",{})
if "log-fluentbit" in h and "metric-node-exporter" in h:
sys.exit(0)
sys.exit(1)
PY
then
return 0
fi
done
return 1
}
check_health "$ID_A_VAL" || { echo "[ERR] health keys not reported for node A" >&2; exit 1; }
check_health "$ID_B_VAL" || { echo "[ERR] health keys not reported for node B" >&2; exit 1; }
NODES_JSON="$SYS_DEBUG_PRIVATE_CORE/argus/metric/prometheus/nodes.json"
if [[ ! -f "$NODES_JSON" ]]; then
echo "[ERR] nodes.json missing at $NODES_JSON" >&2
exit 1
fi
python3 - "$NODES_JSON" <<'PY'
import json,sys
with open(sys.argv[1]) as h:
nodes=json.load(h)
if not isinstance(nodes, list):
raise SystemExit("nodes.json expected list")
if len(nodes) != 2:
raise SystemExit(f"expected 2 nodes online, got {len(nodes)}")
PY
log "Health reported and nodes.json has 2 online nodes"