#!/usr/bin/env bash set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" PRIVATE_ROOT="$TEST_ROOT/private" TMP_ROOT="$TEST_ROOT/tmp" API_BASE="http://localhost:31300/api/v1/master" ROOT_BASE="http://localhost:31300" DB_PATH="$PRIVATE_ROOT/argus/master/db.sqlite3" compose() { if docker compose version >/dev/null 2>&1; then docker compose "$@" else docker-compose "$@" fi } if [[ ! -f "$TMP_ROOT/node_id" ]]; then echo "[ERROR] 主节点 ID 缺失,请先执行前置用例" >&2 exit 1 fi if [[ ! -f "$TMP_ROOT/second_node_id" ]]; then echo "[ERROR] 第二个节点 ID 缺失,请先执行多节点场景脚本" >&2 exit 1 fi if [[ ! -f "$DB_PATH" ]]; then echo "[ERROR] 持久化数据库缺失: $DB_PATH" >&2 exit 1 fi NODE_ID="$(cat "$TMP_ROOT/node_id")" SECOND_NODE_ID="$(cat "$TMP_ROOT/second_node_id")" # 在重启前抓取节点详情与节点文件、统计信息,作为对比基线 first_before="$TMP_ROOT/${NODE_ID}_pre_restart.json" second_before="$TMP_ROOT/${SECOND_NODE_ID}_pre_restart.json" curl -sS "$API_BASE/nodes/$NODE_ID" -o "$first_before" curl -sS "$API_BASE/nodes/$SECOND_NODE_ID" -o "$second_before" nodes_json_before="$TMP_ROOT/nodes_json_pre_restart.json" cp "$PRIVATE_ROOT/argus/metric/prometheus/nodes.json" "$nodes_json_before" stats_before="$TMP_ROOT/stats_pre_restart.json" curl -sS "$API_BASE/nodes/statistics" -o "$stats_before" # 重启 master 容器,模拟服务重启后的持久化场景 pushd "$TEST_ROOT" >/dev/null compose restart master popd >/dev/null # 等待 /readyz 恢复 200 for _ in {1..30}; do status=$(curl -s -o /dev/null -w '%{http_code}' "$ROOT_BASE/readyz" || true) if [[ "$status" == "200" ]]; then break fi sleep 1 done if [[ "${status:-}" != "200" ]]; then echo "[ERROR] master 容器重启后未恢复健康状态,readyz=$status" >&2 exit 1 fi sleep 2 first_after="$TMP_ROOT/${NODE_ID}_post_restart.json" second_after="$TMP_ROOT/${SECOND_NODE_ID}_post_restart.json" curl -sS "$API_BASE/nodes/$NODE_ID" -o "$first_after" curl -sS "$API_BASE/nodes/$SECOND_NODE_ID" -o "$second_after" # 对比重启前后的节点关键信息,确保无丢失 python3 - "$first_before" "$first_after" <<'PY' import json, sys before_path, after_path = sys.argv[1:3] with open(before_path, 'r', encoding='utf-8') as handle: before = json.load(handle) with open(after_path, 'r', encoding='utf-8') as handle: after = json.load(handle) keys = [ "id", "name", "type", "version", "register_time", "meta_data", "config", "label", "health", "last_report", "agent_last_report", ] for key in keys: if before.get(key) != after.get(key): raise AssertionError(f"Key {key} changed after restart: {before.get(key)} -> {after.get(key)}") PY python3 - "$second_before" "$second_after" <<'PY' import json, sys before_path, after_path = sys.argv[1:3] with open(before_path, 'r', encoding='utf-8') as handle: before = json.load(handle) with open(after_path, 'r', encoding='utf-8') as handle: after = json.load(handle) keys = [ "id", "name", "type", "version", "register_time", "meta_data", "config", "label", "health", "last_report", "agent_last_report", ] for key in keys: if before.get(key) != after.get(key): raise AssertionError(f"Key {key} changed after restart: {before.get(key)} -> {after.get(key)}") PY payload=$(python3 - <<'PY' import json from datetime import datetime, timezone body = { "timestamp": datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z"), "health": { "log-fluentbit": {"status": "healthy"} } } print(json.dumps(body)) PY ) curl -sS -o "$TMP_ROOT/restart_second_status.json" -w '%{http_code}' \ -H 'Content-Type: application/json' -X PUT \ "$API_BASE/nodes/$SECOND_NODE_ID/status" -d "$payload" > "$TMP_ROOT/restart_second_status_code" if [[ $(cat "$TMP_ROOT/restart_second_status_code") != "200" ]]; then echo "[ERROR] Failed to restore second node status post-restart" >&2 cat "$TMP_ROOT/restart_second_status.json" >&2 exit 1 fi sleep 3 # 对比重启前后的 nodes.json 与统计信息,验证持久化一致性 nodes_json_after="$TMP_ROOT/nodes_json_post_restart.json" cp "$PRIVATE_ROOT/argus/metric/prometheus/nodes.json" "$nodes_json_after" stats_after="$TMP_ROOT/stats_after_restart.json" curl -sS "$API_BASE/nodes/statistics" -o "$stats_after" python3 - "$nodes_json_before" "$nodes_json_after" <<'PY' import json, sys with open(sys.argv[1], 'r', encoding='utf-8') as handle: before = json.load(handle) with open(sys.argv[2], 'r', encoding='utf-8') as handle: after = json.load(handle) if before != after: raise AssertionError(f"nodes.json changed after restart: {before} -> {after}") PY python3 - "$stats_before" "$stats_after" <<'PY' import json, sys with open(sys.argv[1], 'r', encoding='utf-8') as handle: before = json.load(handle) with open(sys.argv[2], 'r', encoding='utf-8') as handle: after = json.load(handle) if before != after: raise AssertionError(f"Statistics changed after restart: {before} -> {after}") PY if [[ ! -s "$DB_PATH" ]]; then echo "[ERROR] 数据库文件为空,疑似未持久化" >&2 exit 1 fi echo "[INFO] Master 重启后持久化数据校验通过"