185 lines
5.3 KiB
Bash
Executable File
185 lines
5.3 KiB
Bash
Executable File
#!/usr/bin/env bash
|
||
set -euo pipefail
|
||
|
||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||
PRIVATE_ROOT="$TEST_ROOT/private"
|
||
TMP_ROOT="$TEST_ROOT/tmp"
|
||
API_BASE="http://localhost:31300/api/v1/master"
|
||
ROOT_BASE="http://localhost:31300"
|
||
DB_PATH="$PRIVATE_ROOT/argus/master/db.sqlite3"
|
||
|
||
compose() {
|
||
if docker compose version >/dev/null 2>&1; then
|
||
docker compose "$@"
|
||
else
|
||
docker-compose "$@"
|
||
fi
|
||
}
|
||
|
||
if [[ ! -f "$TMP_ROOT/node_id" ]]; then
|
||
echo "[ERROR] 主节点 ID 缺失,请先执行前置用例" >&2
|
||
exit 1
|
||
fi
|
||
|
||
if [[ ! -f "$TMP_ROOT/second_node_id" ]]; then
|
||
echo "[ERROR] 第二个节点 ID 缺失,请先执行多节点场景脚本" >&2
|
||
exit 1
|
||
fi
|
||
|
||
if [[ ! -f "$DB_PATH" ]]; then
|
||
echo "[ERROR] 持久化数据库缺失: $DB_PATH" >&2
|
||
exit 1
|
||
fi
|
||
|
||
NODE_ID="$(cat "$TMP_ROOT/node_id")"
|
||
SECOND_NODE_ID="$(cat "$TMP_ROOT/second_node_id")"
|
||
|
||
# 在重启前抓取节点详情与节点文件、统计信息,作为对比基线
|
||
first_before="$TMP_ROOT/${NODE_ID}_pre_restart.json"
|
||
second_before="$TMP_ROOT/${SECOND_NODE_ID}_pre_restart.json"
|
||
curl -sS "$API_BASE/nodes/$NODE_ID" -o "$first_before"
|
||
curl -sS "$API_BASE/nodes/$SECOND_NODE_ID" -o "$second_before"
|
||
|
||
nodes_json_before="$TMP_ROOT/nodes_json_pre_restart.json"
|
||
cp "$PRIVATE_ROOT/argus/metric/prometheus/nodes.json" "$nodes_json_before"
|
||
|
||
stats_before="$TMP_ROOT/stats_pre_restart.json"
|
||
curl -sS "$API_BASE/nodes/statistics" -o "$stats_before"
|
||
|
||
# 重启 master 容器,模拟服务重启后的持久化场景
|
||
pushd "$TEST_ROOT" >/dev/null
|
||
compose restart master
|
||
popd >/dev/null
|
||
|
||
# 等待 /readyz 恢复 200
|
||
for _ in {1..30}; do
|
||
status=$(curl -s -o /dev/null -w '%{http_code}' "$ROOT_BASE/readyz" || true)
|
||
if [[ "$status" == "200" ]]; then
|
||
break
|
||
fi
|
||
sleep 1
|
||
done
|
||
|
||
if [[ "${status:-}" != "200" ]]; then
|
||
echo "[ERROR] master 容器重启后未恢复健康状态,readyz=$status" >&2
|
||
exit 1
|
||
fi
|
||
|
||
sleep 2
|
||
|
||
first_after="$TMP_ROOT/${NODE_ID}_post_restart.json"
|
||
second_after="$TMP_ROOT/${SECOND_NODE_ID}_post_restart.json"
|
||
curl -sS "$API_BASE/nodes/$NODE_ID" -o "$first_after"
|
||
curl -sS "$API_BASE/nodes/$SECOND_NODE_ID" -o "$second_after"
|
||
|
||
# 对比重启前后的节点关键信息,确保无丢失
|
||
python3 - "$first_before" "$first_after" <<'PY'
|
||
import json, sys
|
||
before_path, after_path = sys.argv[1:3]
|
||
with open(before_path, 'r', encoding='utf-8') as handle:
|
||
before = json.load(handle)
|
||
with open(after_path, 'r', encoding='utf-8') as handle:
|
||
after = json.load(handle)
|
||
keys = [
|
||
"id",
|
||
"name",
|
||
"type",
|
||
"version",
|
||
"register_time",
|
||
"meta_data",
|
||
"config",
|
||
"label",
|
||
"health",
|
||
"last_report",
|
||
"agent_last_report",
|
||
]
|
||
for key in keys:
|
||
if before.get(key) != after.get(key):
|
||
raise AssertionError(f"Key {key} changed after restart: {before.get(key)} -> {after.get(key)}")
|
||
PY
|
||
|
||
python3 - "$second_before" "$second_after" <<'PY'
|
||
import json, sys
|
||
before_path, after_path = sys.argv[1:3]
|
||
with open(before_path, 'r', encoding='utf-8') as handle:
|
||
before = json.load(handle)
|
||
with open(after_path, 'r', encoding='utf-8') as handle:
|
||
after = json.load(handle)
|
||
keys = [
|
||
"id",
|
||
"name",
|
||
"type",
|
||
"version",
|
||
"register_time",
|
||
"meta_data",
|
||
"config",
|
||
"label",
|
||
"health",
|
||
"last_report",
|
||
"agent_last_report",
|
||
]
|
||
for key in keys:
|
||
if before.get(key) != after.get(key):
|
||
raise AssertionError(f"Key {key} changed after restart: {before.get(key)} -> {after.get(key)}")
|
||
PY
|
||
|
||
payload=$(python3 - <<'PY'
|
||
import json
|
||
from datetime import datetime, timezone
|
||
body = {
|
||
"timestamp": datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z"),
|
||
"health": {
|
||
"log-fluentbit": {"status": "healthy"}
|
||
}
|
||
}
|
||
print(json.dumps(body))
|
||
PY
|
||
)
|
||
|
||
curl -sS -o "$TMP_ROOT/restart_second_status.json" -w '%{http_code}' \
|
||
-H 'Content-Type: application/json' -X PUT \
|
||
"$API_BASE/nodes/$SECOND_NODE_ID/status" -d "$payload" > "$TMP_ROOT/restart_second_status_code"
|
||
|
||
if [[ $(cat "$TMP_ROOT/restart_second_status_code") != "200" ]]; then
|
||
echo "[ERROR] Failed to restore second node status post-restart" >&2
|
||
cat "$TMP_ROOT/restart_second_status.json" >&2
|
||
exit 1
|
||
fi
|
||
|
||
sleep 3
|
||
|
||
# 对比重启前后的 nodes.json 与统计信息,验证持久化一致性
|
||
nodes_json_after="$TMP_ROOT/nodes_json_post_restart.json"
|
||
cp "$PRIVATE_ROOT/argus/metric/prometheus/nodes.json" "$nodes_json_after"
|
||
|
||
stats_after="$TMP_ROOT/stats_after_restart.json"
|
||
curl -sS "$API_BASE/nodes/statistics" -o "$stats_after"
|
||
|
||
python3 - "$nodes_json_before" "$nodes_json_after" <<'PY'
|
||
import json, sys
|
||
with open(sys.argv[1], 'r', encoding='utf-8') as handle:
|
||
before = json.load(handle)
|
||
with open(sys.argv[2], 'r', encoding='utf-8') as handle:
|
||
after = json.load(handle)
|
||
if before != after:
|
||
raise AssertionError(f"nodes.json changed after restart: {before} -> {after}")
|
||
PY
|
||
|
||
python3 - "$stats_before" "$stats_after" <<'PY'
|
||
import json, sys
|
||
with open(sys.argv[1], 'r', encoding='utf-8') as handle:
|
||
before = json.load(handle)
|
||
with open(sys.argv[2], 'r', encoding='utf-8') as handle:
|
||
after = json.load(handle)
|
||
if before != after:
|
||
raise AssertionError(f"Statistics changed after restart: {before} -> {after}")
|
||
PY
|
||
|
||
if [[ ! -s "$DB_PATH" ]]; then
|
||
echo "[ERROR] 数据库文件为空,疑似未持久化" >&2
|
||
exit 1
|
||
fi
|
||
|
||
echo "[INFO] Master 重启后持久化数据校验通过"
|