Reviewed-on: #17 Reviewed-by: sundapeng <sundp@mail.zgclab.edu.cn> Reviewed-by: xuxt <xuxt@zgclab.edu.cn>
691 lines
20 KiB
Bash
Executable File
691 lines
20 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
LOG_PREFIX="[AGENT-VERIFY]"
|
|
MASTER_ENDPOINT_DEFAULT=""
|
|
AGENT_DATA_ROOT_DEFAULT="/private/argus/agent"
|
|
AGENT_ETC_ROOT_DEFAULT="/private/argus/etc"
|
|
REPORT_INTERVAL_DEFAULT="2"
|
|
|
|
ALLOW_CONFIG_TOUCH="false"
|
|
KEEP_TEST_HEALTH="false"
|
|
|
|
log_info() {
|
|
echo "${LOG_PREFIX} INFO $*"
|
|
}
|
|
|
|
log_warn() {
|
|
echo "${LOG_PREFIX} WARN $*" >&2
|
|
}
|
|
|
|
log_error() {
|
|
echo "${LOG_PREFIX} ERROR $*" >&2
|
|
}
|
|
|
|
usage() {
|
|
cat <<'USAGE'
|
|
Usage: agent_deployment_verify.sh [options]
|
|
|
|
Options:
|
|
--allow-config-touch Enable optional config PUT dry-run check.
|
|
--keep-test-health Keep the temporary verify health file after checks.
|
|
-h, --help Show this help message.
|
|
|
|
Environment variables:
|
|
MASTER_ENDPOINT (required) Master API base endpoint, e.g. http://master:3000
|
|
AGENT_DATA_ROOT (default: /private/argus/agent)
|
|
AGENT_ETC_ROOT (default: /private/argus/etc)
|
|
VERIFY_HOSTNAME (default: output of hostname)
|
|
REPORT_INTERVAL_SECONDS (default: 2) Agent report interval in seconds
|
|
USAGE
|
|
}
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--allow-config-touch)
|
|
ALLOW_CONFIG_TOUCH="true"
|
|
shift
|
|
;;
|
|
--keep-test-health)
|
|
KEEP_TEST_HEALTH="true"
|
|
shift
|
|
;;
|
|
-h|--help)
|
|
usage
|
|
exit 0
|
|
;;
|
|
*)
|
|
log_error "Unknown option: $1"
|
|
usage >&2
|
|
exit 2
|
|
;;
|
|
esac
|
|
done
|
|
|
|
MASTER_ENDPOINT="${MASTER_ENDPOINT:-$MASTER_ENDPOINT_DEFAULT}"
|
|
AGENT_DATA_ROOT="${AGENT_DATA_ROOT:-$AGENT_DATA_ROOT_DEFAULT}"
|
|
AGENT_ETC_ROOT="${AGENT_ETC_ROOT:-$AGENT_ETC_ROOT_DEFAULT}"
|
|
VERIFY_HOSTNAME="${VERIFY_HOSTNAME:-$(hostname)}"
|
|
REPORT_INTERVAL_SECONDS="${REPORT_INTERVAL_SECONDS:-$REPORT_INTERVAL_DEFAULT}"
|
|
|
|
if [[ -z "$MASTER_ENDPOINT" ]]; then
|
|
log_error "MASTER_ENDPOINT is required"
|
|
exit 2
|
|
fi
|
|
|
|
if ! [[ "$REPORT_INTERVAL_SECONDS" =~ ^[0-9]+$ ]] || [[ "$REPORT_INTERVAL_SECONDS" -le 0 ]]; then
|
|
log_warn "Invalid REPORT_INTERVAL_SECONDS='$REPORT_INTERVAL_SECONDS', fallback to $REPORT_INTERVAL_DEFAULT"
|
|
REPORT_INTERVAL_SECONDS="$REPORT_INTERVAL_DEFAULT"
|
|
fi
|
|
|
|
normalize_endpoint() {
|
|
local endpoint="$1"
|
|
if [[ "$endpoint" != http://* && "$endpoint" != https://* ]]; then
|
|
endpoint="http://$endpoint"
|
|
fi
|
|
endpoint="${endpoint%/}"
|
|
echo "$endpoint"
|
|
}
|
|
|
|
MASTER_BASE="$(normalize_endpoint "$MASTER_ENDPOINT")"
|
|
|
|
NODE_DIR="$AGENT_DATA_ROOT/$VERIFY_HOSTNAME"
|
|
NODE_JSON="$NODE_DIR/node.json"
|
|
HEALTH_DIR="$NODE_DIR/health"
|
|
DNS_CONF="$AGENT_ETC_ROOT/dns.conf"
|
|
UPDATE_SCRIPT="$AGENT_ETC_ROOT/update-dns.sh"
|
|
|
|
declare -a RESULTS_PASS=()
|
|
declare -a RESULTS_WARN=()
|
|
declare -a RESULTS_FAIL=()
|
|
|
|
add_result() {
|
|
local level="$1" message="$2"
|
|
case "$level" in
|
|
PASS)
|
|
RESULTS_PASS+=("$message")
|
|
log_info "$message"
|
|
;;
|
|
WARN)
|
|
RESULTS_WARN+=("$message")
|
|
log_warn "$message"
|
|
;;
|
|
FAIL)
|
|
RESULTS_FAIL+=("$message")
|
|
log_error "$message"
|
|
;;
|
|
esac
|
|
}
|
|
|
|
HAS_JQ="0"
|
|
if command -v jq >/dev/null 2>&1; then
|
|
HAS_JQ="1"
|
|
fi
|
|
|
|
if ! command -v curl >/dev/null 2>&1; then
|
|
log_error "curl command not found; please install curl (e.g. apt-get install -y curl)"
|
|
exit 2
|
|
fi
|
|
|
|
if [[ "$HAS_JQ" == "0" ]] && ! command -v python3 >/dev/null 2>&1; then
|
|
log_error "Neither jq nor python3 is available for JSON processing"
|
|
exit 2
|
|
fi
|
|
|
|
CURL_OPTS=(--fail --show-error --silent --max-time 10)
|
|
|
|
curl_json() {
|
|
local url="$1"
|
|
if ! curl "${CURL_OPTS[@]}" "$url"; then
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
json_query() {
|
|
local json="$1" jq_expr="$2" py_expr="$3"
|
|
if [[ "$HAS_JQ" == "1" ]]; then
|
|
if ! output=$(printf '%s' "$json" | jq -e -r "$jq_expr" 2>/dev/null); then
|
|
return 1
|
|
fi
|
|
printf '%s' "$output"
|
|
return 0
|
|
fi
|
|
|
|
python3 - "$py_expr" <<'PY'
|
|
import json
|
|
import sys
|
|
|
|
expr = sys.argv[1]
|
|
try:
|
|
data = json.load(sys.stdin)
|
|
value = eval(expr, {}, {"data": data})
|
|
except Exception:
|
|
sys.exit(1)
|
|
if value is None:
|
|
sys.exit(1)
|
|
if isinstance(value, (dict, list)):
|
|
print(json.dumps(value))
|
|
else:
|
|
print(value)
|
|
PY
|
|
}
|
|
|
|
json_length() {
|
|
local json="$1" jq_expr="$2" py_expr="$3"
|
|
if [[ "$HAS_JQ" == "1" ]]; then
|
|
if ! output=$(printf '%s' "$json" | jq -e "$jq_expr" 2>/dev/null); then
|
|
return 1
|
|
fi
|
|
printf '%s' "$output"
|
|
return 0
|
|
fi
|
|
|
|
python3 - "$py_expr" <<'PY'
|
|
import json
|
|
import sys
|
|
|
|
expr = sys.argv[1]
|
|
try:
|
|
data = json.load(sys.stdin)
|
|
value = eval(expr, {}, {"data": data})
|
|
except Exception:
|
|
sys.exit(1)
|
|
try:
|
|
print(len(value))
|
|
except Exception:
|
|
sys.exit(1)
|
|
PY
|
|
}
|
|
|
|
json_has_key() {
|
|
local json="$1" jq_expr="$2" py_expr="$3"
|
|
if [[ "$HAS_JQ" == "1" ]]; then
|
|
if printf '%s' "$json" | jq -e "$jq_expr" >/dev/null 2>&1; then
|
|
return 0
|
|
fi
|
|
return 1
|
|
fi
|
|
|
|
python3 - "$py_expr" <<'PY'
|
|
import json
|
|
import sys
|
|
|
|
expr = sys.argv[1]
|
|
try:
|
|
data = json.load(sys.stdin)
|
|
value = eval(expr, {}, {"data": data})
|
|
except Exception:
|
|
sys.exit(1)
|
|
if value:
|
|
sys.exit(0)
|
|
sys.exit(1)
|
|
PY
|
|
}
|
|
|
|
iso_to_epoch() {
|
|
local value="$1"
|
|
if command -v date >/dev/null 2>&1; then
|
|
date -d "$value" +%s 2>/dev/null && return 0
|
|
fi
|
|
if command -v python3 >/dev/null 2>&1; then
|
|
python3 - "$value" <<'PY'
|
|
import sys
|
|
from datetime import datetime
|
|
|
|
value = sys.argv[1]
|
|
if value is None or value == "":
|
|
sys.exit(1)
|
|
if value.endswith('Z'):
|
|
value = value[:-1] + '+00:00'
|
|
try:
|
|
dt = datetime.fromisoformat(value)
|
|
except ValueError:
|
|
sys.exit(1)
|
|
print(int(dt.timestamp()))
|
|
PY
|
|
return $?
|
|
fi
|
|
return 1
|
|
}
|
|
|
|
validate_json_file() {
|
|
local path="$1"
|
|
if [[ "$HAS_JQ" == "1" ]]; then
|
|
jq empty "$path" >/dev/null 2>&1 && return 0
|
|
return 1
|
|
fi
|
|
if command -v python3 >/dev/null 2>&1; then
|
|
python3 - "$path" <<'PY'
|
|
import json
|
|
import sys
|
|
path = sys.argv[1]
|
|
with open(path, 'r', encoding='utf-8') as handle:
|
|
json.load(handle)
|
|
PY
|
|
return $?
|
|
fi
|
|
return 0
|
|
}
|
|
|
|
ensure_directory() {
|
|
local dir="$1"
|
|
if [[ ! -d "$dir" ]]; then
|
|
log_warn "Creating missing directory $dir"
|
|
mkdir -p "$dir"
|
|
fi
|
|
}
|
|
|
|
TEST_HEALTH_FILE=""
|
|
TEST_HEALTH_BACKUP=""
|
|
TEST_HEALTH_EXISTED="false"
|
|
|
|
cleanup() {
|
|
if [[ -n "$TEST_HEALTH_FILE" ]]; then
|
|
if [[ "$TEST_HEALTH_EXISTED" == "true" ]]; then
|
|
printf '%s' "$TEST_HEALTH_BACKUP" > "$TEST_HEALTH_FILE"
|
|
elif [[ "$KEEP_TEST_HEALTH" == "true" ]]; then
|
|
:
|
|
else
|
|
rm -f "$TEST_HEALTH_FILE"
|
|
fi
|
|
fi
|
|
}
|
|
|
|
trap cleanup EXIT
|
|
|
|
log_info "Starting agent deployment verification for hostname '$VERIFY_HOSTNAME'"
|
|
|
|
# 4.2 Master health checks
|
|
health_resp=""
|
|
if ! health_resp=$(curl "${CURL_OPTS[@]}" -w '\n%{http_code} %{time_total}' "$MASTER_BASE/healthz" 2>/tmp/agent_verify_healthz.err); then
|
|
error_detail=$(cat /tmp/agent_verify_healthz.err || true)
|
|
add_result FAIL "GET /healthz failed: $error_detail"
|
|
else
|
|
http_meta=$(tail -n1 <<<"$health_resp")
|
|
payload=$(head -n -1 <<<"$health_resp" || true)
|
|
status_code=${http_meta%% *}
|
|
elapsed=${http_meta##* }
|
|
add_result PASS "GET /healthz status=$status_code elapsed=${elapsed}s payload=$payload"
|
|
fi
|
|
rm -f /tmp/agent_verify_healthz.err
|
|
|
|
if ! readyz_resp=$(curl "${CURL_OPTS[@]}" -w '\n%{http_code} %{time_total}' "$MASTER_BASE/readyz" 2>/tmp/agent_verify_readyz.err); then
|
|
error_detail=$(cat /tmp/agent_verify_readyz.err || true)
|
|
add_result FAIL "GET /readyz failed: $error_detail"
|
|
readyz_payload=""
|
|
else
|
|
readyz_meta=$(tail -n1 <<<"$readyz_resp")
|
|
readyz_payload=$(head -n -1 <<<"$readyz_resp" || true)
|
|
readyz_status=${readyz_meta%% *}
|
|
readyz_elapsed=${readyz_meta##* }
|
|
add_result PASS "GET /readyz status=$readyz_status elapsed=${readyz_elapsed}s"
|
|
fi
|
|
rm -f /tmp/agent_verify_readyz.err
|
|
|
|
# 4.3 Nodes list and detail
|
|
if ! nodes_json=$(curl_json "$MASTER_BASE/api/v1/master/nodes" 2>/tmp/agent_verify_nodes.err); then
|
|
error_detail=$(cat /tmp/agent_verify_nodes.err || true)
|
|
add_result FAIL "GET /api/v1/master/nodes failed: $error_detail"
|
|
nodes_json=""
|
|
fi
|
|
rm -f /tmp/agent_verify_nodes.err
|
|
|
|
NODE_ENTRY=""
|
|
NODE_ID=""
|
|
NODE_IP=""
|
|
if [[ -n "$nodes_json" ]]; then
|
|
if [[ "$HAS_JQ" == "1" ]]; then
|
|
NODE_ENTRY=$(printf '%s' "$nodes_json" | jq -e --arg name "$VERIFY_HOSTNAME" '.[] | select(.name == $name)') || NODE_ENTRY=""
|
|
else
|
|
NODE_ENTRY=$(python3 - "$VERIFY_HOSTNAME" <<'PY'
|
|
import json
|
|
import sys
|
|
|
|
hostname = sys.argv[1]
|
|
nodes = json.load(sys.stdin)
|
|
for node in nodes:
|
|
if node.get("name") == hostname:
|
|
import json as _json
|
|
print(_json.dumps(node))
|
|
sys.exit(0)
|
|
sys.exit(1)
|
|
PY
|
|
) || NODE_ENTRY=""
|
|
fi
|
|
|
|
if [[ -z "$NODE_ENTRY" ]]; then
|
|
add_result FAIL "Current node '$VERIFY_HOSTNAME' not found in master nodes list"
|
|
else
|
|
if NODE_ID=$(json_query "$NODE_ENTRY" '.id' 'data["id"]'); then
|
|
add_result PASS "Discovered node id '$NODE_ID' for hostname '$VERIFY_HOSTNAME'"
|
|
else
|
|
add_result FAIL "Failed to extract node id from master response"
|
|
fi
|
|
fi
|
|
|
|
if [[ -n "$NODE_ENTRY" ]] && NODE_DETAIL=$(curl_json "$MASTER_BASE/api/v1/master/nodes/$NODE_ID" 2>/tmp/agent_verify_node_detail.err); then
|
|
NODE_DETAIL_JSON="$NODE_DETAIL"
|
|
add_result PASS "Fetched node detail for $NODE_ID"
|
|
if NODE_IP=$(json_query "$NODE_DETAIL_JSON" '.meta_data.ip // .meta_data.host_ip // empty' 'data.get("meta_data", {}).get("ip") or data.get("meta_data", {}).get("host_ip") or ""'); then
|
|
if [[ -n "$NODE_IP" ]]; then
|
|
add_result PASS "Registered node IP=$NODE_IP"
|
|
else
|
|
add_result INFO "Node detail does not expose IP fields"
|
|
fi
|
|
fi
|
|
else
|
|
error_detail=$(cat /tmp/agent_verify_node_detail.err 2>/dev/null || true)
|
|
add_result FAIL "Failed to fetch node detail for $NODE_ID: $error_detail"
|
|
NODE_DETAIL_JSON=""
|
|
fi
|
|
rm -f /tmp/agent_verify_node_detail.err
|
|
|
|
if stats_json=$(curl_json "$MASTER_BASE/api/v1/master/nodes/statistics" 2>/tmp/agent_verify_stats.err); then
|
|
if total_nodes=$(json_query "$stats_json" '.total // .total_nodes' 'data.get("total") or data.get("total_nodes")'); then
|
|
if [[ "$total_nodes" =~ ^[0-9]+$ ]] && [[ "$total_nodes" -ge 1 ]]; then
|
|
add_result PASS "Statistics total=$total_nodes"
|
|
else
|
|
add_result WARN "Statistics total field not numeric: $total_nodes"
|
|
fi
|
|
else
|
|
add_result WARN "Unable to read total field from statistics"
|
|
fi
|
|
|
|
active_nodes=""
|
|
if [[ "$HAS_JQ" == "1" ]]; then
|
|
active_nodes=$(printf '%s' "$stats_json" | jq -e 'if .status_statistics then (.status_statistics[] | select(.status == "online") | .count) else empty end' 2>/dev/null | head -n1 || true)
|
|
elif command -v python3 >/dev/null 2>&1; then
|
|
active_nodes=$(printf '%s' "$stats_json" | python3 -c 'import json,sys; data=json.load(sys.stdin); print(next((row.get("count") for row in data.get("status_statistics", []) if row.get("status")=="online"), ""))' 2>/dev/null)
|
|
fi
|
|
if [[ -n "$active_nodes" ]]; then
|
|
add_result PASS "Online nodes reported by master: $active_nodes"
|
|
fi
|
|
|
|
if [[ "$HAS_JQ" == "1" ]]; then
|
|
node_count=$(printf '%s' "$nodes_json" | jq 'length')
|
|
else
|
|
node_count=$(json_length "$nodes_json" 'length' 'len(data)')
|
|
fi
|
|
if [[ "$total_nodes" =~ ^[0-9]+$ ]] && [[ "$node_count" =~ ^[0-9]+$ ]] && [[ "$total_nodes" -lt "$node_count" ]]; then
|
|
add_result WARN "Statistics total=$total_nodes less than nodes list count=$node_count"
|
|
fi
|
|
else
|
|
error_detail=$(cat /tmp/agent_verify_stats.err 2>/dev/null || true)
|
|
add_result FAIL "Failed to fetch node statistics: $error_detail"
|
|
fi
|
|
rm -f /tmp/agent_verify_stats.err
|
|
else
|
|
NODE_DETAIL_JSON=""
|
|
fi
|
|
|
|
# 4.4 Agent persistence checks
|
|
if [[ -f "$NODE_JSON" ]]; then
|
|
node_file_content="$(cat "$NODE_JSON")"
|
|
if node_id_local=$(json_query "$node_file_content" '.id' 'data["id"]'); then
|
|
if [[ "$NODE_ID" != "" && "$node_id_local" == "$NODE_ID" ]]; then
|
|
add_result PASS "node.json id matches master ($NODE_ID)"
|
|
else
|
|
add_result FAIL "node.json id '$node_id_local' differs from master id '$NODE_ID'"
|
|
fi
|
|
else
|
|
add_result FAIL "Unable to extract id from node.json"
|
|
fi
|
|
if node_name_local=$(json_query "$node_file_content" '.name' 'data["name"]'); then
|
|
if [[ "$node_name_local" == "$VERIFY_HOSTNAME" ]]; then
|
|
add_result PASS "node.json name matches $VERIFY_HOSTNAME"
|
|
else
|
|
add_result FAIL "node.json name '$node_name_local' differs from hostname '$VERIFY_HOSTNAME'"
|
|
fi
|
|
else
|
|
add_result FAIL "Unable to extract name from node.json"
|
|
fi
|
|
|
|
if register_time=$(json_query "$node_file_content" '.register_time' 'data.get("register_time")'); then
|
|
if iso_to_epoch "$register_time" >/dev/null 2>&1; then
|
|
add_result PASS "node.json register_time valid ISO timestamp"
|
|
else
|
|
add_result WARN "node.json register_time invalid: $register_time"
|
|
fi
|
|
else
|
|
add_result WARN "node.json missing register_time"
|
|
fi
|
|
|
|
if last_updated=$(json_query "$node_file_content" '.last_updated' 'data.get("last_updated")'); then
|
|
if iso_to_epoch "$last_updated" >/dev/null 2>&1; then
|
|
add_result PASS "node.json last_updated valid ISO timestamp"
|
|
else
|
|
add_result WARN "node.json last_updated invalid: $last_updated"
|
|
fi
|
|
else
|
|
add_result WARN "node.json missing last_updated"
|
|
fi
|
|
else
|
|
add_result FAIL "node.json not found at $NODE_JSON"
|
|
node_file_content=""
|
|
fi
|
|
|
|
ensure_directory "$HEALTH_DIR"
|
|
|
|
if [[ -d "$HEALTH_DIR" ]]; then
|
|
shopt -s nullglob
|
|
health_files=("$HEALTH_DIR"/*.json)
|
|
shopt -u nullglob
|
|
if [[ ${#health_files[@]} -eq 0 ]]; then
|
|
add_result WARN "Health directory $HEALTH_DIR is empty"
|
|
else
|
|
for hf in "${health_files[@]}"; do
|
|
base=$(basename "$hf")
|
|
if [[ "$base" != *-* ]]; then
|
|
add_result WARN "Health file $base does not follow <module>-*.json"
|
|
continue
|
|
fi
|
|
if ! validate_json_file "$hf" >/dev/null 2>&1; then
|
|
add_result WARN "Health file $base is not valid JSON"
|
|
fi
|
|
done
|
|
fi
|
|
else
|
|
add_result WARN "Health directory $HEALTH_DIR missing"
|
|
fi
|
|
|
|
if getent hosts master.argus.com >/dev/null 2>&1; then
|
|
resolved_ips=$(getent hosts master.argus.com | awk '{print $1}' | xargs)
|
|
add_result PASS "master.argus.com resolves to $resolved_ips"
|
|
else
|
|
add_result FAIL "Failed to resolve master.argus.com"
|
|
fi
|
|
|
|
# 4.5 Master-Node status consistency
|
|
sleep_interval=$((REPORT_INTERVAL_SECONDS + 2))
|
|
|
|
if [[ -n "$NODE_DETAIL_JSON" ]]; then
|
|
detail_pre="$NODE_DETAIL_JSON"
|
|
else
|
|
detail_pre=""
|
|
fi
|
|
|
|
if [[ -z "$detail_pre" && -n "$NODE_ID" ]]; then
|
|
if detail_pre=$(curl_json "$MASTER_BASE/api/v1/master/nodes/$NODE_ID" 2>/tmp/agent_verify_detail_pre.err); then
|
|
add_result PASS "Fetched node detail pre-check"
|
|
else
|
|
error_detail=$(cat /tmp/agent_verify_detail_pre.err 2>/dev/null || true)
|
|
add_result FAIL "Unable to fetch node detail for status check: $error_detail"
|
|
fi
|
|
rm -f /tmp/agent_verify_detail_pre.err
|
|
fi
|
|
|
|
server_ts_pre=""
|
|
agent_ts_pre=""
|
|
server_ts_post=""
|
|
agent_ts_post=""
|
|
|
|
if [[ -n "$detail_pre" ]]; then
|
|
server_ts_pre=$(json_query "$detail_pre" '.last_report' 'data.get("last_report")' || echo "")
|
|
agent_ts_pre=$(json_query "$detail_pre" '.agent_last_report' 'data.get("agent_last_report")' || echo "")
|
|
log_info "Captured initial last_report timestamps server='$server_ts_pre' agent='$agent_ts_pre'"
|
|
|
|
sleep "$sleep_interval"
|
|
|
|
if detail_post=$(curl_json "$MASTER_BASE/api/v1/master/nodes/$NODE_ID" 2>/tmp/agent_verify_detail_post.err); then
|
|
server_ts_post=$(json_query "$detail_post" '.last_report' 'data.get("last_report")' || echo "")
|
|
agent_ts_post=$(json_query "$detail_post" '.agent_last_report' 'data.get("agent_last_report")' || echo "")
|
|
if [[ "$server_ts_post" != "$server_ts_pre" ]]; then
|
|
add_result PASS "last_report.server_timestamp advanced (pre=$server_ts_pre post=$server_ts_post)"
|
|
else
|
|
add_result FAIL "last_report.server_timestamp did not change after ${sleep_interval}s"
|
|
fi
|
|
if [[ "$agent_ts_post" != "$agent_ts_pre" ]]; then
|
|
add_result PASS "last_report.agent_timestamp advanced"
|
|
else
|
|
add_result FAIL "last_report.agent_timestamp did not change"
|
|
fi
|
|
|
|
if [[ -n "$node_file_content" ]]; then
|
|
if node_last_updated=$(json_query "$node_file_content" '.last_updated' 'data.get("last_updated")'); then
|
|
if epoch_post=$(iso_to_epoch "$server_ts_post" 2>/dev/null); then
|
|
if node_epoch=$(iso_to_epoch "$node_last_updated" 2>/dev/null); then
|
|
diff=$((epoch_post - node_epoch))
|
|
[[ $diff -lt 0 ]] && diff=$((-diff))
|
|
tolerance=$((REPORT_INTERVAL_SECONDS * 2))
|
|
if [[ $diff -le $tolerance ]]; then
|
|
add_result PASS "last_report.server_timestamp and node.json last_updated within tolerance ($diff s)"
|
|
else
|
|
add_result WARN "Timestamp gap between master ($server_ts_post) and node.json ($node_last_updated) is ${diff}s"
|
|
fi
|
|
fi
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
NODE_DETAIL_JSON="$detail_post"
|
|
else
|
|
error_detail=$(cat /tmp/agent_verify_detail_post.err 2>/dev/null || true)
|
|
add_result FAIL "Failed to fetch node detail post-check: $error_detail"
|
|
fi
|
|
rm -f /tmp/agent_verify_detail_post.err
|
|
fi
|
|
|
|
# 4.6 Health simulation
|
|
TEST_HEALTH_FILE="$HEALTH_DIR/verify-master.json"
|
|
ensure_directory "$HEALTH_DIR"
|
|
|
|
if [[ -f "$TEST_HEALTH_FILE" ]]; then
|
|
TEST_HEALTH_EXISTED="true"
|
|
TEST_HEALTH_BACKUP="$(cat "$TEST_HEALTH_FILE")"
|
|
else
|
|
TEST_HEALTH_EXISTED="false"
|
|
fi
|
|
|
|
create_health_file() {
|
|
local message="$1"
|
|
cat > "$TEST_HEALTH_FILE" <<HEALTHJSON
|
|
{"status":"ok","message":"$message"}
|
|
HEALTHJSON
|
|
}
|
|
|
|
validate_health_in_master() {
|
|
local expected_message="$1"
|
|
local detail_json="$2"
|
|
local message
|
|
if message=$(json_query "$detail_json" '.health["verify-master"].message' 'data.get("health", {}).get("verify-master", {}).get("message")'); then
|
|
if [[ "$message" == "$expected_message" ]]; then
|
|
return 0
|
|
fi
|
|
fi
|
|
return 1
|
|
}
|
|
|
|
remove_health_from_master() {
|
|
local detail_json="$1"
|
|
if json_has_key "$detail_json" '(.health | has("verify-master"))' '"verify-master" in data.get("health", {})'; then
|
|
return 1
|
|
fi
|
|
return 0
|
|
}
|
|
|
|
health_message_one="verify $(date +%s)"
|
|
create_health_file "$health_message_one"
|
|
add_result PASS "Created test health file $TEST_HEALTH_FILE"
|
|
|
|
sleep "$sleep_interval"
|
|
if detail_health_one=$(curl_json "$MASTER_BASE/api/v1/master/nodes/$NODE_ID" 2>/tmp/agent_verify_health1.err); then
|
|
if validate_health_in_master "$health_message_one" "$detail_health_one"; then
|
|
add_result PASS "Master reflects verify-master health message"
|
|
else
|
|
add_result FAIL "Master health payload does not match test message"
|
|
fi
|
|
else
|
|
error_detail=$(cat /tmp/agent_verify_health1.err 2>/dev/null || true)
|
|
add_result FAIL "Failed to fetch node detail during health validation: $error_detail"
|
|
detail_health_one=""
|
|
fi
|
|
rm -f /tmp/agent_verify_health1.err
|
|
|
|
health_message_two="verify $(date +%s)-update"
|
|
create_health_file "$health_message_two"
|
|
sleep "$sleep_interval"
|
|
if detail_health_two=$(curl_json "$MASTER_BASE/api/v1/master/nodes/$NODE_ID" 2>/tmp/agent_verify_health2.err); then
|
|
if validate_health_in_master "$health_message_two" "$detail_health_two"; then
|
|
add_result PASS "Master health updated to new message"
|
|
else
|
|
add_result FAIL "Master health message did not update"
|
|
fi
|
|
else
|
|
error_detail=$(cat /tmp/agent_verify_health2.err 2>/dev/null || true)
|
|
add_result FAIL "Failed to fetch node detail after health update: $error_detail"
|
|
detail_health_two=""
|
|
fi
|
|
rm -f /tmp/agent_verify_health2.err
|
|
|
|
rm -f "$TEST_HEALTH_FILE"
|
|
sleep "$sleep_interval"
|
|
if detail_health_three=$(curl_json "$MASTER_BASE/api/v1/master/nodes/$NODE_ID" 2>/tmp/agent_verify_health3.err); then
|
|
if remove_health_from_master "$detail_health_three"; then
|
|
add_result PASS "Master health no longer lists verify-master after removal"
|
|
else
|
|
add_result FAIL "Master health still contains verify-master after file deletion"
|
|
fi
|
|
else
|
|
error_detail=$(cat /tmp/agent_verify_health3.err 2>/dev/null || true)
|
|
add_result FAIL "Failed to fetch node detail after health removal: $error_detail"
|
|
fi
|
|
rm -f /tmp/agent_verify_health3.err
|
|
|
|
if [[ "$TEST_HEALTH_EXISTED" == "true" ]]; then
|
|
printf '%s' "$TEST_HEALTH_BACKUP" > "$TEST_HEALTH_FILE"
|
|
fi
|
|
|
|
# Optional config touch
|
|
if [[ "$ALLOW_CONFIG_TOUCH" == "true" ]]; then
|
|
if [[ -n "$NODE_ID" ]]; then
|
|
payload='{"label": {"verify": "true"}}'
|
|
if curl "${CURL_OPTS[@]}" -X PUT -H 'Content-Type: application/json' -d "$payload" "$MASTER_BASE/api/v1/master/nodes/$NODE_ID/config" >/tmp/agent_verify_config.log 2>&1; then
|
|
add_result PASS "Config PUT dry-run succeeded"
|
|
else
|
|
add_result WARN "Config PUT dry-run failed: $(cat /tmp/agent_verify_config.log)"
|
|
fi
|
|
rm -f /tmp/agent_verify_config.log
|
|
fi
|
|
else
|
|
add_result WARN "Config PUT dry-run skipped (enable with --allow-config-touch)"
|
|
fi
|
|
|
|
# Result summary
|
|
echo
|
|
echo "==== Verification Summary ===="
|
|
for entry in "${RESULTS_PASS[@]}"; do
|
|
printf 'PASS: %s\n' "$entry"
|
|
done
|
|
for entry in "${RESULTS_WARN[@]}"; do
|
|
printf 'WARN: %s\n' "$entry"
|
|
done
|
|
for entry in "${RESULTS_FAIL[@]}"; do
|
|
printf 'FAIL: %s\n' "$entry"
|
|
done
|
|
|
|
if [[ ${#RESULTS_FAIL[@]} -gt 0 ]]; then
|
|
exit 1
|
|
fi
|
|
|
|
exit 0
|