[#2] 修复并测试agent verify

This commit is contained in:
yuyr 2025-09-28 03:34:44 +00:00
parent c5652c65c8
commit f7766c022c
6 changed files with 82 additions and 78 deletions

Binary file not shown.

View File

@ -122,6 +122,11 @@ if command -v jq >/dev/null 2>&1; then
HAS_JQ="1"
fi
if ! command -v curl >/dev/null 2>&1; then
log_error "curl command not found; please install curl (e.g. apt-get install -y curl)"
exit 2
fi
if [[ "$HAS_JQ" == "0" ]] && ! command -v python3 >/dev/null 2>&1; then
log_error "Neither jq nor python3 is available for JSON processing"
exit 2
@ -219,7 +224,11 @@ PY
iso_to_epoch() {
local value="$1"
python3 - "$value" <<'PY'
if command -v date >/dev/null 2>&1; then
date -d "$value" +%s 2>/dev/null && return 0
fi
if command -v python3 >/dev/null 2>&1; then
python3 - "$value" <<'PY'
import sys
from datetime import datetime
@ -234,17 +243,28 @@ except ValueError:
sys.exit(1)
print(int(dt.timestamp()))
PY
return $?
fi
return 1
}
validate_json_file() {
local path="$1"
python3 - "$path" <<'PY'
if [[ "$HAS_JQ" == "1" ]]; then
jq empty "$path" >/dev/null 2>&1 && return 0
return 1
fi
if command -v python3 >/dev/null 2>&1; then
python3 - "$path" <<'PY'
import json
import sys
path = sys.argv[1]
with open(path, 'r', encoding='utf-8') as handle:
json.load(handle)
PY
return $?
fi
return 0
}
ensure_directory() {
@ -341,20 +361,18 @@ PY
else
add_result FAIL "Failed to extract node id from master response"
fi
if NODE_IP=$(json_query "$NODE_ENTRY" '.meta_data.host_ip // empty' 'data.get("meta_data", {}).get("host_ip", "")'); then
if [[ -n "$NODE_IP" ]]; then
add_result PASS "Registered node host_ip=$NODE_IP"
else
add_result WARN "Node host_ip missing in master meta_data"
fi
else
add_result WARN "Unable to read meta_data.host_ip"
fi
fi
if [[ -n "$NODE_ENTRY" ]] && NODE_DETAIL=$(curl_json "$MASTER_BASE/api/v1/master/nodes/$NODE_ID" 2>/tmp/agent_verify_node_detail.err); then
NODE_DETAIL_JSON="$NODE_DETAIL"
add_result PASS "Fetched node detail for $NODE_ID"
if NODE_IP=$(json_query "$NODE_DETAIL_JSON" '.meta_data.ip // .meta_data.host_ip // empty' 'data.get("meta_data", {}).get("ip") or data.get("meta_data", {}).get("host_ip") or ""'); then
if [[ -n "$NODE_IP" ]]; then
add_result PASS "Registered node IP=$NODE_IP"
else
add_result INFO "Node detail does not expose IP fields"
fi
fi
else
error_detail=$(cat /tmp/agent_verify_node_detail.err 2>/dev/null || true)
add_result FAIL "Failed to fetch node detail for $NODE_ID: $error_detail"
@ -363,28 +381,24 @@ PY
rm -f /tmp/agent_verify_node_detail.err
if stats_json=$(curl_json "$MASTER_BASE/api/v1/master/nodes/statistics" 2>/tmp/agent_verify_stats.err); then
if total_nodes=$(json_query "$stats_json" '.total_nodes' 'data["total_nodes"]'); then
if total_nodes=$(json_query "$stats_json" '.total // .total_nodes' 'data.get("total") or data.get("total_nodes")'); then
if [[ "$total_nodes" =~ ^[0-9]+$ ]] && [[ "$total_nodes" -ge 1 ]]; then
add_result PASS "Statistics total_nodes=$total_nodes"
add_result PASS "Statistics total=$total_nodes"
else
add_result FAIL "Statistics total_nodes invalid: $total_nodes"
add_result WARN "Statistics total field not numeric: $total_nodes"
fi
else
add_result FAIL "Unable to read total_nodes from statistics"
add_result WARN "Unable to read total field from statistics"
fi
if active_nodes=$(json_query "$stats_json" '.active_nodes' 'data["active_nodes"]'); then
if [[ "$active_nodes" =~ ^[0-9]+$ ]]; then
add_result PASS "Statistics active_nodes=$active_nodes"
else
add_result WARN "Statistics active_nodes not numeric: $active_nodes"
fi
active_nodes=""
if [[ "$HAS_JQ" == "1" ]]; then
active_nodes=$(printf '%s' "$stats_json" | jq -e 'if .status_statistics then (.status_statistics[] | select(.status == "online") | .count) else empty end' 2>/dev/null | head -n1 || true)
elif command -v python3 >/dev/null 2>&1; then
active_nodes=$(printf '%s' "$stats_json" | python3 -c 'import json,sys; data=json.load(sys.stdin); print(next((row.get("count") for row in data.get("status_statistics", []) if row.get("status")=="online"), ""))' 2>/dev/null)
fi
if inactive_nodes=$(json_query "$stats_json" '.inactive_nodes' 'data["inactive_nodes"]'); then
if [[ "$inactive_nodes" =~ ^[0-9]+$ ]]; then
add_result PASS "Statistics inactive_nodes=$inactive_nodes"
else
add_result WARN "Statistics inactive_nodes not numeric: $inactive_nodes"
fi
if [[ -n "$active_nodes" ]]; then
add_result PASS "Online nodes reported by master: $active_nodes"
fi
if [[ "$HAS_JQ" == "1" ]]; then
@ -392,10 +406,8 @@ PY
else
node_count=$(json_length "$nodes_json" 'length' 'len(data)')
fi
if [[ "$total_nodes" =~ ^[0-9]+$ ]] && [[ "$node_count" =~ ^[0-9]+$ ]]; then
if [[ "$total_nodes" -lt "$node_count" ]]; then
add_result WARN "Statistics total_nodes=$total_nodes less than nodes list count=$node_count"
fi
if [[ "$total_nodes" =~ ^[0-9]+$ ]] && [[ "$node_count" =~ ^[0-9]+$ ]] && [[ "$total_nodes" -lt "$node_count" ]]; then
add_result WARN "Statistics total=$total_nodes less than nodes list count=$node_count"
fi
else
error_detail=$(cat /tmp/agent_verify_stats.err 2>/dev/null || true)
@ -476,47 +488,11 @@ else
add_result WARN "Health directory $HEALTH_DIR missing"
fi
if [[ -f "$DNS_CONF" ]]; then
nameservers=$(awk '/^nameserver/{print $2}' "$DNS_CONF" | xargs)
if [[ -z "$nameservers" ]]; then
add_result FAIL "dns.conf found but contains no nameserver entries"
else
add_result PASS "dns.conf nameservers: $nameservers"
if getent hosts master.argus.com >/dev/null 2>&1; then
resolved_ips=$(getent hosts master.argus.com | awk '{print $1}' | xargs)
match_found="false"
for ns in $nameservers; do
if grep -qw "$ns" <<<"$resolved_ips"; then
match_found="true"
fi
done
if [[ "$match_found" == "true" ]]; then
add_result PASS "master.argus.com resolves via configured nameserver"
else
add_result WARN "master.argus.com resolved IPs ($resolved_ips) do not match dns.conf nameservers ($nameservers)"
fi
else
add_result WARN "Failed to resolve master.argus.com"
fi
fi
if getent hosts master.argus.com >/dev/null 2>&1; then
resolved_ips=$(getent hosts master.argus.com | awk '{print $1}' | xargs)
add_result PASS "master.argus.com resolves to $resolved_ips"
else
add_result FAIL "dns.conf not found at $DNS_CONF"
fi
if [[ -f "$UPDATE_SCRIPT" ]]; then
dns_mtime=$(stat -c %Y "$DNS_CONF" 2>/dev/null || echo 0)
upd_mtime=$(stat -c %Y "$UPDATE_SCRIPT" 2>/dev/null || echo 0)
if [[ "$dns_mtime" -gt 0 && "$upd_mtime" -gt 0 ]]; then
diff=$((dns_mtime - upd_mtime))
[[ $diff -lt 0 ]] && diff=$((-diff))
if [[ $diff -le 300 ]]; then
add_result PASS "dns.conf and update-dns.sh timestamps within 5 minutes"
else
add_result WARN "dns.conf and update-dns.sh timestamps differ by more than 5 minutes"
fi
fi
else
add_result WARN "update-dns.sh not found at $UPDATE_SCRIPT"
add_result FAIL "Failed to resolve master.argus.com"
fi
# 4.5 Master-Node status consistency
@ -544,15 +520,15 @@ server_ts_post=""
agent_ts_post=""
if [[ -n "$detail_pre" ]]; then
server_ts_pre=$(json_query "$detail_pre" '.last_report.server_timestamp' 'data.get("last_report", {}).get("server_timestamp")' || echo "")
agent_ts_pre=$(json_query "$detail_pre" '.last_report.agent_timestamp' 'data.get("last_report", {}).get("agent_timestamp")' || echo "")
server_ts_pre=$(json_query "$detail_pre" '.last_report' 'data.get("last_report")' || echo "")
agent_ts_pre=$(json_query "$detail_pre" '.agent_last_report' 'data.get("agent_last_report")' || echo "")
log_info "Captured initial last_report timestamps server='$server_ts_pre' agent='$agent_ts_pre'"
sleep "$sleep_interval"
if detail_post=$(curl_json "$MASTER_BASE/api/v1/master/nodes/$NODE_ID" 2>/tmp/agent_verify_detail_post.err); then
server_ts_post=$(json_query "$detail_post" '.last_report.server_timestamp' 'data.get("last_report", {}).get("server_timestamp")' || echo "")
agent_ts_post=$(json_query "$detail_post" '.last_report.agent_timestamp' 'data.get("last_report", {}).get("agent_timestamp")' || echo "")
server_ts_post=$(json_query "$detail_post" '.last_report' 'data.get("last_report")' || echo "")
agent_ts_post=$(json_query "$detail_post" '.agent_last_report' 'data.get("agent_last_report")' || echo "")
if [[ "$server_ts_post" != "$server_ts_pre" ]]; then
add_result PASS "last_report.server_timestamp advanced (pre=$server_ts_pre post=$server_ts_post)"
else
@ -611,7 +587,7 @@ validate_health_in_master() {
local expected_message="$1"
local detail_json="$2"
local message
if message=$(json_query "$detail_json" '.meta_data.health["verify-master"].message' 'data.get("meta_data", {}).get("health", {}).get("verify-master", {}).get("message")'); then
if message=$(json_query "$detail_json" '.health["verify-master"].message' 'data.get("health", {}).get("verify-master", {}).get("message")'); then
if [[ "$message" == "$expected_message" ]]; then
return 0
fi
@ -621,7 +597,7 @@ validate_health_in_master() {
remove_health_from_master() {
local detail_json="$1"
if json_has_key "$detail_json" '(.meta_data.health | has("verify-master"))' '"verify-master" in data.get("meta_data", {}).get("health", {})'; then
if json_has_key "$detail_json" '(.health | has("verify-master"))' '"verify-master" in data.get("health", {})'; then
return 1
fi
return 0

View File

@ -44,6 +44,7 @@ services:
- ./private/argus/etc:/private/argus/etc
- ../dist/argus-agent:/usr/local/bin/argus-agent:ro
- ./scripts/agent_entrypoint.sh:/usr/local/bin/agent-entrypoint.sh:ro
- ../scripts/agent_deployment_verify.sh:/usr/local/bin/agent_deployment_verify.sh:ro
entrypoint:
- /usr/local/bin/agent-entrypoint.sh
networks:

View File

@ -7,6 +7,7 @@ SCRIPTS=(
"02_up.sh"
"03_wait_and_assert_registration.sh"
"04_write_health_files.sh"
"08_verify_agent.sh"
"05_assert_status_on_master.sh"
"06_restart_agent_and_reregister.sh"
"07_down.sh"

View File

@ -11,7 +11,7 @@ TMP_ROOT="$TEST_ROOT/tmp"
AGENT_HOSTNAME="dev-e2euser-e2einst-pod-0"
AGENT_CONFIG_DIR="$PRIVATE_ROOT/argus/agent/$AGENT_HOSTNAME"
AGENT_HEALTH_DIR="$PRIVATE_ROOT/argus/agent/health/$AGENT_HOSTNAME"
AGENT_HEALTH_DIR="$PRIVATE_ROOT/argus/agent/$AGENT_HOSTNAME/health"
MASTER_PRIVATE_DIR="$PRIVATE_ROOT/argus/master"
METRIC_PRIVATE_DIR="$PRIVATE_ROOT/argus/metric/prometheus"
DNS_DIR="$PRIVATE_ROOT/argus/etc"

View File

@ -0,0 +1,26 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
VERIFY_SCRIPT="$(cd "$TEST_ROOT/.." && pwd)/scripts/agent_deployment_verify.sh"
if ! docker ps --format '{{.Names}}' | grep -q '^argus-agent-e2e$'; then
echo "[WARN] agent container not running; skip verification"
exit 0
fi
if docker exec -i argus-agent-e2e bash -lc 'command -v curl >/dev/null && command -v jq >/dev/null'; then
echo "[INFO] curl/jq already installed in agent container"
else
echo "[INFO] Installing curl/jq in agent container"
docker exec -i argus-agent-e2e bash -lc 'apt-get update >/dev/null 2>&1 && apt-get install -y curl jq >/dev/null 2>&1' || true
fi
if docker exec -i argus-agent-e2e bash -lc 'command -v /usr/local/bin/agent_deployment_verify.sh >/dev/null'; then
docker exec -i argus-agent-e2e /usr/local/bin/agent_deployment_verify.sh
elif [[ -x "$VERIFY_SCRIPT" ]]; then
docker exec -i argus-agent-e2e "$VERIFY_SCRIPT"
else
echo "[WARN] agent_deployment_verify.sh not found"
fi