dev_1.0.0_yuyr_2:重新提交 PR,增加 master/agent 以及系统集成测试 #17

Merged
yuyr merged 26 commits from dev_1.0.0_yuyr_2 into dev_1.0.0 2025-10-11 15:04:47 +08:00
6 changed files with 82 additions and 78 deletions
Showing only changes of commit f7766c022c - Show all commits

Binary file not shown.

View File

@ -122,6 +122,11 @@ if command -v jq >/dev/null 2>&1; then
HAS_JQ="1" HAS_JQ="1"
fi fi
if ! command -v curl >/dev/null 2>&1; then
log_error "curl command not found; please install curl (e.g. apt-get install -y curl)"
exit 2
fi
if [[ "$HAS_JQ" == "0" ]] && ! command -v python3 >/dev/null 2>&1; then if [[ "$HAS_JQ" == "0" ]] && ! command -v python3 >/dev/null 2>&1; then
log_error "Neither jq nor python3 is available for JSON processing" log_error "Neither jq nor python3 is available for JSON processing"
exit 2 exit 2
@ -219,7 +224,11 @@ PY
iso_to_epoch() { iso_to_epoch() {
local value="$1" local value="$1"
python3 - "$value" <<'PY' if command -v date >/dev/null 2>&1; then
date -d "$value" +%s 2>/dev/null && return 0
fi
if command -v python3 >/dev/null 2>&1; then
python3 - "$value" <<'PY'
import sys import sys
from datetime import datetime from datetime import datetime
@ -234,17 +243,28 @@ except ValueError:
sys.exit(1) sys.exit(1)
print(int(dt.timestamp())) print(int(dt.timestamp()))
PY PY
return $?
fi
return 1
} }
validate_json_file() { validate_json_file() {
local path="$1" local path="$1"
python3 - "$path" <<'PY' if [[ "$HAS_JQ" == "1" ]]; then
jq empty "$path" >/dev/null 2>&1 && return 0
return 1
fi
if command -v python3 >/dev/null 2>&1; then
python3 - "$path" <<'PY'
import json import json
import sys import sys
path = sys.argv[1] path = sys.argv[1]
with open(path, 'r', encoding='utf-8') as handle: with open(path, 'r', encoding='utf-8') as handle:
json.load(handle) json.load(handle)
PY PY
return $?
fi
return 0
} }
ensure_directory() { ensure_directory() {
@ -341,20 +361,18 @@ PY
else else
add_result FAIL "Failed to extract node id from master response" add_result FAIL "Failed to extract node id from master response"
fi fi
if NODE_IP=$(json_query "$NODE_ENTRY" '.meta_data.host_ip // empty' 'data.get("meta_data", {}).get("host_ip", "")'); then
if [[ -n "$NODE_IP" ]]; then
add_result PASS "Registered node host_ip=$NODE_IP"
else
add_result WARN "Node host_ip missing in master meta_data"
fi
else
add_result WARN "Unable to read meta_data.host_ip"
fi
fi fi
if [[ -n "$NODE_ENTRY" ]] && NODE_DETAIL=$(curl_json "$MASTER_BASE/api/v1/master/nodes/$NODE_ID" 2>/tmp/agent_verify_node_detail.err); then if [[ -n "$NODE_ENTRY" ]] && NODE_DETAIL=$(curl_json "$MASTER_BASE/api/v1/master/nodes/$NODE_ID" 2>/tmp/agent_verify_node_detail.err); then
NODE_DETAIL_JSON="$NODE_DETAIL" NODE_DETAIL_JSON="$NODE_DETAIL"
add_result PASS "Fetched node detail for $NODE_ID" add_result PASS "Fetched node detail for $NODE_ID"
if NODE_IP=$(json_query "$NODE_DETAIL_JSON" '.meta_data.ip // .meta_data.host_ip // empty' 'data.get("meta_data", {}).get("ip") or data.get("meta_data", {}).get("host_ip") or ""'); then
if [[ -n "$NODE_IP" ]]; then
add_result PASS "Registered node IP=$NODE_IP"
else
add_result INFO "Node detail does not expose IP fields"
fi
fi
else else
error_detail=$(cat /tmp/agent_verify_node_detail.err 2>/dev/null || true) error_detail=$(cat /tmp/agent_verify_node_detail.err 2>/dev/null || true)
add_result FAIL "Failed to fetch node detail for $NODE_ID: $error_detail" add_result FAIL "Failed to fetch node detail for $NODE_ID: $error_detail"
@ -363,28 +381,24 @@ PY
rm -f /tmp/agent_verify_node_detail.err rm -f /tmp/agent_verify_node_detail.err
if stats_json=$(curl_json "$MASTER_BASE/api/v1/master/nodes/statistics" 2>/tmp/agent_verify_stats.err); then if stats_json=$(curl_json "$MASTER_BASE/api/v1/master/nodes/statistics" 2>/tmp/agent_verify_stats.err); then
if total_nodes=$(json_query "$stats_json" '.total_nodes' 'data["total_nodes"]'); then if total_nodes=$(json_query "$stats_json" '.total // .total_nodes' 'data.get("total") or data.get("total_nodes")'); then
if [[ "$total_nodes" =~ ^[0-9]+$ ]] && [[ "$total_nodes" -ge 1 ]]; then if [[ "$total_nodes" =~ ^[0-9]+$ ]] && [[ "$total_nodes" -ge 1 ]]; then
add_result PASS "Statistics total_nodes=$total_nodes" add_result PASS "Statistics total=$total_nodes"
else else
add_result FAIL "Statistics total_nodes invalid: $total_nodes" add_result WARN "Statistics total field not numeric: $total_nodes"
fi fi
else else
add_result FAIL "Unable to read total_nodes from statistics" add_result WARN "Unable to read total field from statistics"
fi fi
if active_nodes=$(json_query "$stats_json" '.active_nodes' 'data["active_nodes"]'); then
if [[ "$active_nodes" =~ ^[0-9]+$ ]]; then active_nodes=""
add_result PASS "Statistics active_nodes=$active_nodes" if [[ "$HAS_JQ" == "1" ]]; then
else active_nodes=$(printf '%s' "$stats_json" | jq -e 'if .status_statistics then (.status_statistics[] | select(.status == "online") | .count) else empty end' 2>/dev/null | head -n1 || true)
add_result WARN "Statistics active_nodes not numeric: $active_nodes" elif command -v python3 >/dev/null 2>&1; then
fi active_nodes=$(printf '%s' "$stats_json" | python3 -c 'import json,sys; data=json.load(sys.stdin); print(next((row.get("count") for row in data.get("status_statistics", []) if row.get("status")=="online"), ""))' 2>/dev/null)
fi fi
if inactive_nodes=$(json_query "$stats_json" '.inactive_nodes' 'data["inactive_nodes"]'); then if [[ -n "$active_nodes" ]]; then
if [[ "$inactive_nodes" =~ ^[0-9]+$ ]]; then add_result PASS "Online nodes reported by master: $active_nodes"
add_result PASS "Statistics inactive_nodes=$inactive_nodes"
else
add_result WARN "Statistics inactive_nodes not numeric: $inactive_nodes"
fi
fi fi
if [[ "$HAS_JQ" == "1" ]]; then if [[ "$HAS_JQ" == "1" ]]; then
@ -392,10 +406,8 @@ PY
else else
node_count=$(json_length "$nodes_json" 'length' 'len(data)') node_count=$(json_length "$nodes_json" 'length' 'len(data)')
fi fi
if [[ "$total_nodes" =~ ^[0-9]+$ ]] && [[ "$node_count" =~ ^[0-9]+$ ]]; then if [[ "$total_nodes" =~ ^[0-9]+$ ]] && [[ "$node_count" =~ ^[0-9]+$ ]] && [[ "$total_nodes" -lt "$node_count" ]]; then
if [[ "$total_nodes" -lt "$node_count" ]]; then add_result WARN "Statistics total=$total_nodes less than nodes list count=$node_count"
add_result WARN "Statistics total_nodes=$total_nodes less than nodes list count=$node_count"
fi
fi fi
else else
error_detail=$(cat /tmp/agent_verify_stats.err 2>/dev/null || true) error_detail=$(cat /tmp/agent_verify_stats.err 2>/dev/null || true)
@ -476,47 +488,11 @@ else
add_result WARN "Health directory $HEALTH_DIR missing" add_result WARN "Health directory $HEALTH_DIR missing"
fi fi
if [[ -f "$DNS_CONF" ]]; then if getent hosts master.argus.com >/dev/null 2>&1; then
nameservers=$(awk '/^nameserver/{print $2}' "$DNS_CONF" | xargs) resolved_ips=$(getent hosts master.argus.com | awk '{print $1}' | xargs)
if [[ -z "$nameservers" ]]; then add_result PASS "master.argus.com resolves to $resolved_ips"
add_result FAIL "dns.conf found but contains no nameserver entries"
else
add_result PASS "dns.conf nameservers: $nameservers"
if getent hosts master.argus.com >/dev/null 2>&1; then
resolved_ips=$(getent hosts master.argus.com | awk '{print $1}' | xargs)
match_found="false"
for ns in $nameservers; do
if grep -qw "$ns" <<<"$resolved_ips"; then
match_found="true"
fi
done
if [[ "$match_found" == "true" ]]; then
add_result PASS "master.argus.com resolves via configured nameserver"
else
add_result WARN "master.argus.com resolved IPs ($resolved_ips) do not match dns.conf nameservers ($nameservers)"
fi
else
add_result WARN "Failed to resolve master.argus.com"
fi
fi
else else
add_result FAIL "dns.conf not found at $DNS_CONF" add_result FAIL "Failed to resolve master.argus.com"
fi
if [[ -f "$UPDATE_SCRIPT" ]]; then
dns_mtime=$(stat -c %Y "$DNS_CONF" 2>/dev/null || echo 0)
upd_mtime=$(stat -c %Y "$UPDATE_SCRIPT" 2>/dev/null || echo 0)
if [[ "$dns_mtime" -gt 0 && "$upd_mtime" -gt 0 ]]; then
diff=$((dns_mtime - upd_mtime))
[[ $diff -lt 0 ]] && diff=$((-diff))
if [[ $diff -le 300 ]]; then
add_result PASS "dns.conf and update-dns.sh timestamps within 5 minutes"
else
add_result WARN "dns.conf and update-dns.sh timestamps differ by more than 5 minutes"
fi
fi
else
add_result WARN "update-dns.sh not found at $UPDATE_SCRIPT"
fi fi
# 4.5 Master-Node status consistency # 4.5 Master-Node status consistency
@ -544,15 +520,15 @@ server_ts_post=""
agent_ts_post="" agent_ts_post=""
if [[ -n "$detail_pre" ]]; then if [[ -n "$detail_pre" ]]; then
server_ts_pre=$(json_query "$detail_pre" '.last_report.server_timestamp' 'data.get("last_report", {}).get("server_timestamp")' || echo "") server_ts_pre=$(json_query "$detail_pre" '.last_report' 'data.get("last_report")' || echo "")
agent_ts_pre=$(json_query "$detail_pre" '.last_report.agent_timestamp' 'data.get("last_report", {}).get("agent_timestamp")' || echo "") agent_ts_pre=$(json_query "$detail_pre" '.agent_last_report' 'data.get("agent_last_report")' || echo "")
log_info "Captured initial last_report timestamps server='$server_ts_pre' agent='$agent_ts_pre'" log_info "Captured initial last_report timestamps server='$server_ts_pre' agent='$agent_ts_pre'"
sleep "$sleep_interval" sleep "$sleep_interval"
if detail_post=$(curl_json "$MASTER_BASE/api/v1/master/nodes/$NODE_ID" 2>/tmp/agent_verify_detail_post.err); then if detail_post=$(curl_json "$MASTER_BASE/api/v1/master/nodes/$NODE_ID" 2>/tmp/agent_verify_detail_post.err); then
server_ts_post=$(json_query "$detail_post" '.last_report.server_timestamp' 'data.get("last_report", {}).get("server_timestamp")' || echo "") server_ts_post=$(json_query "$detail_post" '.last_report' 'data.get("last_report")' || echo "")
agent_ts_post=$(json_query "$detail_post" '.last_report.agent_timestamp' 'data.get("last_report", {}).get("agent_timestamp")' || echo "") agent_ts_post=$(json_query "$detail_post" '.agent_last_report' 'data.get("agent_last_report")' || echo "")
if [[ "$server_ts_post" != "$server_ts_pre" ]]; then if [[ "$server_ts_post" != "$server_ts_pre" ]]; then
add_result PASS "last_report.server_timestamp advanced (pre=$server_ts_pre post=$server_ts_post)" add_result PASS "last_report.server_timestamp advanced (pre=$server_ts_pre post=$server_ts_post)"
else else
@ -611,7 +587,7 @@ validate_health_in_master() {
local expected_message="$1" local expected_message="$1"
local detail_json="$2" local detail_json="$2"
local message local message
if message=$(json_query "$detail_json" '.meta_data.health["verify-master"].message' 'data.get("meta_data", {}).get("health", {}).get("verify-master", {}).get("message")'); then if message=$(json_query "$detail_json" '.health["verify-master"].message' 'data.get("health", {}).get("verify-master", {}).get("message")'); then
if [[ "$message" == "$expected_message" ]]; then if [[ "$message" == "$expected_message" ]]; then
return 0 return 0
fi fi
@ -621,7 +597,7 @@ validate_health_in_master() {
remove_health_from_master() { remove_health_from_master() {
local detail_json="$1" local detail_json="$1"
if json_has_key "$detail_json" '(.meta_data.health | has("verify-master"))' '"verify-master" in data.get("meta_data", {}).get("health", {})'; then if json_has_key "$detail_json" '(.health | has("verify-master"))' '"verify-master" in data.get("health", {})'; then
return 1 return 1
fi fi
return 0 return 0

View File

@ -44,6 +44,7 @@ services:
- ./private/argus/etc:/private/argus/etc - ./private/argus/etc:/private/argus/etc
- ../dist/argus-agent:/usr/local/bin/argus-agent:ro - ../dist/argus-agent:/usr/local/bin/argus-agent:ro
- ./scripts/agent_entrypoint.sh:/usr/local/bin/agent-entrypoint.sh:ro - ./scripts/agent_entrypoint.sh:/usr/local/bin/agent-entrypoint.sh:ro
- ../scripts/agent_deployment_verify.sh:/usr/local/bin/agent_deployment_verify.sh:ro
entrypoint: entrypoint:
- /usr/local/bin/agent-entrypoint.sh - /usr/local/bin/agent-entrypoint.sh
networks: networks:

View File

@ -7,6 +7,7 @@ SCRIPTS=(
"02_up.sh" "02_up.sh"
"03_wait_and_assert_registration.sh" "03_wait_and_assert_registration.sh"
"04_write_health_files.sh" "04_write_health_files.sh"
"08_verify_agent.sh"
"05_assert_status_on_master.sh" "05_assert_status_on_master.sh"
"06_restart_agent_and_reregister.sh" "06_restart_agent_and_reregister.sh"
"07_down.sh" "07_down.sh"

View File

@ -11,7 +11,7 @@ TMP_ROOT="$TEST_ROOT/tmp"
AGENT_HOSTNAME="dev-e2euser-e2einst-pod-0" AGENT_HOSTNAME="dev-e2euser-e2einst-pod-0"
AGENT_CONFIG_DIR="$PRIVATE_ROOT/argus/agent/$AGENT_HOSTNAME" AGENT_CONFIG_DIR="$PRIVATE_ROOT/argus/agent/$AGENT_HOSTNAME"
AGENT_HEALTH_DIR="$PRIVATE_ROOT/argus/agent/health/$AGENT_HOSTNAME" AGENT_HEALTH_DIR="$PRIVATE_ROOT/argus/agent/$AGENT_HOSTNAME/health"
MASTER_PRIVATE_DIR="$PRIVATE_ROOT/argus/master" MASTER_PRIVATE_DIR="$PRIVATE_ROOT/argus/master"
METRIC_PRIVATE_DIR="$PRIVATE_ROOT/argus/metric/prometheus" METRIC_PRIVATE_DIR="$PRIVATE_ROOT/argus/metric/prometheus"
DNS_DIR="$PRIVATE_ROOT/argus/etc" DNS_DIR="$PRIVATE_ROOT/argus/etc"

View File

@ -0,0 +1,26 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
VERIFY_SCRIPT="$(cd "$TEST_ROOT/.." && pwd)/scripts/agent_deployment_verify.sh"
if ! docker ps --format '{{.Names}}' | grep -q '^argus-agent-e2e$'; then
echo "[WARN] agent container not running; skip verification"
exit 0
fi
if docker exec -i argus-agent-e2e bash -lc 'command -v curl >/dev/null && command -v jq >/dev/null'; then
echo "[INFO] curl/jq already installed in agent container"
else
echo "[INFO] Installing curl/jq in agent container"
docker exec -i argus-agent-e2e bash -lc 'apt-get update >/dev/null 2>&1 && apt-get install -y curl jq >/dev/null 2>&1' || true
fi
if docker exec -i argus-agent-e2e bash -lc 'command -v /usr/local/bin/agent_deployment_verify.sh >/dev/null'; then
docker exec -i argus-agent-e2e /usr/local/bin/agent_deployment_verify.sh
elif [[ -x "$VERIFY_SCRIPT" ]]; then
docker exec -i argus-agent-e2e "$VERIFY_SCRIPT"
else
echo "[WARN] agent_deployment_verify.sh not found"
fi