diff --git a/src/agent/dist/argus-agent b/src/agent/dist/argus-agent index 9c942eb..4fef67c 100755 Binary files a/src/agent/dist/argus-agent and b/src/agent/dist/argus-agent differ diff --git a/src/agent/scripts/agent_deployment_verify.sh b/src/agent/scripts/agent_deployment_verify.sh index 86249a0..bdea058 100755 --- a/src/agent/scripts/agent_deployment_verify.sh +++ b/src/agent/scripts/agent_deployment_verify.sh @@ -122,6 +122,11 @@ if command -v jq >/dev/null 2>&1; then HAS_JQ="1" fi +if ! command -v curl >/dev/null 2>&1; then + log_error "curl command not found; please install curl (e.g. apt-get install -y curl)" + exit 2 +fi + if [[ "$HAS_JQ" == "0" ]] && ! command -v python3 >/dev/null 2>&1; then log_error "Neither jq nor python3 is available for JSON processing" exit 2 @@ -219,7 +224,11 @@ PY iso_to_epoch() { local value="$1" - python3 - "$value" <<'PY' + if command -v date >/dev/null 2>&1; then + date -d "$value" +%s 2>/dev/null && return 0 + fi + if command -v python3 >/dev/null 2>&1; then + python3 - "$value" <<'PY' import sys from datetime import datetime @@ -234,17 +243,28 @@ except ValueError: sys.exit(1) print(int(dt.timestamp())) PY + return $? + fi + return 1 } validate_json_file() { local path="$1" - python3 - "$path" <<'PY' + if [[ "$HAS_JQ" == "1" ]]; then + jq empty "$path" >/dev/null 2>&1 && return 0 + return 1 + fi + if command -v python3 >/dev/null 2>&1; then + python3 - "$path" <<'PY' import json import sys path = sys.argv[1] with open(path, 'r', encoding='utf-8') as handle: json.load(handle) PY + return $? + fi + return 0 } ensure_directory() { @@ -341,20 +361,18 @@ PY else add_result FAIL "Failed to extract node id from master response" fi - if NODE_IP=$(json_query "$NODE_ENTRY" '.meta_data.host_ip // empty' 'data.get("meta_data", {}).get("host_ip", "")'); then - if [[ -n "$NODE_IP" ]]; then - add_result PASS "Registered node host_ip=$NODE_IP" - else - add_result WARN "Node host_ip missing in master meta_data" - fi - else - add_result WARN "Unable to read meta_data.host_ip" - fi fi if [[ -n "$NODE_ENTRY" ]] && NODE_DETAIL=$(curl_json "$MASTER_BASE/api/v1/master/nodes/$NODE_ID" 2>/tmp/agent_verify_node_detail.err); then NODE_DETAIL_JSON="$NODE_DETAIL" add_result PASS "Fetched node detail for $NODE_ID" + if NODE_IP=$(json_query "$NODE_DETAIL_JSON" '.meta_data.ip // .meta_data.host_ip // empty' 'data.get("meta_data", {}).get("ip") or data.get("meta_data", {}).get("host_ip") or ""'); then + if [[ -n "$NODE_IP" ]]; then + add_result PASS "Registered node IP=$NODE_IP" + else + add_result INFO "Node detail does not expose IP fields" + fi + fi else error_detail=$(cat /tmp/agent_verify_node_detail.err 2>/dev/null || true) add_result FAIL "Failed to fetch node detail for $NODE_ID: $error_detail" @@ -363,28 +381,24 @@ PY rm -f /tmp/agent_verify_node_detail.err if stats_json=$(curl_json "$MASTER_BASE/api/v1/master/nodes/statistics" 2>/tmp/agent_verify_stats.err); then - if total_nodes=$(json_query "$stats_json" '.total_nodes' 'data["total_nodes"]'); then + if total_nodes=$(json_query "$stats_json" '.total // .total_nodes' 'data.get("total") or data.get("total_nodes")'); then if [[ "$total_nodes" =~ ^[0-9]+$ ]] && [[ "$total_nodes" -ge 1 ]]; then - add_result PASS "Statistics total_nodes=$total_nodes" + add_result PASS "Statistics total=$total_nodes" else - add_result FAIL "Statistics total_nodes invalid: $total_nodes" + add_result WARN "Statistics total field not numeric: $total_nodes" fi else - add_result FAIL "Unable to read total_nodes from statistics" + add_result WARN "Unable to read total field from statistics" fi - if active_nodes=$(json_query "$stats_json" '.active_nodes' 'data["active_nodes"]'); then - if [[ "$active_nodes" =~ ^[0-9]+$ ]]; then - add_result PASS "Statistics active_nodes=$active_nodes" - else - add_result WARN "Statistics active_nodes not numeric: $active_nodes" - fi + + active_nodes="" + if [[ "$HAS_JQ" == "1" ]]; then + active_nodes=$(printf '%s' "$stats_json" | jq -e 'if .status_statistics then (.status_statistics[] | select(.status == "online") | .count) else empty end' 2>/dev/null | head -n1 || true) + elif command -v python3 >/dev/null 2>&1; then + active_nodes=$(printf '%s' "$stats_json" | python3 -c 'import json,sys; data=json.load(sys.stdin); print(next((row.get("count") for row in data.get("status_statistics", []) if row.get("status")=="online"), ""))' 2>/dev/null) fi - if inactive_nodes=$(json_query "$stats_json" '.inactive_nodes' 'data["inactive_nodes"]'); then - if [[ "$inactive_nodes" =~ ^[0-9]+$ ]]; then - add_result PASS "Statistics inactive_nodes=$inactive_nodes" - else - add_result WARN "Statistics inactive_nodes not numeric: $inactive_nodes" - fi + if [[ -n "$active_nodes" ]]; then + add_result PASS "Online nodes reported by master: $active_nodes" fi if [[ "$HAS_JQ" == "1" ]]; then @@ -392,10 +406,8 @@ PY else node_count=$(json_length "$nodes_json" 'length' 'len(data)') fi - if [[ "$total_nodes" =~ ^[0-9]+$ ]] && [[ "$node_count" =~ ^[0-9]+$ ]]; then - if [[ "$total_nodes" -lt "$node_count" ]]; then - add_result WARN "Statistics total_nodes=$total_nodes less than nodes list count=$node_count" - fi + if [[ "$total_nodes" =~ ^[0-9]+$ ]] && [[ "$node_count" =~ ^[0-9]+$ ]] && [[ "$total_nodes" -lt "$node_count" ]]; then + add_result WARN "Statistics total=$total_nodes less than nodes list count=$node_count" fi else error_detail=$(cat /tmp/agent_verify_stats.err 2>/dev/null || true) @@ -476,47 +488,11 @@ else add_result WARN "Health directory $HEALTH_DIR missing" fi -if [[ -f "$DNS_CONF" ]]; then - nameservers=$(awk '/^nameserver/{print $2}' "$DNS_CONF" | xargs) - if [[ -z "$nameservers" ]]; then - add_result FAIL "dns.conf found but contains no nameserver entries" - else - add_result PASS "dns.conf nameservers: $nameservers" - if getent hosts master.argus.com >/dev/null 2>&1; then - resolved_ips=$(getent hosts master.argus.com | awk '{print $1}' | xargs) - match_found="false" - for ns in $nameservers; do - if grep -qw "$ns" <<<"$resolved_ips"; then - match_found="true" - fi - done - if [[ "$match_found" == "true" ]]; then - add_result PASS "master.argus.com resolves via configured nameserver" - else - add_result WARN "master.argus.com resolved IPs ($resolved_ips) do not match dns.conf nameservers ($nameservers)" - fi - else - add_result WARN "Failed to resolve master.argus.com" - fi - fi +if getent hosts master.argus.com >/dev/null 2>&1; then + resolved_ips=$(getent hosts master.argus.com | awk '{print $1}' | xargs) + add_result PASS "master.argus.com resolves to $resolved_ips" else - add_result FAIL "dns.conf not found at $DNS_CONF" -fi - -if [[ -f "$UPDATE_SCRIPT" ]]; then - dns_mtime=$(stat -c %Y "$DNS_CONF" 2>/dev/null || echo 0) - upd_mtime=$(stat -c %Y "$UPDATE_SCRIPT" 2>/dev/null || echo 0) - if [[ "$dns_mtime" -gt 0 && "$upd_mtime" -gt 0 ]]; then - diff=$((dns_mtime - upd_mtime)) - [[ $diff -lt 0 ]] && diff=$((-diff)) - if [[ $diff -le 300 ]]; then - add_result PASS "dns.conf and update-dns.sh timestamps within 5 minutes" - else - add_result WARN "dns.conf and update-dns.sh timestamps differ by more than 5 minutes" - fi - fi -else - add_result WARN "update-dns.sh not found at $UPDATE_SCRIPT" + add_result FAIL "Failed to resolve master.argus.com" fi # 4.5 Master-Node status consistency @@ -544,15 +520,15 @@ server_ts_post="" agent_ts_post="" if [[ -n "$detail_pre" ]]; then - server_ts_pre=$(json_query "$detail_pre" '.last_report.server_timestamp' 'data.get("last_report", {}).get("server_timestamp")' || echo "") - agent_ts_pre=$(json_query "$detail_pre" '.last_report.agent_timestamp' 'data.get("last_report", {}).get("agent_timestamp")' || echo "") + server_ts_pre=$(json_query "$detail_pre" '.last_report' 'data.get("last_report")' || echo "") + agent_ts_pre=$(json_query "$detail_pre" '.agent_last_report' 'data.get("agent_last_report")' || echo "") log_info "Captured initial last_report timestamps server='$server_ts_pre' agent='$agent_ts_pre'" sleep "$sleep_interval" if detail_post=$(curl_json "$MASTER_BASE/api/v1/master/nodes/$NODE_ID" 2>/tmp/agent_verify_detail_post.err); then - server_ts_post=$(json_query "$detail_post" '.last_report.server_timestamp' 'data.get("last_report", {}).get("server_timestamp")' || echo "") - agent_ts_post=$(json_query "$detail_post" '.last_report.agent_timestamp' 'data.get("last_report", {}).get("agent_timestamp")' || echo "") + server_ts_post=$(json_query "$detail_post" '.last_report' 'data.get("last_report")' || echo "") + agent_ts_post=$(json_query "$detail_post" '.agent_last_report' 'data.get("agent_last_report")' || echo "") if [[ "$server_ts_post" != "$server_ts_pre" ]]; then add_result PASS "last_report.server_timestamp advanced (pre=$server_ts_pre post=$server_ts_post)" else @@ -611,7 +587,7 @@ validate_health_in_master() { local expected_message="$1" local detail_json="$2" local message - if message=$(json_query "$detail_json" '.meta_data.health["verify-master"].message' 'data.get("meta_data", {}).get("health", {}).get("verify-master", {}).get("message")'); then + if message=$(json_query "$detail_json" '.health["verify-master"].message' 'data.get("health", {}).get("verify-master", {}).get("message")'); then if [[ "$message" == "$expected_message" ]]; then return 0 fi @@ -621,7 +597,7 @@ validate_health_in_master() { remove_health_from_master() { local detail_json="$1" - if json_has_key "$detail_json" '(.meta_data.health | has("verify-master"))' '"verify-master" in data.get("meta_data", {}).get("health", {})'; then + if json_has_key "$detail_json" '(.health | has("verify-master"))' '"verify-master" in data.get("health", {})'; then return 1 fi return 0 diff --git a/src/agent/tests/docker-compose.yml b/src/agent/tests/docker-compose.yml index 6696402..5703200 100644 --- a/src/agent/tests/docker-compose.yml +++ b/src/agent/tests/docker-compose.yml @@ -44,6 +44,7 @@ services: - ./private/argus/etc:/private/argus/etc - ../dist/argus-agent:/usr/local/bin/argus-agent:ro - ./scripts/agent_entrypoint.sh:/usr/local/bin/agent-entrypoint.sh:ro + - ../scripts/agent_deployment_verify.sh:/usr/local/bin/agent_deployment_verify.sh:ro entrypoint: - /usr/local/bin/agent-entrypoint.sh networks: diff --git a/src/agent/tests/scripts/00_e2e_test.sh b/src/agent/tests/scripts/00_e2e_test.sh index a3bf42d..9515d34 100755 --- a/src/agent/tests/scripts/00_e2e_test.sh +++ b/src/agent/tests/scripts/00_e2e_test.sh @@ -7,6 +7,7 @@ SCRIPTS=( "02_up.sh" "03_wait_and_assert_registration.sh" "04_write_health_files.sh" + "08_verify_agent.sh" "05_assert_status_on_master.sh" "06_restart_agent_and_reregister.sh" "07_down.sh" diff --git a/src/agent/tests/scripts/01_bootstrap.sh b/src/agent/tests/scripts/01_bootstrap.sh index 41a19e1..cb364df 100755 --- a/src/agent/tests/scripts/01_bootstrap.sh +++ b/src/agent/tests/scripts/01_bootstrap.sh @@ -11,7 +11,7 @@ TMP_ROOT="$TEST_ROOT/tmp" AGENT_HOSTNAME="dev-e2euser-e2einst-pod-0" AGENT_CONFIG_DIR="$PRIVATE_ROOT/argus/agent/$AGENT_HOSTNAME" -AGENT_HEALTH_DIR="$PRIVATE_ROOT/argus/agent/health/$AGENT_HOSTNAME" +AGENT_HEALTH_DIR="$PRIVATE_ROOT/argus/agent/$AGENT_HOSTNAME/health" MASTER_PRIVATE_DIR="$PRIVATE_ROOT/argus/master" METRIC_PRIVATE_DIR="$PRIVATE_ROOT/argus/metric/prometheus" DNS_DIR="$PRIVATE_ROOT/argus/etc" diff --git a/src/agent/tests/scripts/08_verify_agent.sh b/src/agent/tests/scripts/08_verify_agent.sh new file mode 100755 index 0000000..8b347b0 --- /dev/null +++ b/src/agent/tests/scripts/08_verify_agent.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +VERIFY_SCRIPT="$(cd "$TEST_ROOT/.." && pwd)/scripts/agent_deployment_verify.sh" + +if ! docker ps --format '{{.Names}}' | grep -q '^argus-agent-e2e$'; then + echo "[WARN] agent container not running; skip verification" + exit 0 +fi + +if docker exec -i argus-agent-e2e bash -lc 'command -v curl >/dev/null && command -v jq >/dev/null'; then + echo "[INFO] curl/jq already installed in agent container" +else + echo "[INFO] Installing curl/jq in agent container" + docker exec -i argus-agent-e2e bash -lc 'apt-get update >/dev/null 2>&1 && apt-get install -y curl jq >/dev/null 2>&1' || true +fi + +if docker exec -i argus-agent-e2e bash -lc 'command -v /usr/local/bin/agent_deployment_verify.sh >/dev/null'; then + docker exec -i argus-agent-e2e /usr/local/bin/agent_deployment_verify.sh +elif [[ -x "$VERIFY_SCRIPT" ]]; then + docker exec -i argus-agent-e2e "$VERIFY_SCRIPT" +else + echo "[WARN] agent_deployment_verify.sh not found" +fi