104 lines
3.2 KiB
Bash
Executable File
104 lines
3.2 KiB
Bash
Executable File
#!/bin/bash
|
|
# verify_alertmanager.sh
|
|
# Verify the communication between Prometheus and Alertmanager after deployment
|
|
|
|
set -euo pipefail
|
|
|
|
echo "[INFO] Verifying Prometheus ↔ Alertmanager communication..."
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
TMP_DIR="$TEST_ROOT/tmp"
|
|
mkdir -p "$TMP_DIR"
|
|
|
|
PRIVATE_CORE="$TEST_ROOT/private"
|
|
|
|
#=============================
|
|
# Load environment variables
|
|
#=============================
|
|
if [[ -f "$TEST_ROOT/.env" ]]; then
|
|
set -a; source "$TEST_ROOT/.env"; set +a
|
|
fi
|
|
|
|
#=============================
|
|
# Basic configuration
|
|
#=============================
|
|
PROM_URL="http://localhost:${PROMETHEUS_PORT:-9090}"
|
|
ALERT_URL="http://localhost:${ALERTMANAGER_PORT:-9093}"
|
|
RULE_DIR="$PRIVATE_CORE/argus/metric/prometheus/rules"
|
|
TMP_RULE="$TMP_DIR/test_rule.yml"
|
|
|
|
#=============================
|
|
# Helper functions
|
|
#=============================
|
|
GREEN="\033[32m"; RED="\033[31m"; YELLOW="\033[33m"; RESET="\033[0m"
|
|
|
|
log_info() { echo -e "${YELLOW}[INFO]${RESET} $1"; }
|
|
log_success() { echo -e "${GREEN}[OK]${RESET} $1"; }
|
|
log_warn() { echo -e "${YELLOW}[WARN]${RESET} $1"; }
|
|
log_error() { echo -e "${RED}[ERROR]${RESET} $1"; }
|
|
|
|
fail_exit() { log_error "$1"; exit 1; }
|
|
|
|
#=============================
|
|
# Step 1: Check Alertmanager accessibility
|
|
#=============================
|
|
log_info "Checking Alertmanager status..."
|
|
if curl -sSf "${ALERT_URL}/api/v2/status" >/dev/null 2>&1; then
|
|
log_success "Alertmanager is reachable at ${ALERT_URL}"
|
|
else
|
|
fail_exit "Alertmanager is not reachable. Please check container or port mapping."
|
|
fi
|
|
|
|
#=============================
|
|
# Step 2: Create and load a temporary test alert rule
|
|
#=============================
|
|
log_info "Creating temporary alert rule at ${TMP_RULE}..."
|
|
cat <<EOF > "${TMP_RULE}"
|
|
groups:
|
|
- name: deploy-verify-group
|
|
rules:
|
|
- alert: DeployVerifyAlert
|
|
expr: vector(1)
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "Deployment verification alert"
|
|
EOF
|
|
|
|
mkdir -p "${RULE_DIR}"
|
|
cp "${TMP_RULE}" "${RULE_DIR}/test_rule.yml"
|
|
|
|
log_info "Reloading Prometheus to apply the test rule..."
|
|
if curl -s -X POST "${PROM_URL}/-/reload" >/dev/null; then
|
|
log_success "Prometheus successfully reloaded rules"
|
|
else
|
|
fail_exit "Failed to reload Prometheus. Check API accessibility."
|
|
fi
|
|
|
|
#=============================
|
|
# Step 3: Verify alert received by Alertmanager
|
|
#=============================
|
|
log_info "Waiting for alert propagation (~30 seconds)..."
|
|
sleep 30
|
|
|
|
if curl -s "${ALERT_URL}/api/v2/alerts" | grep -q "DeployVerifyAlert"; then
|
|
log_success "Prometheus → Alertmanager alert path verified successfully"
|
|
else
|
|
fail_exit "DeployVerifyAlert not found in Alertmanager. Check configuration or network."
|
|
fi
|
|
|
|
#=============================
|
|
# Step 4: Cleanup test rule
|
|
#=============================
|
|
log_info "Cleaning up temporary alert rule..."
|
|
rm -f "${RULE_DIR}/test_rule.yml" "${TMP_RULE}"
|
|
|
|
if curl -s -X POST "${PROM_URL}/-/reload" >/dev/null; then
|
|
log_success "Prometheus successfully reloaded after cleanup"
|
|
else
|
|
log_warn "Prometheus reload after cleanup failed. Please check manually."
|
|
fi
|
|
|
|
log_success "Alertmanager verification completed successfully. Communication with Prometheus is healthy."
|