#!/bin/bash set -euo pipefail # ========================================================== # Alertmanager 测试脚本 # ========================================================== ALERTMANAGER_URL="http://localhost:9093" TEST_ALERT_NAME_CRITICAL="NodeDown" TEST_ALERT_NAME_WARNING="HighCPU" TMP_LOG="/tmp/test-alertmanager.log" # 等待参数 am_wait_attempts=30 am_wait_interval=2 GREEN="\033[1;32m" RED="\033[1;31m" YELLOW="\033[1;33m" RESET="\033[0m" # ========================================================== # 函数定义 # ========================================================== wait_for_alertmanager() { local attempt=1 echo "[INFO] 等待 Alertmanager 启动中..." while (( attempt <= am_wait_attempts )); do if curl -fsS "${ALERTMANAGER_URL}/api/v2/status" >/dev/null 2>&1; then echo -e "${GREEN}[OK] Alertmanager 已就绪 (attempt=${attempt}/${am_wait_attempts})${RESET}" return 0 fi echo "[..] Alertmanager 尚未就绪 (${attempt}/${am_wait_attempts})" sleep "${am_wait_interval}" (( attempt++ )) done echo -e "${RED}[ERROR] Alertmanager 在 ${am_wait_attempts} 次尝试后仍未就绪${RESET}" return 1 } log_step() { echo -e "${YELLOW}==== $1 ====${RESET}" } # ========================================================== # 主流程 # ========================================================== log_step "测试 Alertmanager 开始" echo "[INFO] Alertmanager 地址: $ALERTMANAGER_URL" # Step 1: 等待 Alertmanager 启动 wait_for_alertmanager # Step 2: 触发一个critical测试告警 echo "[INFO] 发送critical测试告警..." curl -fsS -X POST "${ALERTMANAGER_URL}/api/v2/alerts" \ -H "Content-Type: application/json" \ -d '[ { "labels": { "alertname": "'"${TEST_ALERT_NAME_CRITICAL}"'", "instance": "node-1", "severity": "critical" }, "annotations": { "summary": "节点 node-1 宕机" } } ]' \ -o "$TMP_LOG" if [ $? -eq 0 ]; then echo -e "${GREEN}[OK] 已成功发送critical测试告警${RESET}" else echo -e "${RED}[ERROR] critical告警发送失败!${RESET}" cat "$TMP_LOG" exit 1 fi # Step 3: 触发一个warning测试告警 echo "[INFO] 发送warning测试告警..." curl -fsS -X POST "${ALERTMANAGER_URL}/api/v2/alerts" \ -H "Content-Type: application/json" \ -d '[ { "labels": { "alertname": "'"${TEST_ALERT_NAME_WARNING}"'", "instance": "node-1", "severity": "warning" }, "annotations": { "summary": "节点 node-1 CPU 使用率过高" } } ]' \ -o "$TMP_LOG" if [ $? -eq 0 ]; then echo -e "${GREEN}[OK] 已成功发送warning测试告警${RESET}" else echo -e "${RED}[ERROR] warning告警发送失败!${RESET}" cat "$TMP_LOG" exit 1 fi