Compare commits

...

3 Commits

Author SHA1 Message Date
sundapeng.sdp
932038ab1d fix: 修复docker-compose中配置路径不正确的问题;
refs #20
2025-10-14 12:16:31 +08:00
sundapeng.sdp
e1b19c04b0 refactor: 优化argus-metric模块e2e测试;
refs #20
2025-10-14 12:16:31 +08:00
8fbe107ac9 dev_1.0.0_xuxt 完成web和alert模块开发,以及模块e2e测试 (#21)
Co-authored-by: xiuting.xu <xiutingxt.xu@gmail.com>
Reviewed-on: #21
Reviewed-by: huhy <husteryezi@163.com>
Reviewed-by: sundapeng <sundp@mail.zgclab.edu.cn>
Reviewed-by: yuyr <yuyr@zgclab.edu.cn>
2025-10-14 10:20:45 +08:00
107 changed files with 7822 additions and 197 deletions

View File

@ -0,0 +1,13 @@
# Alertmanager
## 启动示例
```bash
docker run -d --name alertmanager \
-p 9093:9093 \
-v /opt/alertmanager/data:/alertmanager \
argus-alert:latest
```
## 动态配置
修改alertmanager.yml后调用`/-/reload`接口可以重新加载配置

View File

@ -0,0 +1,86 @@
# 基于 Ubuntu 24.04
FROM ubuntu:24.04
# 切换到 root 用户
USER root
# 安装必要依赖
RUN apt-get update && \
apt-get install -y wget supervisor net-tools inetutils-ping vim ca-certificates passwd && \
apt-get clean && rm -rf /var/lib/apt/lists/*
# 设置 Alertmanager 版本
ARG ALERTMANAGER_VERSION=0.28.1
# 下载并解压 Alertmanager 二进制
RUN wget https://github.com/prometheus/alertmanager/releases/download/v${ALERTMANAGER_VERSION}/alertmanager-${ALERTMANAGER_VERSION}.linux-amd64.tar.gz && \
tar xvf alertmanager-${ALERTMANAGER_VERSION}.linux-amd64.tar.gz && \
mv alertmanager-${ALERTMANAGER_VERSION}.linux-amd64 /usr/local/alertmanager && \
rm alertmanager-${ALERTMANAGER_VERSION}.linux-amd64.tar.gz
ENV ALERTMANAGER_BASE_PATH=/private/argus/alert/alertmanager
ENV ARGUS_UID=2133
ENV ARGUS_GID=2015
RUN mkdir -p /usr/share/alertmanager && \
mkdir -p ${ALERTMANAGER_BASE_PATH} && \
mkdir -p /private/argus/etc && \
rm -rf /alertmanager && \
ln -s ${ALERTMANAGER_BASE_PATH} /alertmanager
# 创建 alertmanager 用户(可自定义 UID/GID
# 创建 alertmanager 用户组
RUN groupadd -g ${ARGUS_GID} alertmanager
# 创建 alertmanager 用户并指定组
RUN useradd -M -s /usr/sbin/nologin -u ${ARGUS_UID} -g ${ARGUS_GID} alertmanager
RUN chown -R alertmanager:alertmanager /usr/share/alertmanager && \
chown -R alertmanager:alertmanager /alertmanager && \
chown -R alertmanager:alertmanager ${ALERTMANAGER_BASE_PATH} && \
chown -R alertmanager:alertmanager /private/argus/etc && \
chown -R alertmanager:alertmanager /usr/local/bin
# 配置内网 apt 源 (如果指定了内网选项)
RUN if [ "$USE_INTRANET" = "true" ]; then \
echo "Configuring intranet apt sources..." && \
cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
echo "deb [trusted=yes] http://10.68.64.1/ubuntu2204/ jammy main" > /etc/apt/sources.list && \
echo 'Acquire::https::Verify-Peer "false";' > /etc/apt/apt.conf.d/99disable-ssl-check && \
echo 'Acquire::https::Verify-Host "false";' >> /etc/apt/apt.conf.d/99disable-ssl-check; \
fi
# 配置部署时使用的 apt 源
RUN if [ "$USE_INTRANET" = "true" ]; then \
echo "deb [trusted=yes] https://10.92.132.52/mirrors/ubuntu2204/ jammy main" > /etc/apt/sources.list; \
fi
# 创建 supervisor 日志目录
RUN mkdir -p /var/log/supervisor
# 复制 supervisor 配置文件
COPY src/alert/alertmanager/build/supervisord.conf /etc/supervisor/conf.d/supervisord.conf
# 复制启动脚本
COPY src/alert/alertmanager/build/start-am-supervised.sh /usr/local/bin/start-am-supervised.sh
RUN chmod +x /usr/local/bin/start-am-supervised.sh
# 复制 Alertmanager 配置文件
COPY src/alert/alertmanager/build/alertmanager.yml /etc/alertmanager/alertmanager.yml
RUN chmod +x /etc/alertmanager/alertmanager.yml
# COPY src/alert/alertmanager/build/alertmanager.yml ${ALERTMANAGER_BASE_PATH}/alertmanager.yml
# 复制 DNS 监控脚本
COPY src/alert/alertmanager/build/dns-monitor.sh /usr/local/bin/dns-monitor.sh
RUN chmod +x /usr/local/bin/dns-monitor.sh
# 保持 root 用户,由 supervisor 控制 user 切换
USER root
# 暴露端口Alertmanager 默认端口 9093
EXPOSE 9093
# 使用 supervisor 作为入口点
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]

View File

@ -0,0 +1,19 @@
global:
resolve_timeout: 5m
route:
group_by: ['alertname', 'instance'] # 分组:相同 alertname + instance 的告警合并
group_wait: 30s # 第一个告警后,等 30s 看是否有同组告警一起发
group_interval: 5m # 同组告警变化后,至少 5 分钟再发一次
repeat_interval: 3h # 相同告警3 小时重复提醒一次
receiver: 'null'
receivers:
- name: 'null'
inhibit_rules:
- source_match:
severity: 'critical' # critical 告警存在时
target_match:
severity: 'warning' # 抑制相同 instance 的 warning 告警
equal: ['instance']

View File

@ -0,0 +1,68 @@
#!/bin/bash
# DNS监控脚本 - 每10秒检查dns.conf是否有变化
# 如果有变化则执行update-dns.sh脚本
DNS_CONF="/private/argus/etc/dns.conf"
DNS_BACKUP="/tmp/dns.conf.backup"
UPDATE_SCRIPT="/private/argus/etc/update-dns.sh"
LOG_FILE="/var/log/supervisor/dns-monitor.log"
# 确保日志文件存在
touch "$LOG_FILE"
log_message() {
echo "$(date '+%Y-%m-%d %H:%M:%S') [DNS-Monitor] $1" >> "$LOG_FILE"
}
log_message "DNS监控脚本启动"
while true; do
if [ -f "$DNS_CONF" ]; then
if [ -f "$DNS_BACKUP" ]; then
# 比较文件内容
if ! cmp -s "$DNS_CONF" "$DNS_BACKUP"; then
log_message "检测到DNS配置变化"
# 更新备份文件
cp "$DNS_CONF" "$DNS_BACKUP"
# 执行更新脚本
if [ -x "$UPDATE_SCRIPT" ]; then
log_message "执行DNS更新脚本: $UPDATE_SCRIPT"
"$UPDATE_SCRIPT" >> "$LOG_FILE" 2>&1
if [ $? -eq 0 ]; then
log_message "DNS更新脚本执行成功"
else
log_message "DNS更新脚本执行失败"
fi
else
log_message "警告: 更新脚本不存在或不可执行: $UPDATE_SCRIPT"
fi
fi
else
# 第一次检测到配置文件,执行更新脚本
if [ -x "$UPDATE_SCRIPT" ]; then
log_message "执行DNS更新脚本: $UPDATE_SCRIPT"
"$UPDATE_SCRIPT" >> "$LOG_FILE" 2>&1
if [ $? -eq 0 ]; then
log_message "DNS更新脚本执行成功"
# 第一次运行,创建备份并执行更新
cp "$DNS_CONF" "$DNS_BACKUP"
log_message "创建DNS配置备份文件"
else
log_message "DNS更新脚本执行失败"
fi
else
log_message "警告: 更新脚本不存在或不可执行: $UPDATE_SCRIPT"
fi
fi
else
log_message "警告: DNS配置文件不存在: $DNS_CONF"
fi
sleep 10
done

View File

@ -0,0 +1,26 @@
#!/bin/bash
set -euo pipefail
echo "[INFO] Starting Alertmanager under supervisor..."
ALERTMANAGER_BASE_PATH=${ALERTMANAGER_BASE_PATH:-/private/argus/alert/alertmanager}
echo "[INFO] Alertmanager base path: ${ALERTMANAGER_BASE_PATH}"
# 生成配置文件
echo "[INFO] Generating Alertmanager configuration file..."
sed "s|\${ALERTMANAGER_BASE_PATH}|${ALERTMANAGER_BASE_PATH}|g" \
/etc/alertmanager/alertmanager.yml > ${ALERTMANAGER_BASE_PATH}/alertmanager.yml
# 记录容器 IP 地址
DOMAIN=alertmanager.alert.argus.com
IP=$(ifconfig | grep -A 1 eth0 | grep inet | awk '{print $2}')
echo "current IP: ${IP}"
echo "${IP}" > /private/argus/etc/${DOMAIN}
echo "[INFO] Starting Alertmanager process..."
# 启动 Alertmanager 主进程
exec /usr/local/alertmanager/alertmanager --config.file=/etc/alertmanager/alertmanager.yml --storage.path=/alertmanager --cluster.listen-address=""

View File

@ -0,0 +1,39 @@
[supervisord]
nodaemon=true
logfile=/var/log/supervisor/supervisord.log
pidfile=/var/run/supervisord.pid
user=root
[program:alertmanager]
command=/usr/local/bin/start-am-supervised.sh
user=alertmanager
stdout_logfile=/var/log/supervisor/alertmanager.log
stderr_logfile=/var/log/supervisor/alertmanager_error.log
autorestart=true
startretries=3
startsecs=10
stopwaitsecs=20
killasgroup=true
stopasgroup=true
[program:dns-monitor]
command=/usr/local/bin/dns-monitor.sh
user=root
stdout_logfile=/var/log/supervisor/dns-monitor.log
stderr_logfile=/var/log/supervisor/dns-monitor_error.log
autorestart=true
startretries=3
startsecs=5
stopwaitsecs=10
killasgroup=true
stopasgroup=true
[unix_http_server]
file=/var/run/supervisor.sock
chmod=0700
[supervisorctl]
serverurl=unix:///var/run/supervisor.sock
[rpcinterface:supervisor]
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface

View File

@ -0,0 +1,60 @@
# 告警配置
> 参考:[自定义Prometheus告警规则](https://yunlzheng.gitbook.io/prometheus-book/parti-prometheus-ji-chu/alert/prometheus-alert-rule)
在Prometheus中配置告警的有两个步骤
1. 写告警规则文件rules文件
2. 在promethues.yml里加载规则并配置Alertmanager
## 1. 编写告警规则文件
告警规则如下:
```yml
groups:
- name: example-rules
interval: 30s # 每30秒评估一次
rules:
- alert: InstanceDown
expr: up == 0
for: 1m
labels:
severity: critical
annotations:
summary: "实例 {{ $labels.instance }} 已宕机"
description: "{{ $labels.instance }} 在 {{ $labels.job }} 中无响应超过 1 分钟。"
- alert: HighCpuUsage
expr: 100 - (avg by (instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
for: 5m
labels:
severity: warning
annotations:
summary: "CPU 使用率过高"
description: "实例 {{ $labels.instance }} CPU 使用率超过 80% 持续 5 分钟。"
```
其中:
- `alert`:告警规则的名称。
- `expr`基于PromQL表达式告警触发条件用于计算是否有时间序列满足该条件。
- `for`评估等待时间可选参数。用于表示只有当触发条件持续一段时间后才发送告警。在等待期间新产生告警的状态为pending。
- `labels`自定义标签允许用户指定要附加到告警上的一组附加标签可以在Alertmanager中做路由和分组。
- `annotations`用于指定一组附加信息比如用于描述告警详细信息的文字等annotations的内容在告警产生时会一同作为参数发送到Alertmanager。可以提供告警摘要和详细信息。
## 2. promothues.yml里引用
在prometheus.yml中加上`rule_files``alerting`:
```yml
global:
[ evaluation_interval: <duration> | default = 1m ]
rule_files:
[ - <filepath_glob> ... ]
alerting:
alertmanagers:
- static_configs:
- targets:
- "localhost:9093" # Alertmanager 地址
```

View File

@ -0,0 +1,37 @@
groups:
- name: example-rules
interval: 30s # 每30秒评估一次
rules:
- alert: InstanceDown
expr: up == 0
for: 1m
labels:
severity: critical
annotations:
summary: "实例 {{ $labels.instance }} 已宕机"
description: "{{ $labels.instance }} 在 {{ $labels.job }} 中无响应超过 1 分钟。"
- alert: HighCpuUsage
expr: 100 - (avg by (instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
for: 5m
labels:
severity: warning
annotations:
summary: "CPU 使用率过高"
description: "实例 {{ $labels.instance }} CPU 使用率超过 80% 持续 5 分钟。"
- alert: HighMemoryUsage
expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100 > 80
for: 5m
labels:
severity: warning
annotations:
summary: "内存使用率过高"
description: "实例 {{ $labels.instance }} 内存使用率超过 80% 持续 5 分钟。"
- alert: DiskSpaceLow
expr: (node_filesystem_size_bytes{fstype!~"tmpfs|overlay"} - node_filesystem_free_bytes{fstype!~"tmpfs|overlay"}) / node_filesystem_size_bytes{fstype!~"tmpfs|overlay"} * 100 > 90
for: 10m
labels:
severity: warning
annotations:
summary: "磁盘空间不足"
description: "实例 {{ $labels.instance }} 磁盘空间不足超过 90% 持续 10 分钟。"

View File

@ -0,0 +1,19 @@
global:
resolve_timeout: 5m
route:
group_by: ['alertname', 'instance'] # 分组:相同 alertname + instance 的告警合并
group_wait: 30s # 第一个告警后,等 30s 看是否有同组告警一起发
group_interval: 5m # 同组告警变化后,至少 5 分钟再发一次
repeat_interval: 3h # 相同告警3 小时重复提醒一次
receiver: 'null'
receivers:
- name: 'null'
inhibit_rules:
- source_match:
severity: 'critical' # critical 告警存在时
target_match:
severity: 'warning' # 抑制相同 instance 的 warning 告警
equal: ['instance']

View File

View File

@ -0,0 +1 @@
172.18.0.2

View File

@ -0,0 +1,37 @@
version: '3.8'
services:
alertmanager:
build:
context: ../../../
dockerfile: src/alert/alertmanager/build/Dockerfile
args:
ARGUS_UID: ${ARGUS_UID:-2133}
ARGUS_GID: ${ARGUS_GID:-2015}
USE_INTRANET: ${USE_INTRANET:-false}
image: argus-alertmanager:latest
container_name: argus-alertmanager
environment:
- ALERTMANAGER_BASE_PATH=/private/argus/alert/alertmanager
- ARGUS_UID=${ARGUS_UID:-2133}
- ARGUS_GID=${ARGUS_GID:-2015}
ports:
- "${ARGUS_PORT:-9093}:9093"
volumes:
- ${DATA_ROOT:-./data}/alertmanager:/private/argus/alert/alertmanager
- ${DATA_ROOT:-./data}/etc:/private/argus/etc
networks:
- argus-network
restart: unless-stopped
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
networks:
argus-network:
driver: bridge
name: argus-network
volumes:
alertmanager_data:
driver: local

View File

@ -0,0 +1,19 @@
#!/usr/bin/env bash
set -euo pipefail
root="$(cd "$(dirname "${BASH_SOURCE[0]}")/../" && pwd)"
project_root="$(cd "$root/../../.." && pwd)"
source "$project_root/scripts/common/build_user.sh"
load_build_user
# 创建新的private目录结构 (基于argus目录结构)
echo "[INFO] Creating private directory structure for supervisor-based containers..."
mkdir -p "$root/private/argus/alert/alertmanager"
mkdir -p "$root/private/argus/etc/"
# 设置数据目录权限
echo "[INFO] Setting permissions for data directories..."
chown -R "${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" "$root/private/argus/alert/alertmanager" 2>/dev/null || true
chown -R "${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" "$root/private/argus/etc" 2>/dev/null || true
echo "[INFO] Supervisor-based containers will manage their own scripts and configurations"

View File

@ -0,0 +1,10 @@
#!/usr/bin/env bash
set -euo pipefail
cd "$(dirname "$0")/.."
compose_cmd="docker compose"
if ! $compose_cmd version >/dev/null 2>&1; then
if command -v docker-compose >/dev/null 2>&1; then compose_cmd="docker-compose"; else
echo "需要 Docker Compose请安装后重试" >&2; exit 1; fi
fi
$compose_cmd -p alert-mvp up -d --remove-orphans
echo "[OK] 服务已启动Alertmanager http://localhost:9093"

View File

@ -0,0 +1,106 @@
#!/bin/bash
set -euo pipefail
# ==========================================================
# Alertmanager 测试脚本
# ==========================================================
ALERTMANAGER_URL="http://localhost:9093"
TEST_ALERT_NAME_CRITICAL="NodeDown"
TEST_ALERT_NAME_WARNING="HighCPU"
TMP_LOG="/tmp/test-alertmanager.log"
# 等待参数
am_wait_attempts=30
am_wait_interval=2
GREEN="\033[1;32m"
RED="\033[1;31m"
YELLOW="\033[1;33m"
RESET="\033[0m"
# ==========================================================
# 函数定义
# ==========================================================
wait_for_alertmanager() {
local attempt=1
echo "[INFO] 等待 Alertmanager 启动中..."
while (( attempt <= am_wait_attempts )); do
if curl -fsS "${ALERTMANAGER_URL}/api/v2/status" >/dev/null 2>&1; then
echo -e "${GREEN}[OK] Alertmanager 已就绪 (attempt=${attempt}/${am_wait_attempts})${RESET}"
return 0
fi
echo "[..] Alertmanager 尚未就绪 (${attempt}/${am_wait_attempts})"
sleep "${am_wait_interval}"
(( attempt++ ))
done
echo -e "${RED}[ERROR] Alertmanager 在 ${am_wait_attempts} 次尝试后仍未就绪${RESET}"
return 1
}
log_step() {
echo -e "${YELLOW}==== $1 ====${RESET}"
}
# ==========================================================
# 主流程
# ==========================================================
log_step "测试 Alertmanager 开始"
echo "[INFO] Alertmanager 地址: $ALERTMANAGER_URL"
# Step 1: 等待 Alertmanager 启动
wait_for_alertmanager
# Step 2: 触发一个critical测试告警
echo "[INFO] 发送critical测试告警..."
curl -fsS -X POST "${ALERTMANAGER_URL}/api/v2/alerts" \
-H "Content-Type: application/json" \
-d '[
{
"labels": {
"alertname": "'"${TEST_ALERT_NAME_CRITICAL}"'",
"instance": "node-1",
"severity": "critical"
},
"annotations": {
"summary": "节点 node-1 宕机"
}
}
]' \
-o "$TMP_LOG"
if [ $? -eq 0 ]; then
echo -e "${GREEN}[OK] 已成功发送critical测试告警${RESET}"
else
echo -e "${RED}[ERROR] critical告警发送失败${RESET}"
cat "$TMP_LOG"
exit 1
fi
# Step 3: 触发一个warning测试告警
echo "[INFO] 发送warning测试告警..."
curl -fsS -X POST "${ALERTMANAGER_URL}/api/v2/alerts" \
-H "Content-Type: application/json" \
-d '[
{
"labels": {
"alertname": "'"${TEST_ALERT_NAME_WARNING}"'",
"instance": "node-1",
"severity": "warning"
},
"annotations": {
"summary": "节点 node-1 CPU 使用率过高"
}
}
]' \
-o "$TMP_LOG"
if [ $? -eq 0 ]; then
echo -e "${GREEN}[OK] 已成功发送warning测试告警${RESET}"
else
echo -e "${RED}[ERROR] warning告警发送失败${RESET}"
cat "$TMP_LOG"
exit 1
fi

View File

@ -0,0 +1,71 @@
#!/bin/bash
set -euo pipefail
# ==========================================================
# Alertmanager 测试脚本(含启动等待)
# ==========================================================
ALERTMANAGER_URL="http://localhost:9093"
TEST_ALERT_NAME_CRITICAL="NodeDown"
TEST_ALERT_NAME_WARNING="HighCPU"
TMP_LOG="/tmp/test-alertmanager.log"
# 等待参数
am_wait_attempts=30
am_wait_interval=2
GREEN="\033[1;32m"
RED="\033[1;31m"
YELLOW="\033[1;33m"
RESET="\033[0m"
# ==========================================================
# 函数定义
# ==========================================================
wait_for_alertmanager() {
local attempt=1
echo "[INFO] 等待 Alertmanager 启动中..."
while (( attempt <= am_wait_attempts )); do
if curl -fsS "${ALERTMANAGER_URL}/api/v2/status" >/dev/null 2>&1; then
echo -e "${GREEN}[OK] Alertmanager 已就绪 (attempt=${attempt}/${am_wait_attempts})${RESET}"
return 0
fi
echo "[..] Alertmanager 尚未就绪 (${attempt}/${am_wait_attempts})"
sleep "${am_wait_interval}"
(( attempt++ ))
done
echo -e "${RED}[ERROR] Alertmanager 在 ${am_wait_attempts} 次尝试后仍未就绪${RESET}"
return 1
}
log_step() {
echo -e "${YELLOW}==== $1 ====${RESET}"
}
# ==========================================================
# 主流程
# ==========================================================
log_step "查询 Alertmanager 当前告警列表开始"
echo "[INFO] Alertmanager 地址: $ALERTMANAGER_URL"
# Step 1: 等待 Alertmanager 启动
wait_for_alertmanager
# Step 2: 查询当前告警列表
echo "[INFO] 查询当前告警..."
sleep 1
curl -fsS "${ALERTMANAGER_URL}/api/v2/alerts" | jq '.' || {
echo -e "${RED}[WARN] 无法解析返回 JSON请检查 jq 是否安装${RESET}"
curl -s "${ALERTMANAGER_URL}/api/v2/alerts"
}
# Step 3: 检查告警是否包含 NodeDown
if curl -fsS "${ALERTMANAGER_URL}/api/v2/alerts" | grep -q "${TEST_ALERT_NAME_CRITICAL}"; then
echo -e "${GREEN}✅ 测试通过Alertmanager 已成功接收告警 ${TEST_ALERT_NAME_CRITICAL}${RESET}"
else
echo -e "${RED}❌ 测试失败:未检测到告警 ${TEST_ALERT_NAME_CRITICAL}${RESET}"
fi
log_step "测试结束"

View File

@ -0,0 +1,21 @@
#!/usr/bin/env bash
set -euo pipefail
cd "$(dirname "$0")/.."
compose_cmd="docker compose"
if ! $compose_cmd version >/dev/null 2>&1; then
if command -v docker-compose >/dev/null 2>&1; then compose_cmd="docker-compose"; else
echo "需要 Docker Compose请安装后重试" >&2; exit 1; fi
fi
$compose_cmd -p alert-mvp down
echo "[OK] 已停止所有容器"
# 清理private目录内容
echo "[INFO] 清理private目录内容..."
cd "$(dirname "$0")/.."
if [ -d "private" ]; then
# 删除private目录及其所有内容
rm -rf private
echo "[OK] 已清理private目录"
else
echo "[INFO] private目录不存在无需清理"
fi

View File

@ -0,0 +1,105 @@
#!/usr/bin/env bash
set -euo pipefail
echo "======================================="
echo "ARGUS Alert System End-to-End Test"
echo "======================================="
echo ""
# 记录测试开始时间
test_start_time=$(date +%s)
# 函数:等待服务就绪
wait_for_services() {
echo "[INFO] Waiting for all services to be ready..."
local max_attempts=${SERVICE_WAIT_ATTEMPTS:-120}
local attempt=1
while [ $attempt -le $max_attempts ]; do
if curl -fs http://localhost:9093/api/v2/status >/dev/null 2>&1; then
echo "[OK] All services are ready!"
return 0
fi
echo " Waiting for services... ($attempt/$max_attempts)"
sleep 5
((attempt++))
done
echo "[ERROR] Services not ready after $max_attempts attempts"
return 1
}
# 函数:显示测试步骤
show_step() {
echo ""
echo "🔄 Step $1: $2"
echo "----------------------------------------"
}
# 函数:验证步骤结果
verify_step() {
if [ $? -eq 0 ]; then
echo "$1 - SUCCESS"
else
echo "$1 - FAILED"
exit 1
fi
}
# 开始端到端测试
show_step "1" "Bootstrap - Initialize environment"
./scripts/01_bootstrap.sh
verify_step "Bootstrap"
show_step "2" "Startup - Start all services"
./scripts/02_up.sh
verify_step "Service startup"
# 等待服务完全就绪
wait_for_services || exit 1
# 发送告警数据
show_step "3" "Add alerts - Send test alerts to Alertmanager"
./scripts/03_alertmanager_add_alert.sh
verify_step "Send test alerts"
# 查询告警数据
show_step "4" "Verify data - Query Alertmanager"
./scripts/04_query_alerts.sh
verify_step "Data verification"
# 检查服务健康状态
show_step "Health" "Check service health"
echo "[INFO] Checking service health..."
# 检查 Alertmanager 状态
if curl -fs "http://localhost:9093/api/v2/status" >/dev/null 2>&1; then
am_status="available"
echo "✅ Alertmanager status: $am_status"
else
am_status="unavailable"
echo "⚠️ Alertmanager status: $am_status"
fi
verify_step "Service health check"
# 清理环境
show_step "5" "Cleanup - Stop all services"
./scripts/05_down.sh
verify_step "Service cleanup"
# 计算总测试时间
test_end_time=$(date +%s)
total_time=$((test_end_time - test_start_time))
echo ""
echo "======================================="
echo "🎉 END-TO-END TEST COMPLETED SUCCESSFULLY!"
echo "======================================="
echo "📊 Test Summary:"
echo " • Total time: ${total_time}s"
echo " • Alertmanager status: $am_status"
echo " • All services started and stopped successfully"
echo ""
echo "✅ The ARGUS Alert system is working correctly!"
echo ""

View File

@ -1,7 +1,7 @@
.env
data/
images-cache/
private-test-node/
*.tar
*.log
.DS_Store

View File

@ -1,3 +1,8 @@
networks:
default:
name: argus-debug-net
external: true
services:
ftp:
build:
@ -20,10 +25,11 @@ services:
- "${FTP_DATA_PORT:-20}:20"
- "21100-21110:21100-21110"
volumes:
- ${DATA_ROOT:-./data}/ftp:/private/argus/ftp
- ${DATA_ROOT:-./data}/etc:/private/argus/etc
- ${DATA_ROOT:-/private}/argus/metric/ftp:/private/argus/ftp
- ${DATA_ROOT:-/private}/argus/etc:/private/argus/etc
networks:
- argus-network
default:
ipv4_address: 172.30.0.40
logging:
driver: "json-file"
options:
@ -48,10 +54,11 @@ services:
ports:
- "${PROMETHEUS_PORT:-9090}:9090"
volumes:
- ${DATA_ROOT:-./data}/prometheus:/private/argus/metric/prometheus
- ${DATA_ROOT:-./data}/etc:/private/argus/etc
- ${DATA_ROOT:-/private}/argus/metric/prometheus:/private/argus/metric/prometheus
- ${DATA_ROOT:-/private}/argus/etc:/private/argus/etc
networks:
- argus-network
default:
ipv4_address: 172.30.0.41
logging:
driver: "json-file"
options:
@ -78,10 +85,11 @@ services:
ports:
- "${GRAFANA_PORT:-3000}:3000"
volumes:
- ${DATA_ROOT:-./data}/grafana:/private/argus/metric/grafana
- ${DATA_ROOT:-./data}/etc:/private/argus/etc
- ${DATA_ROOT:-/private}/argus/metric/grafana:/private/argus/metric/grafana
- ${DATA_ROOT:-/private}/argus/etc:/private/argus/etc
networks:
- argus-network
default:
ipv4_address: 172.30.0.42
depends_on:
- prometheus
logging:
@ -90,16 +98,27 @@ services:
max-size: "10m"
max-file: "3"
networks:
argus-network:
driver: bridge
name: argus-network
volumes:
ftp_data:
driver: local
prometheus_data:
driver: local
grafana_data:
driver: local
test-node:
image: ubuntu:22.04
container_name: argus-metric-test-node
hostname: test-metric-node-001
restart: unless-stopped
depends_on:
- ftp
- prometheus
environment:
- FTP_SERVER=${FTP_SERVER:-172.30.0.40}
- FTP_USER=${FTP_USER:-ftpuser}
- FTP_PASSWORD=${FTP_PASSWORD:-ZGClab1234!}
- FTP_PORT=${FTP_PORT:-21}
volumes:
- ${DATA_ROOT:-/private}/argus/agent:/private/argus/agent
command: sleep infinity
networks:
default:
ipv4_address: 172.30.0.50
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"

View File

@ -1,90 +0,0 @@
#!/bin/bash
# 初始化目录脚本
# 用于创建所有必要的数据目录并设置正确的权限
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
# 加载 .env 文件(如果存在)
if [ -f .env ]; then
echo "加载 .env 配置文件..."
source .env
fi
# 默认配置
FTP_UID=${FTP_UID:-2133}
FTP_GID=${FTP_GID:-2015}
PROMETHEUS_UID=${PROMETHEUS_UID:-2133}
PROMETHEUS_GID=${PROMETHEUS_GID:-2015}
GRAFANA_UID=${GRAFANA_UID:-2133}
GRAFANA_GID=${GRAFANA_GID:-2015}
DATA_ROOT=${DATA_ROOT:-./data}
echo "开始初始化目录结构..."
echo "数据目录: ${DATA_ROOT}"
echo ""
# 创建 FTP 目录
echo "创建 FTP 目录..."
sudo mkdir -p ${DATA_ROOT}/ftp/share
sudo chown -R ${FTP_UID}:${FTP_GID} ${DATA_ROOT}/ftp
sudo chmod -R 755 ${DATA_ROOT}/ftp
# 创建 Prometheus 目录
echo "创建 Prometheus 目录..."
sudo mkdir -p ${DATA_ROOT}/prometheus/{data,rules,targets}
# 创建默认的 targets 文件(先创建文件再改权限)
if [ ! -f "${DATA_ROOT}/prometheus/targets/node_exporter.json" ]; then
echo "创建默认 node_exporter targets..."
echo '[
{
"targets": [],
"labels": {
"job": "node"
}
}
]' | sudo tee ${DATA_ROOT}/prometheus/targets/node_exporter.json > /dev/null
fi
if [ ! -f "${DATA_ROOT}/prometheus/targets/dcgm_exporter.json" ]; then
echo "创建默认 dcgm_exporter targets..."
echo '[
{
"targets": [],
"labels": {
"job": "dcgm"
}
}
]' | sudo tee ${DATA_ROOT}/prometheus/targets/dcgm_exporter.json > /dev/null
fi
# 统一设置 Prometheus 目录权限
sudo chown -R ${PROMETHEUS_UID}:${PROMETHEUS_GID} ${DATA_ROOT}/prometheus
sudo chmod -R 755 ${DATA_ROOT}/prometheus
# 创建 Grafana 目录
echo "创建 Grafana 目录..."
sudo mkdir -p ${DATA_ROOT}/grafana/{data,logs,plugins,provisioning/datasources,provisioning/dashboards,data/sessions,data/dashboards,config}
sudo chown -R ${GRAFANA_UID}:${GRAFANA_GID} ${DATA_ROOT}/grafana
sudo chmod -R 755 ${DATA_ROOT}/grafana
# 创建公共配置目录
sudo mkdir -p ${DATA_ROOT}/etc
sudo chown -R ${FTP_UID}:${FTP_GID} ${DATA_ROOT}/etc
sudo chmod -R 755 ${DATA_ROOT}/etc
echo "目录初始化完成!"
echo ""
echo "目录结构:"
echo " ${DATA_ROOT}/"
echo " ├── ftp/ (UID:${FTP_UID}, GID:${FTP_GID})"
echo " ├── prometheus/ (UID:${PROMETHEUS_UID}, GID:${PROMETHEUS_GID})"
echo " ├── grafana/ (UID:${GRAFANA_UID}, GID:${GRAFANA_GID})"
echo " └── etc/ (UID:${FTP_UID}, GID:${FTP_GID})"
echo ""
echo "您现在可以运行 'docker-compose up -d' 来启动所有服务"

View File

@ -0,0 +1,18 @@
#!/bin/bash
set -e
SCRIPT_DIR="$(dirname "$0")"
echo "=========================================="
echo "Argus Metric E2E Test"
echo "=========================================="
bash "$SCRIPT_DIR/01_start_services.sh"
bash "$SCRIPT_DIR/02_publish_artifact.sh"
bash "$SCRIPT_DIR/03_test_node_install.sh"
bash "$SCRIPT_DIR/04_verify_install.sh"
echo "=========================================="
echo "E2E 测试完成"
echo "=========================================="

View File

@ -0,0 +1,26 @@
#!/bin/bash
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
# 解析参数
REBUILD_FLAG=""
if [[ "$1" == "--rebuild" || "$1" == "-r" ]]; then
REBUILD_FLAG="--rebuild"
echo "[01] 启用强制重新构建模式"
fi
echo "[01] 启动所有服务..."
bash "$SCRIPT_DIR/common/start-all.sh" $REBUILD_FLAG
echo "[01] 等待服务就绪..."
sleep 5
echo "[01] 检查服务状态..."
docker ps | grep argus-ftp
docker ps | grep argus-prometheus
docker ps | grep argus-grafana
docker ps | grep argus-metric-test-node
echo "[01] 所有服务已启动"

View File

@ -0,0 +1,60 @@
#!/bin/bash
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
TEST_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
PLUGIN_DIR="$(cd "$SCRIPT_DIR/../../client-plugins/all-in-one-full" && pwd)"
# 加载 .env
if [ -f "$TEST_DIR/.env" ]; then
source "$TEST_DIR/.env"
fi
# 检测容器挂载目录
if docker ps --format '{{.Names}}' | grep -q '^argus-ftp$'; then
FTP_MOUNT=$(docker inspect argus-ftp --format '{{range .Mounts}}{{if eq .Destination "/private/argus/ftp"}}{{.Source}}{{end}}{{end}}')
OUTPUT_DIR="${FTP_MOUNT}/share"
echo "[02] 容器挂载: $OUTPUT_DIR"
else
OUTPUT_DIR="${DATA_ROOT:-$TEST_DIR/data}/ftp/share"
echo "[02] 默认路径: $OUTPUT_DIR"
fi
OWNER="${FTP_UID:-2133}:${FTP_GID:-2015}"
cd "$PLUGIN_DIR"
echo "[02] 递增版本号..."
bash scripts/version-manager.sh bump minor
VERSION_FILE="config/VERSION"
if [ ! -f "$VERSION_FILE" ]; then
echo "[02] 错误: 未找到 $VERSION_FILE"
exit 1
fi
VERSION=$(cat "$VERSION_FILE" | tr -d '[:space:]')
echo "[02] 新版本: $VERSION"
echo "[02] 构建安装包..."
bash scripts/package_artifact.sh --force
echo "[02] 发布到 FTP: $OUTPUT_DIR"
sudo bash scripts/publish_artifact.sh "$VERSION" --output-dir "$OUTPUT_DIR" --owner "$OWNER"
echo "[02] 设置文件权限..."
# 设置所有者
sudo chown -R "$OWNER" "$OUTPUT_DIR"
# 设置目录权限为 755 (rwxr-xr-x)
sudo find "$OUTPUT_DIR" -type d -exec chmod 755 {} \;
# 设置文件权限为 644 (rw-r--r--)
sudo find "$OUTPUT_DIR" -type f -exec chmod 644 {} \;
# 特别处理 .sh 文件,给予执行权限 755
sudo find "$OUTPUT_DIR" -type f -name "*.sh" -exec chmod 755 {} \;
echo "[02] 权限设置完成 (UID:GID=$OWNER, dirs=755, files=644, scripts=755)"
echo "[02] 发布完成,验证文件..."
ls -lh "$OUTPUT_DIR"
echo "[02] 完成"

View File

@ -0,0 +1,30 @@
#!/bin/bash
set -e
FTP_SERVER="${FTP_SERVER:-172.30.0.40}"
FTP_USER="${FTP_USER:-ftpuser}"
FTP_PASSWORD="${FTP_PASSWORD:-ZGClab1234!}"
FTP_PORT="${FTP_PORT:-21}"
echo "[03] 进入测试节点执行安装..."
docker exec argus-metric-test-node bash -c "
set -e
if ! command -v curl &>/dev/null; then
echo '[03] curl 未安装,正在安装...'
apt-get update && apt-get install -y curl
fi
cd /tmp
echo '[03] 下载 setup.sh...'
curl -u ${FTP_USER}:${FTP_PASSWORD} ftp://${FTP_SERVER}:${FTP_PORT}/setup.sh -o setup.sh
echo '[03] 执行安装...'
chmod +x setup.sh
bash setup.sh --server ${FTP_SERVER} --user ${FTP_USER} --password '${FTP_PASSWORD}' --port ${FTP_PORT}
echo '[03] 安装完成'
"
echo "[03] 完成"

View File

@ -0,0 +1,96 @@
#!/bin/bash
set -e
echo "[04] 验证安装结果 - 检查监控端口..."
echo "=========================================="
# 检查容器是否运行
if ! docker ps --format '{{.Names}}' | grep -q '^argus-metric-test-node$'; then
echo "错误: 容器 argus-metric-test-node 未运行"
exit 1
fi
ERRORS=0
# ==================== 检查监听端口 ====================
echo ""
echo "[1] 检查监听端口..."
echo "----------------------------------------"
CHECK_RESULT=$(docker exec argus-metric-test-node bash -c '
if command -v netstat >/dev/null 2>&1; then
echo "使用 netstat 检查端口:"
if netstat -tlnp 2>/dev/null | grep -E ":(9100|9400|2020)"; then
echo "✓ 找到监控端口"
exit 0
else
echo "✗ 未找到监控端口 (9100/9400/2020)"
exit 1
fi
elif command -v ss >/dev/null 2>&1; then
echo "使用 ss 检查端口:"
if ss -tlnp 2>/dev/null | grep -E ":(9100|9400|2020)"; then
echo "✓ 找到监控端口"
exit 0
else
echo "✗ 未找到监控端口 (9100/9400/2020)"
exit 1
fi
elif command -v lsof >/dev/null 2>&1; then
echo "使用 lsof 检查端口:"
if lsof -i :9100 -i :9400 -i :2020 2>/dev/null | grep LISTEN; then
echo "✓ 找到监控端口"
exit 0
else
echo "✗ 未找到监控端口 (9100/9400/2020)"
exit 1
fi
else
echo "? 没有可用的端口检查工具 (netstat/ss/lsof),跳过此检查"
exit 0
fi
')
echo "$CHECK_RESULT"
# 只有在明确失败时才计入错误exit 1没有工具exit 0不算错误
if echo "$CHECK_RESULT" | grep -q "✗ 未找到监控端口"; then
ERRORS=$((ERRORS + 1))
fi
# ==================== 测试端口连通性 ====================
echo ""
echo "[2] 测试端口连通性..."
echo "----------------------------------------"
docker exec argus-metric-test-node bash -c '
if command -v curl >/dev/null 2>&1; then
FAILED=0
for port in 9100 9400 2020; do
echo -n "端口 $port: "
if curl -s --connect-timeout 2 "http://localhost:$port/metrics" > /dev/null 2>&1; then
echo "✓ 可访问 (/metrics)"
elif curl -s --connect-timeout 2 "http://localhost:$port/" > /dev/null 2>&1; then
echo "✓ 可访问 (根路径)"
else
echo "✗ 不可访问"
FAILED=$((FAILED + 1))
fi
done
exit $FAILED
else
echo "? curl 不可用,跳过连通性测试"
exit 0
fi
' || ERRORS=$((ERRORS + 1))
echo ""
echo "=========================================="
if [ $ERRORS -eq 0 ]; then
echo "✓ [04] 验证完成 - 所有端口检查通过"
else
echo "✗ [04] 验证失败 - 发现 $ERRORS 个问题"
echo ""
echo "调试建议:"
echo " 1. 进入容器检查: docker exec -it argus-metric-test-node bash"
echo " 2. 查看进程: docker exec argus-metric-test-node ps aux"
echo " 3. 查看日志: docker exec argus-metric-test-node cat /tmp/argus_install.log"
exit 1
fi
echo "=========================================="

View File

@ -0,0 +1,11 @@
#!/bin/bash
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
echo "[05] 清理环境..."
bash "$SCRIPT_DIR/common/stop-all.sh" || true
echo "[05] 清理完成"

View File

@ -6,7 +6,8 @@
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
TEST_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
cd "$TEST_DIR"
echo "=========================================="
echo " 路径检查脚本"
@ -18,15 +19,15 @@ echo ""
# 检查配置文件
echo "检查配置文件..."
if [ -f "$SCRIPT_DIR/docker-compose.yml" ]; then
if [ -f "$TEST_DIR/docker-compose.yml" ]; then
echo " ✓ docker-compose.yml 存在"
else
echo " ✗ docker-compose.yml 不存在"
fi
if [ -f "$SCRIPT_DIR/.env" ]; then
if [ -f "$TEST_DIR/.env" ]; then
echo " ✓ .env 存在"
elif [ -f "$SCRIPT_DIR/env.example" ]; then
elif [ -f "$TEST_DIR/env.example" ]; then
echo " ⚠ .env 不存在,但 env.example 存在"
else
echo " ✗ .env 和 env.example 都不存在"

View File

@ -0,0 +1,76 @@
#!/bin/bash
# 初始化目录脚本
# 用于创建所有必要的数据目录并设置正确的权限
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
TEST_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
cd "$TEST_DIR"
# 加载 .env 文件(如果存在)
if [ -f .env ]; then
echo "加载 .env 配置文件..."
source .env
fi
# 默认配置
FTP_UID=${FTP_UID:-2133}
FTP_GID=${FTP_GID:-2015}
PROMETHEUS_UID=${PROMETHEUS_UID:-2133}
PROMETHEUS_GID=${PROMETHEUS_GID:-2015}
GRAFANA_UID=${GRAFANA_UID:-2133}
GRAFANA_GID=${GRAFANA_GID:-2015}
DATA_ROOT=${DATA_ROOT:-/private}
echo "开始初始化目录结构..."
echo "数据根目录: ${DATA_ROOT}"
echo ""
# 创建 FTP 目录
echo "创建 FTP 目录..."
sudo mkdir -p ${DATA_ROOT}/argus/metric/ftp/share
sudo chown -R ${FTP_UID}:${FTP_GID} ${DATA_ROOT}/argus/metric/ftp
sudo chmod -R 755 ${DATA_ROOT}/argus/metric/ftp
# 创建 Prometheus 目录
echo "创建 Prometheus 目录..."
sudo mkdir -p ${DATA_ROOT}/argus/metric/prometheus/{data,rules,targets}
# 统一设置 Prometheus 目录权限
sudo chown -R ${PROMETHEUS_UID}:${PROMETHEUS_GID} ${DATA_ROOT}/argus/metric/prometheus
sudo chmod -R 755 ${DATA_ROOT}/argus/metric/prometheus
# 创建 Grafana 目录
echo "创建 Grafana 目录..."
sudo mkdir -p ${DATA_ROOT}/argus/metric/grafana/{data,logs,plugins,provisioning/datasources,provisioning/dashboards,data/sessions,data/dashboards,config}
sudo chown -R ${GRAFANA_UID}:${GRAFANA_GID} ${DATA_ROOT}/argus/metric/grafana
sudo chmod -R 755 ${DATA_ROOT}/argus/metric/grafana
# 创建公共配置目录
echo "创建公共配置目录..."
sudo mkdir -p ${DATA_ROOT}/argus/etc
sudo chown -R ${FTP_UID}:${FTP_GID} ${DATA_ROOT}/argus/etc
sudo chmod -R 755 ${DATA_ROOT}/argus/etc
# 创建 Agent 目录
echo "创建 Agent 目录..."
sudo mkdir -p ${DATA_ROOT}/argus/agent
sudo chown -R ${FTP_UID}:${FTP_GID} ${DATA_ROOT}/argus/agent
sudo chmod -R 755 ${DATA_ROOT}/argus/agent
echo "目录初始化完成!"
echo ""
echo "目录结构:"
echo " ${DATA_ROOT}/"
echo " ├── argus/"
echo " │ ├── metric/"
echo " │ │ ├── ftp/ (UID:${FTP_UID}, GID:${FTP_GID})"
echo " │ │ ├── prometheus/ (UID:${PROMETHEUS_UID}, GID:${PROMETHEUS_GID})"
echo " │ │ └── grafana/ (UID:${GRAFANA_UID}, GID:${GRAFANA_GID})"
echo " │ ├── etc/ (UID:${FTP_UID}, GID:${FTP_GID})"
echo " │ └── agent/ (UID:${FTP_UID}, GID:${FTP_GID})"
echo ""
echo "您现在可以运行 'docker-compose up -d' 来启动所有服务"

View File

@ -6,7 +6,8 @@
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
TEST_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
cd "$TEST_DIR"
# 检测 docker-compose 命令
if command -v docker-compose &> /dev/null; then
@ -19,7 +20,7 @@ else
fi
# 镜像缓存目录
IMAGE_CACHE_DIR="./images-cache"
IMAGE_CACHE_DIR="$TEST_DIR/images-cache"
mkdir -p "$IMAGE_CACHE_DIR"
# 定义镜像列表

View File

@ -6,13 +6,20 @@
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
TEST_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
cd "$TEST_DIR"
# 解析参数
FORCE_REBUILD=false
if [[ "$1" == "--rebuild" ]]; then
FORCE_REBUILD=true
fi
echo "=========================================="
echo " Argus Metrics 一键启动脚本"
echo "=========================================="
echo ""
echo "当前工作目录: $SCRIPT_DIR"
echo "当前工作目录: $TEST_DIR"
echo ""
# 检查 Docker 和 Docker Compose
@ -21,19 +28,13 @@ if ! command -v docker &> /dev/null; then
exit 1
fi
# 检测 docker-compose 命令(兼容新旧版本)
COMPOSE_FILE="$SCRIPT_DIR/docker-compose.yml"
if command -v docker-compose &> /dev/null; then
DOCKER_COMPOSE="docker-compose -f $COMPOSE_FILE"
echo "使用: docker-compose"
elif docker compose version &> /dev/null 2>&1; then
DOCKER_COMPOSE="docker compose -f $COMPOSE_FILE"
echo "使用: docker compose"
else
echo "错误: 未找到 docker-compose 或 docker compose 命令"
# 检查 docker compose 命令
if ! docker compose version &> /dev/null 2>&1; then
echo "错误: 未找到 docker compose 命令,请确保 Docker Compose V2 已安装"
exit 1
fi
echo "Compose 文件: $COMPOSE_FILE"
echo "使用: docker compose"
echo "Compose 文件: $TEST_DIR/docker-compose.yml"
echo ""
# 检查必要的构建目录
@ -65,6 +66,18 @@ fi
# 加载环境变量
source .env
# 检查并创建 Docker 网络
echo "检查 Docker 网络..."
NETWORK_NAME="argus-debug-net"
if docker network inspect "$NETWORK_NAME" >/dev/null 2>&1; then
echo "网络 $NETWORK_NAME 已存在"
else
echo "创建网络 $NETWORK_NAME..."
docker network create --driver bridge --subnet 172.30.0.0/16 "$NETWORK_NAME"
echo "网络创建成功"
fi
echo ""
echo "1. 初始化目录结构..."
bash "$SCRIPT_DIR/init-directories.sh"
@ -72,8 +85,8 @@ echo ""
echo "2. 准备 Docker 镜像..."
# 检查镜像是否存在
IMAGE_CACHE_DIR="./images-cache"
IMAGES=("argus-metric-ftp:latest" "argus-metric-prometheus:latest" "argus-metric-grafana:latest")
IMAGE_CACHE_DIR="$TEST_DIR/images-cache"
IMAGES=("argus-metric-ftp:latest" "argus-metric-prometheus:latest" "argus-metric-grafana:latest" "ubuntu:22.04")
all_images_exist=true
for image in "${IMAGES[@]}"; do
@ -83,7 +96,12 @@ for image in "${IMAGES[@]}"; do
fi
done
if $all_images_exist; then
if $FORCE_REBUILD; then
echo "强制重新构建镜像(--rebuild 模式)..."
cd "$TEST_DIR"
docker compose build --no-cache
echo "镜像重新构建完成"
elif $all_images_exist; then
echo "所有镜像已存在,跳过构建"
else
echo "检测到缺失镜像,尝试从缓存加载..."
@ -104,6 +122,9 @@ else
"argus-metric-grafana:latest")
cache_file="${IMAGE_CACHE_DIR}/argus-grafana.tar"
;;
"ubuntu:22.04")
cache_file="${IMAGE_CACHE_DIR}/test-node.tar"
;;
esac
if [ -f "$cache_file" ]; then
@ -128,8 +149,8 @@ else
echo ""
echo "部分镜像缺失,开始构建..."
echo "工作目录: $(pwd)"
cd "$SCRIPT_DIR"
$DOCKER_COMPOSE build
cd "$TEST_DIR"
docker compose build
# 询问是否保存镜像
echo ""
@ -149,6 +170,9 @@ else
"argus-metric-grafana:latest")
docker save -o "${IMAGE_CACHE_DIR}/argus-grafana.tar" "$image" && echo " 已保存: argus-grafana.tar"
;;
"ubuntu:22.04")
docker save -o "${IMAGE_CACHE_DIR}/test-node.tar" "$image" && echo " 已保存: test-node.tar"
;;
esac
done
echo "镜像已保存到: $IMAGE_CACHE_DIR/"
@ -161,8 +185,8 @@ fi
echo ""
echo "3. 启动服务..."
cd "$SCRIPT_DIR"
$DOCKER_COMPOSE up -d
cd "$TEST_DIR"
docker compose up -d
echo ""
echo "4. 等待服务启动..."
@ -170,8 +194,8 @@ sleep 5
echo ""
echo "5. 检查服务状态..."
cd "$SCRIPT_DIR"
$DOCKER_COMPOSE ps
cd "$TEST_DIR"
docker compose ps
echo ""
echo "=========================================="
@ -190,10 +214,10 @@ echo " 用户名: admin"
echo " 密码: admin"
echo ""
echo "常用命令:"
echo " 查看日志: $DOCKER_COMPOSE logs -f [service]"
echo " 停止服务: $DOCKER_COMPOSE stop"
echo " 重启服务: $DOCKER_COMPOSE restart"
echo " 停止并删除: $DOCKER_COMPOSE down"
echo " 停止并删除卷: $DOCKER_COMPOSE down -v"
echo " 查看日志: docker compose logs -f [service]"
echo " 停止服务: docker compose stop"
echo " 重启服务: docker compose restart"
echo " 停止并删除: docker compose down"
echo " 停止并删除卷: docker compose down -v"
echo ""

View File

@ -0,0 +1,50 @@
#!/bin/bash
# 停止所有服务脚本
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
TEST_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
cd "$TEST_DIR"
# 检查 docker compose 命令
if ! docker compose version &> /dev/null 2>&1; then
echo "错误: 未找到 docker compose 命令,请确保 Docker Compose V2 已安装"
exit 1
fi
echo "=========================================="
echo " 停止 Argus Metrics 服务"
echo "=========================================="
echo ""
echo "使用: docker compose"
echo "Compose 文件: $TEST_DIR/docker-compose.yml"
echo ""
# 检查是否有运行的容器
if [ "$(docker compose ps -q)" ]; then
echo "停止所有服务..."
docker compose stop
echo ""
read -p "是否要删除容器? (y/N): " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
docker compose down
echo "容器已删除"
read -p "是否要删除数据卷? (y/N): " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
docker compose down -v
echo "数据卷已删除"
fi
fi
else
echo "没有运行的服务"
fi
echo ""
echo "完成!"

View File

@ -0,0 +1,109 @@
#!/bin/bash
# 镜像加载脚本
# 用于从 tar 文件加载 Docker 镜像
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
TEST_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
INPUT_DIR="${1:-$TEST_DIR/images-cache}"
echo "=========================================="
echo " Docker 镜像加载脚本"
echo "=========================================="
echo ""
echo "输入目录: $INPUT_DIR"
echo ""
# 检查输入目录是否存在
if [ ! -d "$INPUT_DIR" ]; then
echo "错误: 目录不存在: $INPUT_DIR"
exit 1
fi
# 定义要加载的镜像文件和期望的镜像名称
declare -A IMAGES=(
["argus-ftp.tar"]="argus-metric-ftp:latest"
["argus-prometheus.tar"]="argus-metric-prometheus:latest"
["argus-grafana.tar"]="argus-metric-grafana:latest"
["test-node.tar"]="ubuntu:22.04"
)
# 加载镜像
total=0
success=0
skipped=0
failed=0
for tar_file in "${!IMAGES[@]}"; do
total=$((total + 1))
input_file="${INPUT_DIR}/${tar_file}"
image_name="${IMAGES[$tar_file]}"
echo "[$total] 处理: $tar_file"
# 检查 tar 文件是否存在
if [ ! -f "$input_file" ]; then
echo " ✗ 文件不存在: $tar_file"
failed=$((failed + 1))
echo ""
continue
fi
# 检查镜像是否已存在
if docker images --format "{{.Repository}}:{{.Tag}}" | grep -q "^${image_name}$"; then
echo " ⊙ 镜像已存在: $image_name (跳过)"
skipped=$((skipped + 1))
echo ""
continue
fi
# 加载镜像
echo " → 加载镜像..."
if docker load -i "$input_file"; then
# 验证加载后的镜像
if docker images --format "{{.Repository}}:{{.Tag}}" | grep -q "^${image_name}$"; then
echo " ✓ 加载成功: $image_name"
success=$((success + 1))
else
echo " ⚠ 加载完成,但镜像名称可能不匹配"
echo " 请检查: docker images | grep '${image_name%:*}'"
success=$((success + 1))
fi
else
echo " ✗ 加载失败: $tar_file"
failed=$((failed + 1))
fi
echo ""
done
echo "=========================================="
echo " 加载完成"
echo "=========================================="
echo ""
echo "统计:"
echo " 总计: $total"
echo " 成功: $success"
echo " 跳过: $skipped"
echo " 失败: $failed"
echo ""
# 显示当前的镜像
echo "当前 Argus 相关镜像:"
docker images | grep -E "REPOSITORY|argus-metric" || echo "未找到 argus-metric 镜像"
echo ""
if [ $failed -gt 0 ]; then
echo "部分镜像加载失败,请检查!"
exit 1
fi
if [ $success -gt 0 ]; then
echo "✓ 镜像加载成功!"
echo ""
echo "下一步:"
echo " 运行: bash scripts/common/start-all.sh"
echo " 或者: docker-compose up -d"
fi

View File

@ -0,0 +1,93 @@
#!/bin/bash
# 镜像保存脚本
# 用于保存 Docker 镜像到 tar 文件,便于离线部署
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
TEST_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
OUTPUT_DIR="${1:-$TEST_DIR/images-cache}"
echo "=========================================="
echo " Docker 镜像保存脚本"
echo "=========================================="
echo ""
echo "输出目录: $OUTPUT_DIR"
echo ""
# 创建输出目录
mkdir -p "$OUTPUT_DIR"
# 定义镜像名称(与 docker-compose.yml 保持一致)
declare -A IMAGES=(
["argus-metric-ftp:latest"]="argus-ftp.tar"
["argus-metric-prometheus:latest"]="argus-prometheus.tar"
["argus-metric-grafana:latest"]="argus-grafana.tar"
["ubuntu:22.04"]="test-node.tar"
)
# 检查镜像是否存在并保存
total=0
success=0
failed=0
for image in "${!IMAGES[@]}"; do
total=$((total + 1))
output_file="${OUTPUT_DIR}/${IMAGES[$image]}"
echo "[$total] 检查镜像: $image"
if docker images --format "{{.Repository}}:{{.Tag}}" | grep -q "^${image}$"; then
echo " ✓ 镜像存在,开始保存..."
if docker save -o "$output_file" "$image"; then
file_size=$(ls -lh "$output_file" | awk '{print $5}')
echo " ✓ 保存成功: ${IMAGES[$image]} ($file_size)"
success=$((success + 1))
else
echo " ✗ 保存失败: $image"
failed=$((failed + 1))
fi
else
echo " ✗ 镜像不存在,请先构建镜像"
failed=$((failed + 1))
fi
echo ""
done
echo "=========================================="
echo " 保存完成"
echo "=========================================="
echo ""
echo "统计:"
echo " 总计: $total"
echo " 成功: $success"
echo " 失败: $failed"
echo ""
echo "输出目录: $OUTPUT_DIR"
echo ""
if [ $success -gt 0 ]; then
echo "已保存的文件:"
ls -lh "$OUTPUT_DIR"/*.tar 2>/dev/null || true
echo ""
echo "文件列表:"
for image in "${!IMAGES[@]}"; do
output_file="${OUTPUT_DIR}/${IMAGES[$image]}"
if [ -f "$output_file" ]; then
file_size=$(ls -lh "$output_file" | awk '{print $5}')
echo " - ${IMAGES[$image]} ($file_size)"
fi
done
fi
echo ""
echo "使用说明:"
echo "1. 将 images-cache 目录复制到目标服务器的 ~/argus/src/metric/tests/ 下"
echo "2. 在目标服务器运行: bash scripts/common/start-all.sh"
echo ""
if [ $failed -gt 0 ]; then
exit 1
fi

View File

@ -1,51 +0,0 @@
#!/bin/bash
# 停止所有服务脚本
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
# 检测 docker-compose 命令(兼容新旧版本)
COMPOSE_FILE="$SCRIPT_DIR/docker-compose.yml"
if command -v docker-compose &> /dev/null; then
DOCKER_COMPOSE="docker-compose -f $COMPOSE_FILE"
elif docker compose version &> /dev/null 2>&1; then
DOCKER_COMPOSE="docker compose -f $COMPOSE_FILE"
else
echo "错误: 未找到 docker-compose 或 docker compose 命令"
exit 1
fi
echo "=========================================="
echo " 停止 Argus Metrics 服务"
echo "=========================================="
echo ""
# 检查是否有运行的容器
if [ "$($DOCKER_COMPOSE ps -q)" ]; then
echo "停止所有服务..."
$DOCKER_COMPOSE stop
echo ""
read -p "是否要删除容器? (y/N): " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
$DOCKER_COMPOSE down
echo "容器已删除"
read -p "是否要删除数据卷? (y/N): " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
$DOCKER_COMPOSE down -v
echo "数据卷已删除"
fi
fi
else
echo "没有运行的服务"
fi
echo ""
echo "完成!"

45
src/web/.gitignore vendored Normal file
View File

@ -0,0 +1,45 @@
# Node modules
node_modules/
# Build output
/dist
/build
# Dependency directories
jspm_packages/
# Logs
npm-debug.log*
yarn-debug.log*
yarn-error.log*
# Editor directories and files
.idea/
.vscode/
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?
# OS generated files
.DS_Store
Thumbs.db
# Environment variables
.env
.env.local
.env.development.local
.env.test.local
.env.production.local
# Testing
/coverage/
# Optional: service worker cache
/.pwa-cache/
# Misc
*.log
.vite/

View File

@ -0,0 +1,12 @@
# Argus-web
架构React + Vite + Mantine
## 打包部署
根目录下运行
```bash
bash src/web/buld_tools/frontend/build.sh
```

View File

@ -0,0 +1,91 @@
# ========== 构建阶段 ==========
FROM node:20 AS builder
# 设置工作目录
WORKDIR /app/src/web
# 复制依赖文件并安装
COPY src/web/package*.json ./
RUN npm install
# 复制源码并打包
COPY src/web ./
RUN npm run build
# ========== 运行阶段 ==========
FROM ubuntu:24.04
USER root
# 安装 nginx 和 supervisor
RUN apt-get update && \
apt-get install -y nginx supervisor curl vim net-tools inetutils-ping ca-certificates passwd && \
apt-get clean && rm -rf /var/lib/apt/lists/*
ENV FRONTEND_BASE_PATH=/private/argus/web/frontend
ENV ARGUS_UID=2133
ENV ARGUS_GID=2015
RUN mkdir -p ${FRONTEND_BASE_PATH} && \
mkdir -p /private/argus/etc
# 创建 web 用户(可自定义 UID/GID
# 创建 web 用户组
RUN groupadd -g ${ARGUS_GID} web
# 创建 web 用户并指定组
RUN useradd -M -s /usr/sbin/nologin -u ${ARGUS_UID} -g ${ARGUS_GID} web
RUN chown -R web:web ${FRONTEND_BASE_PATH} && \
chown -R web:web /private/argus/etc && \
chown -R web:web /usr/local/bin
# 配置内网 apt 源 (如果指定了内网选项)
RUN if [ "$USE_INTRANET" = "true" ]; then \
echo "Configuring intranet apt sources..." && \
cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
echo "deb [trusted=yes] http://10.68.64.1/ubuntu2204/ jammy main" > /etc/apt/sources.list && \
echo 'Acquire::https::Verify-Peer "false";' > /etc/apt/apt.conf.d/99disable-ssl-check && \
echo 'Acquire::https::Verify-Host "false";' >> /etc/apt/apt.conf.d/99disable-ssl-check; \
fi
# 配置部署时使用的 apt 源
RUN if [ "$USE_INTRANET" = "true" ]; then \
echo "deb [trusted=yes] https://10.92.132.52/mirrors/ubuntu2204/ jammy main" > /etc/apt/sources.list; \
fi
# 前端编译产物放到 nginx 目录
COPY --from=builder /app/src/web/dist /usr/share/nginx/html
# 复制 nginx 配置(保证 React 前端路由兼容)
COPY src/web/build_tools/frontend/nginx.conf /etc/nginx/nginx.conf
# COPY src/web/build_tools/frontend/conf.d/ /etc/nginx/conf.d/
# 复制 supervisor 配置
COPY src/web/build_tools/frontend/supervisord.conf /etc/supervisor/conf.d/supervisord.conf
# 创建 supervisor 日志目录
RUN mkdir -p /var/log/supervisor
# 复制启动脚本
COPY src/web/build_tools/frontend/start-web-supervised.sh /usr/local/bin/start-web-supervised.sh
RUN chmod +x /usr/local/bin/start-web-supervised.sh
# 复制 DNS 监控脚本
COPY src/web/build_tools/frontend/dns-monitor.sh /usr/local/bin/dns-monitor.sh
RUN chmod +x /usr/local/bin/dns-monitor.sh
# 复制健康检查脚本
COPY src/web/build_tools/frontend/health-check.sh /usr/local/bin/health-check.sh
RUN chmod +x /usr/local/bin/health-check.sh
# 暴露端口
EXPOSE 80
# 保持 root 用户,由 supervisor 控制 user 切换
USER root
# 以 supervisor 为入口
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]

View File

@ -0,0 +1,4 @@
docker pull node:20
docker pull ubuntu:24.04
docker build -f src/web/build_tools/frontend/Dockerfile -t argus-web:0.1.1 .
rm -f argus-web-0.1.1.tar && sudo docker image save argus-web:0.1.1 > argus-web-0.1.1.tar

View File

@ -0,0 +1,68 @@
#!/bin/bash
# DNS监控脚本 - 每10秒检查dns.conf是否有变化
# 如果有变化则执行update-dns.sh脚本
DNS_CONF="/private/argus/etc/dns.conf"
DNS_BACKUP="/tmp/dns.conf.backup"
UPDATE_SCRIPT="/private/argus/etc/update-dns.sh"
LOG_FILE="/var/log/supervisor/dns-monitor.log"
# 确保日志文件存在
touch "$LOG_FILE"
log_message() {
echo "$(date '+%Y-%m-%d %H:%M:%S') [DNS-Monitor] $1" >> "$LOG_FILE"
}
log_message "DNS监控脚本启动"
while true; do
if [ -f "$DNS_CONF" ]; then
if [ -f "$DNS_BACKUP" ]; then
# 比较文件内容
if ! cmp -s "$DNS_CONF" "$DNS_BACKUP"; then
log_message "检测到DNS配置变化"
# 更新备份文件
cp "$DNS_CONF" "$DNS_BACKUP"
# 执行更新脚本
if [ -x "$UPDATE_SCRIPT" ]; then
log_message "执行DNS更新脚本: $UPDATE_SCRIPT"
"$UPDATE_SCRIPT" >> "$LOG_FILE" 2>&1
if [ $? -eq 0 ]; then
log_message "DNS更新脚本执行成功"
else
log_message "DNS更新脚本执行失败"
fi
else
log_message "警告: 更新脚本不存在或不可执行: $UPDATE_SCRIPT"
fi
fi
else
# 第一次检测到配置文件,执行更新脚本
if [ -x "$UPDATE_SCRIPT" ]; then
log_message "执行DNS更新脚本: $UPDATE_SCRIPT"
"$UPDATE_SCRIPT" >> "$LOG_FILE" 2>&1
if [ $? -eq 0 ]; then
log_message "DNS更新脚本执行成功"
# 第一次运行,创建备份并执行更新
cp "$DNS_CONF" "$DNS_BACKUP"
log_message "创建DNS配置备份文件"
else
log_message "DNS更新脚本执行失败"
fi
else
log_message "警告: 更新脚本不存在或不可执行: $UPDATE_SCRIPT"
fi
fi
else
log_message "警告: DNS配置文件不存在: $DNS_CONF"
fi
sleep 10
done

View File

@ -0,0 +1,16 @@
#!/bin/bash
set -euo pipefail
URL="http://127.0.0.1:80"
echo "[INFO] Starting Argus web health check loop for $URL..."
while true; do
if curl -s --max-time 5 "$URL" > /dev/null; then
echo "[OK] $(date '+%Y-%m-%d %H:%M:%S') Argus web is healthy"
else
echo "[ERROR] $(date '+%Y-%m-%d %H:%M:%S') Argus web health check failed"
exit 1
fi
sleep 10
done

View File

@ -0,0 +1,56 @@
user web;
worker_processes auto;
events {
worker_connections 1024;
}
http {
include mime.types;
default_type application/octet-stream;
sendfile on;
# React 前端服务
server {
listen 80;
server_name web.argus.com;
root /usr/share/nginx/html;
index index.html;
# React 前端路由兼容
location / {
try_files $uri /index.html;
}
}
# Master 服务,需要增加 CORS 支持
server {
listen 80;
server_name master.argus.com;
location / {
proxy_pass http://master.argus.com;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# CORS 支持
add_header 'Access-Control-Allow-Origin' 'http://web.argus.com' always;
add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, OPTIONS' always;
add_header 'Access-Control-Allow-Headers' 'Origin, Content-Type, Accept, Authorization' always;
if ($request_method = OPTIONS) {
add_header 'Access-Control-Allow-Origin' 'http://web.argus.com' always;
add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, OPTIONS' always;
add_header 'Access-Control-Allow-Headers' 'Origin, Content-Type, Accept, Authorization' always;
add_header 'Content-Length' 0;
add_header 'Content-Type' 'text/plain';
return 204;
}
}
}
}

View File

@ -0,0 +1,39 @@
#!/bin/bash
set -euo pipefail
echo "[INFO] Starting React frontend under supervisor..."
DNS_DIR="/private/argus/etc"
DNS_SCRIPT="${DNS_DIR}/update-dns.sh"
DOMAIN=web.argus.com
WEB_DOMAIN_FILE="${DNS_DIR}/${DOMAIN}"
RUNTIME_USER="${ARGUS_RUNTIME_USER:-argus}"
RUNTIME_UID="${ARGUS_BUILD_UID:-2133}"
RUNTIME_GID="${ARGUS_BUILD_GID:-2015}"
mkdir -p "$DNS_DIR"
chown -R "$RUNTIME_UID:$RUNTIME_GID" "$DNS_DIR" 2>/dev/null || true
if [[ -x "$DNS_SCRIPT" ]]; then
echo "[INFO] Running update-dns.sh before master starts"
# 若脚本存在则执行,保证容器使用 bind 作为 DNS
"$DNS_SCRIPT" || echo "[WARN] update-dns.sh execution failed"
else
echo "[WARN] DNS update script not found or not executable: $DNS_SCRIPT"
fi
# 记录容器 IP
IP=$(ifconfig | grep -A 1 eth0 | grep inet | awk '{print $2}' || true)
if [[ -n "${IP}" ]]; then
echo "current IP: ${IP}"
echo "${IP}" > "$WEB_DOMAIN_FILE"
chown "$RUNTIME_UID:$RUNTIME_GID" "$WEB_DOMAIN_FILE" 2>/dev/null || true
else
echo "[WARN] Failed to detect web IP via ifconfig"
fi
echo "[INFO] Launching nginx..."
# 启动 nginx 前台模式
exec /usr/sbin/nginx -g "daemon off;"

View File

@ -0,0 +1,51 @@
[supervisord]
nodaemon=true
logfile=/var/log/supervisor/supervisord.log
pidfile=/var/run/supervisord.pid
user=root
[program:web]
command=/usr/local/bin/start-web-supervised.sh
user=root
stdout_logfile=/var/log/supervisor/web-frontend.log
stderr_logfile=/var/log/supervisor/web-frontend_error.log
autorestart=true
startretries=3
startsecs=5
stopwaitsecs=10
killasgroup=true
stopasgroup=true
[program:web-health]
command=/usr/local/bin/health-check.sh
user=web
stdout_logfile=/var/log/supervisor/web-health.log
stderr_logfile=/var/log/supervisor/web-health_error.log
autorestart=true
startretries=3
startsecs=5
stopwaitsecs=10
killasgroup=true
stopasgroup=true
[program:dns-monitor]
command=/usr/local/bin/dns-monitor.sh
user=root
stdout_logfile=/var/log/supervisor/dns-monitor.log
stderr_logfile=/var/log/supervisor/dns-monitor_error.log
autorestart=true
startretries=3
startsecs=5
stopwaitsecs=10
killasgroup=true
stopasgroup=true
[unix_http_server]
file=/var/run/supervisor.sock
chmod=0700
[supervisorctl]
serverurl=unix:///var/run/supervisor.sock
[rpcinterface:supervisor]
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface

View File

@ -0,0 +1,69 @@
FROM ubuntu:24.04
USER root
# 安装 nginx 和 supervisor
RUN apt-get update && \
apt-get install -y nginx supervisor curl vim net-tools inetutils-ping ca-certificates passwd && \
apt-get clean && rm -rf /var/lib/apt/lists/*
ENV FRONTEND_BASE_PATH=/private/argus/web/proxy
ENV ARGUS_UID=2133
ENV ARGUS_GID=2015
RUN mkdir -p ${FRONTEND_BASE_PATH} && \
mkdir -p /private/argus/etc
# 创建 proxy 用户(可自定义 UID/GID
# 创建 proxy 用户组
RUN groupadd -g ${ARGUS_GID} web_proxy
# 创建 proxy 用户并指定组
RUN useradd -M -s /usr/sbin/nologin -u ${ARGUS_UID} -g ${ARGUS_GID} web_proxy
RUN chown -R web_proxy:web_proxy ${FRONTEND_BASE_PATH} && \
chown -R web_proxy:web_proxy /private/argus/etc && \
chown -R web_proxy:web_proxy /usr/local/bin
# 配置内网 apt 源 (如果指定了内网选项)
RUN if [ "$USE_INTRANET" = "true" ]; then \
echo "Configuring intranet apt sources..." && \
cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
echo "deb [trusted=yes] http://10.68.64.1/ubuntu2204/ jammy main" > /etc/apt/sources.list && \
echo 'Acquire::https::Verify-Peer "false";' > /etc/apt/apt.conf.d/99disable-ssl-check && \
echo 'Acquire::https::Verify-Host "false";' >> /etc/apt/apt.conf.d/99disable-ssl-check; \
fi
# 配置部署时使用的 apt 源
RUN if [ "$USE_INTRANET" = "true" ]; then \
echo "deb [trusted=yes] https://10.92.132.52/mirrors/ubuntu2204/ jammy main" > /etc/apt/sources.list; \
fi
# 复制 nginx 配置(保证 React 前端路由兼容)
COPY src/web/build_tools/proxy/nginx.conf.template /etc/nginx/nginx.conf.template
COPY src/web/build_tools/proxy/conf.d/ /etc/nginx/conf.d/
# 复制 supervisor 配置
COPY src/web/build_tools/proxy/supervisord.conf /etc/supervisor/conf.d/supervisord.conf
# 创建 supervisor 日志目录
RUN mkdir -p /var/log/supervisor
# 复制启动脚本
COPY src/web/build_tools/proxy/start-proxy-supervised.sh /usr/local/bin/start-proxy-supervised.sh
RUN chmod +x /usr/local/bin/start-proxy-supervised.sh
# 复制 DNS 监控脚本
COPY src/web/build_tools/proxy/dns-monitor.sh /usr/local/bin/dns-monitor.sh
RUN chmod +x /usr/local/bin/dns-monitor.sh
# 暴露端口
EXPOSE 80
# 保持 root 用户,由 supervisor 控制 user 切换
USER root
# 以 supervisor 为入口
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]

View File

@ -0,0 +1,8 @@
server {
listen 80;
server_name alertmanager.alert.argus.com;
location / {
proxy_pass http://alertmanager.alert.argus.com:9093;
}
}

View File

@ -0,0 +1,19 @@
# Elasticsearch
server {
listen 80;
server_name es.log.argus.com;
location / {
proxy_pass http://es.log.argus.com;
}
}
# Kibana
server {
listen 80;
server_name kibana.log.argus.com;
location / {
proxy_pass http://kibana.log.argus.com;
}
}

View File

@ -0,0 +1,27 @@
server {
listen 80;
server_name master.argus.com;
location / {
# proxy_pass http://master.argus.com;
proxy_pass http://master.argus.com;
# proxy_set_header Host $host;
# proxy_set_header X-Real-IP $remote_addr;
# proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
# proxy_set_header X-Forwarded-Proto $scheme;
# # CORS 支持
# add_header 'Access-Control-Allow-Origin' 'http://web.argus.com' always;
# add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, OPTIONS' always;
# add_header 'Access-Control-Allow-Headers' 'Origin, Content-Type, Accept, Authorization' always;
# if ($request_method = OPTIONS) {
# add_header 'Access-Control-Allow-Origin' 'http://web.argus.com' always;
# add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, OPTIONS' always;
# add_header 'Access-Control-Allow-Headers' 'Origin, Content-Type, Accept, Authorization' always;
# add_header 'Content-Length' 0;
# add_header 'Content-Type' 'text/plain';
# return 204;
# }
}
}

View File

@ -0,0 +1,19 @@
# Prometheus
server {
listen 80;
server_name prometheus.metric.argus.com;
location / {
proxy_pass http://prom.metric.argus.com;
}
}
# # Grafana
# server {
# listen 80;
# server_name grafana.metric.argus.com;
# location / {
# proxy_pass http://grafana.metric.argus.com;
# }
# }

View File

@ -0,0 +1,8 @@
server {
listen 80;
server_name web.argus.com;
location / {
proxy_pass http://web.argus.com:80;
}
}

View File

@ -0,0 +1,68 @@
#!/bin/bash
# DNS监控脚本 - 每10秒检查dns.conf是否有变化
# 如果有变化则执行update-dns.sh脚本
DNS_CONF="/private/argus/etc/dns.conf"
DNS_BACKUP="/tmp/dns.conf.backup"
UPDATE_SCRIPT="/private/argus/etc/update-dns.sh"
LOG_FILE="/var/log/supervisor/dns-monitor.log"
# 确保日志文件存在
touch "$LOG_FILE"
log_message() {
echo "$(date '+%Y-%m-%d %H:%M:%S') [DNS-Monitor] $1" >> "$LOG_FILE"
}
log_message "DNS监控脚本启动"
while true; do
if [ -f "$DNS_CONF" ]; then
if [ -f "$DNS_BACKUP" ]; then
# 比较文件内容
if ! cmp -s "$DNS_CONF" "$DNS_BACKUP"; then
log_message "检测到DNS配置变化"
# 更新备份文件
cp "$DNS_CONF" "$DNS_BACKUP"
# 执行更新脚本
if [ -x "$UPDATE_SCRIPT" ]; then
log_message "执行DNS更新脚本: $UPDATE_SCRIPT"
"$UPDATE_SCRIPT" >> "$LOG_FILE" 2>&1
if [ $? -eq 0 ]; then
log_message "DNS更新脚本执行成功"
else
log_message "DNS更新脚本执行失败"
fi
else
log_message "警告: 更新脚本不存在或不可执行: $UPDATE_SCRIPT"
fi
fi
else
# 第一次检测到配置文件,执行更新脚本
if [ -x "$UPDATE_SCRIPT" ]; then
log_message "执行DNS更新脚本: $UPDATE_SCRIPT"
"$UPDATE_SCRIPT" >> "$LOG_FILE" 2>&1
if [ $? -eq 0 ]; then
log_message "DNS更新脚本执行成功"
# 第一次运行,创建备份并执行更新
cp "$DNS_CONF" "$DNS_BACKUP"
log_message "创建DNS配置备份文件"
else
log_message "DNS更新脚本执行失败"
fi
else
log_message "警告: 更新脚本不存在或不可执行: $UPDATE_SCRIPT"
fi
fi
else
log_message "警告: DNS配置文件不存在: $DNS_CONF"
fi
sleep 10
done

View File

@ -0,0 +1,36 @@
user web_proxy;
worker_processes auto;
events {
worker_connections 1024;
}
server {
listen 80 default_server;
server_name _;
location / {
proxy_pass http://web.argus.com:80;
}
}
http {
include mime.types;
default_type application/octet-stream;
sendfile on;
# 使用系统 resolv.conf由 update-dns.sh 动态更新)
resolver __RESOLVERS__ valid=30s ipv6=off;
# 启用访问日志
access_log /var/log/nginx/access.log;
error_log /var/log/nginx/error.log;
# 反向代理默认头部
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
include /etc/nginx/conf.d/*.conf;
}

View File

@ -0,0 +1,61 @@
#!/bin/bash
set -euo pipefail
echo "[INFO] Starting proxy under supervisor..."
TEMPLATE="/etc/nginx/nginx.conf.template"
TARGET="/etc/nginx/nginx.conf"
DNS_CONF_PRIVATE="/private/argus/etc/dns.conf"
DNS_CONF_SYSTEM="/etc/resolv.conf"
DNS_DIR="/private/argus/etc"
DNS_SCRIPT="${DNS_DIR}/update-dns.sh"
RUNTIME_UID="${ARGUS_BUILD_UID:-2133}"
RUNTIME_GID="${ARGUS_BUILD_GID:-2015}"
mkdir -p "$DNS_DIR"
chown -R "$RUNTIME_UID:$RUNTIME_GID" "$DNS_DIR" 2>/dev/null || true
if [[ -x "$DNS_SCRIPT" ]]; then
echo "[INFO] Running update-dns.sh before master starts"
# 若脚本存在则执行,保证容器使用 bind 作为 DNS
"$DNS_SCRIPT" || echo "[WARN] update-dns.sh execution failed"
else
echo "[WARN] DNS update script not found or not executable: $DNS_SCRIPT"
fi
# ========== 读取 DNS ==========
if [ -f "$DNS_CONF_PRIVATE" ]; then
echo "$DNS_CONF_PRIVATE 读取 DNS 服务器..."
RESOLVERS=$(awk '/^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/ {print $1}' "$DNS_CONF_PRIVATE" | tr '\n' ' ')
fi
# 如果 /private 文件不存在则 fallback
if [ -z "${RESOLVERS:-}" ]; then
echo "未在 $DNS_CONF_PRIVATE 中找到有效 DNS使用系统 /etc/resolv.conf"
RESOLVERS=$(awk '/^nameserver/ {print $2}' "$DNS_CONF_SYSTEM" | tr '\n' ' ')
fi
# 最后兜底:若仍为空,使用公共 DNS
if [ -z "$RESOLVERS" ]; then
echo "警告: 未找到任何 DNS使用默认 8.8.8.8"
RESOLVERS="8.8.8.8"
fi
echo "检测到 DNS 服务器列表: $RESOLVERS"
# ========== 生成 nginx.conf ==========
if [ -f "$TEMPLATE" ]; then
echo "从模板生成 nginx.conf ..."
sed "s|__RESOLVERS__|$RESOLVERS|" "$TEMPLATE" > "$TARGET"
else
echo "错误: 找不到 nginx.conf.template ($TEMPLATE)"
exit 1
fi
# 打印生成结果供排查
grep resolver "$TARGET" || true
echo "[INFO] Launching nginx..."
# 启动 nginx 前台模式
exec /usr/sbin/nginx -g "daemon off;"

View File

@ -0,0 +1,39 @@
[supervisord]
nodaemon=true
logfile=/var/log/supervisor/supervisord.log
pidfile=/var/run/supervisord.pid
user=root
[program:proxy]
command=/usr/local/bin/start-proxy-supervised.sh
user=root
stdout_logfile=/var/log/supervisor/web-proxy.log
stderr_logfile=/var/log/supervisor/web-proxy_error.log
autorestart=true
startretries=3
startsecs=5
stopwaitsecs=10
killasgroup=true
stopasgroup=true
[program:dns-monitor]
command=/usr/local/bin/dns-monitor.sh
user=root
stdout_logfile=/var/log/supervisor/dns-monitor.log
stderr_logfile=/var/log/supervisor/dns-monitor_error.log
autorestart=true
startretries=3
startsecs=5
stopwaitsecs=10
killasgroup=true
stopasgroup=true
[unix_http_server]
file=/var/run/supervisor.sock
chmod=0700
[supervisorctl]
serverurl=unix:///var/run/supervisor.sock
[rpcinterface:supervisor]
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface

29
src/web/eslint.config.js Normal file
View File

@ -0,0 +1,29 @@
import js from '@eslint/js'
import globals from 'globals'
import reactHooks from 'eslint-plugin-react-hooks'
import reactRefresh from 'eslint-plugin-react-refresh'
import { defineConfig, globalIgnores } from 'eslint/config'
export default defineConfig([
globalIgnores(['dist']),
{
files: ['**/*.{js,jsx}'],
extends: [
js.configs.recommended,
reactHooks.configs['recommended-latest'],
reactRefresh.configs.vite,
],
languageOptions: {
ecmaVersion: 2020,
globals: globals.browser,
parserOptions: {
ecmaVersion: 'latest',
ecmaFeatures: { jsx: true },
sourceType: 'module',
},
},
rules: {
'no-unused-vars': ['error', { varsIgnorePattern: '^[A-Z_]' }],
},
},
])

13
src/web/index.html Normal file
View File

@ -0,0 +1,13 @@
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>GPU集群运维系统</title>
<link rel="icon" type="image/png" href="/src/assets/argus.png" />
</head>
<body>
<div id="root"></div>
<script type="module" src="/src/main.jsx"></script>
</body>
</html>

3617
src/web/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

34
src/web/package.json Normal file
View File

@ -0,0 +1,34 @@
{
"name": "argus-web",
"private": true,
"version": "0.0.0",
"type": "module",
"scripts": {
"dev": "vite",
"build": "vite build",
"lint": "eslint .",
"preview": "vite preview"
},
"dependencies": {
"@emotion/react": "^11.14.0",
"@mantine/core": "^8.3.1",
"@mantine/hooks": "^8.3.1",
"@mantine/notifications": "^8.3.1",
"@tabler/icons-react": "^3.34.1",
"react": "^19.1.1",
"react-dom": "^19.1.1",
"react-router-dom": "^7.8.2",
"tabler-icons-react": "^1.56.0"
},
"devDependencies": {
"@eslint/js": "^9.33.0",
"@types/react": "^19.1.10",
"@types/react-dom": "^19.1.7",
"@vitejs/plugin-react": "^5.0.0",
"eslint": "^9.33.0",
"eslint-plugin-react-hooks": "^5.2.0",
"eslint-plugin-react-refresh": "^0.4.20",
"globals": "^16.3.0",
"vite": "^7.1.2"
}
}

Binary file not shown.

1
src/web/public/vite.svg Normal file
View File

@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="31.88" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 257"><defs><linearGradient id="IconifyId1813088fe1fbc01fb466" x1="-.828%" x2="57.636%" y1="7.652%" y2="78.411%"><stop offset="0%" stop-color="#41D1FF"></stop><stop offset="100%" stop-color="#BD34FE"></stop></linearGradient><linearGradient id="IconifyId1813088fe1fbc01fb467" x1="43.376%" x2="50.316%" y1="2.242%" y2="89.03%"><stop offset="0%" stop-color="#FFEA83"></stop><stop offset="8.333%" stop-color="#FFDD35"></stop><stop offset="100%" stop-color="#FFA800"></stop></linearGradient></defs><path fill="url(#IconifyId1813088fe1fbc01fb466)" d="M255.153 37.938L134.897 252.976c-2.483 4.44-8.862 4.466-11.382.048L.875 37.958c-2.746-4.814 1.371-10.646 6.827-9.67l120.385 21.517a6.537 6.537 0 0 0 2.322-.004l117.867-21.483c5.438-.991 9.574 4.796 6.877 9.62Z"></path><path fill="url(#IconifyId1813088fe1fbc01fb467)" d="M185.432.063L96.44 17.501a3.268 3.268 0 0 0-2.634 3.014l-5.474 92.456a3.268 3.268 0 0 0 3.997 3.378l24.777-5.718c2.318-.535 4.413 1.507 3.936 3.838l-7.361 36.047c-.495 2.426 1.782 4.5 4.151 3.78l15.304-4.649c2.372-.72 4.652 1.36 4.15 3.788l-11.698 56.621c-.732 3.542 3.979 5.473 5.943 2.437l1.313-2.028l72.516-144.72c1.215-2.423-.88-5.186-3.54-4.672l-25.505 4.922c-2.396.462-4.435-1.77-3.759-4.114l16.646-57.705c.677-2.35-1.37-4.583-3.769-4.113Z"></path></svg>

After

Width:  |  Height:  |  Size: 1.5 KiB

42
src/web/src/App.css Normal file
View File

@ -0,0 +1,42 @@
#root {
max-width: 1280px;
margin: 0 auto;
padding: 2rem;
text-align: center;
}
.logo {
height: 6em;
padding: 1.5em;
will-change: filter;
transition: filter 300ms;
}
.logo:hover {
filter: drop-shadow(0 0 2em #646cffaa);
}
.logo.react:hover {
filter: drop-shadow(0 0 2em #61dafbaa);
}
@keyframes logo-spin {
from {
transform: rotate(0deg);
}
to {
transform: rotate(360deg);
}
}
@media (prefers-reduced-motion: no-preference) {
a:nth-of-type(2) .logo {
animation: logo-spin infinite 20s linear;
}
}
.card {
padding: 2em;
}
.read-the-docs {
color: #888;
}

40
src/web/src/App.jsx Normal file
View File

@ -0,0 +1,40 @@
import { AppShell } from "@mantine/core";
import { Routes, Route, Navigate } from "react-router-dom";
import Sidebar from "./components/Sidebar";
import HeaderBar from "./components/HeaderBar";
import Dashboard from "./pages/Dashboard";
import NodePage from "./pages/NodePage";
import Metrics from "./pages/Metrics";
import Logs from "./pages/Logs";
import Alerts from "./pages/Alerts";
export default function App() {
return (
<AppShell
padding="md"
header={{ height: 60 }}
navbar={{ width: 240, breakpoint: "sm" }}
>
<AppShell.Header>
<HeaderBar />
</AppShell.Header>
<AppShell.Navbar>
<Sidebar />
</AppShell.Navbar>
<AppShell.Main>
<Routes>
<Route path="/" element={<Navigate to="/dashboard" replace />} />
<Route path="/dashboard" element={<Dashboard />} />
<Route path="/nodeInfo" element={<NodePage />} />
<Route path="/metrics" element={<Metrics />} />
<Route path="/logs" element={<Logs />} />
<Route path="/alerts" element={<Alerts />} />
<Route path="*" element={<div>404 Not Found</div>} />
</Routes>
</AppShell.Main>
</AppShell>
);
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 MiB

BIN
src/web/src/assets/es.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 145 KiB

View File

@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="35.93" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 228"><path fill="#00D8FF" d="M210.483 73.824a171.49 171.49 0 0 0-8.24-2.597c.465-1.9.893-3.777 1.273-5.621c6.238-30.281 2.16-54.676-11.769-62.708c-13.355-7.7-35.196.329-57.254 19.526a171.23 171.23 0 0 0-6.375 5.848a155.866 155.866 0 0 0-4.241-3.917C100.759 3.829 77.587-4.822 63.673 3.233C50.33 10.957 46.379 33.89 51.995 62.588a170.974 170.974 0 0 0 1.892 8.48c-3.28.932-6.445 1.924-9.474 2.98C17.309 83.498 0 98.307 0 113.668c0 15.865 18.582 31.778 46.812 41.427a145.52 145.52 0 0 0 6.921 2.165a167.467 167.467 0 0 0-2.01 9.138c-5.354 28.2-1.173 50.591 12.134 58.266c13.744 7.926 36.812-.22 59.273-19.855a145.567 145.567 0 0 0 5.342-4.923a168.064 168.064 0 0 0 6.92 6.314c21.758 18.722 43.246 26.282 56.54 18.586c13.731-7.949 18.194-32.003 12.4-61.268a145.016 145.016 0 0 0-1.535-6.842c1.62-.48 3.21-.974 4.76-1.488c29.348-9.723 48.443-25.443 48.443-41.52c0-15.417-17.868-30.326-45.517-39.844Zm-6.365 70.984c-1.4.463-2.836.91-4.3 1.345c-3.24-10.257-7.612-21.163-12.963-32.432c5.106-11 9.31-21.767 12.459-31.957c2.619.758 5.16 1.557 7.61 2.4c23.69 8.156 38.14 20.213 38.14 29.504c0 9.896-15.606 22.743-40.946 31.14Zm-10.514 20.834c2.562 12.94 2.927 24.64 1.23 33.787c-1.524 8.219-4.59 13.698-8.382 15.893c-8.067 4.67-25.32-1.4-43.927-17.412a156.726 156.726 0 0 1-6.437-5.87c7.214-7.889 14.423-17.06 21.459-27.246c12.376-1.098 24.068-2.894 34.671-5.345a134.17 134.17 0 0 1 1.386 6.193ZM87.276 214.515c-7.882 2.783-14.16 2.863-17.955.675c-8.075-4.657-11.432-22.636-6.853-46.752a156.923 156.923 0 0 1 1.869-8.499c10.486 2.32 22.093 3.988 34.498 4.994c7.084 9.967 14.501 19.128 21.976 27.15a134.668 134.668 0 0 1-4.877 4.492c-9.933 8.682-19.886 14.842-28.658 17.94ZM50.35 144.747c-12.483-4.267-22.792-9.812-29.858-15.863c-6.35-5.437-9.555-10.836-9.555-15.216c0-9.322 13.897-21.212 37.076-29.293c2.813-.98 5.757-1.905 8.812-2.773c3.204 10.42 7.406 21.315 12.477 32.332c-5.137 11.18-9.399 22.249-12.634 32.792a134.718 134.718 0 0 1-6.318-1.979Zm12.378-84.26c-4.811-24.587-1.616-43.134 6.425-47.789c8.564-4.958 27.502 2.111 47.463 19.835a144.318 144.318 0 0 1 3.841 3.545c-7.438 7.987-14.787 17.08-21.808 26.988c-12.04 1.116-23.565 2.908-34.161 5.309a160.342 160.342 0 0 1-1.76-7.887Zm110.427 27.268a347.8 347.8 0 0 0-7.785-12.803c8.168 1.033 15.994 2.404 23.343 4.08c-2.206 7.072-4.956 14.465-8.193 22.045a381.151 381.151 0 0 0-7.365-13.322Zm-45.032-43.861c5.044 5.465 10.096 11.566 15.065 18.186a322.04 322.04 0 0 0-30.257-.006c4.974-6.559 10.069-12.652 15.192-18.18ZM82.802 87.83a323.167 323.167 0 0 0-7.227 13.238c-3.184-7.553-5.909-14.98-8.134-22.152c7.304-1.634 15.093-2.97 23.209-3.984a321.524 321.524 0 0 0-7.848 12.897Zm8.081 65.352c-8.385-.936-16.291-2.203-23.593-3.793c2.26-7.3 5.045-14.885 8.298-22.6a321.187 321.187 0 0 0 7.257 13.246c2.594 4.48 5.28 8.868 8.038 13.147Zm37.542 31.03c-5.184-5.592-10.354-11.779-15.403-18.433c4.902.192 9.899.29 14.978.29c5.218 0 10.376-.117 15.453-.343c-4.985 6.774-10.018 12.97-15.028 18.486Zm52.198-57.817c3.422 7.8 6.306 15.345 8.596 22.52c-7.422 1.694-15.436 3.058-23.88 4.071a382.417 382.417 0 0 0 7.859-13.026a347.403 347.403 0 0 0 7.425-13.565Zm-16.898 8.101a358.557 358.557 0 0 1-12.281 19.815a329.4 329.4 0 0 1-23.444.823c-7.967 0-15.716-.248-23.178-.732a310.202 310.202 0 0 1-12.513-19.846h.001a307.41 307.41 0 0 1-10.923-20.627a310.278 310.278 0 0 1 10.89-20.637l-.001.001a307.318 307.318 0 0 1 12.413-19.761c7.613-.576 15.42-.876 23.31-.876H128c7.926 0 15.743.303 23.354.883a329.357 329.357 0 0 1 12.335 19.695a358.489 358.489 0 0 1 11.036 20.54a329.472 329.472 0 0 1-11 20.722Zm22.56-122.124c8.572 4.944 11.906 24.881 6.52 51.026c-.344 1.668-.73 3.367-1.15 5.09c-10.622-2.452-22.155-4.275-34.23-5.408c-7.034-10.017-14.323-19.124-21.64-27.008a160.789 160.789 0 0 1 5.888-5.4c18.9-16.447 36.564-22.941 44.612-18.3ZM128 90.808c12.625 0 22.86 10.235 22.86 22.86s-10.235 22.86-22.86 22.86s-22.86-10.235-22.86-22.86s10.235-22.86 22.86-22.86Z"></path></svg>

After

Width:  |  Height:  |  Size: 4.0 KiB

View File

@ -0,0 +1,38 @@
import { Group, Select } from "@mantine/core";
export function AlertFilters({ filters, setFilters, nodeOptions }) {
return (
<Group spacing="md">
<Select
label="严重性"
value={filters.severity}
onChange={(value) => setFilters((f) => ({ ...f, severity: value }))}
data={[
{ value: "all", label: "全部" },
{ value: "critical", label: "严重" },
{ value: "warning", label: "警告" },
{ value: "info", label: "信息" },
]}
w={150}
/>
<Select
label="状态"
value={filters.state}
onChange={(value) => setFilters((f) => ({ ...f, state: value }))}
data={[
{ value: "all", label: "全部" },
{ value: "active", label: "Active" },
{ value: "resolved", label: "Resolved" },
]}
w={150}
/>
<Select
label="节点"
value={filters.instance}
onChange={(value) => setFilters((f) => ({ ...f, instance: value }))}
data={nodeOptions}
w={150}
/>
</Group>
);
}

View File

@ -0,0 +1,47 @@
import { Card, Group, Text, Badge, Stack, Anchor } from "@mantine/core";
import { Link } from "react-router-dom";
export function AlertStats({ stats, layout = "row", title, link }) {
const Wrapper = layout === "row" ? Group : Stack;
return (
<Card withBorder radius="md" shadow="sm" p="md" mb="md">
{(title || link) && (
<Group position="apart" mb="sm">
{title && <Text fw={700} size="lg">{title}</Text>}
{link && (
<Anchor component={Link} to={link} size="sm" underline>
查看更多
</Anchor>
)}
</Group>
)}
<Wrapper spacing="xl" grow>
<Group spacing="xs">
<Badge color="gray" radius="sm" variant="filled"></Badge>
<Text size="sm" fw={500}>总数</Text>
<Text fw={700} color="gray">{stats.total || 0}</Text>
</Group>
<Group spacing="xs">
<Badge color="red" radius="sm" variant="filled"></Badge>
<Text size="sm" fw={500}>严重</Text>
<Text fw={700} color="red">{stats.critical || 0}</Text>
</Group>
<Group spacing="xs">
<Badge color="orange" radius="sm" variant="filled"></Badge>
<Text size="sm" fw={500}>警告</Text>
<Text fw={700} color="orange">{stats.warning || 0}</Text>
</Group>
<Group spacing="xs">
<Badge color="blue" radius="sm" variant="filled"></Badge>
<Text size="sm" fw={500}>信息</Text>
<Text fw={700} color="blue">{stats.info || 0}</Text>
</Group>
</Wrapper>
</Card>
);
}

View File

@ -0,0 +1,96 @@
import { Table, Group, ActionIcon, Button } from "@mantine/core";
import { IconChevronUp, IconChevronDown } from "@tabler/icons-react";
export function AlertTable({
alerts,
paginatedAlerts,
page,
setPage,
pageSize,
sortedAlerts,
sortConfig,
handleSort,
getRowColor,
getSeverityColor,
getStateBadge,
formatRelativeTime,
}) {
const totalPages = Math.ceil(sortedAlerts.length / pageSize);
return (
<>
<Table striped highlightOnHover>
<Table.Thead>
<Table.Tr>
{[
{ key: "alertname", label: "名称" },
{ key: "instance", label: "节点" },
{ key: "severity", label: "严重性" },
{ key: "state", label: "状态" },
{ key: "startsAt", label: "开始时间" },
{ key: "endsAt", label: "结束时间" },
{ key: "updatedAt", label: "更新时间" },
{ key: "summary", label: "描述" },
].map((col) => (
<Table.Th key={col.key}>
<Group spacing={4}>
{col.label}
{["severity", "startsAt", "instance"].includes(col.key) && (
<ActionIcon size="xs" onClick={() => handleSort(col.key)}>
{sortConfig.key === col.key && sortConfig.direction === "asc" ? (
<IconChevronUp size={14} />
) : (
<IconChevronDown size={14} />
)}
</ActionIcon>
)}
</Group>
</Table.Th>
))}
</Table.Tr>
</Table.Thead>
<Table.Tbody>
{paginatedAlerts.map((alert, i) => (
<Table.Tr key={i} style={{ backgroundColor: getRowColor(alert) }}>
<Table.Td>{alert.labels?.alertname || "-"}</Table.Td>
<Table.Td>{alert.labels?.instance || "-"}</Table.Td>
<Table.Td style={{ color: getSeverityColor(alert.labels?.severity) }}>
{alert.labels?.severity || "info"}
</Table.Td>
<Table.Td>{getStateBadge(alert.status?.state)}</Table.Td>
<Table.Td title={alert.startsAt || "-"}>{formatRelativeTime(alert.startsAt)}</Table.Td>
<Table.Td title={alert.endsAt || "-"}>
{alert.endsAt ? new Date(alert.endsAt).toLocaleString() : "-"}
</Table.Td>
<Table.Td title={alert.updatedAt || "-"}>{formatRelativeTime(alert.updatedAt)}</Table.Td>
<Table.Td>{alert.annotations?.summary || "-"}</Table.Td>
</Table.Tr>
))}
</Table.Tbody>
</Table>
{/* 分页控件 */}
<Group position="apart" mt="sm">
<Button
disabled={page === 1}
onClick={() => setPage((p) => Math.max(1, p - 1))}
variant="outline"
size="xs"
>
上一页
</Button>
<span>
{page} / {totalPages}
</span>
<Button
disabled={page >= totalPages}
onClick={() => setPage((p) => p + 1)}
variant="outline"
size="xs"
>
下一页
</Button>
</Group>
</>
);
}

View File

@ -0,0 +1,66 @@
import { Card, Flex, Image, Text, UnstyledButton } from "@mantine/core";
import { IconArrowRight } from "@tabler/icons-react";
export default function EntryCard({ label, href, icon }) {
return (
<Card
shadow="sm"
p="lg"
withBorder
radius="md"
style={{
position: "relative",
aspectRatio: "1 / 1",
transition: "transform 0.2s, box-shadow 0.2s",
}}
sx={(theme) => ({
'&:hover': {
transform: 'translateY(-4px)',
boxShadow: theme.shadows.md,
},
})}
>
{/* 图标 + 标题 居中 */}
<Flex
direction="column"
align="center"
justify="center"
style={{ flex: 1, textAlign: "center", gap: "12px", height: "100%" }}
>
<Image src={icon} alt={label} width={48} height={48} fit="contain" />
<Text fw={600}>{label}</Text>
</Flex>
{/* 悬浮圆形箭头按钮 + 动画效果 */}
<UnstyledButton
component="a"
href={href}
target="_blank"
rel="noopener noreferrer"
style={{
position: "absolute",
bottom: 16,
right: 16,
width: 40,
height: 40,
borderRadius: "50%",
display: "flex",
alignItems: "center",
justifyContent: "center",
backgroundColor: "rgba(0, 0, 0, 0.05)",
transition: "background-color 0.2s, transform 0.2s",
}}
onMouseEnter={(e) => {
e.currentTarget.style.backgroundColor = "rgba(0, 0, 0, 0.15)";
e.currentTarget.style.transform = "translateX(4px)";
}}
onMouseLeave={(e) => {
e.currentTarget.style.backgroundColor = "rgba(0, 0, 0, 0.05)";
e.currentTarget.style.transform = "translateX(0)";
}}
>
<IconArrowRight size={18} />
</UnstyledButton>
</Card>
);
}

View File

@ -0,0 +1,13 @@
import { Group, Text } from "@mantine/core";
import { SystemIcon } from "../components/SystemIcon";
export default function HeaderBar() {
return (
<Group justify="space-between" h="100%" px="md">
<Group spacing="sm" align="center">
<SystemIcon size={32} />
<Text fw={700}>GPU 集群运维系统</Text>
</Group>
</Group>
);
}

View File

@ -0,0 +1,62 @@
import { Card, Group, Text, RingProgress } from "@mantine/core";
// gray
const statusColors = {
healthy: "green",
warning: "yellow",
error: "red",
online: "green",
offline: "gray",
};
export function HealthCard({ health }) {
const totalNodes = health?.total || 0;
const stats = health?.status_statistics || [];
// sections
const sections = stats.map((s) => ({
value: (s.count / totalNodes) * 100,
color: statusColors[s.status] || "gray",
}));
// 沿 online healthy
const mainStatus = stats.find(
(s) => s.status === "online" || s.status === "healthy"
);
const mainPercent = mainStatus
? ((mainStatus.count / totalNodes) * 100).toFixed(1)
: "0.0";
return (
<Card shadow="sm" radius="md" p="lg">
<Text fw={700} size="lg" mb="md">节点健康状态</Text>
<Group spacing="xl" align="center">
<RingProgress
size={140}
thickness={14}
sections={sections}
label={
<Text fw={700} ta="center" size="lg">
{mainPercent}%
</Text>
}
/>
<div style={{ display: "flex", flexDirection: "column", justifyContent: "center", gap: 8 }}>
{stats.map((s, idx) => (
<div
key={idx}
style={{ display: "flex", justifyContent: "space-between", width: 140 }}
>
<Text size="sm" color={statusColors[s.status] || "gray"}>
{s.status}
</Text>
<Text fw={600}>{s.count}</Text>
</div>
))}
</div>
</Group>
</Card>
);
}

View File

@ -0,0 +1,150 @@
import { useState, useEffect } from "react";
import { Card, Text, Group, TextInput, Stack, ActionIcon } from "@mantine/core";
import { IconEdit, IconX, IconCheck, IconPlus, IconTrash } from "@tabler/icons-react";
import { apiRequest } from "../config/request";
import { MASTER_API } from "../config/api";
export default function NodeConfigCard({ nodeId, config = {}, onSaved }) {
const [editing, setEditing] = useState(false);
const [configList, setConfigList] = useState([]);
const [newKey, setNewKey] = useState("");
const [newValue, setNewValue] = useState("");
const [saving, setSaving] = useState(false);
useEffect(() => {
const arr = Object.entries(config || {});
setConfigList(arr);
}, [config]);
const removeConfig = (index) => {
setConfigList((prev) => prev.filter((_, i) => i !== index));
};
const updateConfig = (index, key, value) => {
setConfigList((prev) =>
prev.map((item, i) => (i === index ? [key, value] : item))
);
};
const addConfig = () => {
if (newKey && !configList.find(([k]) => k === newKey)) {
setConfigList((prev) => [...prev, [newKey, newValue]]);
setNewKey("");
setNewValue("");
}
};
const handleSave = async () => {
setSaving(true);
try {
let finalList = [...configList];
// +
if (newKey && !finalList.find(([k]) => k === newKey)) {
finalList = [...finalList, [newKey, newValue]];
setNewKey("");
setNewValue("");
}
const configObj = Object.fromEntries(finalList);
await apiRequest(MASTER_API.CONFIG(nodeId), {
method: "PUT",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ config: configObj }),
});
setConfigList(finalList); // state UI
setEditing(false);
onSaved && onSaved();
} finally {
setSaving(false);
}
};
return (
<Card shadow="sm" radius="md" withBorder>
<Group position="apart" mb="sm">
<Text fw={600}>配置信息</Text>
<Group spacing="xs">
{editing ? (
<>
<ActionIcon
color="green"
size="sm"
loading={saving}
onClick={handleSave}
>
<IconCheck size={16} />
</ActionIcon>
<ActionIcon color="red" size="sm" onClick={() => setEditing(false)}>
<IconX size={16} />
</ActionIcon>
</>
) : (
<ActionIcon color="blue" size="sm" onClick={() => setEditing(true)}>
<IconEdit size={16} />
</ActionIcon>
)}
</Group>
</Group>
{editing ? (
<Stack spacing="xs">
{configList.map(([key, value], idx) => (
<Group key={idx} spacing="xs">
<TextInput
placeholder="Key"
value={key}
onChange={(e) => updateConfig(idx, e.target.value, value)}
/>
<TextInput
placeholder="Value"
value={value}
onChange={(e) => updateConfig(idx, key, e.target.value)}
/>
<ActionIcon color="red" onClick={() => removeConfig(idx)}>
<IconTrash size={16} />
</ActionIcon>
</Group>
))}
<Group spacing="xs">
<TextInput
placeholder="新增 Key"
value={newKey}
onChange={(e) => setNewKey(e.target.value)}
/>
<TextInput
placeholder="新增 Value"
value={newValue}
onChange={(e) => setNewValue(e.target.value)}
onKeyDown={(e) => {
if (e.key === "Enter") {
e.preventDefault();
addConfig();
}
}}
/>
<ActionIcon color="blue" onClick={addConfig}>
<IconPlus size={16} />
</ActionIcon>
</Group>
</Stack>
) : (
<Stack spacing="xs">
{configList.length > 0 ? (
configList.map(([key, value], idx) => (
<Group key={idx} spacing="xs">
<Text fw={500}>{key}:</Text>
<Text>{String(value)}</Text>
</Group>
))
) : (
<Text c="dimmed">暂无配置</Text>
)}
</Stack>
)}
</Card>
);
}

View File

@ -0,0 +1,131 @@
import { useState, useEffect } from "react";
import {
Drawer,
Text,
Loader,
Center,
ScrollArea,
Group,
Divider,
ThemeIcon,
Stack,
ActionIcon,
} from "@mantine/core";
import { IconRefresh } from "@tabler/icons-react";
import { healthStatus } from "../config/status";
import { apiRequest } from "../config/request";
import { MASTER_API } from "../config/api";
import NodeConfigCard from "./NodeConfigCard";
import NodeLabelCard from "./NodeLabelCard";
import NodeMetaCard from "./NodeMetaCard";
import NodeHealthCard from "./NodeHealthCard";
export default function NodeDetailDrawer({ opened, nodeId, onClose }) {
const [node, setNode] = useState(null);
const [loading, setLoading] = useState(false);
const fetchNodeDetail = async (id) => {
if (!id) return;
setLoading(true);
try {
const res = await apiRequest(MASTER_API.DETAIL(id));
setNode(res);
} finally {
setLoading(false);
}
};
useEffect(() => {
if (opened && nodeId) fetchNodeDetail(nodeId);
}, [opened, nodeId]);
return (
<Drawer
opened={opened}
onClose={onClose}
position="right"
size="lg"
title="节点详情"
padding="lg"
overlayProps={{ backgroundOpacity: 0.4, blur: 4 }}
>
{loading && !node ? (
<Center h={200}>
<Loader size="sm" />
</Center>
) : node ? (
<div style={{ height: "90vh", display: "flex", flexDirection: "column" }}>
{/* 固定头部基础信息 */}
<div
style={{
position: "sticky",
top: 0,
background: "white",
zIndex: 10,
paddingBottom: 8,
}}
>
<Group spacing="sm" align="center" position="apart">
<Group spacing="sm" align="center">
<ThemeIcon
size="lg"
radius="xl"
color={healthStatus(node.status).color}
variant="light"
>
{healthStatus(node.status).icon}
</ThemeIcon>
<Text fw={700} size="xl">{node.name}</Text>
<Text c="dimmed">{node.type}</Text>
<Text c={healthStatus(node.status).color}>{node.status}</Text>
<Text c="dimmed" size="sm">
最近上报时间: {new Date(node.last_report).toLocaleString()}
</Text>
</Group>
{/* 刷新按钮固定在右侧 */}
<ActionIcon
color="blue"
variant="light"
onClick={() => fetchNodeDetail(node.id)}
disabled={loading}
>
<IconRefresh size={18} />
</ActionIcon>
</Group>
<Divider my="sm" />
</div>
{/* 滚动内容 */}
<ScrollArea style={{ flex: 1 }}>
<Stack spacing="md">
{/* 配置信息 */}
<NodeConfigCard nodeId={node.id} config={node.config || {}} onSaved={() => fetchNodeDetail(node.id)} />
{/* 标签信息 */}
<NodeLabelCard nodeId={node.id} labels={Array.isArray(node.label) ? node.label : []} onSaved={() => fetchNodeDetail(node.id)} />
{/* 元数据 */}
<NodeMetaCard node={node} />
{/* 健康信息 */}
<NodeHealthCard node={node} />
{/* 其他基础信息展示 */}
<Stack spacing="xs">
<Text fw={500}>注册时间: <Text span c="dimmed">{new Date(node.register_time).toLocaleString()}</Text></Text>
<Text fw={500}>最近上报时间: <Text span c="dimmed">{new Date(node.last_report).toLocaleString()}</Text></Text>
<Text fw={500}>最后更新时间: <Text span c="dimmed">{new Date(node.last_updated).toLocaleString()}</Text></Text>
</Stack>
</Stack>
</ScrollArea>
</div>
) : (
<Text c="dimmed">暂无数据</Text>
)}
</Drawer>
);
}

View File

@ -0,0 +1,56 @@
import { useState } from "react";
import { Card, Text, Stack, Group, ActionIcon, Badge, Popover } from "@mantine/core";
import { IconInfoCircle } from "@tabler/icons-react";
export default function NodeHealthCard({ node }) {
const health = node.health || {};
const renderHealthItem = (moduleName, data) => {
const status = data?.status || "unknown";
const color = status === "healthy" ? "green" : status === "unhealthy" ? "red" : "gray";
const [opened, setOpened] = useState(false);
return (
<Group key={moduleName} spacing="xs" align="center">
<Text size="sm" fw={500}>{moduleName}</Text>
<Badge color={color} variant="light">{status}</Badge>
{(data?.error || data?.timestamp) && (
<Popover
opened={opened}
onClose={() => setOpened(false)}
position="bottom"
withArrow
shadow="sm"
>
<Popover.Target>
<ActionIcon size="xs" color="blue" variant="light" onClick={() => setOpened((o) => !o)}>
<IconInfoCircle size={14} />
</ActionIcon>
</Popover.Target>
<Popover.Dropdown>
<Stack spacing={4}>
{data.error && <Text size="xs" c="red">Error: {data.error}</Text>}
{data.timestamp && (
<Text size="xs" c="dimmed">
Updated: {new Date(data.timestamp).toLocaleString()}
</Text>
)}
</Stack>
</Popover.Dropdown>
</Popover>
)}
</Group>
);
};
return (
<Card shadow="xs" radius="md" withBorder>
<Text fw={600} mb="sm">健康信息</Text>
<Stack spacing="xs">
{Object.entries(health).map(([moduleName, data]) =>
renderHealthItem(moduleName, data)
)}
</Stack>
</Card>
);
}

View File

@ -0,0 +1,115 @@
import { useState, useEffect } from "react";
import { Card, Text, Group, TextInput, Stack, ActionIcon, Badge } from "@mantine/core";
import { IconEdit, IconX, IconCheck, IconPlus, IconTrash } from "@tabler/icons-react";
import { apiRequest } from "../config/request";
import { MASTER_API } from "../config/api";
export default function NodeLabelCard({ nodeId, labels = [], onSaved }) {
const [editing, setEditing] = useState(false);
const [tagList, setTagList] = useState([]);
const [tagColors, setTagColors] = useState([]);
const [newTag, setNewTag] = useState("");
const [saving, setSaving] = useState(false);
const randomColor = () => {
const colors = ["red", "pink", "grape", "violet", "indigo", "blue", "cyan", "teal", "green", "lime", "yellow", "orange", "gray"];
return colors[Math.floor(Math.random() * colors.length)];
};
useEffect(() => {
const arr = Array.isArray(labels) ? labels : [];
setTagList(arr);
setTagColors(arr.map(() => randomColor()));
}, [labels]);
const removeTag = (index) => {
setTagList((prev) => prev.filter((_, i) => i !== index));
setTagColors((prev) => prev.filter((_, i) => i !== index));
};
const updateTag = (index, value) => {
setTagList((prev) => prev.map((t, i) => (i === index ? value : t)));
};
const addTag = () => {
if (newTag && !tagList.includes(newTag)) {
setTagList((prev) => [...prev, newTag]);
setTagColors((prev) => [...prev, randomColor()]);
setNewTag("");
}
};
const handleSave = async () => {
setSaving(true);
try {
let finalTags = [...tagList];
if (newTag && !finalTags.includes(newTag)) {
finalTags = [...finalTags, newTag];
setNewTag(""); //
}
await apiRequest(MASTER_API.CONFIG(nodeId), {
method: "PUT",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ label: finalTags }),
});
setTagList(finalTags);
setEditing(false);
onSaved && onSaved();
} finally {
setSaving(false);
}
};
return (
<Card shadow="sm" radius="md" withBorder>
<Group position="apart" mb="sm">
<Text fw={600}>标签信息</Text>
<Group spacing="xs">
{editing ? (
<>
<ActionIcon color="green" size="sm" loading={saving} onClick={handleSave}><IconCheck size={16} /></ActionIcon>
<ActionIcon color="red" size="sm" onClick={() => setEditing(false)}><IconX size={16} /></ActionIcon>
</>
) : (
<ActionIcon color="blue" size="sm" onClick={() => setEditing(true)}><IconEdit size={16} /></ActionIcon>
)}
</Group>
</Group>
{editing ? (
<Stack spacing="xs">
{tagList.map((tag, idx) => (
<Group key={idx} spacing="xs">
<TextInput value={tag} onChange={(e) => updateTag(idx, e.target.value)} />
<ActionIcon color="red" onClick={() => removeTag(idx)}><IconTrash size={16} /></ActionIcon>
</Group>
))}
<Group spacing="xs">
<TextInput
placeholder="新增标签"
value={newTag}
onChange={(e) => setNewTag(e.target.value)}
onKeyDown={(e) => {
if (e.key === "Enter") {
e.preventDefault(); //
addTag();
}
}}
/>
<ActionIcon color="blue" onClick={addTag}><IconPlus size={16} /></ActionIcon>
</Group>
</Stack>
) : (
<Group spacing="xs" wrap="wrap">
{tagList.length > 0 ? tagList.map((tag, idx) => (
<Badge key={idx} color={tagColors[idx]} variant="light">{tag}</Badge>
)) : <Text c="dimmed">暂无标签</Text>}
</Group>
)}
</Card>
);
}

View File

@ -0,0 +1,21 @@
import { Card, Text, Stack } from "@mantine/core";
export default function NodeMetaCard({ node }) {
const meta = node.meta_data || {};
return (
<Card shadow="xs" radius="md" withBorder>
<Text fw={600} mb="sm">元数据信息</Text>
<Stack spacing="xs">
<Text size="sm">主机名: <Text span c="dimmed">{meta.hostname}</Text></Text>
<Text size="sm">IP: <Text span c="dimmed">{meta.ip}</Text></Text>
<Text size="sm">环境: <Text span c="dimmed">{meta.env}</Text></Text>
<Text size="sm">用户: <Text span c="dimmed">{meta.user}</Text></Text>
<Text size="sm">实例: <Text span c="dimmed">{meta.instance}</Text></Text>
<Text size="sm">CPU 数量: <Text span c="dimmed">{meta.cpu_number}</Text></Text>
<Text size="sm">内存: <Text span c="dimmed">{(meta.memory_in_bytes / 1024 / 1024).toFixed(2)} MB</Text></Text>
<Text size="sm">GPU 数量: <Text span c="dimmed">{meta.gpu_number}</Text></Text>
</Stack>
</Card>
);
}

View File

@ -0,0 +1,21 @@
import { statusMap } from "../config/status";
export default function NodeStatus({ status }) {
const { color, label } = statusMap[status] || { color: "gray", label: "未知" };
return (
<span style={{ display: "flex", alignItems: "center" }}>
<span
style={{
display: "inline-block",
width: 10,
height: 10,
borderRadius: "50%",
background: color,
marginRight: 8,
}}
/>
{label}
</span>
);
}

View File

@ -0,0 +1,132 @@
import { useState, useEffect } from "react";
import { Card, Table, Button, Loader, Center, Group, Anchor, Text } from "@mantine/core";
import { Link } from "react-router-dom";
import NodeStatus from "./NodeStatus";
import PaginationControl from "./PaginationControl";
import { apiRequest } from "../config/request";
import { MASTER_API } from "../config/api";
export function NodeTable({
withSearch = false,
withPagination = false,
withActions = false,
clusterData = null, // Dashboard
fetchDetail, // NodePage
title,
viewMoreLink,
}) {
const [nodes, setNodes] = useState([]);
const [page, setPage] = useState(1);
const [pageSize, setPageSize] = useState(5);
const [loading, setLoading] = useState(false);
// NodePage 使
const fetchNodes = async (params = {}) => {
if (!withPagination && !withSearch) return; // Dashboard clusterData
setLoading(true);
try {
const query = new URLSearchParams({
page: params.page || page,
limit: params.pageSize || pageSize,
}).toString();
const result = await apiRequest(`${MASTER_API.LIST}?${query}`);
setNodes(result);
} finally {
setLoading(false);
}
};
//
useEffect(() => {
if (withPagination || withSearch) {
fetchNodes();
} else if (clusterData) {
setNodes(clusterData || []);
}
}, [clusterData]);
//
const rows = nodes.map((node) => (
<Table.Tr key={node.id}>
<Table.Td>{node.id}</Table.Td>
<Table.Td>{node.name}</Table.Td>
<Table.Td><NodeStatus status={node.status} /></Table.Td>
<Table.Td>{node.type}</Table.Td>
<Table.Td>{node.version}</Table.Td>
{withActions && (
<Table.Td>
<Button
size="xs"
variant="light"
onClick={() => fetchDetail && fetchDetail(node.id)}
>
查看详情
</Button>
</Table.Td>
)}
</Table.Tr>
));
return (
<Card shadow="sm" radius="md" p="lg">
{/* 标题 + 查看更多 */}
{(title || viewMoreLink) && (
<Group position="apart" mb="sm">
{title && <Text fw={700} size="lg">{title}</Text>}
{viewMoreLink && (
<Anchor component={Link} to={viewMoreLink} size="sm" underline>
查看更多
</Anchor>
)}
</Group>
)}
{/* 搜索区域 */}
{withSearch && (
<div style={{ display: "flex", gap: 8, marginBottom: 16 }}>
<Button onClick={() => fetchNodes()} variant="outline">刷新列表</Button>
</div>
)}
{loading ? (
<Center h={200}><Loader size="lg" /></Center>
) : (
<>
<Table striped highlightOnHover withTableBorder>
<Table.Thead>
<Table.Tr>
<Table.Th>ID</Table.Th>
<Table.Th>名称</Table.Th>
<Table.Th>状态</Table.Th>
<Table.Th>类型</Table.Th>
<Table.Th>版本</Table.Th>
{withActions && <Table.Th>操作</Table.Th>}
</Table.Tr>
</Table.Thead>
<Table.Tbody>{rows}</Table.Tbody>
</Table>
{withPagination && (
<PaginationControl
page={page}
pageSize={pageSize}
hasPrevPage={page > 1}
hasNextPage={nodes.length === pageSize}
onPageChange={(p) => {
setPage(p);
fetchNodes({ page: p });
}}
onPageSizeChange={(size) => {
setPageSize(size);
setPage(1);
fetchNodes({ page: 1, pageSize: size });
}}
/>
)}
</>
)}
</Card>
);
}

View File

@ -0,0 +1,44 @@
import { Button, Group, Select, Text } from "@mantine/core";
export default function PaginationControl({
page,
pageSize,
onPageChange,
onPageSizeChange,
hasNextPage,
hasPrevPage,
}) {
const pageSizeValue = pageSize ? String(pageSize) : "10"; // undefined
return (
<div style={{ display: "flex", justifyContent: "space-between", marginTop: 16 }}>
<Select
data={["5", "10", "20", "50"]}
value={pageSizeValue}
onChange={(val) => {
if (val) onPageSizeChange(Number(val));
}}
style={{ width: 100 }}
/>
<Group>
<Button
variant="default"
disabled={!hasPrevPage}
onClick={() => onPageChange(page - 1)}
>
上一页
</Button>
<Text size="sm" style={{ minWidth: 60, textAlign: "center" }}>
{page}
</Text>
<Button
variant="default"
disabled={!hasNextPage}
onClick={() => onPageChange(page + 1)}
>
下一页
</Button>
</Group>
</div>
);
}

View File

@ -0,0 +1,48 @@
import { NavLink, Stack } from "@mantine/core";
import {
IconGauge,
IconServer,
IconActivity,
IconFileText,
IconAlertCircle,
} from "@tabler/icons-react";
import { Link, useLocation } from "react-router-dom";
export default function Sidebar() {
const location = useLocation(); // Sidebar
const links = [
{ to: "/dashboard", label: "概览仪表盘", icon: <IconGauge size={16} /> },
{ to: "/nodeInfo", label: "节点信息", icon: <IconServer size={16} /> },
{ to: "/metrics", label: "指标详情", icon: <IconActivity size={16} /> },
{ to: "/logs", label: "日志详情", icon: <IconFileText size={16} /> },
{ to: "/alerts", label: "告警详情", icon: <IconAlertCircle size={16} /> },
];
return (
<Stack p="md">
{links.map((link) =>
link.external ? (
<NavLink
key={link.to}
component="a"
href={link.to}
target="_blank"
rel="noopener noreferrer"
label={link.label}
leftSection={link.icon}
/>
) : (
<NavLink
key={link.to}
component={Link}
to={link.to}
label={link.label}
leftSection={link.icon}
active={location.pathname === link.to}
/>
)
)}
</Stack>
);
}

View File

@ -0,0 +1,10 @@
import argusIcon from "../assets/argus.png";
/**
* 系统图标组件可在 HeaderBarDashboard 等复用
* @param {number} size 图标大小默认 32
* @param {string} alt 图标替代文本默认 'Argus'
*/
export function SystemIcon({ size = 32, alt = "Argus" }) {
return <img src={argusIcon} alt={alt} style={{ width: size, height: size }} />;
}

30
src/web/src/config/api.js Normal file
View File

@ -0,0 +1,30 @@
// config/api.js
// Master 节点相关 API
export const MASTER_API = {
// 节点列表
LIST: "http://master.argus.com/api/v1/master/nodes",
// 节点详情(需要 nodeId
DETAIL: (nodeId) => `http://master.argus.com/api/v1/master/nodes/${nodeId}`,
// 节点配置(需要 nodeId
CONFIG: (nodeId) => `http://master.argus.com/api/v1/master/nodes/${nodeId}/config`,
// 节点统计信息
STATISTICS: "http://master.argus.com/api/v1/master/nodes/statistics",
};
// 其他外部 API
export const EXTERNAL_API = {
ALERTS_INFOS: "http://localhost:9093/api/v2/alerts",
};
// 外部服务 Host
export const EXTERNAL_HOST = {
ALERTS: "http://localhost:9093",
GRAFANA: "http://grafana.metric.argus.com",
PROMETHEUS: "http://prometheus.metric.argus.com",
ES: "http://es.log.argus.com",
KIBANA: "http://kibana.log.argus.com",
};

View File

@ -0,0 +1,19 @@
import grafanaLogo from "../assets/grafana.png";
import prometheusLogo from "../assets/prometheus.png";
import esLogo from "../assets/es.png";
import kibanaLogo from "../assets/kibana.png";
import { EXTERNAL_HOST } from "./api";
export const metricsEntries = [
{ label: "Grafana", href: EXTERNAL_HOST.GRAFANA, icon: grafanaLogo },
{ label: "Prometheus", href: EXTERNAL_HOST.PROMETHEUS, icon: prometheusLogo },
];
export const logsEntries = [
{ label: "Elasticsearch", href: EXTERNAL_HOST.ES, icon: esLogo },
{ label: "Kibana", href: EXTERNAL_HOST.KIBANA, icon: kibanaLogo },
];
export const alertsEntries = [
{ label: "Alertmanager", href: EXTERNAL_HOST.ALERTS, icon: prometheusLogo },
];

View File

@ -0,0 +1,47 @@
import { notifications } from "@mantine/notifications";
/**
* 通用 API 请求封装
* @param {string} url 请求地址
* @param {object} options fetch 配置
* @param {string} successMsg 成功提示文案可选
* @returns {Promise<any>} 返回 JSON 数据
*/
export async function apiRequest(url, options = {}, successMsg) {
try {
const res = await fetch(url, options);
if (!res.ok) {
let msg = "请求失败";
try {
const errData = await res.json();
if (errData && errData.message) msg = errData.message;
} catch (e) {
// ignore json parse error
}
throw new Error(msg);
}
const data = await res.json();
if (successMsg) {
notifications.show({
title: "成功",
message: successMsg,
color: "green",
});
}
return data;
} catch (err) {
console.log("API 请求错误:", err);
notifications.show({
title: "操作失败",
message: err.message || "接口调用失败",
color: "red",
});
throw err; // 继续抛出错误,方便上层处理
}
}

View File

@ -0,0 +1,33 @@
import React from "react";
import {
IconCircleCheck,
IconAlertTriangle,
IconX,
IconCircleDashed,
} from "@tabler/icons-react";
export const statusMap = {
online: { label: "Online", color: "green"},
offline: { label: "Offline", color: "red"},
};
export const statusOptions = Object.entries(statusMap).map(([value, { label }]) => ({
value,
label,
}));
export const healthStatus = (status) => {
switch (status) {
case "activate":
case "healthy":
case "online":
return { color: "green", icon: React.createElement(IconCircleCheck, { size: 16 }) };
case "warning":
return { color: "yellow", icon: React.createElement(IconAlertTriangle, { size: 16 }) };
case "error":
case "fail":
return { color: "red", icon: React.createElement(IconX, { size: 16 }) };
default:
return { color: "gray", icon: React.createElement(IconCircleDashed, { size: 16 }) };
}
};

View File

@ -0,0 +1,15 @@
export function formatRelativeTime(dateStr) {
if (!dateStr) return "-";
const date = new Date(dateStr);
const now = new Date();
const diffMs = now - date;
const diffSec = Math.floor(diffMs / 1000);
const diffMin = Math.floor(diffSec / 60);
const diffHour = Math.floor(diffMin / 60);
const diffDay = Math.floor(diffHour / 24);
if (diffSec < 60) return `${diffSec} 秒前`;
if (diffMin < 60) return `${diffMin} 分钟前`;
if (diffHour < 24) return `${diffHour} 小时前`;
return `${diffDay} 天前`;
}

68
src/web/src/index.css Normal file
View File

@ -0,0 +1,68 @@
:root {
font-family: system-ui, Avenir, Helvetica, Arial, sans-serif;
line-height: 1.5;
font-weight: 400;
color-scheme: light dark;
color: rgba(255, 255, 255, 0.87);
background-color: #242424;
font-synthesis: none;
text-rendering: optimizeLegibility;
-webkit-font-smoothing: antialiased;
-moz-osx-font-smoothing: grayscale;
}
a {
font-weight: 500;
color: #646cff;
text-decoration: inherit;
}
a:hover {
color: #535bf2;
}
body {
margin: 0;
display: flex;
place-items: center;
min-width: 320px;
min-height: 100vh;
}
h1 {
font-size: 3.2em;
line-height: 1.1;
}
button {
border-radius: 8px;
border: 1px solid transparent;
padding: 0.6em 1.2em;
font-size: 1em;
font-weight: 500;
font-family: inherit;
background-color: #1a1a1a;
cursor: pointer;
transition: border-color 0.25s;
}
button:hover {
border-color: #646cff;
}
button:focus,
button:focus-visible {
outline: 4px auto -webkit-focus-ring-color;
}
@media (prefers-color-scheme: light) {
:root {
color: #213547;
background-color: #ffffff;
}
a:hover {
color: #747bff;
}
button {
background-color: #f9f9f9;
}
}

20
src/web/src/main.jsx Normal file
View File

@ -0,0 +1,20 @@
// main.jsx
import React from 'react';
import ReactDOM from 'react-dom/client';
import '@mantine/core/styles.css';
import { MantineProvider } from '@mantine/core';
import { BrowserRouter } from 'react-router-dom';
import { Notifications } from "@mantine/notifications";
import '@mantine/notifications/styles.css';
import App from './App';
ReactDOM.createRoot(document.getElementById('root')).render(
<React.StrictMode>
<MantineProvider>
<Notifications position="top-right" />
<BrowserRouter>
<App />
</BrowserRouter>
</MantineProvider>
</React.StrictMode>
);

View File

@ -0,0 +1,160 @@
import { useEffect, useState, useMemo } from "react";
import { Stack, Title, Loader, Center, Group, Button, Badge, ActionIcon } from "@mantine/core";
import { IconRefresh } from "@tabler/icons-react";
import { apiRequest } from "../config/request";
import { EXTERNAL_API } from "../config/api";
import { AlertStats } from "../components/AlertStats";
import { AlertFilters } from "../components/AlertFilters";
import { AlertTable } from "../components/AlertTable";
import { formatRelativeTime } from "../config/utils";
import { EXTERNAL_HOST } from "../config/api";
export default function Alerts() {
const [alerts, setAlerts] = useState([]);
const [stats, setStats] = useState({ critical: 0, warning: 0, info: 0 });
const [loading, setLoading] = useState(true);
const [filters, setFilters] = useState({ severity: "all", state: "all", instance: "all" });
const [page, setPage] = useState(1);
const pageSize = 10;
const [sortConfig, setSortConfig] = useState({ key: "startsAt", direction: "desc" });
async function fetchAlerts() {
setLoading(true);
const data = await apiRequest(EXTERNAL_API.ALERTS_INFOS);
if (data && Array.isArray(data)) {
setAlerts(data);
const counts = { critical: 0, warning: 0, info: 0 };
data.forEach((alert) => {
const sev = alert.labels?.severity || "info";
if (sev === "critical") counts.critical++;
else if (sev === "warning") counts.warning++;
else counts.info++;
});
setStats(counts);
}
setLoading(false);
}
useEffect(() => {
fetchAlerts();
const timer = setInterval(fetchAlerts, 30000);
return () => clearInterval(timer);
}, []);
//
const nodeOptions = useMemo(() => {
const nodes = Array.from(new Set(alerts.map((a) => a.labels?.instance).filter(Boolean)));
return [{ value: "all", label: "全部" }, ...nodes.map((n) => ({ value: n, label: n }))];
}, [alerts]);
// & &
const filteredAlerts = useMemo(() => {
return alerts.filter((alert) => {
const sev = alert.labels?.severity || "info";
const state = alert.status?.state || "active";
const instance = alert.labels?.instance || "";
return (
(filters.severity === "all" || filters.severity === sev) &&
(filters.state === "all" || filters.state === state) &&
(filters.instance === "all" || filters.instance === instance)
);
});
}, [alerts, filters]);
const sortedAlerts = useMemo(() => {
const sorted = [...filteredAlerts];
if (sortConfig.key) {
sorted.sort((a, b) => {
let valA, valB;
if (sortConfig.key === "severity") {
const map = { critical: 3, warning: 2, info: 1 };
valA = map[a.labels?.severity] || 0;
valB = map[b.labels?.severity] || 0;
} else if (["startsAt", "endsAt", "updatedAt"].includes(sortConfig.key)) {
valA = new Date(a[sortConfig.key]).getTime() || 0;
valB = new Date(b[sortConfig.key]).getTime() || 0;
} else if (sortConfig.key === "instance") {
valA = a.labels?.instance || "";
valB = b.labels?.instance || "";
} else {
valA = a.labels?.alertname || "";
valB = b.labels?.alertname || "";
}
if (valA < valB) return sortConfig.direction === "asc" ? -1 : 1;
if (valA > valB) return sortConfig.direction === "asc" ? 1 : -1;
return 0;
});
}
return sorted;
}, [filteredAlerts, sortConfig]);
const paginatedAlerts = useMemo(() => {
const start = (page - 1) * pageSize;
return sortedAlerts.slice(start, start + pageSize);
}, [sortedAlerts, page]);
// & Badge
const getRowColor = (alert) => {
if (alert.status?.state === "resolved") return "gray.1";
const sev = alert.labels?.severity;
if (sev === "critical") return "red.0";
if (sev === "warning") return "orange.0";
if (sev === "info") return "blue.0";
return undefined;
};
const getSeverityColor = (sev) => {
if (sev === "critical") return "red";
if (sev === "warning") return "orange";
if (sev === "info") return "blue";
return "gray";
};
const getStateBadge = (state) => (
<Badge color={state === "active" ? "red" : "gray"} variant="filled" size="xs">
{state}
</Badge>
);
const handleSort = (key) => {
setSortConfig((prev) => ({
key,
direction: prev.key === key && prev.direction === "asc" ? "desc" : "asc",
}));
};
return (
<Stack spacing="lg" p="md">
<Group position="apart">
<Title order={2}>告警详情</Title>
<Button component="a" href="{EXTERNAL_HOST.ALERTS}" target="_blank" variant="outline">
打开 Alertmanager
</Button>
<ActionIcon onClick={fetchAlerts} color="blue" variant="filled" size="lg" title="刷新">
<IconRefresh size={20} />
</ActionIcon>
</Group>
<AlertStats stats={stats} />
<AlertFilters filters={filters} setFilters={setFilters} nodeOptions={nodeOptions} />
{loading ? (
<Center>
<Loader />
</Center>
) : (
<AlertTable
alerts={alerts}
paginatedAlerts={paginatedAlerts}
page={page}
setPage={setPage}
pageSize={pageSize}
sortedAlerts={sortedAlerts}
sortConfig={sortConfig}
handleSort={handleSort}
getRowColor={getRowColor}
getSeverityColor={getSeverityColor}
getStateBadge={getStateBadge}
formatRelativeTime={formatRelativeTime}
/>
)}
</Stack>
);
}

View File

@ -0,0 +1,73 @@
import { useEffect, useState } from "react";
import { Grid, Text } from "@mantine/core";
import { NodeTable } from "../components/NodeTable";
import { HealthCard } from "../components/HealthCard";
import { AlertStats } from "../components/AlertStats";
import { apiRequest } from "../config/request";
import { EXTERNAL_API } from "../config/api";
import { MASTER_API } from "../config/api";
export default function Dashboard() {
const [cluster, setCluster] = useState(null);
const [health, setHealth] = useState(null);
const [alerts, setAlerts] = useState(null);
const [loading, setLoading] = useState(true);
const countAlerts = (data) => {
const stats = { critical: 0, warning: 0, info: 0 };
data?.forEach((alert) => {
const severity = alert.labels?.severity || "info";
if (severity === "critical") stats.critical++;
else if (severity === "warning") stats.warning++;
else stats.info++;
});
return stats;
};
useEffect(() => {
async function fetchData() {
setLoading(true);
try {
const [clusterRes, healthRes, alertsRes] = await Promise.all([
apiRequest(MASTER_API.LIST),
apiRequest(MASTER_API.STATISTICS),
apiRequest(EXTERNAL_API.ALERTS_INFOS),
]);
setCluster(clusterRes || []);
setHealth({
total: healthRes?.total || 0,
status_statistics: healthRes?.status_statistics || [],
});
setAlerts(countAlerts(alertsRes?.data || []));
} catch (err) {
console.error("获取 Dashboard 数据失败:", err);
} finally {
setLoading(false);
}
}
fetchData();
}, []);
if (loading) {
return <Text>加载中...</Text>;
}
if (!cluster || !health || !alerts) {
return <Text>数据加载失败</Text>;
}
return (
<Grid>
<Grid.Col span={6}><HealthCard health={health} /></Grid.Col>
<Grid.Col span={6}>
<AlertStats stats={alerts} layout="column" title="告警统计" link="/alerts" />
</Grid.Col>
<Grid.Col span={12}><NodeTable clusterData={cluster} title="集群节点" viewMoreLink="/nodeInfo" /></Grid.Col>
</Grid>
);
}

View File

@ -0,0 +1,18 @@
import { Grid, Stack, Title } from "@mantine/core";
import EntryCard from "../components/EntryCard";
import { logsEntries } from "../config/entries";
export default function Logs() {
return (
<Stack spacing="lg" p="md">
<Title order={2}>日志详情</Title>
<Grid gutter="lg">
{logsEntries.map((entry) => (
<Grid.Col key={entry.href} span={{ base: 12, sm: 4, md: 3 }}>
<EntryCard label={entry.label} href={entry.href} icon={entry.icon} />
</Grid.Col>
))}
</Grid>
</Stack>
);
}

View File

@ -0,0 +1,18 @@
import { Grid, Stack, Title } from "@mantine/core";
import EntryCard from "../components/EntryCard";
import { metricsEntries } from "../config/entries";
export default function Metrics() {
return (
<Stack spacing="lg" p="md">
<Title order={2}>指标入口</Title>
<Grid gutter="lg">
{metricsEntries.map((entry) => (
<Grid.Col key={entry.href} span={{ base: 12, sm: 4, md: 3 }}>
<EntryCard label={entry.label} href={entry.href} icon={entry.icon} />
</Grid.Col>
))}
</Grid>
</Stack>
);
}

View File

@ -0,0 +1,46 @@
import { useState } from "react";
import { Grid } from "@mantine/core";
import { apiRequest } from "../config/request";
import { MASTER_API } from "../config/api";
import { NodeTable } from "../components/NodeTable";
import NodeDetailDrawer from "../components/NodeDetailDrawer";
export default function NodePage() {
const [selectedNodeId, setSelectedNodeId] = useState(null);
const [drawerOpen, setDrawerOpen] = useState(false);
const [detailLoading, setDetailLoading] = useState(false);
//
const fetchNodeDetail = async (id) => {
setDetailLoading(true);
setDrawerOpen(true);
try {
const result = await apiRequest(MASTER_API.DETAIL(id));
setSelectedNodeId(result.id);
} finally {
setDetailLoading(false);
}
};
return (
<Grid gutter="lg">
{/* 左侧:节点表格 */}
<Grid.Col span={drawerOpen ? 8 : 12}>
<NodeTable
withSearch
withPagination
withActions
fetchDetail={fetchNodeDetail}
/>
</Grid.Col>
{/* 节点详情 Drawer */}
<NodeDetailDrawer
opened={drawerOpen}
onClose={() => setDrawerOpen(false)}
nodeId={selectedNodeId}
loading={detailLoading}
/>
</Grid>
);
}

View File

@ -0,0 +1,6 @@
body {
margin: 0;
font-family: Inter, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto,
Helvetica, Arial, sans-serif;
background-color: #f8f9fa;
}

View File

@ -0,0 +1 @@
172.18.0.3

Some files were not shown because too many files have changed in this diff Show More