feat: 测试环境搭建,e2e测试;

refs #3
This commit is contained in:
sundapeng.sdp 2025-10-11 15:52:21 +08:00
parent 28ef5df6e4
commit 5211769ba8
11 changed files with 1848 additions and 0 deletions

7
src/metric/tests/.gitignore vendored Normal file
View File

@ -0,0 +1,7 @@
.env
data/
images-cache/
*.tar
*.log
.DS_Store

159
src/metric/tests/README.md Normal file
View File

@ -0,0 +1,159 @@
# E2E Test - Argus Metric 部署测试
## 概述
本项目用于对 Argus Metric 模块进行端到端E2E部署测试。
通过一键脚本可快速搭建 Prometheus、FTP、Grafana 等服务,验证 Metric 模块的完整部署与运行流程。
## 一键构建与部署 Prometheus / FTP / Grafana
### 1. 修改环境变量文件
将示例配置文件复制为 .env 并根据实际情况修改:
``` bash
cp env.example .env
```
### 2. 一键启动服务
执行以下命令完成环境初始化、镜像构建与服务启动:
``` bash
sudo bash start-all.sh
```
该脚本将自动完成:
- 初始化目录结构(如 /private/argus/metric
- 构建各服务 Docker 镜像
- 启动 Prometheus、FTP、Grafana 容器
### 3. 检查容器日志
可手动验证容器运行状态:
``` bash
docker logs argus-metric-ftp
docker logs argus-metric-grafana
docker logs argus-metric-prometheus
```
如日志输出中无 ERROR 或 supervisor 报错信息,则表示服务启动正常。
## 客户端安装包打包与分发
打包后服务端会将安装包发布至 FTP 共享目录,默认路径为:
``` bash
$DATA_ROOT/ftp/share
```
发布后的文件权限与 FTP 目录账户保持一致。
### 1. 递增版本号
``` bash
bash scripts/version-manager.sh bump minor
```
该脚本会自动更新版本号(如 1.101.0 → 1.102.0)。
### 2. 打包安装制品
``` bash
bash scripts/package_artifact.sh
```
执行后会在输出目录中生成压缩包或安装脚本。
### 3. 发布制品至 FTP
``` bash
sudo bash scripts/publish_artifact.sh $VERSION --output-dir $OUTPUT_DIR --owner $UID:$GID
```
参数说明:
参数 说明
$VERSION 发布版本号(如 1.102.0
$OUTPUT_DIR 输出目录(默认 /private/argus/ftp/share
$UID:$GID 文件属主用户ID:组ID
示例:
``` bash
sudo bash scripts/publish_artifact.sh 1.102.0 --output-dir /private/argus/ftp/share --owner 2133:2015
```
更多详情可参考 client-plugins/all-in-one/README.md。
## 客户端安装(通过 FTP
客户端下载与安装步骤如下:
``` bash
curl -u ${USER}:${PASSWD} ftp://${FTP_SERVER}/setup.sh -o setup.sh
chmod +x setup.sh
sudo bash setup.sh --server ${FTP_SERVER} --user ${USER} --password ${PASSWD} --port ${PORT}
```
参数说明:
参数 说明
$FTP_SERVER 服务器 IP 地址
$USER 默认 ftpuser
$PASSWD 默认 ZGClab1234!
$PORT FTP 服务端口(需与 .env 保持一致)
示例:
``` bash
curl -u ftpuser:ZGClab1234! ftp://10.211.55.4/setup.sh -o setup.sh
chmod +x setup.sh
sudo bash setup.sh --server 10.211.55.4 --user ftpuser --password 'ZGClab1234!' --port 2122
```
更多细节可参考 client-plugins/all-in-one/README.md。
## 模拟 Argus-Master 配置下发
可通过手动写入 nodes.json 文件模拟 Argus-Master 对 Argus-Metric 的配置下发:
[
{
"node_id": "A1",
"user_id": "sundapeng",
"ip": "10.211.55.4",
"hostname": "dev-sundapeng-nsche-wohen-pod-0",
"labels": ["label-a", "label-b"]
}
]
路径:
``` bash
${DATA_ROOT}/prometheus/nodes.json
```
Argus-Metric 中的 prometheus 模块会自动解析该文件,并将其拆分生成目标配置:
``` bash
${DATA_ROOT}/prometheus/targets/
```
## Grafana 手动配置(如未自动接入 Prometheus
如 Grafana 未自动导入 Prometheus 数据源,可手动执行以下操作:
1. 添加数据源
- 进入 Grafana → Data sources
- 选择 Add data source → Prometheus
- URL 填写http://prometheus:9090(Docker 内部 DNS 地址)
2. 导入测试 Dashboard
- 打开 Grafana → Dashboards → Import
- 上传或粘贴 test_grafana_dashboard.json
## 查看监控数据
Prometheus 访问以下地址查看节点活性:
``` bash
http://127.0.0.1:9091/targets
```
Grafana 访问以下地址查看监控大屏:
``` bash
http://127.0.0.1:3000/d/node_gpu_metrics/node-and-gpu-metrics
```

106
src/metric/tests/check-paths.sh Executable file
View File

@ -0,0 +1,106 @@
#!/bin/bash
# 路径检查脚本
# 用于验证所有必要的构建目录是否存在
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
echo "=========================================="
echo " 路径检查脚本"
echo "=========================================="
echo ""
echo "当前脚本目录: $SCRIPT_DIR"
echo "当前工作目录: $(pwd)"
echo ""
# 检查配置文件
echo "检查配置文件..."
if [ -f "$SCRIPT_DIR/docker-compose.yml" ]; then
echo " ✓ docker-compose.yml 存在"
else
echo " ✗ docker-compose.yml 不存在"
fi
if [ -f "$SCRIPT_DIR/.env" ]; then
echo " ✓ .env 存在"
elif [ -f "$SCRIPT_DIR/env.example" ]; then
echo " ⚠ .env 不存在,但 env.example 存在"
else
echo " ✗ .env 和 env.example 都不存在"
fi
echo ""
# 检查构建目录
echo "检查构建目录..."
BUILD_DIRS=(
"../ftp/build"
"../prometheus/build"
"../grafana/build"
)
all_exist=true
for dir in "${BUILD_DIRS[@]}"; do
full_path="$SCRIPT_DIR/$dir"
if [ -d "$full_path" ]; then
echo "$dir"
echo " 完整路径: $full_path"
else
echo "$dir 不存在"
echo " 查找路径: $full_path"
all_exist=false
fi
done
echo ""
# 检查 Dockerfile
echo "检查 Dockerfile..."
DOCKERFILES=(
"../ftp/build/Dockerfile"
"../prometheus/build/Dockerfile"
"../grafana/build/Dockerfile"
)
for dockerfile in "${DOCKERFILES[@]}"; do
full_path="$SCRIPT_DIR/$dockerfile"
if [ -f "$full_path" ]; then
echo "$dockerfile"
else
echo "$dockerfile 不存在"
echo " 查找路径: $full_path"
all_exist=false
fi
done
echo ""
# 检查数据目录(可选)
if [ -f "$SCRIPT_DIR/.env" ]; then
source "$SCRIPT_DIR/.env"
DATA_ROOT=${DATA_ROOT:-./data}
echo "检查数据目录..."
echo " 数据根目录: $DATA_ROOT"
if [ -d "$SCRIPT_DIR/$DATA_ROOT" ]; then
echo " ✓ 数据目录存在"
ls -la "$SCRIPT_DIR/$DATA_ROOT" | head -10
else
echo " ⚠ 数据目录不存在(首次运行时会自动创建)"
fi
echo ""
fi
# 总结
echo "=========================================="
if $all_exist; then
echo " ✓ 所有必要的文件和目录都存在"
echo " 可以运行 ./start-all.sh 启动服务"
else
echo " ✗ 部分文件或目录缺失"
echo " 请检查项目结构是否完整"
fi
echo "=========================================="
echo ""

View File

@ -0,0 +1,105 @@
services:
ftp:
build:
context: ../ftp/build
dockerfile: Dockerfile
args:
FTP_UID: ${FTP_UID:-2133}
FTP_GID: ${FTP_GID:-2015}
image: argus-metric-ftp:latest
container_name: argus-ftp
restart: unless-stopped
environment:
- FTP_BASE_PATH=/private/argus/ftp
- FTP_PASSWORD=${FTP_PASSWORD:-ZGClab1234!}
- DOMAIN=${FTP_DOMAIN:-prom.ftp.argus.com}
- FTP_UID=${FTP_UID:-2133}
- FTP_GID=${FTP_GID:-2015}
ports:
- "${FTP_PORT:-21}:21"
- "${FTP_DATA_PORT:-20}:20"
- "21100-21110:21100-21110"
volumes:
- ${DATA_ROOT:-./data}/ftp:/private/argus/ftp
- ${DATA_ROOT:-./data}/etc:/private/argus/etc
networks:
- argus-network
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
prometheus:
build:
context: ../prometheus/build
dockerfile: Dockerfile
args:
PROMETHEUS_UID: ${PROMETHEUS_UID:-2133}
PROMETHEUS_GID: ${PROMETHEUS_GID:-2015}
USE_INTRANET: ${USE_INTRANET:-false}
image: argus-metric-prometheus:latest
container_name: argus-prometheus
restart: unless-stopped
environment:
- PROMETHEUS_BASE_PATH=/private/argus/metric/prometheus
- PROMETHEUS_UID=${PROMETHEUS_UID:-2133}
- PROMETHEUS_GID=${PROMETHEUS_GID:-2015}
ports:
- "${PROMETHEUS_PORT:-9090}:9090"
volumes:
- ${DATA_ROOT:-./data}/prometheus:/private/argus/metric/prometheus
- ${DATA_ROOT:-./data}/etc:/private/argus/etc
networks:
- argus-network
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
grafana:
build:
context: ../grafana/build
dockerfile: Dockerfile
args:
GRAFANA_UID: ${GRAFANA_UID:-2133}
GRAFANA_GID: ${GRAFANA_GID:-2015}
image: argus-metric-grafana:latest
container_name: argus-grafana
restart: unless-stopped
environment:
- GRAFANA_BASE_PATH=/private/argus/metric/grafana
- GRAFANA_UID=${GRAFANA_UID:-2133}
- GRAFANA_GID=${GRAFANA_GID:-2015}
- GF_SERVER_HTTP_PORT=3000
- GF_LOG_LEVEL=warn
- GF_LOG_MODE=console
ports:
- "${GRAFANA_PORT:-3000}:3000"
volumes:
- ${DATA_ROOT:-./data}/grafana:/private/argus/metric/grafana
- ${DATA_ROOT:-./data}/etc:/private/argus/etc
networks:
- argus-network
depends_on:
- prometheus
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
networks:
argus-network:
driver: bridge
name: argus-network
volumes:
ftp_data:
driver: local
prometheus_data:
driver: local
grafana_data:
driver: local

View File

@ -0,0 +1,26 @@
# 用户和组配置
FTP_UID=2133
FTP_GID=2015
PROMETHEUS_UID=2133
PROMETHEUS_GID=2015
GRAFANA_UID=2133
GRAFANA_GID=2015
# 数据根目录
DATA_ROOT=/private/argus
# FTP 配置
FTP_PORT=2122
FTP_DATA_PORT=2022
FTP_PASSWORD=ZGClab1234!
FTP_DOMAIN=prom.ftp.argus.com
# Prometheus 配置
PROMETHEUS_PORT=9090
# Grafana 配置
GRAFANA_PORT=3000
# 网络配置
USE_INTRANET=false

View File

@ -0,0 +1,90 @@
#!/bin/bash
# 初始化目录脚本
# 用于创建所有必要的数据目录并设置正确的权限
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
# 加载 .env 文件(如果存在)
if [ -f .env ]; then
echo "加载 .env 配置文件..."
source .env
fi
# 默认配置
FTP_UID=${FTP_UID:-2133}
FTP_GID=${FTP_GID:-2015}
PROMETHEUS_UID=${PROMETHEUS_UID:-2133}
PROMETHEUS_GID=${PROMETHEUS_GID:-2015}
GRAFANA_UID=${GRAFANA_UID:-2133}
GRAFANA_GID=${GRAFANA_GID:-2015}
DATA_ROOT=${DATA_ROOT:-./data}
echo "开始初始化目录结构..."
echo "数据目录: ${DATA_ROOT}"
echo ""
# 创建 FTP 目录
echo "创建 FTP 目录..."
sudo mkdir -p ${DATA_ROOT}/ftp/share
sudo chown -R ${FTP_UID}:${FTP_GID} ${DATA_ROOT}/ftp
sudo chmod -R 755 ${DATA_ROOT}/ftp
# 创建 Prometheus 目录
echo "创建 Prometheus 目录..."
sudo mkdir -p ${DATA_ROOT}/prometheus/{data,rules,targets}
# 创建默认的 targets 文件(先创建文件再改权限)
if [ ! -f "${DATA_ROOT}/prometheus/targets/node_exporter.json" ]; then
echo "创建默认 node_exporter targets..."
echo '[
{
"targets": [],
"labels": {
"job": "node"
}
}
]' | sudo tee ${DATA_ROOT}/prometheus/targets/node_exporter.json > /dev/null
fi
if [ ! -f "${DATA_ROOT}/prometheus/targets/dcgm_exporter.json" ]; then
echo "创建默认 dcgm_exporter targets..."
echo '[
{
"targets": [],
"labels": {
"job": "dcgm"
}
}
]' | sudo tee ${DATA_ROOT}/prometheus/targets/dcgm_exporter.json > /dev/null
fi
# 统一设置 Prometheus 目录权限
sudo chown -R ${PROMETHEUS_UID}:${PROMETHEUS_GID} ${DATA_ROOT}/prometheus
sudo chmod -R 755 ${DATA_ROOT}/prometheus
# 创建 Grafana 目录
echo "创建 Grafana 目录..."
sudo mkdir -p ${DATA_ROOT}/grafana/{data,logs,plugins,provisioning/datasources,provisioning/dashboards,data/sessions,data/dashboards,config}
sudo chown -R ${GRAFANA_UID}:${GRAFANA_GID} ${DATA_ROOT}/grafana
sudo chmod -R 755 ${DATA_ROOT}/grafana
# 创建公共配置目录
sudo mkdir -p ${DATA_ROOT}/etc
sudo chown -R ${FTP_UID}:${FTP_GID} ${DATA_ROOT}/etc
sudo chmod -R 755 ${DATA_ROOT}/etc
echo "目录初始化完成!"
echo ""
echo "目录结构:"
echo " ${DATA_ROOT}/"
echo " ├── ftp/ (UID:${FTP_UID}, GID:${FTP_GID})"
echo " ├── prometheus/ (UID:${PROMETHEUS_UID}, GID:${PROMETHEUS_GID})"
echo " ├── grafana/ (UID:${GRAFANA_UID}, GID:${GRAFANA_GID})"
echo " └── etc/ (UID:${FTP_UID}, GID:${FTP_GID})"
echo ""
echo "您现在可以运行 'docker-compose up -d' 来启动所有服务"

View File

@ -0,0 +1,105 @@
#!/bin/bash
################################################################################
# Ubuntu 22.04 环境初始化脚本
# 用途:安装开发测试环境所需的基础工具
# 系统要求Ubuntu 22.04
# 使用方法sudo ./init_environment.sh
################################################################################
set -e
echo "==================================="
echo "开始安装环境依赖..."
echo "==================================="
# 更新系统
echo "[1/4] 更新系统包列表..."
apt-get update -y
# 安装基础工具
echo "[2/4] 安装基础工具..."
apt-get install -y \
vim \
curl \
wget \
git \
htop \
tree \
net-tools \
dnsutils \
iputils-ping \
telnet \
traceroute \
lsof \
unzip \
zip \
tar \
jq \
ca-certificates \
gnupg \
lsb-release \
software-properties-common \
apt-transport-https \
build-essential \
python3 \
python3-pip \
python3-venv \
tmux \
ncdu
# 安装 Docker
echo "[3/4] 安装 Docker..."
# 卸载旧版本
apt-get remove -y docker docker-engine docker.io containerd runc 2>/dev/null || true
# 添加 Docker 官方 GPG key
install -m 0755 -d /etc/apt/keyrings
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg
chmod a+r /etc/apt/keyrings/docker.gpg
# 添加 Docker 仓库
echo \
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
tee /etc/apt/sources.list.d/docker.list > /dev/null
# 更新包列表并安装 Docker
apt-get update -y
apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
# 启动 Docker 服务
systemctl start docker
systemctl enable docker
# 添加当前用户到 docker 组
if [ -n "$SUDO_USER" ]; then
usermod -aG docker "$SUDO_USER"
echo "✓ 用户 $SUDO_USER 已添加到 docker 组"
fi
# 清理
echo "[4/4] 清理..."
apt-get autoremove -y
apt-get autoclean -y
# 显示安装结果
echo ""
echo "==================================="
echo "安装完成!"
echo "==================================="
echo ""
echo "已安装:"
echo " ✓ vim"
echo " ✓ curl, wget, git"
echo " ✓ Docker: $(docker --version)"
echo " ✓ Docker Compose: $(docker compose version)"
echo " ✓ Python: $(python3 --version)"
echo " ✓ 其他基础工具 (htop, tree, jq, tmux 等)"
echo ""
if [ -n "$SUDO_USER" ]; then
echo "提示:请重新登录以使 docker 组权限生效"
fi
echo ""

371
src/metric/tests/manage-images.sh Executable file
View File

@ -0,0 +1,371 @@
#!/bin/bash
# Docker 镜像管理脚本
# 支持构建、保存、加载、清理镜像
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
# 检测 docker-compose 命令
if command -v docker-compose &> /dev/null; then
DOCKER_COMPOSE="docker-compose"
elif docker compose version &> /dev/null 2>&1; then
DOCKER_COMPOSE="docker compose"
else
echo "错误: 未找到 docker-compose 或 docker compose 命令"
exit 1
fi
# 镜像缓存目录
IMAGE_CACHE_DIR="./images-cache"
mkdir -p "$IMAGE_CACHE_DIR"
# 定义镜像列表
IMAGES=(
"argus-metric-ftp:latest"
"argus-metric-prometheus:latest"
"argus-metric-grafana:latest"
)
# 镜像文件名映射
declare -A IMAGE_FILES=(
["argus-metric-ftp:latest"]="argus-ftp.tar"
["argus-metric-prometheus:latest"]="argus-prometheus.tar"
["argus-metric-grafana:latest"]="argus-grafana.tar"
)
# 检查镜像是否存在
check_image_exists() {
local image=$1
if docker images --format "{{.Repository}}:{{.Tag}}" | grep -q "^${image}$"; then
return 0
else
return 1
fi
}
# 加载镜像
load_image() {
local image=$1
local file="${IMAGE_CACHE_DIR}/${IMAGE_FILES[$image]}"
if [ -f "$file" ]; then
echo "正在从缓存加载镜像: $image"
docker load -i "$file"
return 0
else
return 1
fi
}
# 保存镜像
save_image() {
local image=$1
local file="${IMAGE_CACHE_DIR}/${IMAGE_FILES[$image]}"
if check_image_exists "$image"; then
echo "正在保存镜像到缓存: $image"
docker save -o "$file" "$image"
echo "已保存: $file ($(du -h "$file" | cut -f1))"
return 0
else
echo "镜像不存在: $image"
return 1
fi
}
# 构建所有镜像
build_all() {
echo "=========================================="
echo " 构建所有 Docker 镜像"
echo "=========================================="
echo ""
local build_flag="${1:---no-cache}"
echo "开始构建镜像..."
$DOCKER_COMPOSE build $build_flag
echo ""
echo "构建完成!"
}
# 保存所有镜像
save_all() {
echo "=========================================="
echo " 保存所有 Docker 镜像到缓存"
echo "=========================================="
echo ""
for image in "${IMAGES[@]}"; do
if save_image "$image"; then
echo "$image"
else
echo "$image (跳过)"
fi
echo ""
done
echo "缓存目录: $IMAGE_CACHE_DIR"
echo "总大小: $(du -sh "$IMAGE_CACHE_DIR" | cut -f1)"
}
# 加载所有镜像
load_all() {
echo "=========================================="
echo " 从缓存加载所有 Docker 镜像"
echo "=========================================="
echo ""
local loaded=0
local skipped=0
for image in "${IMAGES[@]}"; do
if check_image_exists "$image"; then
echo "镜像已存在,跳过: $image"
((skipped++))
elif load_image "$image"; then
echo "✓ 已加载: $image"
((loaded++))
else
echo "✗ 缓存不存在: $image"
fi
echo ""
done
echo "加载: $loaded, 跳过: $skipped"
}
# 检查镜像状态
status() {
echo "=========================================="
echo " 镜像状态"
echo "=========================================="
echo ""
echo "Docker 镜像:"
for image in "${IMAGES[@]}"; do
if check_image_exists "$image"; then
local size=$(docker images --format "{{.Size}}" "$image" | head -1)
echo "$image ($size)"
else
echo "$image (未构建)"
fi
done
echo ""
echo "缓存文件:"
if [ -d "$IMAGE_CACHE_DIR" ] && [ "$(ls -A $IMAGE_CACHE_DIR 2>/dev/null)" ]; then
for image in "${IMAGES[@]}"; do
local file="${IMAGE_CACHE_DIR}/${IMAGE_FILES[$image]}"
if [ -f "$file" ]; then
echo "${IMAGE_FILES[$image]} ($(du -h "$file" | cut -f1))"
else
echo "${IMAGE_FILES[$image]} (不存在)"
fi
done
echo ""
echo "缓存总大小: $(du -sh "$IMAGE_CACHE_DIR" | cut -f1)"
else
echo " (无缓存文件)"
fi
}
# 清理缓存
clean_cache() {
echo "=========================================="
echo " 清理镜像缓存"
echo "=========================================="
echo ""
if [ -d "$IMAGE_CACHE_DIR" ] && [ "$(ls -A $IMAGE_CACHE_DIR 2>/dev/null)" ]; then
echo "缓存目录: $IMAGE_CACHE_DIR"
echo "大小: $(du -sh "$IMAGE_CACHE_DIR" | cut -f1)"
echo ""
read -p "确认删除所有缓存文件? (y/N): " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
rm -rf "$IMAGE_CACHE_DIR"/*.tar
echo "已清理缓存文件"
else
echo "已取消"
fi
else
echo "没有缓存文件"
fi
}
# 清理 Docker 镜像
clean_images() {
echo "=========================================="
echo " 清理 Docker 镜像"
echo "=========================================="
echo ""
local exists=0
for image in "${IMAGES[@]}"; do
if check_image_exists "$image"; then
exists=1
break
fi
done
if [ $exists -eq 0 ]; then
echo "没有需要清理的镜像"
return
fi
echo "将删除以下镜像:"
for image in "${IMAGES[@]}"; do
if check_image_exists "$image"; then
echo " - $image"
fi
done
echo ""
read -p "确认删除这些镜像? (y/N): " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
for image in "${IMAGES[@]}"; do
if check_image_exists "$image"; then
docker rmi "$image"
echo "已删除: $image"
fi
done
else
echo "已取消"
fi
}
# 智能准备镜像(自动检测并加载或构建)
prepare() {
echo "=========================================="
echo " 智能准备 Docker 镜像"
echo "=========================================="
echo ""
local need_build=()
local loaded=0
local existed=0
for image in "${IMAGES[@]}"; do
if check_image_exists "$image"; then
echo "✓ 镜像已存在: $image"
((existed++))
elif load_image "$image"; then
echo "✓ 已从缓存加载: $image"
((loaded++))
else
echo "✗ 需要构建: $image"
need_build+=("$image")
fi
done
echo ""
echo "统计: 已存在 $existed, 已加载 $loaded, 需构建 ${#need_build[@]}"
if [ ${#need_build[@]} -gt 0 ]; then
echo ""
echo "需要构建以下镜像:"
for image in "${need_build[@]}"; do
echo " - $image"
done
echo ""
read -p "是否现在构建? (Y/n): " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Nn]$ ]]; then
build_all ""
echo ""
read -p "是否保存新构建的镜像到缓存? (Y/n): " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Nn]$ ]]; then
save_all
fi
fi
else
echo ""
echo "所有镜像已就绪!"
fi
}
# 显示帮助
show_help() {
cat << EOF
Docker 镜像管理工具
用法: $0 <command>
命令:
prepare 智能准备镜像(推荐)- 自动检测、加载或构建
build 构建所有镜像
build-cache 使用缓存构建
save 保存所有镜像到缓存
load 从缓存加载所有镜像
status 查看镜像状态
clean-cache 清理缓存文件
clean-images 清理 Docker 镜像
clean-all 清理缓存和镜像
help 显示此帮助信息
示例:
# 智能准备(首次使用或镜像丢失时)
$0 prepare
# 构建并保存镜像
$0 build
$0 save
# 从缓存加载镜像
$0 load
# 查看状态
$0 status
镜像缓存目录: $IMAGE_CACHE_DIR/
EOF
}
# 主逻辑
case "${1:-help}" in
prepare)
prepare
;;
build)
build_all "--no-cache"
;;
build-cache)
build_all ""
;;
save)
save_all
;;
load)
load_all
;;
status)
status
;;
clean-cache)
clean_cache
;;
clean-images)
clean_images
;;
clean-all)
clean_cache
clean_images
;;
help|--help|-h)
show_help
;;
*)
echo "错误: 未知命令 '$1'"
echo ""
show_help
exit 1
;;
esac

199
src/metric/tests/start-all.sh Executable file
View File

@ -0,0 +1,199 @@
#!/bin/bash
# 一键启动脚本
# 用于初始化目录、构建镜像并启动所有服务
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
echo "=========================================="
echo " Argus Metrics 一键启动脚本"
echo "=========================================="
echo ""
echo "当前工作目录: $SCRIPT_DIR"
echo ""
# 检查 Docker 和 Docker Compose
if ! command -v docker &> /dev/null; then
echo "错误: 未找到 docker 命令,请先安装 Docker"
exit 1
fi
# 检测 docker-compose 命令(兼容新旧版本)
COMPOSE_FILE="$SCRIPT_DIR/docker-compose.yml"
if command -v docker-compose &> /dev/null; then
DOCKER_COMPOSE="docker-compose -f $COMPOSE_FILE"
echo "使用: docker-compose"
elif docker compose version &> /dev/null 2>&1; then
DOCKER_COMPOSE="docker compose -f $COMPOSE_FILE"
echo "使用: docker compose"
else
echo "错误: 未找到 docker-compose 或 docker compose 命令"
exit 1
fi
echo "Compose 文件: $COMPOSE_FILE"
echo ""
# 检查必要的构建目录
echo "检查构建目录..."
BUILD_DIRS=(
"../ftp/build"
"../prometheus/build"
"../grafana/build"
)
for dir in "${BUILD_DIRS[@]}"; do
if [ ! -d "$dir" ]; then
echo "错误: 构建目录不存在: $dir"
echo "完整路径: $(cd "$(dirname "$dir")" 2>/dev/null && pwd)/$(basename "$dir")"
exit 1
else
echo " ✓ 找到: $dir"
fi
done
echo ""
# 检查并创建 .env 文件
if [ ! -f .env ]; then
echo "未找到 .env 文件,从 env.example 创建..."
cp env.example .env
echo "已创建 .env 文件,请根据需要修改配置"
fi
# 加载环境变量
source .env
echo "1. 初始化目录结构..."
bash "$SCRIPT_DIR/init-directories.sh"
echo ""
echo "2. 准备 Docker 镜像..."
# 检查镜像是否存在
IMAGE_CACHE_DIR="./images-cache"
IMAGES=("argus-metric-ftp:latest" "argus-metric-prometheus:latest" "argus-metric-grafana:latest")
all_images_exist=true
for image in "${IMAGES[@]}"; do
if ! docker images --format "{{.Repository}}:{{.Tag}}" | grep -q "^${image}$"; then
all_images_exist=false
break
fi
done
if $all_images_exist; then
echo "所有镜像已存在,跳过构建"
else
echo "检测到缺失镜像,尝试从缓存加载..."
# 尝试从缓存加载
loaded_from_cache=false
if [ -d "$IMAGE_CACHE_DIR" ]; then
for image in "${IMAGES[@]}"; do
if ! docker images --format "{{.Repository}}:{{.Tag}}" | grep -q "^${image}$"; then
# 镜像不存在,尝试加载
case "$image" in
"argus-metric-ftp:latest")
cache_file="${IMAGE_CACHE_DIR}/argus-ftp.tar"
;;
"argus-metric-prometheus:latest")
cache_file="${IMAGE_CACHE_DIR}/argus-prometheus.tar"
;;
"argus-metric-grafana:latest")
cache_file="${IMAGE_CACHE_DIR}/argus-grafana.tar"
;;
esac
if [ -f "$cache_file" ]; then
echo " 从缓存加载: $image"
docker load -i "$cache_file"
loaded_from_cache=true
fi
fi
done
fi
# 检查加载后是否还有缺失的镜像
need_build=false
for image in "${IMAGES[@]}"; do
if ! docker images --format "{{.Repository}}:{{.Tag}}" | grep -q "^${image}$"; then
need_build=true
break
fi
done
if $need_build; then
echo ""
echo "部分镜像缺失,开始构建..."
echo "工作目录: $(pwd)"
cd "$SCRIPT_DIR"
$DOCKER_COMPOSE build
# 询问是否保存镜像
echo ""
read -p "是否保存镜像到缓存以便下次快速启动? (Y/n): " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Nn]$ ]]; then
mkdir -p "$IMAGE_CACHE_DIR"
echo "保存镜像到缓存..."
for image in "${IMAGES[@]}"; do
case "$image" in
"argus-metric-ftp:latest")
docker save -o "${IMAGE_CACHE_DIR}/argus-ftp.tar" "$image" && echo " 已保存: argus-ftp.tar"
;;
"argus-metric-prometheus:latest")
docker save -o "${IMAGE_CACHE_DIR}/argus-prometheus.tar" "$image" && echo " 已保存: argus-prometheus.tar"
;;
"argus-metric-grafana:latest")
docker save -o "${IMAGE_CACHE_DIR}/argus-grafana.tar" "$image" && echo " 已保存: argus-grafana.tar"
;;
esac
done
echo "镜像已保存到: $IMAGE_CACHE_DIR/"
fi
elif $loaded_from_cache; then
echo ""
echo "所有镜像已从缓存加载完成!"
fi
fi
echo ""
echo "3. 启动服务..."
cd "$SCRIPT_DIR"
$DOCKER_COMPOSE up -d
echo ""
echo "4. 等待服务启动..."
sleep 5
echo ""
echo "5. 检查服务状态..."
cd "$SCRIPT_DIR"
$DOCKER_COMPOSE ps
echo ""
echo "=========================================="
echo " 服务启动完成!"
echo "=========================================="
echo ""
echo "服务访问地址:"
echo " - FTP: ftp://localhost:${FTP_PORT:-21}"
echo " 用户名: ftpuser"
echo " 密码: ${FTP_PASSWORD:-ZGClab1234!}"
echo ""
echo " - Prometheus: http://localhost:${PROMETHEUS_PORT:-9090}"
echo ""
echo " - Grafana: http://localhost:${GRAFANA_PORT:-3000}"
echo " 用户名: admin"
echo " 密码: admin"
echo ""
echo "常用命令:"
echo " 查看日志: $DOCKER_COMPOSE logs -f [service]"
echo " 停止服务: $DOCKER_COMPOSE stop"
echo " 重启服务: $DOCKER_COMPOSE restart"
echo " 停止并删除: $DOCKER_COMPOSE down"
echo " 停止并删除卷: $DOCKER_COMPOSE down -v"
echo ""

51
src/metric/tests/stop-all.sh Executable file
View File

@ -0,0 +1,51 @@
#!/bin/bash
# 停止所有服务脚本
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
# 检测 docker-compose 命令(兼容新旧版本)
COMPOSE_FILE="$SCRIPT_DIR/docker-compose.yml"
if command -v docker-compose &> /dev/null; then
DOCKER_COMPOSE="docker-compose -f $COMPOSE_FILE"
elif docker compose version &> /dev/null 2>&1; then
DOCKER_COMPOSE="docker compose -f $COMPOSE_FILE"
else
echo "错误: 未找到 docker-compose 或 docker compose 命令"
exit 1
fi
echo "=========================================="
echo " 停止 Argus Metrics 服务"
echo "=========================================="
echo ""
# 检查是否有运行的容器
if [ "$($DOCKER_COMPOSE ps -q)" ]; then
echo "停止所有服务..."
$DOCKER_COMPOSE stop
echo ""
read -p "是否要删除容器? (y/N): " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
$DOCKER_COMPOSE down
echo "容器已删除"
read -p "是否要删除数据卷? (y/N): " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
$DOCKER_COMPOSE down -v
echo "数据卷已删除"
fi
fi
else
echo "没有运行的服务"
fi
echo ""
echo "完成!"

View File

@ -0,0 +1,629 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 9,
"links": [],
"panels": [
{
"datasource": {
"type": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "Load",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 101,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"expr": "node_load1{hostname=\"$hostname\"}",
"legendFormat": "{{hostname}} load1",
"refId": "A"
},
{
"expr": "node_load5{hostname=\"$hostname\"}",
"legendFormat": "{{hostname}} load5",
"refId": "B"
},
{
"expr": "node_load15{hostname=\"$hostname\"}",
"legendFormat": "{{hostname}} load15",
"refId": "C"
}
],
"title": "System Load",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 1,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"expr": "100 * (1 - avg by(hostname) (irate(node_cpu_seconds_total{mode=\"idle\",hostname=\"$hostname\"}[5m])))",
"legendFormat": "{{hostname}}",
"refId": "A"
}
],
"title": "CPU Usage",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "%",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 4,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "orange",
"value": 70
},
{
"color": "red",
"value": 90
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"id": 5,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"expr": "100 * (1 - (node_memory_MemAvailable_bytes{hostname=\"$hostname\"} / node_memory_MemTotal_bytes{hostname=\"$hostname\"}))",
"legendFormat": "{{hostname}}",
"refId": "B"
}
],
"title": "Node Memory Usage",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "Bytes/s",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "Bps"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 8
},
"id": 6,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"expr": "sum by(hostname) (rate(node_disk_read_bytes_total{device!~\"^(loop|ram|sr0).*\",hostname=\"$hostname\"}[5m]))",
"legendFormat": "{{hostname}} read",
"refId": "A"
},
{
"expr": "sum by(hostname) (rate(node_disk_written_bytes_total{device!~\"^(loop|ram|sr0).*\",hostname=\"$hostname\"}[5m]))",
"legendFormat": "{{hostname}} write",
"refId": "B"
}
],
"title": "Node Disk I/O (Bytes/s)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "Bytes/s",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "Bps"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 16
},
"id": 102,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"expr": "sum by(hostname)(rate(node_network_receive_bytes_total{device!~\"^(lo|docker.*)\",hostname=\"$hostname\"}[5m]))",
"legendFormat": "{{hostname}} RX",
"refId": "A"
},
{
"expr": "sum by(hostname)(rate(node_network_transmit_bytes_total{device!~\"^(lo|docker.*)\",hostname=\"$hostname\"}[5m]))",
"legendFormat": "{{hostname}} TX",
"refId": "B"
}
],
"title": "Network Traffic",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "Processes",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 4,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "orange",
"value": 200
},
{
"color": "red",
"value": 500
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 16
},
"id": 104,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"expr": "node_procs_running{hostname=\"$hostname\"}",
"legendFormat": "{{hostname}} Running",
"refId": "A"
},
{
"expr": "node_procs_blocked{hostname=\"$hostname\"}",
"legendFormat": "{{hostname}} Blocked",
"refId": "B"
}
],
"title": "Node Process Count",
"type": "timeseries"
}
],
"refresh": "15s",
"schemaVersion": 39,
"tags": [],
"templating": {
"list": [
{
"current": {
"selected": true,
"text": "node-exporter-A1",
"value": "node-exporter-A1"
},
"datasource": {
"type": "prometheus"
},
"definition": "label_values(node_cpu_seconds_total,hostname)",
"hide": 0,
"includeAll": false,
"label": "hostname",
"multi": false,
"name": "hostname",
"options": [],
"query": {
"qryType": 1,
"query": "label_values(node_cpu_seconds_total,hostname)",
"refId": "PrometheusVariableQueryEditor-VariableQuery"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
}
]
},
"time": {
"from": "now-12h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Node and GPU Metrics",
"uid": "node_gpu_metrics",
"weekStart": ""
}