完成a6000测试系统构建、部署、测试整合 #35

Merged
yuyr merged 18 commits from dev_1.0.0_yuyr_5 into dev_1.0.0 2025-10-29 10:04:29 +08:00
3 changed files with 116 additions and 14 deletions
Showing only changes of commit c4582c99bc - Show all commits

View File

@ -1,6 +1,9 @@
#!/bin/bash #!/bin/bash
set -e set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
COMMON_DIR="$SCRIPT_DIR/common"
FTP_SERVER="${FTP_SERVER:-172.30.0.40}" FTP_SERVER="${FTP_SERVER:-172.30.0.40}"
FTP_USER="${FTP_USER:-ftpuser}" FTP_USER="${FTP_USER:-ftpuser}"
FTP_PASSWORD="${FTP_PASSWORD:-ZGClab1234!}" FTP_PASSWORD="${FTP_PASSWORD:-ZGClab1234!}"
@ -8,26 +11,37 @@ FTP_PORT="${FTP_PORT:-21}"
FTP_HOST="${FTP_SERVER}" FTP_HOST="${FTP_SERVER}"
echo "[03] 进入测试节点执行安装..." echo "[04] 检测GPU环境..."
echo "[03] 使用 FTP 地址: ${FTP_HOST}:${FTP_PORT}" # 检测GPU环境
if bash "$COMMON_DIR/check-gpu.sh"; then
echo "[04] GPU环境可用继续执行GPU节点安装"
GPU_AVAILABLE=true
else
echo "[04] GPU环境不可用跳过GPU节点安装"
GPU_AVAILABLE=false
exit 0
fi
echo "[04] 进入测试节点执行安装..."
echo "[04] 使用 FTP 地址: ${FTP_HOST}:${FTP_PORT}"
docker exec argus-metric-test-gpu-node bash -c " docker exec argus-metric-test-gpu-node bash -c "
set -e set -e
if ! command -v curl &>/dev/null; then if ! command -v curl &>/dev/null; then
echo '[03] curl 未安装,正在安装...' echo '[04] curl 未安装,正在安装...'
apt-get update && apt-get install -y curl apt-get update && apt-get install -y curl
fi fi
cd /tmp cd /tmp
echo '[03] 下载 setup.sh...' echo '[04] 下载 setup.sh...'
curl -u ${FTP_USER}:${FTP_PASSWORD} ftp://${FTP_HOST}:${FTP_PORT}/setup.sh -o setup.sh curl -u ${FTP_USER}:${FTP_PASSWORD} ftp://${FTP_HOST}:${FTP_PORT}/setup.sh -o setup.sh
echo '[03] 执行安装...' echo '[04] 执行安装...'
chmod +x setup.sh chmod +x setup.sh
bash setup.sh --server ${FTP_HOST} --user ${FTP_USER} --password '${FTP_PASSWORD}' --port ${FTP_PORT} bash setup.sh --server ${FTP_HOST} --user ${FTP_USER} --password '${FTP_PASSWORD}' --port ${FTP_PORT}
echo '[03] 安装完成' echo '[04] 安装完成'
" "
echo "[03] 完成" echo "[04] 完成"

View File

@ -0,0 +1,59 @@
#!/bin/bash
# GPU环境检测脚本
# 检测系统是否有NVIDIA GPU硬件
set -e
# 检测函数
check_gpu_support() {
echo "检测GPU环境..."
# 方法1: 检测GPU设备文件
if ls /dev/nvidia* &>/dev/null; then
echo "✓ 检测到NVIDIA GPU设备文件"
return 0
fi
# 方法2: 检测lspci中的NVIDIA设备Linux
if command -v lspci &> /dev/null; then
if lspci | grep -i nvidia &> /dev/null; then
echo "✓ 检测到NVIDIA GPU硬件"
return 0
fi
fi
# 方法3: 检测nvidia-smi
if command -v nvidia-smi &> /dev/null; then
if nvidia-smi &> /dev/null; then
echo "✓ 检测到NVIDIA GPU硬件"
return 0
fi
fi
echo "✗ 未检测到NVIDIA GPU硬件"
return 1
}
# 主函数
main() {
echo "=========================================="
echo " GPU环境检测"
echo "=========================================="
echo ""
if check_gpu_support; then
echo ""
echo "结果: GPU环境可用"
exit 0
else
echo ""
echo "结果: GPU环境不可用将跳过GPU相关服务"
exit 1
fi
}
# 如果直接运行此脚本
if [ "${BASH_SOURCE[0]}" = "${0}" ]; then
main "$@"
fi

View File

@ -59,18 +59,40 @@ echo "1. 初始化目录结构..."
bash "$SCRIPT_DIR/init-directories.sh" bash "$SCRIPT_DIR/init-directories.sh"
echo "" echo ""
echo "2. 检查 Docker 镜像..." echo "2. 检测GPU环境..."
# 检测GPU环境
if bash "$SCRIPT_DIR/check-gpu.sh"; then
echo "GPU环境可用将启动GPU节点"
GPU_AVAILABLE=true
else
echo "GPU环境不可用跳过GPU节点"
GPU_AVAILABLE=false
fi
echo ""
echo "3. 检查 Docker 镜像..."
# 检查必要的镜像是否存在 # 检查必要的镜像是否存在
IMAGES=("argus-metric-ftp:latest" "argus-metric-prometheus:latest" "argus-metric-grafana:latest" "argus-metric-test-node:latest" "argus-metric-test-gpu-node:latest") BASE_IMAGES=("argus-metric-ftp:latest" "argus-metric-prometheus:latest" "argus-metric-grafana:latest" "argus-metric-test-node:latest")
missing_images=() GPU_IMAGES=("argus-metric-test-gpu-node:latest")
for image in "${IMAGES[@]}"; do # 先检查基础镜像
missing_images=()
for image in "${BASE_IMAGES[@]}"; do
if ! docker images --format "{{.Repository}}:{{.Tag}}" | grep -q "^${image}$"; then if ! docker images --format "{{.Repository}}:{{.Tag}}" | grep -q "^${image}$"; then
missing_images+=("$image") missing_images+=("$image")
fi fi
done done
# 检查GPU镜像如果GPU环境可用
if [ "$GPU_AVAILABLE" = true ]; then
for image in "${GPU_IMAGES[@]}"; do
if ! docker images --format "{{.Repository}}:{{.Tag}}" | grep -q "^${image}$"; then
missing_images+=("$image")
fi
done
fi
if [ ${#missing_images[@]} -gt 0 ]; then if [ ${#missing_images[@]} -gt 0 ]; then
echo "以下镜像缺失,请先运行 build/build_images.sh 构建镜像:" echo "以下镜像缺失,请先运行 build/build_images.sh 构建镜像:"
for image in "${missing_images[@]}"; do for image in "${missing_images[@]}"; do
@ -85,10 +107,17 @@ else
fi fi
echo "" echo ""
echo "3. 启动基础服务..." echo "4. 启动基础服务..."
cd "$TEST_DIR" cd "$TEST_DIR"
# 启动除GPU节点外的所有服务
docker compose up -d ftp prometheus grafana test-node test-gpu-node # 根据GPU环境决定启动的服务
if [ "$GPU_AVAILABLE" = true ]; then
echo "启动所有服务包括GPU节点..."
docker compose up -d ftp prometheus grafana test-node test-gpu-node
else
echo "启动基础服务跳过GPU节点..."
docker compose up -d ftp prometheus grafana test-node
fi
echo "" echo ""
echo "4. 等待服务启动..." echo "4. 等待服务启动..."