完成a6000测试系统构建、部署、测试整合 #35

Merged
yuyr merged 18 commits from dev_1.0.0_yuyr_5 into dev_1.0.0 2025-10-29 10:04:29 +08:00
7 changed files with 122 additions and 10 deletions
Showing only changes of commit b0d451cbe7 - Show all commits

View File

@ -3,6 +3,7 @@ set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
REPO_ROOT="$(cd "$TEST_ROOT/../../.." && pwd)"
compose() { compose() {
if docker compose version >/dev/null 2>&1; then if docker compose version >/dev/null 2>&1; then
@ -14,12 +15,19 @@ compose() {
echo "[INFO] Bringing up system stack..." echo "[INFO] Bringing up system stack..."
# 加载环境变量 # 检测GPU环境
if [ -f "$TEST_ROOT/.env" ]; then echo "[INFO] 检测GPU环境..."
source "$TEST_ROOT/.env" GPU_CHECK_SCRIPT="$REPO_ROOT/src/metric/tests/scripts/common/check-gpu.sh"
echo "[INFO] 已加载环境变量GPU_AVAILABLE=$GPU_AVAILABLE" if [ -f "$GPU_CHECK_SCRIPT" ]; then
if bash "$GPU_CHECK_SCRIPT" >/dev/null 2>&1; then
echo "[INFO] GPU环境可用将启动GPU测试节点"
GPU_AVAILABLE=true
else else
echo "[WARN] 未找到.env文件默认GPU不可用" echo "[INFO] GPU环境不可用将跳过GPU测试节点"
GPU_AVAILABLE=false
fi
else
echo "[WARN] 未找到GPU检测脚本: $GPU_CHECK_SCRIPT跳过GPU检测"
GPU_AVAILABLE=false GPU_AVAILABLE=false
fi fi

View File

@ -2,7 +2,8 @@
set -e set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
COMMON_DIR="$SCRIPT_DIR/common" TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
REPO_ROOT="$(cd "$TEST_ROOT/../../.." && pwd)"
FTP_SERVER="${FTP_SERVER:-172.29.0.40}" FTP_SERVER="${FTP_SERVER:-172.29.0.40}"
FTP_USER="${FTP_USER:-ftpuser}" FTP_USER="${FTP_USER:-ftpuser}"
@ -13,7 +14,9 @@ FTP_HOST="${FTP_SERVER}"
echo "[04] 检测GPU环境..." echo "[04] 检测GPU环境..."
# 检测GPU环境 # 检测GPU环境
if bash "$COMMON_DIR/check-gpu.sh"; then GPU_CHECK_SCRIPT="$REPO_ROOT/metric/tests/scripts/common/check-gpu.sh"
if [ -f "$GPU_CHECK_SCRIPT" ]; then
if bash "$GPU_CHECK_SCRIPT"; then
echo "[04] GPU环境可用继续执行GPU节点安装" echo "[04] GPU环境可用继续执行GPU节点安装"
GPU_AVAILABLE=true GPU_AVAILABLE=true
else else
@ -21,6 +24,11 @@ else
GPU_AVAILABLE=false GPU_AVAILABLE=false
exit 0 exit 0
fi fi
else
echo "[04] 未找到GPU检测脚本: $GPU_CHECK_SCRIPT跳过GPU节点安装"
GPU_AVAILABLE=false
exit 0
fi
echo "[04] 进入测试节点执行安装..." echo "[04] 进入测试节点执行安装..."
echo "[04] 使用 FTP 地址: ${FTP_HOST}:${FTP_PORT}" echo "[04] 使用 FTP 地址: ${FTP_HOST}:${FTP_PORT}"

View File

@ -0,0 +1,96 @@
#!/bin/bash
set -e
echo "[04] 验证安装结果 - 检查监控端口..."
echo "=========================================="
# 检查容器是否运行
if ! docker ps --format '{{.Names}}' | grep -q '^argus-metric-test-node$'; then
echo "错误: 容器 argus-metric-test-node 未运行"
exit 1
fi
ERRORS=0
# ==================== 检查监听端口 ====================
echo ""
echo "[1] 检查监听端口..."
echo "----------------------------------------"
CHECK_RESULT=$(docker exec argus-metric-test-node bash -c '
if command -v netstat >/dev/null 2>&1; then
echo "使用 netstat 检查端口:"
if netstat -tlnp 2>/dev/null | grep -E ":(9100|9400|2020)"; then
echo "✓ 找到监控端口"
exit 0
else
echo "✗ 未找到监控端口 (9100/9400/2020)"
exit 1
fi
elif command -v ss >/dev/null 2>&1; then
echo "使用 ss 检查端口:"
if ss -tlnp 2>/dev/null | grep -E ":(9100|9400|2020)"; then
echo "✓ 找到监控端口"
exit 0
else
echo "✗ 未找到监控端口 (9100/9400/2020)"
exit 1
fi
elif command -v lsof >/dev/null 2>&1; then
echo "使用 lsof 检查端口:"
if lsof -i :9100 -i :9400 -i :2020 2>/dev/null | grep LISTEN; then
echo "✓ 找到监控端口"
exit 0
else
echo "✗ 未找到监控端口 (9100/9400/2020)"
exit 1
fi
else
echo "? 没有可用的端口检查工具 (netstat/ss/lsof),跳过此检查"
exit 0
fi
')
echo "$CHECK_RESULT"
# 只有在明确失败时才计入错误exit 1没有工具exit 0不算错误
if echo "$CHECK_RESULT" | grep -q "✗ 未找到监控端口"; then
ERRORS=$((ERRORS + 1))
fi
# ==================== 测试端口连通性 ====================
echo ""
echo "[2] 测试端口连通性..."
echo "----------------------------------------"
docker exec argus-metric-test-node bash -c '
if command -v curl >/dev/null 2>&1; then
FAILED=0
for port in 9100 9400 2020; do
echo -n "端口 $port: "
if curl -s --connect-timeout 2 "http://localhost:$port/metrics" > /dev/null 2>&1; then
echo "✓ 可访问 (/metrics)"
elif curl -s --connect-timeout 2 "http://localhost:$port/" > /dev/null 2>&1; then
echo "✓ 可访问 (根路径)"
else
echo "✗ 不可访问"
FAILED=$((FAILED + 1))
fi
done
exit $FAILED
else
echo "? curl 不可用,跳过连通性测试"
exit 0
fi
' || ERRORS=$((ERRORS + 1))
echo ""
echo "=========================================="
if [ $ERRORS -eq 0 ]; then
echo "✓ [04] 验证完成 - 所有端口检查通过"
else
echo "✗ [04] 验证失败 - 发现 $ERRORS 个问题"
echo ""
echo "调试建议:"
echo " 1. 进入容器检查: docker exec -it argus-metric-test-node bash"
echo " 2. 查看进程: docker exec argus-metric-test-node ps aux"
echo " 3. 查看日志: docker exec argus-metric-test-node cat /tmp/argus_install.log"
exit 1
fi
echo "=========================================="