refactor: metric e2e测试流程融合到 sys/tests 步骤中(test-gpu-node/check-service-installed);
refs #29
This commit is contained in:
parent
835e81282f
commit
b0d451cbe7
@ -3,6 +3,7 @@ set -euo pipefail
|
|||||||
|
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||||
|
REPO_ROOT="$(cd "$TEST_ROOT/../../.." && pwd)"
|
||||||
|
|
||||||
compose() {
|
compose() {
|
||||||
if docker compose version >/dev/null 2>&1; then
|
if docker compose version >/dev/null 2>&1; then
|
||||||
@ -14,12 +15,19 @@ compose() {
|
|||||||
|
|
||||||
echo "[INFO] Bringing up system stack..."
|
echo "[INFO] Bringing up system stack..."
|
||||||
|
|
||||||
# 加载环境变量
|
# 检测GPU环境
|
||||||
if [ -f "$TEST_ROOT/.env" ]; then
|
echo "[INFO] 检测GPU环境..."
|
||||||
source "$TEST_ROOT/.env"
|
GPU_CHECK_SCRIPT="$REPO_ROOT/src/metric/tests/scripts/common/check-gpu.sh"
|
||||||
echo "[INFO] 已加载环境变量,GPU_AVAILABLE=$GPU_AVAILABLE"
|
if [ -f "$GPU_CHECK_SCRIPT" ]; then
|
||||||
|
if bash "$GPU_CHECK_SCRIPT" >/dev/null 2>&1; then
|
||||||
|
echo "[INFO] GPU环境可用,将启动GPU测试节点"
|
||||||
|
GPU_AVAILABLE=true
|
||||||
else
|
else
|
||||||
echo "[WARN] 未找到.env文件,默认GPU不可用"
|
echo "[INFO] GPU环境不可用,将跳过GPU测试节点"
|
||||||
|
GPU_AVAILABLE=false
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "[WARN] 未找到GPU检测脚本: $GPU_CHECK_SCRIPT,跳过GPU检测"
|
||||||
GPU_AVAILABLE=false
|
GPU_AVAILABLE=false
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|||||||
@ -2,7 +2,8 @@
|
|||||||
set -e
|
set -e
|
||||||
|
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
COMMON_DIR="$SCRIPT_DIR/common"
|
TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||||
|
REPO_ROOT="$(cd "$TEST_ROOT/../../.." && pwd)"
|
||||||
|
|
||||||
FTP_SERVER="${FTP_SERVER:-172.29.0.40}"
|
FTP_SERVER="${FTP_SERVER:-172.29.0.40}"
|
||||||
FTP_USER="${FTP_USER:-ftpuser}"
|
FTP_USER="${FTP_USER:-ftpuser}"
|
||||||
@ -13,7 +14,9 @@ FTP_HOST="${FTP_SERVER}"
|
|||||||
|
|
||||||
echo "[04] 检测GPU环境..."
|
echo "[04] 检测GPU环境..."
|
||||||
# 检测GPU环境
|
# 检测GPU环境
|
||||||
if bash "$COMMON_DIR/check-gpu.sh"; then
|
GPU_CHECK_SCRIPT="$REPO_ROOT/metric/tests/scripts/common/check-gpu.sh"
|
||||||
|
if [ -f "$GPU_CHECK_SCRIPT" ]; then
|
||||||
|
if bash "$GPU_CHECK_SCRIPT"; then
|
||||||
echo "[04] GPU环境可用,继续执行GPU节点安装"
|
echo "[04] GPU环境可用,继续执行GPU节点安装"
|
||||||
GPU_AVAILABLE=true
|
GPU_AVAILABLE=true
|
||||||
else
|
else
|
||||||
@ -21,6 +24,11 @@ else
|
|||||||
GPU_AVAILABLE=false
|
GPU_AVAILABLE=false
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
else
|
||||||
|
echo "[04] 未找到GPU检测脚本: $GPU_CHECK_SCRIPT,跳过GPU节点安装"
|
||||||
|
GPU_AVAILABLE=false
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
echo "[04] 进入测试节点执行安装..."
|
echo "[04] 进入测试节点执行安装..."
|
||||||
echo "[04] 使用 FTP 地址: ${FTP_HOST}:${FTP_PORT}"
|
echo "[04] 使用 FTP 地址: ${FTP_HOST}:${FTP_PORT}"
|
||||||
|
|||||||
96
src/sys/tests/scripts/08_check_services_installed.sh
Executable file
96
src/sys/tests/scripts/08_check_services_installed.sh
Executable file
@ -0,0 +1,96 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
echo "[04] 验证安装结果 - 检查监控端口..."
|
||||||
|
echo "=========================================="
|
||||||
|
|
||||||
|
# 检查容器是否运行
|
||||||
|
if ! docker ps --format '{{.Names}}' | grep -q '^argus-metric-test-node$'; then
|
||||||
|
echo "错误: 容器 argus-metric-test-node 未运行"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
ERRORS=0
|
||||||
|
|
||||||
|
# ==================== 检查监听端口 ====================
|
||||||
|
echo ""
|
||||||
|
echo "[1] 检查监听端口..."
|
||||||
|
echo "----------------------------------------"
|
||||||
|
CHECK_RESULT=$(docker exec argus-metric-test-node bash -c '
|
||||||
|
if command -v netstat >/dev/null 2>&1; then
|
||||||
|
echo "使用 netstat 检查端口:"
|
||||||
|
if netstat -tlnp 2>/dev/null | grep -E ":(9100|9400|2020)"; then
|
||||||
|
echo "✓ 找到监控端口"
|
||||||
|
exit 0
|
||||||
|
else
|
||||||
|
echo "✗ 未找到监控端口 (9100/9400/2020)"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
elif command -v ss >/dev/null 2>&1; then
|
||||||
|
echo "使用 ss 检查端口:"
|
||||||
|
if ss -tlnp 2>/dev/null | grep -E ":(9100|9400|2020)"; then
|
||||||
|
echo "✓ 找到监控端口"
|
||||||
|
exit 0
|
||||||
|
else
|
||||||
|
echo "✗ 未找到监控端口 (9100/9400/2020)"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
elif command -v lsof >/dev/null 2>&1; then
|
||||||
|
echo "使用 lsof 检查端口:"
|
||||||
|
if lsof -i :9100 -i :9400 -i :2020 2>/dev/null | grep LISTEN; then
|
||||||
|
echo "✓ 找到监控端口"
|
||||||
|
exit 0
|
||||||
|
else
|
||||||
|
echo "✗ 未找到监控端口 (9100/9400/2020)"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "? 没有可用的端口检查工具 (netstat/ss/lsof),跳过此检查"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
')
|
||||||
|
echo "$CHECK_RESULT"
|
||||||
|
# 只有在明确失败时才计入错误(exit 1),没有工具(exit 0)不算错误
|
||||||
|
if echo "$CHECK_RESULT" | grep -q "✗ 未找到监控端口"; then
|
||||||
|
ERRORS=$((ERRORS + 1))
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ==================== 测试端口连通性 ====================
|
||||||
|
echo ""
|
||||||
|
echo "[2] 测试端口连通性..."
|
||||||
|
echo "----------------------------------------"
|
||||||
|
docker exec argus-metric-test-node bash -c '
|
||||||
|
if command -v curl >/dev/null 2>&1; then
|
||||||
|
FAILED=0
|
||||||
|
for port in 9100 9400 2020; do
|
||||||
|
echo -n "端口 $port: "
|
||||||
|
if curl -s --connect-timeout 2 "http://localhost:$port/metrics" > /dev/null 2>&1; then
|
||||||
|
echo "✓ 可访问 (/metrics)"
|
||||||
|
elif curl -s --connect-timeout 2 "http://localhost:$port/" > /dev/null 2>&1; then
|
||||||
|
echo "✓ 可访问 (根路径)"
|
||||||
|
else
|
||||||
|
echo "✗ 不可访问"
|
||||||
|
FAILED=$((FAILED + 1))
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
exit $FAILED
|
||||||
|
else
|
||||||
|
echo "? curl 不可用,跳过连通性测试"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
' || ERRORS=$((ERRORS + 1))
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "=========================================="
|
||||||
|
if [ $ERRORS -eq 0 ]; then
|
||||||
|
echo "✓ [04] 验证完成 - 所有端口检查通过"
|
||||||
|
else
|
||||||
|
echo "✗ [04] 验证失败 - 发现 $ERRORS 个问题"
|
||||||
|
echo ""
|
||||||
|
echo "调试建议:"
|
||||||
|
echo " 1. 进入容器检查: docker exec -it argus-metric-test-node bash"
|
||||||
|
echo " 2. 查看进程: docker exec argus-metric-test-node ps aux"
|
||||||
|
echo " 3. 查看日志: docker exec argus-metric-test-node cat /tmp/argus_install.log"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "=========================================="
|
||||||
Loading…
x
Reference in New Issue
Block a user