refactor: metric e2e测试流程融合到 sys/tests 步骤中(bootstrap/up/publish/installer);

refs #29
This commit is contained in:
sundapeng.sdp 2025-10-20 17:59:17 +08:00
parent c4582c99bc
commit 835e81282f
6 changed files with 379 additions and 5 deletions

View File

@ -1,7 +1,8 @@
version: "3.8" -version: "3.8"
networks: networks:
default: default:
external: true
name: argus-sys-net name: argus-sys-net
driver: bridge driver: bridge
ipam: ipam:
@ -137,3 +138,156 @@ services:
ports: ports:
- "2021:2020" - "2021:2020"
restart: unless-stopped restart: unless-stopped
ftp:
image: argus-metric-ftp:latest
container_name: argus-ftp
restart: unless-stopped
environment:
- TZ=Asia/Shanghai
- FTP_BASE_PATH=/private/argus/ftp
- FTP_PASSWORD=${FTP_PASSWORD:-ZGClab1234!}
- DOMAIN=${FTP_DOMAIN:-ftp.metric.argus.com}
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
ports:
- "${FTP_PORT:-21}:21"
- "${FTP_DATA_PORT:-20}:20"
- "21100-21110:21100-21110"
volumes:
- ./private/argus/metric/ftp:/private/argus/ftp
- ./private/argus/etc:/private/argus/etc
- /etc/localtime:/etc/localtime:ro
- /etc/timezone:/etc/timezone:ro
networks:
default:
ipv4_address: 172.29.0.40
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
prometheus:
image: argus-metric-prometheus:latest
container_name: argus-prometheus
restart: unless-stopped
environment:
- TZ=Asia/Shanghai
- PROMETHEUS_BASE_PATH=/private/argus/metric/prometheus
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
ports:
- "${PROMETHEUS_PORT:-9090}:9090"
volumes:
- ./private/argus/metric/prometheus:/private/argus/metric/prometheus
- ./private/argus/etc:/private/argus/etc
- /etc/localtime:/etc/localtime:ro
- /etc/timezone:/etc/timezone:ro
networks:
default:
ipv4_address: 172.29.0.41
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
grafana:
image: argus-metric-grafana:latest
container_name: argus-grafana
restart: unless-stopped
environment:
- TZ=Asia/Shanghai
- GRAFANA_BASE_PATH=/private/argus/metric/grafana
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
- GF_SERVER_HTTP_PORT=3000
- GF_LOG_LEVEL=warn
- GF_LOG_MODE=console
ports:
- "${GRAFANA_PORT:-3000}:3000"
volumes:
- ./private/argus/metric/grafana:/private/argus/metric/grafana
- ./private/argus/etc:/private/argus/etc
- /etc/localtime:/etc/localtime:ro
- /etc/timezone:/etc/timezone:ro
networks:
default:
ipv4_address: 172.29.0.42
depends_on:
- prometheus
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
test-node:
image: argus-metric-test-node:latest
container_name: argus-metric-test-node
hostname: test-metric-node-001
restart: unless-stopped
privileged: true
depends_on:
- ftp
- prometheus
environment:
- TZ=Asia/Shanghai
- DEBIAN_FRONTEND=noninteractive
- FTP_DOMAIN=${FTP_DOMAIN:-ftp.metric.argus.com}
- FTP_SERVER=${FTP_SERVER:-172.29.0.40}
- FTP_USER=${FTP_USER:-ftpuser}
- FTP_PASSWORD=${FTP_PASSWORD:-ZGClab1234!}
- FTP_PORT=${FTP_PORT:-21}
volumes:
- ./private/argus/agent:/private/argus/agent
- /etc/localtime:/etc/localtime:ro
- /etc/timezone:/etc/timezone:ro
command: sleep infinity
networks:
default:
ipv4_address: 172.29.0.50
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
test-gpu-node:
image: argus-metric-test-gpu-node:latest
container_name: argus-metric-test-gpu-node
hostname: test-metric-gpu-node-001
restart: unless-stopped
privileged: true
runtime: nvidia
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities:
- gpu
depends_on:
- ftp
- prometheus
environment:
- TZ=Asia/Shanghai
- DEBIAN_FRONTEND=noninteractive
- NVIDIA_VISIBLE_DEVICES=all
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
- GPU_MODE=gpu
volumes:
- ./private/argus/agent:/private/argus/agent
- /etc/localtime:/etc/localtime:ro
- /etc/timezone:/etc/timezone:ro
command: sleep infinity
networks:
default:
ipv4_address: 172.29.0.51
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"

View File

@ -27,17 +27,34 @@ mkdir -p \
"$PRIVATE_CORE/argus/bind" \ "$PRIVATE_CORE/argus/bind" \
"$PRIVATE_CORE/argus/master" \ "$PRIVATE_CORE/argus/master" \
"$PRIVATE_CORE/argus/metric/prometheus" \ "$PRIVATE_CORE/argus/metric/prometheus" \
"$PRIVATE_CORE/argus/metric/ftp/share" \
"$PRIVATE_CORE/argus/metric/grafana/data" \
"$PRIVATE_CORE/argus/metric/grafana/logs" \
"$PRIVATE_CORE/argus/metric/grafana/plugins" \
"$PRIVATE_CORE/argus/metric/grafana/provisioning/datasources" \
"$PRIVATE_CORE/argus/metric/grafana/provisioning/dashboards" \
"$PRIVATE_CORE/argus/metric/grafana/data/sessions" \
"$PRIVATE_CORE/argus/metric/grafana/data/dashboards" \
"$PRIVATE_CORE/argus/metric/grafana/config" \
"$PRIVATE_CORE/argus/metric/prometheus/data" \
"$PRIVATE_CORE/argus/metric/prometheus/rules" \
"$PRIVATE_CORE/argus/metric/prometheus/targets" \
"$PRIVATE_CORE/argus/agent" \
"$PRIVATE_CORE/argus/log/elasticsearch" \ "$PRIVATE_CORE/argus/log/elasticsearch" \
"$PRIVATE_CORE/argus/log/kibana" \ "$PRIVATE_CORE/argus/log/kibana" \
"$PRIVATE_NODEA/argus/agent/dev-yyrshare-nbnyx10-cp2f-pod-0/health" \ "$PRIVATE_NODEA/argus/agent/dev-yyrshare-nbnyx10-cp2f-pod-0/health" \
"$PRIVATE_NODEB/argus/agent/dev-yyrshare-uuuu10-ep2f-pod-0/health" \ "$PRIVATE_NODEB/argus/agent/dev-yyrshare-uuuu10-ep2f-pod-0/health" \
"$TMP_DIR" "$TMP_DIR"
# Align ownership for supervisor-managed services (ES/Kibana expect UID/GID inside container) # Align ownership for supervisor-managed services (ES/Kibana/Grafana expect UID/GID inside container)
echo "[INFO] Fixing ownership for core private directories..." echo "[INFO] Fixing ownership for core private directories..."
chown -R "${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" \ chown -R "${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" \
"$PRIVATE_CORE/argus/log/elasticsearch" \ "$PRIVATE_CORE/argus/log/elasticsearch" \
"$PRIVATE_CORE/argus/log/kibana" \ "$PRIVATE_CORE/argus/log/kibana" \
"$PRIVATE_CORE/argus/metric/grafana" \
"$PRIVATE_CORE/argus/metric/prometheus" \
"$PRIVATE_CORE/argus/metric/ftp" \
"$PRIVATE_CORE/argus/agent" \
"$PRIVATE_CORE/argus/etc" 2>/dev/null || true "$PRIVATE_CORE/argus/etc" 2>/dev/null || true
echo "[INFO] Distributing update-dns.sh for core services (bind/master/es/kibana)" echo "[INFO] Distributing update-dns.sh for core services (bind/master/es/kibana)"
@ -55,6 +72,11 @@ ensure_image "argus-elasticsearch:latest"
ensure_image "argus-kibana:latest" ensure_image "argus-kibana:latest"
ensure_image "argus-bind9:latest" ensure_image "argus-bind9:latest"
ensure_image "argus-master:latest" ensure_image "argus-master:latest"
ensure_image "argus-metric-ftp:latest"
ensure_image "argus-metric-prometheus:latest"
ensure_image "argus-metric-grafana:latest"
ensure_image "argus-metric-test-node:latest"
ensure_image "argus-metric-test-gpu-node:latest"
echo "[INFO] Building agent binary..." echo "[INFO] Building agent binary..."
pushd "$REPO_ROOT/src/agent" >/dev/null pushd "$REPO_ROOT/src/agent" >/dev/null
@ -68,10 +90,44 @@ if [[ ! -x "$AGENT_BIN" ]]; then
fi fi
echo "$AGENT_BIN" > "$TMP_DIR/agent_binary_path" echo "$AGENT_BIN" > "$TMP_DIR/agent_binary_path"
echo "[INFO] Writing .env with UID/GID" # 检测GPU环境
echo "[INFO] 检测GPU环境..."
GPU_CHECK_SCRIPT="$REPO_ROOT/src/metric/tests/scripts/common/check-gpu.sh"
if [ -f "$GPU_CHECK_SCRIPT" ]; then
if bash "$GPU_CHECK_SCRIPT" >/dev/null 2>&1; then
echo "[INFO] GPU环境可用将启动test-gpu-node容器"
GPU_AVAILABLE=true
else
echo "[INFO] GPU环境不可用跳过test-gpu-node容器"
GPU_AVAILABLE=false
fi
else
echo "[WARN] 未找到GPU检测脚本: $GPU_CHECK_SCRIPT跳过GPU检测"
GPU_AVAILABLE=false
fi
echo "[INFO] Writing .env with UID/GID and metric configuration"
cat > "$TEST_ROOT/.env" <<EOF cat > "$TEST_ROOT/.env" <<EOF
ARGUS_BUILD_UID=$ARGUS_BUILD_UID ARGUS_BUILD_UID=$ARGUS_BUILD_UID
ARGUS_BUILD_GID=$ARGUS_BUILD_GID ARGUS_BUILD_GID=$ARGUS_BUILD_GID
# GPU 配置
GPU_AVAILABLE=$GPU_AVAILABLE
# FTP 配置
FTP_PORT=21
FTP_DATA_PORT=20
FTP_PASSWORD=ZGClab1234!
FTP_DOMAIN=ftp.metric.argus.com
# Prometheus 配置
PROMETHEUS_PORT=9090
# Grafana 配置
GRAFANA_PORT=3000
# 网络配置
USE_INTRANET=false
EOF EOF
echo "[OK] Bootstrap completed" echo "[OK] Bootstrap completed"

View File

@ -13,10 +13,33 @@ compose() {
} }
echo "[INFO] Bringing up system stack..." echo "[INFO] Bringing up system stack..."
# 加载环境变量
if [ -f "$TEST_ROOT/.env" ]; then
source "$TEST_ROOT/.env"
echo "[INFO] 已加载环境变量GPU_AVAILABLE=$GPU_AVAILABLE"
else
echo "[WARN] 未找到.env文件默认GPU不可用"
GPU_AVAILABLE=false
fi
pushd "$TEST_ROOT" >/dev/null pushd "$TEST_ROOT" >/dev/null
compose -p argus-sys down --remove-orphans || true compose -p argus-sys down --remove-orphans || true
compose -p argus-sys up -d
# 根据GPU可用性决定启动的服务
if [ "$GPU_AVAILABLE" = true ]; then
echo "[INFO] 启动所有服务包括test-gpu-node..."
compose -p argus-sys up -d
else
echo "[INFO] 启动基础服务跳过test-gpu-node..."
compose -p argus-sys up -d --scale test-gpu-node=0
fi
popd >/dev/null popd >/dev/null
echo "[OK] Services started: master:32300 es:9200 kibana:5601 node-a:2020 node-b:2021" if [ "$GPU_AVAILABLE" = true ]; then
echo "[OK] Services started: master:32300 es:9200 kibana:5601 node-a:2020 node-b:2021 test-gpu-node:172.29.0.51"
else
echo "[OK] Services started: master:32300 es:9200 kibana:5601 node-a:2020 node-b:2021 (test-gpu-node skipped)"
fi

View File

@ -0,0 +1,61 @@
#!/bin/bash
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
TEST_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
REPO_ROOT="$(cd "$TEST_DIR/../../.." && pwd)"
PLUGIN_DIR="$REPO_ROOT/src/metric/client-plugins/all-in-one-full"
# 加载 .env
if [ -f "$TEST_DIR/.env" ]; then
source "$TEST_DIR/.env"
fi
# 检测容器挂载目录
if docker ps --format '{{.Names}}' | grep -q '^argus-ftp$'; then
FTP_MOUNT=$(docker inspect argus-ftp --format '{{range .Mounts}}{{if eq .Destination "/private/argus/ftp"}}{{.Source}}{{end}}{{end}}')
OUTPUT_DIR="${FTP_MOUNT}/share"
echo "[02] 容器挂载: $OUTPUT_DIR"
else
OUTPUT_DIR="${DATA_ROOT:-$TEST_DIR/private}/ftp/share"
echo "[02] 默认路径: $OUTPUT_DIR"
fi
OWNER="${ARGUS_BUILD_UID:-2133}:${ARGUS_BUILD_GID:-2015}"
cd "$PLUGIN_DIR"
echo "[02] 递增版本号..."
bash scripts/version-manager.sh bump minor
VERSION_FILE="config/VERSION"
if [ ! -f "$VERSION_FILE" ]; then
echo "[02] 错误: 未找到 $VERSION_FILE"
exit 1
fi
VERSION=$(cat "$VERSION_FILE" | tr -d '[:space:]')
echo "[02] 新版本: $VERSION"
echo "[02] 构建安装包..."
bash scripts/package_artifact.sh --force
echo "[02] 发布到 FTP: $OUTPUT_DIR"
sudo bash scripts/publish_artifact.sh "$VERSION" --output-dir "$OUTPUT_DIR" --owner "$OWNER"
echo "[02] 设置文件权限..."
# 设置所有者
sudo chown -R "$OWNER" "$OUTPUT_DIR"
# 设置目录权限为 755 (rwxr-xr-x)
sudo find "$OUTPUT_DIR" -type d -exec chmod 755 {} \;
# 设置文件权限为 644 (rw-r--r--)
sudo find "$OUTPUT_DIR" -type f -exec chmod 644 {} \;
# 特别处理 .sh 文件,给予执行权限 755
sudo find "$OUTPUT_DIR" -type f -name "*.sh" -exec chmod 755 {} \;
echo "[02] 权限设置完成 (UID:GID=$OWNER, dirs=755, files=644, scripts=755)"
echo "[02] 发布完成,验证文件..."
ls -lh "$OUTPUT_DIR"
echo "[02] 完成"

View File

@ -0,0 +1,33 @@
#!/bin/bash
set -e
FTP_SERVER="${FTP_SERVER:-172.30.0.40}"
FTP_USER="${FTP_USER:-ftpuser}"
FTP_PASSWORD="${FTP_PASSWORD:-ZGClab1234!}"
FTP_PORT="${FTP_PORT:-21}"
FTP_HOST="${FTP_SERVER}"
echo "[03] 进入测试节点执行安装..."
echo "[03] 使用 FTP 地址: ${FTP_HOST}:${FTP_PORT}"
docker exec argus-metric-test-node bash -c "
set -e
if ! command -v curl &>/dev/null; then
echo '[03] curl 未安装,正在安装...'
apt-get update && apt-get install -y curl
fi
cd /tmp
echo '[03] 下载 setup.sh...'
curl -u ${FTP_USER}:${FTP_PASSWORD} ftp://${FTP_HOST}:${FTP_PORT}/setup.sh -o setup.sh
echo '[03] 执行安装...'
chmod +x setup.sh
bash setup.sh --server ${FTP_HOST} --user ${FTP_USER} --password '${FTP_PASSWORD}' --port ${FTP_PORT}
echo '[03] 安装完成'
"
echo "[03] 完成"

View File

@ -0,0 +1,47 @@
#!/bin/bash
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
COMMON_DIR="$SCRIPT_DIR/common"
FTP_SERVER="${FTP_SERVER:-172.29.0.40}"
FTP_USER="${FTP_USER:-ftpuser}"
FTP_PASSWORD="${FTP_PASSWORD:-ZGClab1234!}"
FTP_PORT="${FTP_PORT:-21}"
FTP_HOST="${FTP_SERVER}"
echo "[04] 检测GPU环境..."
# 检测GPU环境
if bash "$COMMON_DIR/check-gpu.sh"; then
echo "[04] GPU环境可用继续执行GPU节点安装"
GPU_AVAILABLE=true
else
echo "[04] GPU环境不可用跳过GPU节点安装"
GPU_AVAILABLE=false
exit 0
fi
echo "[04] 进入测试节点执行安装..."
echo "[04] 使用 FTP 地址: ${FTP_HOST}:${FTP_PORT}"
docker exec argus-metric-test-gpu-node bash -c "
set -e
if ! command -v curl &>/dev/null; then
echo '[04] curl 未安装,正在安装...'
apt-get update && apt-get install -y curl
fi
cd /tmp
echo '[04] 下载 setup.sh...'
curl -u ${FTP_USER}:${FTP_PASSWORD} ftp://${FTP_HOST}:${FTP_PORT}/setup.sh -o setup.sh
echo '[04] 执行安装...'
chmod +x setup.sh
bash setup.sh --server ${FTP_HOST} --user ${FTP_USER} --password '${FTP_PASSWORD}' --port ${FTP_PORT}
echo '[04] 安装完成'
"
echo "[04] 完成"