diff --git a/src/sys/tests/docker-compose.yml b/src/sys/tests/docker-compose.yml index 03b9f76..badf4ec 100644 --- a/src/sys/tests/docker-compose.yml +++ b/src/sys/tests/docker-compose.yml @@ -1,7 +1,8 @@ -version: "3.8" +-version: "3.8" networks: default: + external: true name: argus-sys-net driver: bridge ipam: @@ -137,3 +138,156 @@ services: ports: - "2021:2020" restart: unless-stopped + + ftp: + image: argus-metric-ftp:latest + container_name: argus-ftp + restart: unless-stopped + environment: + - TZ=Asia/Shanghai + - FTP_BASE_PATH=/private/argus/ftp + - FTP_PASSWORD=${FTP_PASSWORD:-ZGClab1234!} + - DOMAIN=${FTP_DOMAIN:-ftp.metric.argus.com} + - ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133} + - ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015} + ports: + - "${FTP_PORT:-21}:21" + - "${FTP_DATA_PORT:-20}:20" + - "21100-21110:21100-21110" + volumes: + - ./private/argus/metric/ftp:/private/argus/ftp + - ./private/argus/etc:/private/argus/etc + - /etc/localtime:/etc/localtime:ro + - /etc/timezone:/etc/timezone:ro + networks: + default: + ipv4_address: 172.29.0.40 + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" + + prometheus: + image: argus-metric-prometheus:latest + container_name: argus-prometheus + restart: unless-stopped + environment: + - TZ=Asia/Shanghai + - PROMETHEUS_BASE_PATH=/private/argus/metric/prometheus + - ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133} + - ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015} + ports: + - "${PROMETHEUS_PORT:-9090}:9090" + volumes: + - ./private/argus/metric/prometheus:/private/argus/metric/prometheus + - ./private/argus/etc:/private/argus/etc + - /etc/localtime:/etc/localtime:ro + - /etc/timezone:/etc/timezone:ro + networks: + default: + ipv4_address: 172.29.0.41 + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" + + grafana: + image: argus-metric-grafana:latest + container_name: argus-grafana + restart: unless-stopped + environment: + - TZ=Asia/Shanghai + - GRAFANA_BASE_PATH=/private/argus/metric/grafana + - ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133} + - ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015} + - GF_SERVER_HTTP_PORT=3000 + - GF_LOG_LEVEL=warn + - GF_LOG_MODE=console + ports: + - "${GRAFANA_PORT:-3000}:3000" + volumes: + - ./private/argus/metric/grafana:/private/argus/metric/grafana + - ./private/argus/etc:/private/argus/etc + - /etc/localtime:/etc/localtime:ro + - /etc/timezone:/etc/timezone:ro + networks: + default: + ipv4_address: 172.29.0.42 + depends_on: + - prometheus + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" + + test-node: + image: argus-metric-test-node:latest + container_name: argus-metric-test-node + hostname: test-metric-node-001 + restart: unless-stopped + privileged: true + depends_on: + - ftp + - prometheus + environment: + - TZ=Asia/Shanghai + - DEBIAN_FRONTEND=noninteractive + - FTP_DOMAIN=${FTP_DOMAIN:-ftp.metric.argus.com} + - FTP_SERVER=${FTP_SERVER:-172.29.0.40} + - FTP_USER=${FTP_USER:-ftpuser} + - FTP_PASSWORD=${FTP_PASSWORD:-ZGClab1234!} + - FTP_PORT=${FTP_PORT:-21} + volumes: + - ./private/argus/agent:/private/argus/agent + - /etc/localtime:/etc/localtime:ro + - /etc/timezone:/etc/timezone:ro + command: sleep infinity + networks: + default: + ipv4_address: 172.29.0.50 + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" + + test-gpu-node: + image: argus-metric-test-gpu-node:latest + container_name: argus-metric-test-gpu-node + hostname: test-metric-gpu-node-001 + restart: unless-stopped + privileged: true + runtime: nvidia + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: + - gpu + depends_on: + - ftp + - prometheus + environment: + - TZ=Asia/Shanghai + - DEBIAN_FRONTEND=noninteractive + - NVIDIA_VISIBLE_DEVICES=all + - NVIDIA_DRIVER_CAPABILITIES=compute,utility + - GPU_MODE=gpu + volumes: + - ./private/argus/agent:/private/argus/agent + - /etc/localtime:/etc/localtime:ro + - /etc/timezone:/etc/timezone:ro + command: sleep infinity + networks: + default: + ipv4_address: 172.29.0.51 + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" diff --git a/src/sys/tests/scripts/01_bootstrap.sh b/src/sys/tests/scripts/01_bootstrap.sh index e550a43..130eb63 100755 --- a/src/sys/tests/scripts/01_bootstrap.sh +++ b/src/sys/tests/scripts/01_bootstrap.sh @@ -27,17 +27,34 @@ mkdir -p \ "$PRIVATE_CORE/argus/bind" \ "$PRIVATE_CORE/argus/master" \ "$PRIVATE_CORE/argus/metric/prometheus" \ + "$PRIVATE_CORE/argus/metric/ftp/share" \ + "$PRIVATE_CORE/argus/metric/grafana/data" \ + "$PRIVATE_CORE/argus/metric/grafana/logs" \ + "$PRIVATE_CORE/argus/metric/grafana/plugins" \ + "$PRIVATE_CORE/argus/metric/grafana/provisioning/datasources" \ + "$PRIVATE_CORE/argus/metric/grafana/provisioning/dashboards" \ + "$PRIVATE_CORE/argus/metric/grafana/data/sessions" \ + "$PRIVATE_CORE/argus/metric/grafana/data/dashboards" \ + "$PRIVATE_CORE/argus/metric/grafana/config" \ + "$PRIVATE_CORE/argus/metric/prometheus/data" \ + "$PRIVATE_CORE/argus/metric/prometheus/rules" \ + "$PRIVATE_CORE/argus/metric/prometheus/targets" \ + "$PRIVATE_CORE/argus/agent" \ "$PRIVATE_CORE/argus/log/elasticsearch" \ "$PRIVATE_CORE/argus/log/kibana" \ "$PRIVATE_NODEA/argus/agent/dev-yyrshare-nbnyx10-cp2f-pod-0/health" \ "$PRIVATE_NODEB/argus/agent/dev-yyrshare-uuuu10-ep2f-pod-0/health" \ "$TMP_DIR" -# Align ownership for supervisor-managed services (ES/Kibana expect UID/GID inside container) +# Align ownership for supervisor-managed services (ES/Kibana/Grafana expect UID/GID inside container) echo "[INFO] Fixing ownership for core private directories..." chown -R "${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" \ "$PRIVATE_CORE/argus/log/elasticsearch" \ "$PRIVATE_CORE/argus/log/kibana" \ + "$PRIVATE_CORE/argus/metric/grafana" \ + "$PRIVATE_CORE/argus/metric/prometheus" \ + "$PRIVATE_CORE/argus/metric/ftp" \ + "$PRIVATE_CORE/argus/agent" \ "$PRIVATE_CORE/argus/etc" 2>/dev/null || true echo "[INFO] Distributing update-dns.sh for core services (bind/master/es/kibana)" @@ -55,6 +72,11 @@ ensure_image "argus-elasticsearch:latest" ensure_image "argus-kibana:latest" ensure_image "argus-bind9:latest" ensure_image "argus-master:latest" +ensure_image "argus-metric-ftp:latest" +ensure_image "argus-metric-prometheus:latest" +ensure_image "argus-metric-grafana:latest" +ensure_image "argus-metric-test-node:latest" +ensure_image "argus-metric-test-gpu-node:latest" echo "[INFO] Building agent binary..." pushd "$REPO_ROOT/src/agent" >/dev/null @@ -68,10 +90,44 @@ if [[ ! -x "$AGENT_BIN" ]]; then fi echo "$AGENT_BIN" > "$TMP_DIR/agent_binary_path" -echo "[INFO] Writing .env with UID/GID" +# 检测GPU环境 +echo "[INFO] 检测GPU环境..." +GPU_CHECK_SCRIPT="$REPO_ROOT/src/metric/tests/scripts/common/check-gpu.sh" +if [ -f "$GPU_CHECK_SCRIPT" ]; then + if bash "$GPU_CHECK_SCRIPT" >/dev/null 2>&1; then + echo "[INFO] GPU环境可用,将启动test-gpu-node容器" + GPU_AVAILABLE=true + else + echo "[INFO] GPU环境不可用,跳过test-gpu-node容器" + GPU_AVAILABLE=false + fi +else + echo "[WARN] 未找到GPU检测脚本: $GPU_CHECK_SCRIPT,跳过GPU检测" + GPU_AVAILABLE=false +fi + +echo "[INFO] Writing .env with UID/GID and metric configuration" cat > "$TEST_ROOT/.env" </dev/null compose -p argus-sys down --remove-orphans || true -compose -p argus-sys up -d + +# 根据GPU可用性决定启动的服务 +if [ "$GPU_AVAILABLE" = true ]; then + echo "[INFO] 启动所有服务(包括test-gpu-node)..." + compose -p argus-sys up -d +else + echo "[INFO] 启动基础服务(跳过test-gpu-node)..." + compose -p argus-sys up -d --scale test-gpu-node=0 +fi + popd >/dev/null -echo "[OK] Services started: master:32300 es:9200 kibana:5601 node-a:2020 node-b:2021" +if [ "$GPU_AVAILABLE" = true ]; then + echo "[OK] Services started: master:32300 es:9200 kibana:5601 node-a:2020 node-b:2021 test-gpu-node:172.29.0.51" +else + echo "[OK] Services started: master:32300 es:9200 kibana:5601 node-a:2020 node-b:2021 (test-gpu-node skipped)" +fi diff --git a/src/sys/tests/scripts/05_publish_artifact.sh b/src/sys/tests/scripts/05_publish_artifact.sh new file mode 100755 index 0000000..fd51850 --- /dev/null +++ b/src/sys/tests/scripts/05_publish_artifact.sh @@ -0,0 +1,61 @@ +#!/bin/bash +set -e + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +TEST_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" +REPO_ROOT="$(cd "$TEST_DIR/../../.." && pwd)" +PLUGIN_DIR="$REPO_ROOT/src/metric/client-plugins/all-in-one-full" + +# 加载 .env +if [ -f "$TEST_DIR/.env" ]; then + source "$TEST_DIR/.env" +fi + +# 检测容器挂载目录 +if docker ps --format '{{.Names}}' | grep -q '^argus-ftp$'; then + FTP_MOUNT=$(docker inspect argus-ftp --format '{{range .Mounts}}{{if eq .Destination "/private/argus/ftp"}}{{.Source}}{{end}}{{end}}') + OUTPUT_DIR="${FTP_MOUNT}/share" + echo "[02] 容器挂载: $OUTPUT_DIR" +else + OUTPUT_DIR="${DATA_ROOT:-$TEST_DIR/private}/ftp/share" + echo "[02] 默认路径: $OUTPUT_DIR" +fi + +OWNER="${ARGUS_BUILD_UID:-2133}:${ARGUS_BUILD_GID:-2015}" + +cd "$PLUGIN_DIR" + +echo "[02] 递增版本号..." +bash scripts/version-manager.sh bump minor + +VERSION_FILE="config/VERSION" +if [ ! -f "$VERSION_FILE" ]; then + echo "[02] 错误: 未找到 $VERSION_FILE" + exit 1 +fi + +VERSION=$(cat "$VERSION_FILE" | tr -d '[:space:]') +echo "[02] 新版本: $VERSION" + +echo "[02] 构建安装包..." +bash scripts/package_artifact.sh --force + +echo "[02] 发布到 FTP: $OUTPUT_DIR" +sudo bash scripts/publish_artifact.sh "$VERSION" --output-dir "$OUTPUT_DIR" --owner "$OWNER" + +echo "[02] 设置文件权限..." +# 设置所有者 +sudo chown -R "$OWNER" "$OUTPUT_DIR" +# 设置目录权限为 755 (rwxr-xr-x) +sudo find "$OUTPUT_DIR" -type d -exec chmod 755 {} \; +# 设置文件权限为 644 (rw-r--r--) +sudo find "$OUTPUT_DIR" -type f -exec chmod 644 {} \; +# 特别处理 .sh 文件,给予执行权限 755 +sudo find "$OUTPUT_DIR" -type f -name "*.sh" -exec chmod 755 {} \; +echo "[02] 权限设置完成 (UID:GID=$OWNER, dirs=755, files=644, scripts=755)" + +echo "[02] 发布完成,验证文件..." +ls -lh "$OUTPUT_DIR" + +echo "[02] 完成" + diff --git a/src/sys/tests/scripts/06_test_node_install.sh b/src/sys/tests/scripts/06_test_node_install.sh new file mode 100755 index 0000000..af8200f --- /dev/null +++ b/src/sys/tests/scripts/06_test_node_install.sh @@ -0,0 +1,33 @@ +#!/bin/bash +set -e + +FTP_SERVER="${FTP_SERVER:-172.30.0.40}" +FTP_USER="${FTP_USER:-ftpuser}" +FTP_PASSWORD="${FTP_PASSWORD:-ZGClab1234!}" +FTP_PORT="${FTP_PORT:-21}" + +FTP_HOST="${FTP_SERVER}" + +echo "[03] 进入测试节点执行安装..." +echo "[03] 使用 FTP 地址: ${FTP_HOST}:${FTP_PORT}" + +docker exec argus-metric-test-node bash -c " +set -e + +if ! command -v curl &>/dev/null; then + echo '[03] curl 未安装,正在安装...' + apt-get update && apt-get install -y curl +fi + +cd /tmp +echo '[03] 下载 setup.sh...' +curl -u ${FTP_USER}:${FTP_PASSWORD} ftp://${FTP_HOST}:${FTP_PORT}/setup.sh -o setup.sh + +echo '[03] 执行安装...' +chmod +x setup.sh +bash setup.sh --server ${FTP_HOST} --user ${FTP_USER} --password '${FTP_PASSWORD}' --port ${FTP_PORT} + +echo '[03] 安装完成' +" + +echo "[03] 完成" diff --git a/src/sys/tests/scripts/07_test_gpu_node_install.sh b/src/sys/tests/scripts/07_test_gpu_node_install.sh new file mode 100755 index 0000000..e8fa4e3 --- /dev/null +++ b/src/sys/tests/scripts/07_test_gpu_node_install.sh @@ -0,0 +1,47 @@ +#!/bin/bash +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +COMMON_DIR="$SCRIPT_DIR/common" + +FTP_SERVER="${FTP_SERVER:-172.29.0.40}" +FTP_USER="${FTP_USER:-ftpuser}" +FTP_PASSWORD="${FTP_PASSWORD:-ZGClab1234!}" +FTP_PORT="${FTP_PORT:-21}" + +FTP_HOST="${FTP_SERVER}" + +echo "[04] 检测GPU环境..." +# 检测GPU环境 +if bash "$COMMON_DIR/check-gpu.sh"; then + echo "[04] GPU环境可用,继续执行GPU节点安装" + GPU_AVAILABLE=true +else + echo "[04] GPU环境不可用,跳过GPU节点安装" + GPU_AVAILABLE=false + exit 0 +fi + +echo "[04] 进入测试节点执行安装..." +echo "[04] 使用 FTP 地址: ${FTP_HOST}:${FTP_PORT}" + +docker exec argus-metric-test-gpu-node bash -c " +set -e + +if ! command -v curl &>/dev/null; then + echo '[04] curl 未安装,正在安装...' + apt-get update && apt-get install -y curl +fi + +cd /tmp +echo '[04] 下载 setup.sh...' +curl -u ${FTP_USER}:${FTP_PASSWORD} ftp://${FTP_HOST}:${FTP_PORT}/setup.sh -o setup.sh + +echo '[04] 执行安装...' +chmod +x setup.sh +bash setup.sh --server ${FTP_HOST} --user ${FTP_USER} --password '${FTP_PASSWORD}' --port ${FTP_PORT} + +echo '[04] 安装完成' +" + +echo "[04] 完成"