diff --git a/deployment_new/build/make_client_gpu_package.sh b/deployment_new/build/make_client_gpu_package.sh index 9e2d5ac..fc9b480 100755 --- a/deployment_new/build/make_client_gpu_package.sh +++ b/deployment_new/build/make_client_gpu_package.sh @@ -73,6 +73,9 @@ PKG_VERSION=$VERSION NODE_GPU_BUNDLE_IMAGE_TAG=${REPO}:${VERSION} +# Compose project name (isolation from server stack) +COMPOSE_PROJECT_NAME=argus-client + # Required (no defaults). Must be filled before install. AGENT_ENV= AGENT_USER= diff --git a/deployment_new/build/make_server_package.sh b/deployment_new/build/make_server_package.sh index 16d81f9..a29bffc 100755 --- a/deployment_new/build/make_server_package.sh +++ b/deployment_new/build/make_server_package.sh @@ -113,6 +113,9 @@ FTP_PASSWORD=NASPlab1234! # UID/GID for volume ownership ARGUS_BUILD_UID=2133 ARGUS_BUILD_GID=2015 + +# Compose project name (isolation from other stacks on same host) +COMPOSE_PROJECT_NAME=argus-server EOF # 3) Docs (from deployment_new templates) diff --git a/deployment_new/templates/client_gpu/scripts/config.sh b/deployment_new/templates/client_gpu/scripts/config.sh index 773e29c..dff103e 100644 --- a/deployment_new/templates/client_gpu/scripts/config.sh +++ b/deployment_new/templates/client_gpu/scripts/config.sh @@ -9,7 +9,14 @@ ENV_OUT="$PKG_ROOT/compose/.env" info(){ echo -e "\033[34m[CONFIG-GPU]\033[0m $*"; } err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; } require(){ local ok=1; for c in "$@"; do command -v "$c" >/dev/null 2>&1 || { err "缺少依赖: $c"; ok=0; }; done; [[ $ok -eq 1 ]]; } -require docker curl jq awk sed tar gzip nvidia-smi +# Compose 检测:优先 docker compose(v2),回退 docker-compose(v1) +require_compose(){ + if docker compose version >/dev/null 2>&1; then return 0; fi + if command -v docker-compose >/dev/null 2>&1 && docker-compose version >/dev/null 2>&1; then return 0; fi + err "未检测到 Docker Compose,请安装 docker compose v2 或 docker-compose v1"; exit 1 +} +require docker curl jq awk sed tar gzip +require_compose # 磁盘空间检查(MB) check_disk(){ local p="$1"; local need=10240; local free diff --git a/deployment_new/templates/client_gpu/scripts/install.sh b/deployment_new/templates/client_gpu/scripts/install.sh index dd7b66e..e66cdad 100644 --- a/deployment_new/templates/client_gpu/scripts/install.sh +++ b/deployment_new/templates/client_gpu/scripts/install.sh @@ -9,7 +9,14 @@ COMPOSE_FILE="$PKG_ROOT/compose/docker-compose.yml" info(){ echo -e "\033[34m[INSTALL-GPU]\033[0m $*"; } err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; } require(){ local ok=1; for c in "$@"; do command -v "$c" >/dev/null 2>&1 || { err "缺少依赖: $c"; ok=0; }; done; [[ $ok -eq 1 ]]; } -require docker docker compose nvidia-smi +# Compose 检测:优先 docker compose(v2),回退 docker-compose(v1) +require_compose(){ + if docker compose version >/dev/null 2>&1; then return 0; fi + if command -v docker-compose >/dev/null 2>&1 && docker-compose version >/dev/null 2>&1; then return 0; fi + err "未检测到 Docker Compose,请安装 docker compose v2 或 docker-compose v1"; exit 1 +} +require docker nvidia-smi +require_compose [[ -f "$ENV_FILE" ]] || { err "缺少 compose/.env,请先运行 scripts/config.sh"; exit 1; } info "使用环境文件: $ENV_FILE" @@ -52,9 +59,10 @@ info "日志目录已准备并赋权 1777: logs/infer logs/train" stat -c '%a %U:%G %n' "$PKG_ROOT/logs/infer" "$PKG_ROOT/logs/train" 2>/dev/null || true # 启动 compose 并跟踪日志 -info "启动 GPU 节点 (docker compose up -d)" -docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" up -d -docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" ps +PROJECT="${COMPOSE_PROJECT_NAME:-argus-client}" +info "启动 GPU 节点 (docker compose -p $PROJECT up -d)" +docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f "$COMPOSE_FILE" up -d +docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f "$COMPOSE_FILE" ps # 再次校准宿主日志目录权限,避免容器内脚本对 bind mount 权限回退 chmod 1777 "$PKG_ROOT/logs/infer" "$PKG_ROOT/logs/train" || true diff --git a/deployment_new/templates/client_gpu/scripts/uninstall.sh b/deployment_new/templates/client_gpu/scripts/uninstall.sh index 2be7d6d..ff4c8d8 100644 --- a/deployment_new/templates/client_gpu/scripts/uninstall.sh +++ b/deployment_new/templates/client_gpu/scripts/uninstall.sh @@ -6,11 +6,23 @@ PKG_ROOT="$ROOT_DIR" ENV_FILE="$PKG_ROOT/compose/.env" COMPOSE_FILE="$PKG_ROOT/compose/docker-compose.yml" +# load COMPOSE_PROJECT_NAME if provided in compose/.env +if [[ -f "$ENV_FILE" ]]; then set -a; source "$ENV_FILE"; set +a; fi +PROJECT="${COMPOSE_PROJECT_NAME:-argus-client}" + info(){ echo -e "\033[34m[UNINSTALL-GPU]\033[0m $*"; } +err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; } +# Compose 检测:优先 docker compose(v2),回退 docker-compose(v1) +require_compose(){ + if docker compose version >/dev/null 2>&1; then return 0; fi + if command -v docker-compose >/dev/null 2>&1 && docker-compose version >/dev/null 2>&1; then return 0; fi + err "未检测到 Docker Compose,请安装 docker compose v2 或 docker-compose v1"; exit 1 +} +require_compose if [[ -f "$ENV_FILE" ]]; then - info "stopping compose project" - docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" down --remove-orphans || true + info "stopping compose project (project=$PROJECT)" + docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f "$COMPOSE_FILE" down --remove-orphans || true else info "compose/.env not found; attempting to remove container by name" fi @@ -22,4 +34,3 @@ docker rm -f argus-net-warmup >/dev/null 2>&1 || true docker rm -f argus-metric-gpu-node-swarm >/dev/null 2>&1 || true info "uninstall completed" - diff --git a/deployment_new/templates/server/scripts/config.sh b/deployment_new/templates/server/scripts/config.sh index d8d0339..8ff27dc 100644 --- a/deployment_new/templates/server/scripts/config.sh +++ b/deployment_new/templates/server/scripts/config.sh @@ -9,8 +9,15 @@ ENV_OUT="$PKG_ROOT/compose/.env" info(){ echo -e "\033[34m[CONFIG]\033[0m $*"; } err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; } require(){ local ok=1; for c in "$@"; do command -v "$c" >/dev/null 2>&1 || { err "缺少依赖: $c"; ok=0; }; done; [[ $ok -eq 1 ]]; } +# Compose 检测:优先 docker compose(v2),回退 docker-compose(v1) +require_compose(){ + if docker compose version >/dev/null 2>&1; then return 0; fi + if command -v docker-compose >/dev/null 2>&1 && docker-compose version >/dev/null 2>&1; then return 0; fi + err "未检测到 Docker Compose,请安装 docker compose v2 或 docker-compose v1"; exit 1 +} require docker curl jq awk sed tar gzip +require_compose # 磁盘空间检查(MB) check_disk(){ local p="$1"; local need=10240; local free diff --git a/deployment_new/templates/server/scripts/diagnose.sh b/deployment_new/templates/server/scripts/diagnose.sh index e93d69d..7eb77aa 100644 --- a/deployment_new/templates/server/scripts/diagnose.sh +++ b/deployment_new/templates/server/scripts/diagnose.sh @@ -9,6 +9,11 @@ ENV_FILE="$ROOT/compose/.env"; [[ -f "$ENV_FILE" ]] && set -a && source "$ENV_FI ts="$(date -u +%Y%m%d-%H%M%SZ)" LOG_DIR="$ROOT/logs"; mkdir -p "$LOG_DIR" || true if ! ( : > "$LOG_DIR/.w" 2>/dev/null ); then LOG_DIR="/tmp/argus-logs"; mkdir -p "$LOG_DIR" || true; fi + +# load compose project for accurate ps output +ENV_FILE="$ROOT/compose/.env" +if [[ -f "$ENV_FILE" ]]; then set -a; source "$ENV_FILE"; set +a; fi +PROJECT="${COMPOSE_PROJECT_NAME:-argus-server}" DETAILS="$LOG_DIR/diagnose_details_${ts}.log"; ERRORS="$LOG_DIR/diagnose_error_${ts}.log"; : > "$DETAILS"; : > "$ERRORS" logd() { echo "$(date '+%F %T') $*" >> "$DETAILS"; } @@ -85,7 +90,7 @@ docker exec argus-ftp sh -lc 'ls -ld /private/argus/ftp /private/argus/ftp/share section SYSTEM logd "uname -a:"; uname -a >> "$DETAILS" logd "docker version:"; docker version --format '{{.Server.Version}}' >> "$DETAILS" 2>&1 || true -logd "compose ps:"; (cd "$ROOT/compose" && docker compose ps) >> "$DETAILS" 2>&1 || true +logd "compose ps (project=$PROJECT):"; (cd "$ROOT/compose" && docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f docker-compose.yml ps) >> "$DETAILS" 2>&1 || true section SUMMARY [[ $(http_code "http://localhost:${ES_HTTP_PORT:-9200}/_cluster/health") != 200 ]] && echo "[es][http] health not 200" >> "$ERRORS" diff --git a/deployment_new/templates/server/scripts/install.sh b/deployment_new/templates/server/scripts/install.sh index 81e2258..1725980 100644 --- a/deployment_new/templates/server/scripts/install.sh +++ b/deployment_new/templates/server/scripts/install.sh @@ -9,7 +9,14 @@ COMPOSE_FILE="$PKG_ROOT/compose/docker-compose.yml" info(){ echo -e "\033[34m[INSTALL]\033[0m $*"; } err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; } require(){ local ok=1; for c in "$@"; do command -v "$c" >/dev/null 2>&1 || { err "缺少依赖: $c"; ok=0; }; done; [[ $ok -eq 1 ]]; } +# Compose 检测:优先 docker compose(v2),回退 docker-compose(v1) +require_compose(){ + if docker compose version >/dev/null 2>&1; then return 0; fi + if command -v docker-compose >/devnull 2>&1 && docker-compose version >/dev/null 2>&1; then return 0; fi + err "未检测到 Docker Compose,请安装 docker compose v2 或 docker-compose v1"; exit 1 +} require docker curl jq awk sed tar gzip +require_compose [[ -f "$ENV_FILE" ]] || { err "缺少 compose/.env,请先运行 scripts/config.sh"; exit 1; } info "使用环境文件: $ENV_FILE" @@ -52,9 +59,10 @@ done shopt -u nullglob # Compose up -info "启动服务栈 (docker compose up -d)" -docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" up -d -docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" ps +PROJECT="${COMPOSE_PROJECT_NAME:-argus-server}" +info "启动服务栈 (docker compose -p $PROJECT up -d)" +docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f "$COMPOSE_FILE" up -d +docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f "$COMPOSE_FILE" ps # Wait readiness (best-effort) code(){ curl -4 -s -o /dev/null -w "%{http_code}" "$1" || echo 000; } diff --git a/deployment_new/templates/server/scripts/status.sh b/deployment_new/templates/server/scripts/status.sh index c555cb8..84694c2 100644 --- a/deployment_new/templates/server/scripts/status.sh +++ b/deployment_new/templates/server/scripts/status.sh @@ -4,4 +4,6 @@ ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" PKG_ROOT="$ROOT_DIR" ENV_FILE="$PKG_ROOT/compose/.env" COMPOSE_FILE="$PKG_ROOT/compose/docker-compose.yml" -docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" ps +if [[ -f "$ENV_FILE" ]]; then set -a; source "$ENV_FILE"; set +a; fi +PROJECT="${COMPOSE_PROJECT_NAME:-argus-server}" +docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f "$COMPOSE_FILE" ps diff --git a/deployment_new/templates/server/scripts/uninstall.sh b/deployment_new/templates/server/scripts/uninstall.sh index c63bb24..4a7afa7 100644 --- a/deployment_new/templates/server/scripts/uninstall.sh +++ b/deployment_new/templates/server/scripts/uninstall.sh @@ -4,6 +4,20 @@ ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" PKG_ROOT="$ROOT_DIR" ENV_FILE="$PKG_ROOT/compose/.env" COMPOSE_FILE="$PKG_ROOT/compose/docker-compose.yml" -echo "[UNINSTALL] stopping compose" -docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" down --remove-orphans || true + +# load COMPOSE_PROJECT_NAME from env file if present +if [[ -f "$ENV_FILE" ]]; then set -a; source "$ENV_FILE"; set +a; fi +PROJECT="${COMPOSE_PROJECT_NAME:-argus-server}" + +err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; } +# Compose 检测:优先 docker compose(v2),回退 docker-compose(v1) +require_compose(){ + if docker compose version >/dev/null 2>&1; then return 0; fi + if command -v docker-compose >/dev/null 2>&1 && docker-compose version >/dev/null 2>&1; then return 0; fi + err "未检测到 Docker Compose,请安装 docker compose v2 或 docker-compose v1"; exit 1 +} +require_compose + +echo "[UNINSTALL] stopping compose (project=$PROJECT)" +docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f "$COMPOSE_FILE" down --remove-orphans || true echo "[UNINSTALL] done"