完成H20服务器部署及重启测试 #51
@ -73,6 +73,9 @@ PKG_VERSION=$VERSION
|
|||||||
|
|
||||||
NODE_GPU_BUNDLE_IMAGE_TAG=${REPO}:${VERSION}
|
NODE_GPU_BUNDLE_IMAGE_TAG=${REPO}:${VERSION}
|
||||||
|
|
||||||
|
# Compose project name (isolation from server stack)
|
||||||
|
COMPOSE_PROJECT_NAME=argus-client
|
||||||
|
|
||||||
# Required (no defaults). Must be filled before install.
|
# Required (no defaults). Must be filled before install.
|
||||||
AGENT_ENV=
|
AGENT_ENV=
|
||||||
AGENT_USER=
|
AGENT_USER=
|
||||||
|
|||||||
@ -113,6 +113,9 @@ FTP_PASSWORD=NASPlab1234!
|
|||||||
# UID/GID for volume ownership
|
# UID/GID for volume ownership
|
||||||
ARGUS_BUILD_UID=2133
|
ARGUS_BUILD_UID=2133
|
||||||
ARGUS_BUILD_GID=2015
|
ARGUS_BUILD_GID=2015
|
||||||
|
|
||||||
|
# Compose project name (isolation from other stacks on same host)
|
||||||
|
COMPOSE_PROJECT_NAME=argus-server
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
# 3) Docs (from deployment_new templates)
|
# 3) Docs (from deployment_new templates)
|
||||||
|
|||||||
@ -9,7 +9,14 @@ ENV_OUT="$PKG_ROOT/compose/.env"
|
|||||||
info(){ echo -e "\033[34m[CONFIG-GPU]\033[0m $*"; }
|
info(){ echo -e "\033[34m[CONFIG-GPU]\033[0m $*"; }
|
||||||
err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; }
|
err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; }
|
||||||
require(){ local ok=1; for c in "$@"; do command -v "$c" >/dev/null 2>&1 || { err "缺少依赖: $c"; ok=0; }; done; [[ $ok -eq 1 ]]; }
|
require(){ local ok=1; for c in "$@"; do command -v "$c" >/dev/null 2>&1 || { err "缺少依赖: $c"; ok=0; }; done; [[ $ok -eq 1 ]]; }
|
||||||
require docker curl jq awk sed tar gzip nvidia-smi
|
# Compose 检测:优先 docker compose(v2),回退 docker-compose(v1)
|
||||||
|
require_compose(){
|
||||||
|
if docker compose version >/dev/null 2>&1; then return 0; fi
|
||||||
|
if command -v docker-compose >/dev/null 2>&1 && docker-compose version >/dev/null 2>&1; then return 0; fi
|
||||||
|
err "未检测到 Docker Compose,请安装 docker compose v2 或 docker-compose v1"; exit 1
|
||||||
|
}
|
||||||
|
require docker curl jq awk sed tar gzip
|
||||||
|
require_compose
|
||||||
|
|
||||||
# 磁盘空间检查(MB)
|
# 磁盘空间检查(MB)
|
||||||
check_disk(){ local p="$1"; local need=10240; local free
|
check_disk(){ local p="$1"; local need=10240; local free
|
||||||
|
|||||||
@ -9,7 +9,14 @@ COMPOSE_FILE="$PKG_ROOT/compose/docker-compose.yml"
|
|||||||
info(){ echo -e "\033[34m[INSTALL-GPU]\033[0m $*"; }
|
info(){ echo -e "\033[34m[INSTALL-GPU]\033[0m $*"; }
|
||||||
err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; }
|
err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; }
|
||||||
require(){ local ok=1; for c in "$@"; do command -v "$c" >/dev/null 2>&1 || { err "缺少依赖: $c"; ok=0; }; done; [[ $ok -eq 1 ]]; }
|
require(){ local ok=1; for c in "$@"; do command -v "$c" >/dev/null 2>&1 || { err "缺少依赖: $c"; ok=0; }; done; [[ $ok -eq 1 ]]; }
|
||||||
require docker docker compose nvidia-smi
|
# Compose 检测:优先 docker compose(v2),回退 docker-compose(v1)
|
||||||
|
require_compose(){
|
||||||
|
if docker compose version >/dev/null 2>&1; then return 0; fi
|
||||||
|
if command -v docker-compose >/dev/null 2>&1 && docker-compose version >/dev/null 2>&1; then return 0; fi
|
||||||
|
err "未检测到 Docker Compose,请安装 docker compose v2 或 docker-compose v1"; exit 1
|
||||||
|
}
|
||||||
|
require docker nvidia-smi
|
||||||
|
require_compose
|
||||||
|
|
||||||
[[ -f "$ENV_FILE" ]] || { err "缺少 compose/.env,请先运行 scripts/config.sh"; exit 1; }
|
[[ -f "$ENV_FILE" ]] || { err "缺少 compose/.env,请先运行 scripts/config.sh"; exit 1; }
|
||||||
info "使用环境文件: $ENV_FILE"
|
info "使用环境文件: $ENV_FILE"
|
||||||
@ -52,9 +59,10 @@ info "日志目录已准备并赋权 1777: logs/infer logs/train"
|
|||||||
stat -c '%a %U:%G %n' "$PKG_ROOT/logs/infer" "$PKG_ROOT/logs/train" 2>/dev/null || true
|
stat -c '%a %U:%G %n' "$PKG_ROOT/logs/infer" "$PKG_ROOT/logs/train" 2>/dev/null || true
|
||||||
|
|
||||||
# 启动 compose 并跟踪日志
|
# 启动 compose 并跟踪日志
|
||||||
info "启动 GPU 节点 (docker compose up -d)"
|
PROJECT="${COMPOSE_PROJECT_NAME:-argus-client}"
|
||||||
docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" up -d
|
info "启动 GPU 节点 (docker compose -p $PROJECT up -d)"
|
||||||
docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" ps
|
docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f "$COMPOSE_FILE" up -d
|
||||||
|
docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f "$COMPOSE_FILE" ps
|
||||||
|
|
||||||
# 再次校准宿主日志目录权限,避免容器内脚本对 bind mount 权限回退
|
# 再次校准宿主日志目录权限,避免容器内脚本对 bind mount 权限回退
|
||||||
chmod 1777 "$PKG_ROOT/logs/infer" "$PKG_ROOT/logs/train" || true
|
chmod 1777 "$PKG_ROOT/logs/infer" "$PKG_ROOT/logs/train" || true
|
||||||
|
|||||||
@ -6,11 +6,23 @@ PKG_ROOT="$ROOT_DIR"
|
|||||||
ENV_FILE="$PKG_ROOT/compose/.env"
|
ENV_FILE="$PKG_ROOT/compose/.env"
|
||||||
COMPOSE_FILE="$PKG_ROOT/compose/docker-compose.yml"
|
COMPOSE_FILE="$PKG_ROOT/compose/docker-compose.yml"
|
||||||
|
|
||||||
|
# load COMPOSE_PROJECT_NAME if provided in compose/.env
|
||||||
|
if [[ -f "$ENV_FILE" ]]; then set -a; source "$ENV_FILE"; set +a; fi
|
||||||
|
PROJECT="${COMPOSE_PROJECT_NAME:-argus-client}"
|
||||||
|
|
||||||
info(){ echo -e "\033[34m[UNINSTALL-GPU]\033[0m $*"; }
|
info(){ echo -e "\033[34m[UNINSTALL-GPU]\033[0m $*"; }
|
||||||
|
err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; }
|
||||||
|
# Compose 检测:优先 docker compose(v2),回退 docker-compose(v1)
|
||||||
|
require_compose(){
|
||||||
|
if docker compose version >/dev/null 2>&1; then return 0; fi
|
||||||
|
if command -v docker-compose >/dev/null 2>&1 && docker-compose version >/dev/null 2>&1; then return 0; fi
|
||||||
|
err "未检测到 Docker Compose,请安装 docker compose v2 或 docker-compose v1"; exit 1
|
||||||
|
}
|
||||||
|
require_compose
|
||||||
|
|
||||||
if [[ -f "$ENV_FILE" ]]; then
|
if [[ -f "$ENV_FILE" ]]; then
|
||||||
info "stopping compose project"
|
info "stopping compose project (project=$PROJECT)"
|
||||||
docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" down --remove-orphans || true
|
docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f "$COMPOSE_FILE" down --remove-orphans || true
|
||||||
else
|
else
|
||||||
info "compose/.env not found; attempting to remove container by name"
|
info "compose/.env not found; attempting to remove container by name"
|
||||||
fi
|
fi
|
||||||
@ -22,4 +34,3 @@ docker rm -f argus-net-warmup >/dev/null 2>&1 || true
|
|||||||
docker rm -f argus-metric-gpu-node-swarm >/dev/null 2>&1 || true
|
docker rm -f argus-metric-gpu-node-swarm >/dev/null 2>&1 || true
|
||||||
|
|
||||||
info "uninstall completed"
|
info "uninstall completed"
|
||||||
|
|
||||||
|
|||||||
@ -9,8 +9,15 @@ ENV_OUT="$PKG_ROOT/compose/.env"
|
|||||||
info(){ echo -e "\033[34m[CONFIG]\033[0m $*"; }
|
info(){ echo -e "\033[34m[CONFIG]\033[0m $*"; }
|
||||||
err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; }
|
err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; }
|
||||||
require(){ local ok=1; for c in "$@"; do command -v "$c" >/dev/null 2>&1 || { err "缺少依赖: $c"; ok=0; }; done; [[ $ok -eq 1 ]]; }
|
require(){ local ok=1; for c in "$@"; do command -v "$c" >/dev/null 2>&1 || { err "缺少依赖: $c"; ok=0; }; done; [[ $ok -eq 1 ]]; }
|
||||||
|
# Compose 检测:优先 docker compose(v2),回退 docker-compose(v1)
|
||||||
|
require_compose(){
|
||||||
|
if docker compose version >/dev/null 2>&1; then return 0; fi
|
||||||
|
if command -v docker-compose >/dev/null 2>&1 && docker-compose version >/dev/null 2>&1; then return 0; fi
|
||||||
|
err "未检测到 Docker Compose,请安装 docker compose v2 或 docker-compose v1"; exit 1
|
||||||
|
}
|
||||||
|
|
||||||
require docker curl jq awk sed tar gzip
|
require docker curl jq awk sed tar gzip
|
||||||
|
require_compose
|
||||||
|
|
||||||
# 磁盘空间检查(MB)
|
# 磁盘空间检查(MB)
|
||||||
check_disk(){ local p="$1"; local need=10240; local free
|
check_disk(){ local p="$1"; local need=10240; local free
|
||||||
|
|||||||
@ -9,6 +9,11 @@ ENV_FILE="$ROOT/compose/.env"; [[ -f "$ENV_FILE" ]] && set -a && source "$ENV_FI
|
|||||||
ts="$(date -u +%Y%m%d-%H%M%SZ)"
|
ts="$(date -u +%Y%m%d-%H%M%SZ)"
|
||||||
LOG_DIR="$ROOT/logs"; mkdir -p "$LOG_DIR" || true
|
LOG_DIR="$ROOT/logs"; mkdir -p "$LOG_DIR" || true
|
||||||
if ! ( : > "$LOG_DIR/.w" 2>/dev/null ); then LOG_DIR="/tmp/argus-logs"; mkdir -p "$LOG_DIR" || true; fi
|
if ! ( : > "$LOG_DIR/.w" 2>/dev/null ); then LOG_DIR="/tmp/argus-logs"; mkdir -p "$LOG_DIR" || true; fi
|
||||||
|
|
||||||
|
# load compose project for accurate ps output
|
||||||
|
ENV_FILE="$ROOT/compose/.env"
|
||||||
|
if [[ -f "$ENV_FILE" ]]; then set -a; source "$ENV_FILE"; set +a; fi
|
||||||
|
PROJECT="${COMPOSE_PROJECT_NAME:-argus-server}"
|
||||||
DETAILS="$LOG_DIR/diagnose_details_${ts}.log"; ERRORS="$LOG_DIR/diagnose_error_${ts}.log"; : > "$DETAILS"; : > "$ERRORS"
|
DETAILS="$LOG_DIR/diagnose_details_${ts}.log"; ERRORS="$LOG_DIR/diagnose_error_${ts}.log"; : > "$DETAILS"; : > "$ERRORS"
|
||||||
|
|
||||||
logd() { echo "$(date '+%F %T') $*" >> "$DETAILS"; }
|
logd() { echo "$(date '+%F %T') $*" >> "$DETAILS"; }
|
||||||
@ -85,7 +90,7 @@ docker exec argus-ftp sh -lc 'ls -ld /private/argus/ftp /private/argus/ftp/share
|
|||||||
section SYSTEM
|
section SYSTEM
|
||||||
logd "uname -a:"; uname -a >> "$DETAILS"
|
logd "uname -a:"; uname -a >> "$DETAILS"
|
||||||
logd "docker version:"; docker version --format '{{.Server.Version}}' >> "$DETAILS" 2>&1 || true
|
logd "docker version:"; docker version --format '{{.Server.Version}}' >> "$DETAILS" 2>&1 || true
|
||||||
logd "compose ps:"; (cd "$ROOT/compose" && docker compose ps) >> "$DETAILS" 2>&1 || true
|
logd "compose ps (project=$PROJECT):"; (cd "$ROOT/compose" && docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f docker-compose.yml ps) >> "$DETAILS" 2>&1 || true
|
||||||
|
|
||||||
section SUMMARY
|
section SUMMARY
|
||||||
[[ $(http_code "http://localhost:${ES_HTTP_PORT:-9200}/_cluster/health") != 200 ]] && echo "[es][http] health not 200" >> "$ERRORS"
|
[[ $(http_code "http://localhost:${ES_HTTP_PORT:-9200}/_cluster/health") != 200 ]] && echo "[es][http] health not 200" >> "$ERRORS"
|
||||||
|
|||||||
@ -9,7 +9,14 @@ COMPOSE_FILE="$PKG_ROOT/compose/docker-compose.yml"
|
|||||||
info(){ echo -e "\033[34m[INSTALL]\033[0m $*"; }
|
info(){ echo -e "\033[34m[INSTALL]\033[0m $*"; }
|
||||||
err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; }
|
err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; }
|
||||||
require(){ local ok=1; for c in "$@"; do command -v "$c" >/dev/null 2>&1 || { err "缺少依赖: $c"; ok=0; }; done; [[ $ok -eq 1 ]]; }
|
require(){ local ok=1; for c in "$@"; do command -v "$c" >/dev/null 2>&1 || { err "缺少依赖: $c"; ok=0; }; done; [[ $ok -eq 1 ]]; }
|
||||||
|
# Compose 检测:优先 docker compose(v2),回退 docker-compose(v1)
|
||||||
|
require_compose(){
|
||||||
|
if docker compose version >/dev/null 2>&1; then return 0; fi
|
||||||
|
if command -v docker-compose >/devnull 2>&1 && docker-compose version >/dev/null 2>&1; then return 0; fi
|
||||||
|
err "未检测到 Docker Compose,请安装 docker compose v2 或 docker-compose v1"; exit 1
|
||||||
|
}
|
||||||
require docker curl jq awk sed tar gzip
|
require docker curl jq awk sed tar gzip
|
||||||
|
require_compose
|
||||||
|
|
||||||
[[ -f "$ENV_FILE" ]] || { err "缺少 compose/.env,请先运行 scripts/config.sh"; exit 1; }
|
[[ -f "$ENV_FILE" ]] || { err "缺少 compose/.env,请先运行 scripts/config.sh"; exit 1; }
|
||||||
info "使用环境文件: $ENV_FILE"
|
info "使用环境文件: $ENV_FILE"
|
||||||
@ -52,9 +59,10 @@ done
|
|||||||
shopt -u nullglob
|
shopt -u nullglob
|
||||||
|
|
||||||
# Compose up
|
# Compose up
|
||||||
info "启动服务栈 (docker compose up -d)"
|
PROJECT="${COMPOSE_PROJECT_NAME:-argus-server}"
|
||||||
docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" up -d
|
info "启动服务栈 (docker compose -p $PROJECT up -d)"
|
||||||
docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" ps
|
docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f "$COMPOSE_FILE" up -d
|
||||||
|
docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f "$COMPOSE_FILE" ps
|
||||||
|
|
||||||
# Wait readiness (best-effort)
|
# Wait readiness (best-effort)
|
||||||
code(){ curl -4 -s -o /dev/null -w "%{http_code}" "$1" || echo 000; }
|
code(){ curl -4 -s -o /dev/null -w "%{http_code}" "$1" || echo 000; }
|
||||||
|
|||||||
@ -4,4 +4,6 @@ ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
|||||||
PKG_ROOT="$ROOT_DIR"
|
PKG_ROOT="$ROOT_DIR"
|
||||||
ENV_FILE="$PKG_ROOT/compose/.env"
|
ENV_FILE="$PKG_ROOT/compose/.env"
|
||||||
COMPOSE_FILE="$PKG_ROOT/compose/docker-compose.yml"
|
COMPOSE_FILE="$PKG_ROOT/compose/docker-compose.yml"
|
||||||
docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" ps
|
if [[ -f "$ENV_FILE" ]]; then set -a; source "$ENV_FILE"; set +a; fi
|
||||||
|
PROJECT="${COMPOSE_PROJECT_NAME:-argus-server}"
|
||||||
|
docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f "$COMPOSE_FILE" ps
|
||||||
|
|||||||
@ -4,6 +4,20 @@ ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
|||||||
PKG_ROOT="$ROOT_DIR"
|
PKG_ROOT="$ROOT_DIR"
|
||||||
ENV_FILE="$PKG_ROOT/compose/.env"
|
ENV_FILE="$PKG_ROOT/compose/.env"
|
||||||
COMPOSE_FILE="$PKG_ROOT/compose/docker-compose.yml"
|
COMPOSE_FILE="$PKG_ROOT/compose/docker-compose.yml"
|
||||||
echo "[UNINSTALL] stopping compose"
|
|
||||||
docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" down --remove-orphans || true
|
# load COMPOSE_PROJECT_NAME from env file if present
|
||||||
|
if [[ -f "$ENV_FILE" ]]; then set -a; source "$ENV_FILE"; set +a; fi
|
||||||
|
PROJECT="${COMPOSE_PROJECT_NAME:-argus-server}"
|
||||||
|
|
||||||
|
err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; }
|
||||||
|
# Compose 检测:优先 docker compose(v2),回退 docker-compose(v1)
|
||||||
|
require_compose(){
|
||||||
|
if docker compose version >/dev/null 2>&1; then return 0; fi
|
||||||
|
if command -v docker-compose >/dev/null 2>&1 && docker-compose version >/dev/null 2>&1; then return 0; fi
|
||||||
|
err "未检测到 Docker Compose,请安装 docker compose v2 或 docker-compose v1"; exit 1
|
||||||
|
}
|
||||||
|
require_compose
|
||||||
|
|
||||||
|
echo "[UNINSTALL] stopping compose (project=$PROJECT)"
|
||||||
|
docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f "$COMPOSE_FILE" down --remove-orphans || true
|
||||||
echo "[UNINSTALL] done"
|
echo "[UNINSTALL] done"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user