argus/deployment_new/build/make_client_gpu_package.sh
yuyr 34cb239bf4 完成H20服务器部署及重启测试 (#51)
当前部署情况
- h1: 部署server & client
- h2: 部署client
- 部署2025-11-25
- 部署目录:  /home2/argus/server  ,  /home2/argus/client
- 部署使用账号:argus

网络拓扑:
- h1 作为docker swarm manager
- h2 作为worker加入docker swarm
- docker swarm 上创建overlay network

访问方式:
- 通过ssh到h1服务器,端口转发 20006-20011 端口到笔记本本地;
- 门户网址:http://localhost:20006/dashboard

部署截图:
![image.png](/attachments/86c1a7af-dacc-4ba7-a182-f7cefd4e6427)
![image.png](/attachments/06f20852-771c-4264-b031-e6acd0f6ea1c)
![image.png](/attachments/091ab5a8-95bf-466f-a394-3255dcb49735)

注意事项:
- server各容器使用域名作为overlay network上alias别名,实现域名访问,当前版本禁用bind作为域名解析,原因是容器重启后IP变化场景bind机制复杂且不稳定。
- client 构建是内置安装包,容器启动时执行安装流程,后续重启容器跳过安装步骤。
- UID/GID:部署使用 argus账号 uid=2133, gid=2015。

Reviewed-on: #51
Reviewed-by: sundapeng <sundp@mail.zgclab.edu.cn>
Reviewed-by: xuxt <xuxt@zgclab.edu.cn>
Reviewed-by: huhy <husteryezi@163.com>
2025-11-25 15:54:29 +08:00

132 lines
4.0 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
# Make client GPU package (versioned gpu bundle image, compose, env, docs, busybox)
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
TEMPL_DIR="$ROOT_DIR/deployment_new/templates/client_gpu"
ART_ROOT="$ROOT_DIR/deployment_new/artifact/client_gpu"
# Use deployment_new local common helpers
COMMON_SH="$ROOT_DIR/deployment_new/build/common.sh"
. "$COMMON_SH"
usage(){ cat <<EOF
Build Client-GPU Package (deployment_new)
Usage: $(basename "$0") --version YYYYMMDD [--image IMAGE[:TAG]]
Defaults:
image = argus-sys-metric-test-node-bundle-gpu:latest
Outputs: deployment_new/artifact/client_gpu/<YYYYMMDD>/ and client_gpu_YYYYMMDD.tar.gz
EOF
}
VERSION=""
IMAGE="argus-sys-metric-test-node-bundle-gpu:latest"
while [[ $# -gt 0 ]]; do
case "$1" in
--version) VERSION="$2"; shift 2;;
--image) IMAGE="$2"; shift 2;;
-h|--help) usage; exit 0;;
*) err "unknown arg: $1"; usage; exit 1;;
esac
done
if [[ -z "$VERSION" ]]; then VERSION="$(today_version)"; fi
require_cmd docker tar gzip
STAGE="$(mktemp -d)"; trap 'rm -rf "$STAGE"' EXIT
PKG_DIR="$ART_ROOT/$VERSION"
mkdir -p "$PKG_DIR" "$STAGE/images" "$STAGE/compose" "$STAGE/docs" "$STAGE/scripts" "$STAGE/private/argus"
# 1) Save GPU bundle image with version tag
if ! docker image inspect "$IMAGE" >/dev/null 2>&1; then
err "missing image: $IMAGE"; exit 1; fi
REPO="${IMAGE%%:*}"; TAG_VER="$REPO:$VERSION"
docker tag "$IMAGE" "$TAG_VER"
out_tar="$STAGE/images/${REPO//\//-}-$VERSION.tar"
docker save -o "$out_tar" "$TAG_VER"
gzip -f "$out_tar"
# 2) Busybox tar for connectivity/overlay warmup (prefer local template; fallback to docker save)
BB_SRC="$TEMPL_DIR/images/busybox.tar"
if [[ -f "$BB_SRC" ]]; then
cp "$BB_SRC" "$STAGE/images/busybox.tar"
else
if docker image inspect busybox:latest >/dev/null 2>&1 || docker pull busybox:latest >/dev/null 2>&1; then
docker save -o "$STAGE/images/busybox.tar" busybox:latest
log "Included busybox from local docker daemon"
else
warn "busybox image not found and cannot pull; skipping busybox.tar"
fi
fi
# 3) Compose + env template and docs/scripts from templates
cp "$TEMPL_DIR/compose/docker-compose.yml" "$STAGE/compose/docker-compose.yml"
ENV_EX="$STAGE/compose/.env.example"
cat >"$ENV_EX" <<EOF
# Generated by make_client_gpu_package.sh
PKG_VERSION=$VERSION
NODE_GPU_BUNDLE_IMAGE_TAG=${REPO}:${VERSION}
# Compose project name (isolation from server stack)
COMPOSE_PROJECT_NAME=argus-client
# Required (no defaults). Must be filled before install.
AGENT_ENV=
AGENT_USER=
AGENT_INSTANCE=
GPU_NODE_HOSTNAME=
# Overlay network (should match server包 overlay)
ARGUS_OVERLAY_NET=argus-sys-net
# From cluster-info.env (server package output)
SWARM_MANAGER_ADDR=
SWARM_JOIN_TOKEN_WORKER=
SWARM_JOIN_TOKEN_MANAGER=
EOF
# 4) Docs from deployment_new templates
CLIENT_DOC_SRC="$TEMPL_DIR/docs"
if [[ -d "$CLIENT_DOC_SRC" ]]; then
rsync -a "$CLIENT_DOC_SRC/" "$STAGE/docs/" >/dev/null 2>&1 || cp -r "$CLIENT_DOC_SRC/." "$STAGE/docs/"
fi
# Placeholder scripts (will be implemented in M2)
cat >"$STAGE/scripts/README.md" <<'EOF'
# Client-GPU Scripts (Placeholder)
本目录将在 M2 引入:
- config.sh / install.sh
当前为占位,便于包结构审阅。
EOF
# 5) Scripts (from deployment_new templates) and Private skeleton
SCRIPTS_SRC="$TEMPL_DIR/scripts"
if [[ -d "$SCRIPTS_SRC" ]]; then
rsync -a "$SCRIPTS_SRC/" "$STAGE/scripts/" >/dev/null 2>&1 || cp -r "$SCRIPTS_SRC/." "$STAGE/scripts/"
find "$STAGE/scripts" -type f -name '*.sh' -exec chmod +x {} + 2>/dev/null || true
fi
mkdir -p "$STAGE/private/argus/agent"
# 6) Manifest & checksums
gen_manifest "$STAGE" "$STAGE/manifest.txt"
checksum_dir "$STAGE" "$STAGE/checksums.txt"
# 7) Move to artifact dir and pack
mkdir -p "$PKG_DIR"
rsync -a "$STAGE/" "$PKG_DIR/" >/dev/null 2>&1 || cp -r "$STAGE/." "$PKG_DIR/"
OUT_TAR_DIR="$(dirname "$PKG_DIR")"
OUT_TAR="$OUT_TAR_DIR/client_gpu_${VERSION}.tar.gz"
log "Creating tarball: $OUT_TAR"
(cd "$PKG_DIR/.." && tar -czf "$OUT_TAR" "$(basename "$PKG_DIR")")
log "Client-GPU package ready: $PKG_DIR"
echo "$OUT_TAR"