From a6e1bf7fe73807754f0f377105c30b1036cb8285 Mon Sep 17 00:00:00 2001 From: yuyr Date: Mon, 1 Dec 2025 15:27:12 +0800 Subject: [PATCH] =?UTF-8?q?[#50]=20x86=E6=9C=BA=E5=99=A8=E4=BD=BF=E7=94=A8?= =?UTF-8?q?qemu=E6=A8=A1=E6=8B=9F=E6=96=B9=E5=BC=8F=E5=AE=89=E8=A3=85?= =?UTF-8?q?=E9=83=A8=E7=BD=B2arm=E7=89=88=E6=9C=AC=E5=AE=89=E8=A3=85?= =?UTF-8?q?=E5=8C=85=EF=BC=8C=E5=BD=93=E5=89=8D=E6=9C=AA=E7=B2=BE=E7=AE=80?= =?UTF-8?q?=E7=89=88=EF=BC=8C=E5=8F=AA=E6=9C=89=E7=AE=80=E5=8D=95=E7=9A=84?= =?UTF-8?q?node=20exporter=E5=8A=9F=E8=83=BD=EF=BC=8C=E5=88=A0=E5=87=8F?= =?UTF-8?q?=E6=97=A5=E5=BF=97=EF=BC=8CGPU=E6=8C=87=E6=A0=87=E5=8A=9F?= =?UTF-8?q?=E8=83=BD=EF=BC=8C=E4=BB=8D=E4=BD=BF=E7=94=A8Swarm=E7=BB=84?= =?UTF-8?q?=E7=BD=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- build/build_images_for_arm.sh | 84 +++++++-- .../build/make_arm_client_package.sh | 112 +++++++++++ .../build/make_arm_server_package.sh | 175 ++++++++++++++++++ .../client_arm/compose/docker-compose.yml | 29 +++ .../client_arm/docs/INSTALL_CLIENT_ARM_zh.md | 154 +++++++++++++++ .../templates/client_arm/scripts/config.sh | 105 +++++++++++ .../templates/client_arm/scripts/install.sh | 48 +++++ .../templates/client_arm/scripts/uninstall.sh | 28 +++ .../client_arm/scripts_for_x86/config.sh | 96 ++++++++++ .../client_arm/scripts_for_x86/install.sh | 76 ++++++++ .../client_arm/scripts_for_x86/uninstall.sh | 6 + .../server_arm/compose/docker-compose.yml | 153 +++++++++++++++ .../server_arm/docs/INSTALL_SERVER_ARM_zh.md | 158 ++++++++++++++++ .../templates/server_arm/scripts/config.sh | 111 +++++++++++ .../templates/server_arm/scripts/diagnose.sh | 91 +++++++++ .../templates/server_arm/scripts/install.sh | 130 +++++++++++++ .../templates/server_arm/scripts/selfcheck.sh | 74 ++++++++ .../templates/server_arm/scripts/status.sh | 15 ++ .../templates/server_arm/scripts/uninstall.sh | 28 +++ .../server_arm/scripts_for_x86/config.sh | 116 ++++++++++++ .../server_arm/scripts_for_x86/diagnose.sh | 6 + .../server_arm/scripts_for_x86/install.sh | 77 ++++++++ .../server_arm/scripts_for_x86/selfcheck.sh | 6 + .../server_arm/scripts_for_x86/status.sh | 6 + .../server_arm/scripts_for_x86/uninstall.sh | 6 + src/metric/grafana/build/grafana.ini | 5 + 26 files changed, 1875 insertions(+), 20 deletions(-) create mode 100755 deployment_new/build/make_arm_client_package.sh create mode 100755 deployment_new/build/make_arm_server_package.sh create mode 100644 deployment_new/templates/client_arm/compose/docker-compose.yml create mode 100644 deployment_new/templates/client_arm/docs/INSTALL_CLIENT_ARM_zh.md create mode 100644 deployment_new/templates/client_arm/scripts/config.sh create mode 100644 deployment_new/templates/client_arm/scripts/install.sh create mode 100644 deployment_new/templates/client_arm/scripts/uninstall.sh create mode 100644 deployment_new/templates/client_arm/scripts_for_x86/config.sh create mode 100644 deployment_new/templates/client_arm/scripts_for_x86/install.sh create mode 100644 deployment_new/templates/client_arm/scripts_for_x86/uninstall.sh create mode 100644 deployment_new/templates/server_arm/compose/docker-compose.yml create mode 100644 deployment_new/templates/server_arm/docs/INSTALL_SERVER_ARM_zh.md create mode 100644 deployment_new/templates/server_arm/scripts/config.sh create mode 100644 deployment_new/templates/server_arm/scripts/diagnose.sh create mode 100644 deployment_new/templates/server_arm/scripts/install.sh create mode 100644 deployment_new/templates/server_arm/scripts/selfcheck.sh create mode 100644 deployment_new/templates/server_arm/scripts/status.sh create mode 100644 deployment_new/templates/server_arm/scripts/uninstall.sh create mode 100644 deployment_new/templates/server_arm/scripts_for_x86/config.sh create mode 100644 deployment_new/templates/server_arm/scripts_for_x86/diagnose.sh create mode 100644 deployment_new/templates/server_arm/scripts_for_x86/install.sh create mode 100644 deployment_new/templates/server_arm/scripts_for_x86/selfcheck.sh create mode 100644 deployment_new/templates/server_arm/scripts_for_x86/status.sh create mode 100644 deployment_new/templates/server_arm/scripts_for_x86/uninstall.sh diff --git a/build/build_images_for_arm.sh b/build/build_images_for_arm.sh index 9766543..61144f5 100755 --- a/build/build_images_for_arm.sh +++ b/build/build_images_for_arm.sh @@ -45,8 +45,8 @@ Options: --master-offline Build master offline image (requires src/master/offline_wheels.tar.gz) --metric Build metric module images (ftp, prometheus, grafana, test nodes) --no-cache Build all images without using Docker layer cache - --only LIST Comma-separated targets to build: core,master,metric,web,alert,sys,gpu_bundle,cpu_bundle,server_pkg,client_pkg,all - --version DATE Date tag used by gpu_bundle/server_pkg/client_pkg (e.g. 20251112) + --only LIST Comma-separated targets to build: core,master,metric,web,alert,sys,gpu_bundle,cpu_bundle,server_pkg,client_pkg,arm_server_pkg,arm_client_pkg,all + --version DATE Date tag used by gpu_bundle/server_pkg/client_pkg/arm_server_pkg/arm_client_pkg (e.g. 20251112) --client-semver X.Y.Z Override client semver used in all-in-one-full artifact (optional) --cuda VER CUDA runtime version for NVIDIA base (default: 12.2.2) --tag-latest Also tag bundle image as :latest (for cpu_bundle only; default off) @@ -73,6 +73,8 @@ build_gpu_bundle=false build_cpu_bundle=false build_server_pkg=false build_client_pkg=false +build_server_pkg_arm=false +build_client_pkg_arm=false no_cache=false bundle_date="" @@ -109,24 +111,28 @@ while [[ $# -gt 0 ]]; do echo "--only requires a target list" >&2; exit 1 fi sel="$2"; shift 2 - # reset all, then enable selected - build_core=false; build_master=false; build_metric=false; build_web=false; build_alert=false; build_sys=false; build_gpu_bundle=false; build_cpu_bundle=false; build_server_pkg=false; build_client_pkg=false + # reset all, then enable selected + build_core=false; build_master=false; build_metric=false; build_web=false; build_alert=false; build_sys=false; build_gpu_bundle=false; build_cpu_bundle=false; build_server_pkg=false; build_client_pkg=false; build_server_pkg_arm=false; build_client_pkg_arm=false IFS=',' read -ra parts <<< "$sel" for p in "${parts[@]}"; do - case "$p" in - core) build_core=true ;; - master) build_master=true ;; - metric) build_metric=true ;; - web) build_web=true ;; - alert) build_alert=true ;; - sys) build_sys=true ;; - gpu_bundle) build_gpu_bundle=true ;; - cpu_bundle) build_cpu_bundle=true ;; - server_pkg) build_server_pkg=true; build_core=true; build_master=true; build_metric=true; build_web=true; build_alert=true ;; - client_pkg) build_client_pkg=true ;; - all) build_core=true; build_master=true; build_metric=true; build_web=true; build_alert=true; build_sys=true ;; - *) echo "Unknown --only target: $p" >&2; exit 1 ;; - esac + case "$p" in + core) build_core=true ;; + master) build_master=true ;; + metric) build_metric=true ;; + web) build_web=true ;; + alert) build_alert=true ;; + sys) build_sys=true ;; + gpu_bundle) build_gpu_bundle=true ;; + cpu_bundle) build_cpu_bundle=true ;; + # 在 ARM 构建脚本中,server_pkg 视作 arm_server_pkg 的别名,避免误用 x86 打包逻辑 + server_pkg) build_server_pkg_arm=true; build_core=true; build_master=true; build_metric=true; build_web=true; build_alert=true ;; + # 在 ARM 构建脚本中,client_pkg 视作 arm_client_pkg 的别名,避免误用 x86 打包逻辑 + client_pkg) build_client_pkg_arm=true; build_sys=true ;; + arm_server_pkg) build_server_pkg_arm=true; build_core=true; build_master=true; build_metric=true; build_web=true; build_alert=true ;; + arm_client_pkg) build_client_pkg_arm=true; build_sys=true ;; + all) build_core=true; build_master=true; build_metric=true; build_web=true; build_alert=true; build_sys=true ;; + *) echo "Unknown --only target: $p" >&2; exit 1 ;; + esac done ;; --version) @@ -169,12 +175,12 @@ fi cd "$root" # Set default image tag policy before building -if [[ "$build_server_pkg" == true ]]; then +if [[ "$build_server_pkg" == true || "$build_client_pkg" == true || "$build_server_pkg_arm" == true || "$build_client_pkg_arm" == true ]]; then DEFAULT_IMAGE_TAG="${bundle_date:-latest}" fi # Select build user profile for pkg vs default -if [[ "$build_server_pkg" == true || "$build_client_pkg" == true ]]; then +if [[ "$build_server_pkg" == true || "$build_client_pkg" == true || "$build_server_pkg_arm" == true || "$build_client_pkg_arm" == true ]]; then export ARGUS_BUILD_PROFILE=pkg fi @@ -902,6 +908,44 @@ if [[ "$build_client_pkg" == true ]]; then fi fi +# ARM 专用:Server/Client 部署包 +if [[ "$build_server_pkg_arm" == true ]]; then + if [[ -z "$bundle_date" ]]; then + echo "❌ arm_server_pkg requires --version YYYYMMDD" >&2 + build_failed=true + else + echo "" + echo "🧳 Building ARM Server package..." + if ! "$root/deployment_new/build/make_arm_server_package.sh" --version "$bundle_date"; then + build_failed=true + fi + fi +fi + +if [[ "$build_client_pkg_arm" == true ]]; then + if [[ -z "$bundle_date" ]]; then + echo "❌ arm_client_pkg requires --version YYYYMMDD" >&2 + build_failed=true + else + echo "" + echo "🧳 Building ARM Client-CPU package..." + cpu_repo="argus-sys-metric-test-node-arm64" + cpu_image_arg="$cpu_repo:latest" + # 若已存在 :,优先使用;否则退回 latest 交由打包脚本打 tag + if docker image inspect "${cpu_repo}:${bundle_date}" >/dev/null 2>&1; then + cpu_image_arg="${cpu_repo}:${bundle_date}" + elif ! docker image inspect "${cpu_repo}:latest" >/dev/null 2>&1; then + echo "❌ required CPU node image missing: ${cpu_repo}:{${bundle_date},latest}" >&2 + build_failed=true + fi + if [[ "$build_failed" != "true" ]]; then + if ! "$root/deployment_new/build/make_arm_client_package.sh" --version "$bundle_date" --image "$cpu_image_arg"; then + build_failed=true + fi + fi + fi +fi + echo "=======================================" echo "📦 Build Summary" echo "=======================================" diff --git a/deployment_new/build/make_arm_client_package.sh b/deployment_new/build/make_arm_client_package.sh new file mode 100755 index 0000000..67516d5 --- /dev/null +++ b/deployment_new/build/make_arm_client_package.sh @@ -0,0 +1,112 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Make ARM Client-CPU package (versioned CPU node image, compose, env, docs) + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +TEMPL_DIR="$ROOT_DIR/deployment_new/templates/client_arm" +ART_ROOT="$ROOT_DIR/deployment_new/artifact/client_arm" + +COMMON_SH="$ROOT_DIR/deployment_new/build/common.sh" +. "$COMMON_SH" + +usage(){ cat </ and client_arm_YYYYMMDD.tar.gz +EOF +} + +VERSION="" +IMAGE="argus-sys-metric-test-node-arm64:latest" +while [[ $# -gt 0 ]]; do + case "$1" in + --version) VERSION="$2"; shift 2;; + --image) IMAGE="$2"; shift 2;; + -h|--help) usage; exit 0;; + *) err "unknown arg: $1"; usage; exit 1;; + esac +done +if [[ -z "$VERSION" ]]; then VERSION="$(today_version)"; fi + +require_cmd docker tar gzip + +STAGE="$(mktemp -d)"; trap 'rm -rf "$STAGE"' EXIT +PKG_DIR="$ART_ROOT/$VERSION" +mkdir -p "$PKG_DIR" "$STAGE/images" "$STAGE/compose" "$STAGE/docs" "$STAGE/scripts" "$STAGE/private/argus" + +# 1) Save CPU node image with version tag +if ! docker image inspect "$IMAGE" >/dev/null 2>&1; then + err "missing image: $IMAGE"; exit 1; fi + +REPO="${IMAGE%%:*}"; TAG_VER="$REPO:$VERSION" +docker tag "$IMAGE" "$TAG_VER" +out_tar="$STAGE/images/${REPO//\//-}-$VERSION.tar" +docker save -o "$out_tar" "$TAG_VER" +gzip -f "$out_tar" + +# 2) Compose + env template +cp "$TEMPL_DIR/compose/docker-compose.yml" "$STAGE/compose/docker-compose.yml" +ENV_EX="$STAGE/compose/.env.example" +cat >"$ENV_EX" </dev/null || true +fi + +# x86 专用脚本:用于在 x86_64 主机上运行 ARM Client 包 +SCRIPTS_X86_SRC="$TEMPL_DIR/scripts_for_x86" +if [[ -d "$SCRIPTS_X86_SRC" ]]; then + copy_tree "$SCRIPTS_X86_SRC" "$STAGE/scripts_for_x86" + find "$STAGE/scripts_for_x86" -type f -name '*.sh' -exec chmod +x {} + 2>/dev/null || true +fi + +mkdir -p "$STAGE/private/argus/agent" + +# 5) Manifest & checksums +gen_manifest "$STAGE" "$STAGE/manifest.txt" +checksum_dir "$STAGE" "$STAGE/checksums.txt" + +# 6) Move to artifact dir and pack +mkdir -p "$PKG_DIR" +copy_tree "$STAGE" "$PKG_DIR" + +OUT_TAR_DIR="$(dirname "$PKG_DIR")" +OUT_TAR="$OUT_TAR_DIR/client_arm_${VERSION}.tar.gz" +log "Creating tarball: $OUT_TAR" +(cd "$PKG_DIR/.." && tar -czf "$OUT_TAR" "$(basename "$PKG_DIR")") +log "Client-ARM package ready: $PKG_DIR" +echo "$OUT_TAR" diff --git a/deployment_new/build/make_arm_server_package.sh b/deployment_new/build/make_arm_server_package.sh new file mode 100755 index 0000000..3f31eef --- /dev/null +++ b/deployment_new/build/make_arm_server_package.sh @@ -0,0 +1,175 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Make ARM server deployment package (master + prometheus + grafana + alertmanager + web) + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +TEMPL_DIR="$ROOT_DIR/deployment_new/templates/server_arm" +ART_ROOT="$ROOT_DIR/deployment_new/artifact/server_arm" + +COMMON_SH="$ROOT_DIR/deployment_new/build/common.sh" +. "$COMMON_SH" + +usage(){ cat </ and server_arm_YYYYMMDD.tar.gz +EOF +} + +VERSION="" +while [[ $# -gt 0 ]]; do + case "$1" in + --version) VERSION="$2"; shift 2;; + -h|--help) usage; exit 0;; + *) err "unknown arg: $1"; usage; exit 1;; + esac +done +if [[ -z "$VERSION" ]]; then VERSION="$(today_version)"; fi + +require_cmd docker tar gzip awk sed + +IMAGES=( + argus-master-arm64 + argus-metric-prometheus-arm64 + argus-metric-prometheus-targets-updater-arm64 + argus-metric-grafana-arm64 + argus-alertmanager-arm64 + argus-web-frontend-arm64 + argus-web-proxy-arm64 +) + +STAGE="$(mktemp -d)"; trap 'rm -rf "$STAGE"' EXIT +PKG_DIR="$ART_ROOT/$VERSION" +mkdir -p "$PKG_DIR" "$STAGE/images" "$STAGE/compose" "$STAGE/docs" "$STAGE/scripts" "$STAGE/private/argus" + +# 1) Save per-image tars with version tag +log "Tagging and saving ARM server images (version=$VERSION)" +for repo in "${IMAGES[@]}"; do + if ! docker image inspect "$repo:latest" >/dev/null 2>&1 && ! docker image inspect "$repo:$VERSION" >/dev/null 2>&1; then + err "missing image: $repo (need :latest or :$VERSION)"; exit 1; fi + if docker image inspect "$repo:$VERSION" >/dev/null 2>&1; then + tag="$repo:$VERSION" + else + docker tag "$repo:latest" "$repo:$VERSION" + tag="$repo:$VERSION" + fi + out_tar="$STAGE/images/${repo//\//-}-$VERSION.tar" + docker save -o "$out_tar" "$tag" + gzip -f "$out_tar" +done + +# 1.1) (可选)打包 x86 ARM 测试用基础镜像 ubuntu:22.04(供 scripts_for_x86/install.sh 使用) +TEST_BASE="ubuntu:22.04" +if docker image inspect "$TEST_BASE" >/dev/null 2>&1 || docker pull "$TEST_BASE" >/dev/null 2>&1; then + log "Saving ARM test base image for x86 host: $TEST_BASE" + test_tar="$STAGE/images/ubuntu-22.04-arm-test.tar" + docker save -o "$test_tar" "$TEST_BASE" + gzip -f "$test_tar" +else + warn "无法获取 $TEST_BASE,x86 上的 ARM 运行能力将无法使用该镜像做快速自检(可忽略,或在目标机自行拉取)。" +fi + +# 1.2) 随包提供 tonistiigi/binfmt 镜像,便于 x86 离线机器安装 binfmt/QEMU +BINFMT_IMAGE="tonistiigi/binfmt:latest" +if docker image inspect "$BINFMT_IMAGE" >/dev/null 2>&1 || docker pull "$BINFMT_IMAGE" >/dev/null 2>&1; then + log "Saving binfmt helper image for x86 host: $BINFMT_IMAGE" + binfmt_tar="$STAGE/images/tonistiigi-binfmt.tar" + docker save -o "$binfmt_tar" "$BINFMT_IMAGE" + gzip -f "$binfmt_tar" +else + warn "无法获取 $BINFMT_IMAGE,x86 上将无法通过离线方式安装 binfmt(可在目标机上自行 docker pull)。" +fi + +# 2) Compose + env template(ARM 专用) +cp "$TEMPL_DIR/compose/docker-compose.yml" "$STAGE/compose/docker-compose.yml" +ENV_EX="$STAGE/compose/.env.example" +cat >"$ENV_EX" </dev/null || true +fi + +# x86 专用脚本:用于在 x86_64 主机上运行 ARM server 包 +SCRIPTS_X86_SRC="$TEMPL_DIR/scripts_for_x86" +if [[ -d "$SCRIPTS_X86_SRC" ]]; then + copy_tree "$SCRIPTS_X86_SRC" "$STAGE/scripts_for_x86" + find "$STAGE/scripts_for_x86" -type f -name '*.sh' -exec chmod +x {} + 2>/dev/null || true +fi + +# 4) Private skeleton (最小 ARM server 所需目录) +mkdir -p \ + "$STAGE/private/argus/etc" \ + "$STAGE/private/argus/master" \ + "$STAGE/private/argus/metric/prometheus" \ + "$STAGE/private/argus/metric/prometheus/data" \ + "$STAGE/private/argus/metric/prometheus/rules" \ + "$STAGE/private/argus/metric/prometheus/targets" \ + "$STAGE/private/argus/metric/grafana" \ + "$STAGE/private/argus/metric/grafana/data" \ + "$STAGE/private/argus/metric/grafana/logs" \ + "$STAGE/private/argus/metric/grafana/plugins" \ + "$STAGE/private/argus/metric/grafana/provisioning/datasources" \ + "$STAGE/private/argus/metric/grafana/provisioning/dashboards" \ + "$STAGE/private/argus/metric/grafana/data/sessions" \ + "$STAGE/private/argus/metric/grafana/data/dashboards" \ + "$STAGE/private/argus/metric/grafana/config" + +# 5) Manifest & checksums +gen_manifest "$STAGE" "$STAGE/manifest.txt" +checksum_dir "$STAGE" "$STAGE/checksums.txt" + +# 6) Move to artifact dir and pack +mkdir -p "$PKG_DIR" +copy_tree "$STAGE" "$PKG_DIR" + +OUT_TAR_DIR="$(dirname "$PKG_DIR")" +OUT_TAR="$OUT_TAR_DIR/server_arm_${VERSION}.tar.gz" +log "Creating tarball: $OUT_TAR" +(cd "$PKG_DIR/.." && tar -czf "$OUT_TAR" "$(basename "$PKG_DIR")") +log "ARM Server package ready: $PKG_DIR" +echo "$OUT_TAR" diff --git a/deployment_new/templates/client_arm/compose/docker-compose.yml b/deployment_new/templates/client_arm/compose/docker-compose.yml new file mode 100644 index 0000000..4be0032 --- /dev/null +++ b/deployment_new/templates/client_arm/compose/docker-compose.yml @@ -0,0 +1,29 @@ +version: "3.8" + +networks: + argus-sys-net: + external: true + +services: + metric-cpu-node: + image: ${NODE_CPU_IMAGE_TAG:-argus-sys-metric-test-node-arm64:${PKG_VERSION}} + platform: linux/arm64 + container_name: argus-metric-cpu-node + hostname: ${CPU_NODE_HOSTNAME} + restart: unless-stopped + environment: + - TZ=Asia/Shanghai + - DEBIAN_FRONTEND=noninteractive + - MASTER_ENDPOINT=${MASTER_ENDPOINT} + - ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133} + - ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015} + - AGENT_ENV=${AGENT_ENV} + - AGENT_USER=${AGENT_USER} + - AGENT_INSTANCE=${AGENT_INSTANCE} + networks: + argus-sys-net: + aliases: + - ${AGENT_INSTANCE}.node.argus.com + volumes: + - ../private/argus/agent:/private/argus/agent + diff --git a/deployment_new/templates/client_arm/docs/INSTALL_CLIENT_ARM_zh.md b/deployment_new/templates/client_arm/docs/INSTALL_CLIENT_ARM_zh.md new file mode 100644 index 0000000..13031b7 --- /dev/null +++ b/deployment_new/templates/client_arm/docs/INSTALL_CLIENT_ARM_zh.md @@ -0,0 +1,154 @@ +# ARM Client-CPU 安装指南(client_arm) + +本包用于在 ARM64 CPU 节点上部署 Argus 的最小客户端: + +- `argus-agent`(负责注册/上报状态到 master) +- `node-exporter`(暴露该节点的系统指标) + +> 注意:本 ARM 客户端包 **不包含 GPU/DCGM exporter 和 Fluent Bit**,仅用于 CPU 节点 metrics。 + +--- + +## 1. 前置条件 + +- 目标机器为 ARM64(例如:aarch64 服务器 / 节点); +- 已安装 Docker 和 docker compose(v2,支持 `docker compose` 命令); +- 能访问 master 所在网络,或已经通过 overlay 网络加入到与 server 相同的 `argus-sys-net` 中。 + +如采用 overlay 网络(Swarm 模式),需提前创建 `argus-sys-net`: + +```bash +docker network create --driver overlay argus-sys-net || true +``` + +如仅使用本地 bridge 网络,可将 compose 中 `networks: argus-sys-net` 调整为本地自定义 network。 + +--- + +## 2. 解压和加载镜像 + +假设你已经将 `client_arm_YYYYMMDD.tar.gz` 拷贝到 ARM 节点: + +```bash +tar -xzf client_arm_YYYYMMDD.tar.gz +cd client_arm/YYYYMMDD +``` + +加载 CPU node 镜像: + +```bash +for f in images/*.tar.gz; do + gunzip -c "$f" | docker load +done +``` + +可以通过: + +```bash +docker images | grep 'argus-sys-metric-test-node-arm64' +``` + +确认镜像已加载。 + +--- + +## 3. 配置 .env + +进入 compose 目录: + +```bash +cd compose +cp .env.example .env +``` + +需要填写的关键字段: + +- `MASTER_ENDPOINT` + - 例如:`http://master.argus.com:3000` 或 `http://:32300` +- `AGENT_ENV` / `AGENT_USER` / `AGENT_INSTANCE` + - 用于识别环境、用户和实例,在 UI 中区分节点: + - 示例:`AGENT_ENV=prod`, `AGENT_USER=ops`, `AGENT_INSTANCE=node001` +- `CPU_NODE_HOSTNAME` + - 节点在配置中的 hostname,一般与宿主 `hostname` 一致,例如 `swarm-metric-node-001`。 +- `ARGUS_BUILD_UID` / `ARGUS_BUILD_GID` + - 用于挂载目录 `/private/argus/agent` 的权限映射。 + +例如: + +```env +PKG_VERSION=20251124 +NODE_CPU_IMAGE_TAG=argus-sys-metric-test-node-arm64:20251124 + +COMPOSE_PROJECT_NAME=argus-client-arm + +MASTER_ENDPOINT=http://master.argus.com:3000 +AGENT_ENV=dev2 +AGENT_USER=yuyr +AGENT_INSTANCE=node001sX +CPU_NODE_HOSTNAME=swarm-metric-node-001 + +ARGUS_BUILD_UID=1000 +ARGUS_BUILD_GID=1000 +``` + +--- + +## 4. 准备挂载目录 + +包中已经预创建 `private/argus/agent` 目录: + +- `../private/argus/agent` + +可以根据需要设置所有者和权限: + +```bash +cd .. +sudo chown -R $UID:$GID private/argus +``` + +确保与 `.env` 中的 `ARGUS_BUILD_UID/GID` 一致。 + +--- + +## 5. 启动客户端容器 + +在 `compose/` 目录执行: + +```bash +docker compose --env-file .env up -d +``` + +查看状态: + +```bash +docker compose ps +``` + +预期看到: + +- `argus-metric-cpu-node` 处于 `Up` 状态。 + +--- + +## 6. 在 server 端验证 + +在 server 侧(master + Prometheus + Grafana 已经通过 ARM server 包部署): + +1. 在 master 的 UI 或 API 中: + - `GET /api/v1/master/nodes` 应能看到新节点,`name=CPU_NODE_HOSTNAME`; +2. 在 Prometheus 中: + - `up{job="node"}` 中应有当前节点记录,且 `value=1`; + - 可以通过 `node_load1{hostname="swarm-metric-node-001"}` 等表达式查看指标; +3. 在 Web 门户 / Grafana: + - 节点列表页面中能看到该节点; + - 通过“按 hostname 的节点面板”查看对应指标。 + +--- + +## 7. 注意事项 + +- ARM 客户端包只负责“CPU 节点 metrics”,不包含 GPU/DCGM 或 Fluent Bit; +- 如需要 GPU 支持,请继续使用 `client_gpu` 包,并部署在 x86_64 + NVIDIA GPU 节点上; +- ARM 节点的网络连通性(到 master / Prometheus)需要运维侧提前规划; + 本包不做网络路由/防火墙配置,仅假定可以直接访问 `MASTER_ENDPOINT`。 + diff --git a/deployment_new/templates/client_arm/scripts/config.sh b/deployment_new/templates/client_arm/scripts/config.sh new file mode 100644 index 0000000..517043e --- /dev/null +++ b/deployment_new/templates/client_arm/scripts/config.sh @@ -0,0 +1,105 @@ +#!/usr/bin/env bash +set -euo pipefail + +# ARM Client-CPU 配置脚本: +# - 读取 server 侧生成的 cluster-info.env(包含 SWARM_MANAGER_ADDR / SWARM_JOIN_TOKEN_WORKER) +# - 加入 Swarm(worker) +# - 预热 overlay 网络,确保能通过 master.argus.com 访问 server +# - 生成/更新 compose/.env(填充 Swarm 相关字段,保留人工填写的 AGENT_*、CPU_NODE_HOSTNAME 等) + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +PKG_ROOT="$ROOT_DIR" +ENV_EX="$PKG_ROOT/compose/.env.example" +ENV_OUT="$PKG_ROOT/compose/.env" + +info(){ echo -e "\033[34m[CONFIG-ARM-CLIENT]\033[0m $*"; } +err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; } + +require(){ local ok=1; for c in "$@"; do command -v "$c" >/dev/null 2>&1 || { err "缺少依赖: $c"; ok=0; }; done; [[ $ok -eq 1 ]]; } + +# Compose 检测:优先 docker compose(v2),回退 docker-compose(v1) +require_compose(){ + if docker compose version >/dev/null 2>&1; then return 0; fi + if command -v docker-compose >/dev/null 2>&1 && docker-compose version >/dev/null 2>&1; then return 0; fi + err "未检测到 Docker Compose,请安装 docker compose v2 或 docker-compose v1"; exit 1 +} + +require docker curl jq awk sed tar gzip +require_compose + +[[ -f "$ENV_EX" ]] || { err "缺少模板文件: $ENV_EX"; exit 1; } + +# 磁盘空间检查(MB) +check_disk(){ local p="$1"; local need=5120; local free + free=$(df -Pm "$p" | awk 'NR==2{print $4+0}') + if [[ -z "$free" || "$free" -lt "$need" ]]; then err "磁盘空间不足: $p 剩余 ${free:-0}MB (<${need}MB)"; fi +} +check_disk "$PKG_ROOT" || true +check_disk "/var/lib/docker" || true + +# 导入 cluster-info.env(默认取当前包根,也可用 CLUSTER_INFO 指定路径) +CI_IN="${CLUSTER_INFO:-$PKG_ROOT/cluster-info.env}" +info "读取 cluster-info.env: $CI_IN" +[[ -f "$CI_IN" ]] || { err "找不到 cluster-info.env(默认当前包根,或设置环境变量 CLUSTER_INFO 指定绝对路径)"; exit 1; } +set -a; source "$CI_IN"; set +a +[[ -n "${SWARM_MANAGER_ADDR:-}" && -n "${SWARM_JOIN_TOKEN_WORKER:-}" ]] || { err "cluster-info.env 缺少 SWARM 信息(SWARM_MANAGER_ADDR/SWARM_JOIN_TOKEN_WORKER)"; exit 1; } + +# 加入 Swarm(幂等) +info "加入 Swarm(幂等):$SWARM_MANAGER_ADDR" +docker swarm join --token "$SWARM_JOIN_TOKEN_WORKER" "$SWARM_MANAGER_ADDR":2377 >/dev/null 2>&1 || true + +# 导入 busybox 并做 overlay 预热与连通性 +NET_NAME="${ARGUS_OVERLAY_NET:-argus-sys-net}" +if ! docker image inspect busybox:latest >/dev/null 2>&1; then + if [[ -f "$PKG_ROOT/images/busybox.tar" ]]; then + info "加载 busybox.tar 以预热 overlay" + docker load -i "$PKG_ROOT/images/busybox.tar" >/dev/null + else + err "缺少 busybox 镜像(包内 images/busybox.tar 或本地 busybox:latest),无法预热 overlay $NET_NAME"; exit 1 + fi +fi + +# 预热容器(worker 侧加入 overlay 以便本地可见) +docker rm -f argus-net-warmup >/dev/null 2>&1 || true +info "启动 warmup 容器加入 overlay: $NET_NAME" +docker run -d --rm --name argus-net-warmup --network "$NET_NAME" busybox:latest sleep 600 >/dev/null 2>&1 || true +for i in {1..60}; do docker network inspect "$NET_NAME" >/dev/null 2>&1 && { info "overlay 可见 (t=${i}s)"; break; }; sleep 1; done +docker network inspect "$NET_NAME" >/dev/null 2>&1 || { err "预热后仍未看到 overlay: $NET_NAME;请确认 server 侧 overlay 已创建且可达"; exit 1; } + +# 通过 warmup 容器测试实际数据通路(alias → master) +if ! docker exec argus-net-warmup sh -lc "ping -c 1 -W 2 master.argus.com >/dev/null 2>&1"; then + err "warmup 容器内无法通过别名访问 master.argus.com;请确认 server compose 已启动并加入 overlay $NET_NAME" + exit 1 +fi +info "warmup 容器内可达 master.argus.com(Docker DNS + alias 正常)" + +# 生成/更新 .env(保留人工填写项,不覆盖已有键) +if [[ ! -f "$ENV_OUT" ]]; then + cp "$ENV_EX" "$ENV_OUT" +fi + +set_kv(){ local k="$1" v="$2"; if grep -q "^${k}=" "$ENV_OUT"; then sed -i -E "s#^${k}=.*#${k}=${v}#" "$ENV_OUT"; else echo "${k}=${v}" >> "$ENV_OUT"; fi } + +# 写入与 Swarm 相关的字段,便于后续诊断 +set_kv SWARM_MANAGER_ADDR "${SWARM_MANAGER_ADDR:-}" +set_kv SWARM_JOIN_TOKEN_WORKER "${SWARM_JOIN_TOKEN_WORKER:-}" +set_kv SWARM_JOIN_TOKEN_MANAGER "${SWARM_JOIN_TOKEN_MANAGER:-}" + +# 若未显式设置 MASTER_ENDPOINT,则默认走 overlay 别名 +if ! grep -q '^MASTER_ENDPOINT=' "$ENV_OUT"; then + echo "MASTER_ENDPOINT=http://master.argus.com:3000" >> "$ENV_OUT" +fi + +# 检查用户必须填写的字段 +REQ_VARS=(AGENT_ENV AGENT_USER AGENT_INSTANCE CPU_NODE_HOSTNAME) +missing=() +for v in "${REQ_VARS[@]}"; do + val=$(grep -E "^$v=" "$ENV_OUT" | head -1 | cut -d= -f2-) + if [[ -z "$val" ]]; then missing+=("$v"); fi +done +if [[ ${#missing[@]} -gt 0 ]]; then + err "以下变量必须在 compose/.env 中填写:${missing[*]}(已保留你现有的内容,不会被覆盖)" + exit 1 +fi + +info "已生成 compose/.env;可执行 scripts/install.sh 启动 ARM Client-CPU" diff --git a/deployment_new/templates/client_arm/scripts/install.sh b/deployment_new/templates/client_arm/scripts/install.sh new file mode 100644 index 0000000..a378c90 --- /dev/null +++ b/deployment_new/templates/client_arm/scripts/install.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +set -euo pipefail + +# ARM Client-CPU 安装脚本: +# - 导入 ARM CPU node 镜像 +# - 启动 compose 中的 metric-cpu-node 服务 + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +PKG_ROOT="$ROOT_DIR" +ENV_FILE="$PKG_ROOT/compose/.env" +COMPOSE_FILE="$PKG_ROOT/compose/docker-compose.yml" + +info(){ echo -e "\033[34m[INSTALL-ARM-CLIENT]\033[0m $*"; } +err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; } + +require(){ local ok=1; for c in "$@"; do command -v "$c" >/dev/null 2>&1 || { err "缺少依赖: $c"; ok=0; }; done; [[ $ok -eq 1 ]]; } + +require_compose(){ + if docker compose version >/dev/null 2>&1; then return 0; fi + if command -v docker-compose >/dev/null 2>&1 && docker-compose version >/dev/null 2>&1; then return 0; fi + err "未检测到 Docker Compose,请安装 docker compose v2 或 docker-compose v1"; exit 1 +} + +require docker +require_compose + +[[ -f "$ENV_FILE" ]] || { err "缺少 compose/.env,请先运行 scripts/config.sh"; exit 1; } +info "使用环境文件: $ENV_FILE" + +set -a; source "$ENV_FILE"; set +a + +# 导入 ARM CPU node 镜像 +IMG_TGZ=$(ls -1 "$PKG_ROOT"/images/argus-sys-metric-test-node-arm64-*.tar.gz 2>/dev/null | head -1 || true) +[[ -n "$IMG_TGZ" ]] || { err "找不到 ARM CPU node 镜像 tar.gz(argus-sys-metric-test-node-arm64-*.tar.gz)"; exit 1; } +info "导入 ARM CPU node 镜像: $(basename "$IMG_TGZ")" +tmp=$(mktemp); gunzip -c "$IMG_TGZ" > "$tmp"; docker load -i "$tmp" >/dev/null; rm -f "$tmp" + +# 确保 agent 挂载目录存在 +mkdir -p "$PKG_ROOT/private/argus/agent" + +# 启动 compose +PROJECT="${COMPOSE_PROJECT_NAME:-argus-client-arm}" +info "启动 ARM CPU 节点 (docker compose -p $PROJECT up -d)" +docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f "$COMPOSE_FILE" up -d +docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f "$COMPOSE_FILE" ps + +info "ARM Client-CPU 安装完成。可通过 docker logs -f argus-metric-cpu-node 查看节点容器日志。" + diff --git a/deployment_new/templates/client_arm/scripts/uninstall.sh b/deployment_new/templates/client_arm/scripts/uninstall.sh new file mode 100644 index 0000000..54664df --- /dev/null +++ b/deployment_new/templates/client_arm/scripts/uninstall.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +PKG_ROOT="$ROOT_DIR" +ENV_FILE="$PKG_ROOT/compose/.env" +COMPOSE_FILE="$PKG_ROOT/compose/docker-compose.yml" + +if [[ -f "$ENV_FILE" ]]; then + set -a; source "$ENV_FILE"; set +a +fi + +PROJECT="${COMPOSE_PROJECT_NAME:-argus-client-arm}" + +err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; } + +require_compose(){ + if docker compose version >/dev/null 2>&1; then return 0; fi + if command -v docker-compose >/dev/null 2>&1 && docker-compose version >/dev/null 2>&1; then return 0; fi + err "未检测到 Docker Compose,请安装 docker compose v2 或 docker-compose v1"; exit 1 +} + +require_compose + +echo "[UNINSTALL-ARM-CLIENT] stopping compose (project=$PROJECT)" +docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f "$COMPOSE_FILE" down --remove-orphans || true +echo "[UNINSTALL-ARM-CLIENT] done" + diff --git a/deployment_new/templates/client_arm/scripts_for_x86/config.sh b/deployment_new/templates/client_arm/scripts_for_x86/config.sh new file mode 100644 index 0000000..4bd661f --- /dev/null +++ b/deployment_new/templates/client_arm/scripts_for_x86/config.sh @@ -0,0 +1,96 @@ +#!/usr/bin/env bash +set -euo pipefail + +# x86 上安装 ARM Client-CPU 的配置脚本: +# 逻辑与 ARM 版基本一致(读取 cluster-info、加入 Swarm、预热 overlay、生成 .env),只是日志前缀不同。 + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +PKG_ROOT="$ROOT_DIR" +ENV_EX="$PKG_ROOT/compose/.env.example" +ENV_OUT="$PKG_ROOT/compose/.env" + +info(){ echo -e "\033[34m[CONFIG-ARM-CLIENT-X86]\033[0m $*"; } +err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; } + +require(){ local ok=1; for c in "$@"; do command -v "$c" >/dev/null 2>&1 || { err "缺少依赖: $c"; ok=0; }; done; [[ $ok -eq 1 ]]; } + +require_compose(){ + if docker compose version >/dev/null 2>&1; then return 0; fi + if command -v docker-compose >/dev/null 2>&1 && docker-compose version >/dev/null 2>&1; then return 0; fi + err "未检测到 Docker Compose,请安装 docker compose v2 或 docker-compose v1"; exit 1 +} + +require docker curl jq awk sed tar gzip +require_compose + +[[ -f "$ENV_EX" ]] || { err "缺少模板文件: $ENV_EX"; exit 1; } + +# 磁盘空间检查 +check_disk(){ local p="$1"; local need=5120; local free + free=$(df -Pm "$p" | awk 'NR==2{print $4+0}') + if [[ -z "$free" || "$free" -lt "$need" ]]; then err "磁盘空间不足: $p 剩余 ${free:-0}MB (<${need}MB)"; fi +} +check_disk "$PKG_ROOT" || true +check_disk "/var/lib/docker" || true + +# 导入 cluster-info.env +CI_IN="${CLUSTER_INFO:-$PKG_ROOT/cluster-info.env}" +info "读取 cluster-info.env: $CI_IN" +[[ -f "$CI_IN" ]] || { err "找不到 cluster-info.env(默认当前包根,或设置环境变量 CLUSTER_INFO 指定绝对路径)"; exit 1; } +set -a; source "$CI_IN"; set +a +[[ -n "${SWARM_MANAGER_ADDR:-}" && -n "${SWARM_JOIN_TOKEN_WORKER:-}" ]] || { err "cluster-info.env 缺少 SWARM 信息(SWARM_MANAGER_ADDR/SWARM_JOIN_TOKEN_WORKER)"; exit 1; } + +# 加入 Swarm(幂等) +info "加入 Swarm(幂等):$SWARM_MANAGER_ADDR" +docker swarm join --token "$SWARM_JOIN_TOKEN_WORKER" "$SWARM_MANAGER_ADDR":2377 >/dev/null 2>&1 || true + +# 导入 busybox 并做 overlay 预热与连通性 +NET_NAME="${ARGUS_OVERLAY_NET:-argus-sys-net}" +if ! docker image inspect busybox:latest >/dev/null 2>&1; then + if [[ -f "$PKG_ROOT/images/busybox.tar" ]]; then + info "加载 busybox.tar 以预热 overlay" + docker load -i "$PKG_ROOT/images/busybox.tar" >/dev/null + else + err "缺少 busybox 镜像(包内 images/busybox.tar 或本地 busybox:latest),无法预热 overlay $NET_NAME"; exit 1 + fi +fi + +docker rm -f argus-net-warmup >/dev/null 2>&1 || true +info "启动 warmup 容器加入 overlay: $NET_NAME" +docker run -d --rm --name argus-net-warmup --network "$NET_NAME" busybox:latest sleep 600 >/dev/null 2>&1 || true +for i in {1..60}; do docker network inspect "$NET_NAME" >/dev/null 2>&1 && { info "overlay 可见 (t=${i}s)"; break; }; sleep 1; done +docker network inspect "$NET_NAME" >/dev/null 2>&1 || { err "预热后仍未看到 overlay: $NET_NAME;请确认 server 侧 overlay 已创建且可达"; exit 1; } + +if ! docker exec argus-net-warmup sh -lc "ping -c 1 -W 2 master.argus.com >/dev/null 2>&1"; then + err "warmup 容器内无法通过别名访问 master.argus.com;请确认 server compose 已启动并加入 overlay $NET_NAME" + exit 1 +fi +info "warmup 容器内可达 master.argus.com(Docker DNS + alias 正常)" + +if [[ ! -f "$ENV_OUT" ]]; then + cp "$ENV_EX" "$ENV_OUT" +fi + +set_kv(){ local k="$1" v="$2"; if grep -q "^${k}=" "$ENV_OUT"; then sed -i -E "s#^${k}=.*#${k}=${v}#" "$ENV_OUT"; else echo "${k}=${v}" >> "$ENV_OUT"; fi } + +set_kv SWARM_MANAGER_ADDR "${SWARM_MANAGER_ADDR:-}" +set_kv SWARM_JOIN_TOKEN_WORKER "${SWARM_JOIN_TOKEN_WORKER:-}" +set_kv SWARM_JOIN_TOKEN_MANAGER "${SWARM_JOIN_TOKEN_MANAGER:-}" + +if ! grep -q '^MASTER_ENDPOINT=' "$ENV_OUT"; then + echo "MASTER_ENDPOINT=http://master.argus.com:3000" >> "$ENV_OUT" +fi + +REQ_VARS=(AGENT_ENV AGENT_USER AGENT_INSTANCE CPU_NODE_HOSTNAME) +missing=() +for v in "${REQ_VARS[@]}"; do + val=$(grep -E "^$v=" "$ENV_OUT" | head -1 | cut -d= -f2-) + if [[ -z "$val" ]]; then missing+=("$v"); fi +done +if [[ ${#missing[@]} -gt 0 ]]; then + err "以下变量必须在 compose/.env 中填写:${missing[*]}(已保留你现有的内容,不会被覆盖)" + exit 1 +fi + +info "已生成 compose/.env;可执行 scripts_for_x86/install.sh 启动 ARM Client-CPU" + diff --git a/deployment_new/templates/client_arm/scripts_for_x86/install.sh b/deployment_new/templates/client_arm/scripts_for_x86/install.sh new file mode 100644 index 0000000..028d2c2 --- /dev/null +++ b/deployment_new/templates/client_arm/scripts_for_x86/install.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash +set -euo pipefail + +# 在 x86_64 主机上安装 ARM Client-CPU: +# 1) 确认/安装 binfmt + QEMU(通过 tonistiigi/binfmt); +# 2) 调用通用的 scripts/install.sh 启动 ARM CPU node 容器。 + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" + +info(){ echo -e "\033[34m[INSTALL-ARM-CLIENT-X86]\033[0m $*"; } +err(){ echo -e "\033[31m[ERROR-ARM-CLIENT-X86]\033[0m $*" >&2; } + +require(){ local ok=1; for c in "$@"; do command -v "$c" >/dev/null 2>&1 || { err "缺少依赖: $c"; ok=0; }; done; [[ $ok -eq 1 ]]; } + +ensure_binfmt_arm64() { + info "检查当前主机架构: $(uname -m)" + if [[ "$(uname -m)" != "x86_64" && "$(uname -m)" != "amd64" ]]; then + info "当前并非 x86_64($(uname -m)),通常应直接在 ARM 服务器上使用 scripts/install.sh。继续执行但不强制安装 binfmt。" + return 0 + fi + + require docker + + # 优先使用随安装包一起提供的 ubuntu-22.04-arm-test 镜像做自检 + local TEST_IMG="ubuntu:22.04" + local TEST_TAR="" + if [[ -d "$ROOT_DIR/images" ]]; then + TEST_TAR=$(ls "$ROOT_DIR"/images/ubuntu-22.04-arm-test*.tar.gz 2>/dev/null | head -n1 || true) + fi + + if [[ -n "$TEST_TAR" && ! $(docker image inspect "$TEST_IMG" >/dev/null 2>&1; echo $?) -eq 0 ]]; then + info "从安装包加载 ARM 测试基础镜像: $(basename "$TEST_TAR")" + tmp=$(mktemp) + gunzip -c "$TEST_TAR" > "$tmp" + docker load -i "$tmp" >/dev/null + rm -f "$tmp" + fi + + info "验证是否已能运行 linux/arm64 容器..." + if docker run --rm --platform=linux/arm64 "$TEST_IMG" uname -m 2>/dev/null | grep -q 'aarch64'; then + info "检测到本机已支持 ARM64 容器运行 (uname -m = aarch64)" + return 0 + fi + + info "未检测到 ARM64 运行能力,尝试通过 tonistiigi/binfmt 安装 binfmt/QEMU ..." + local BINFMT_IMG="tonistiigi/binfmt:latest" + local BINFMT_TAR="" + if [[ -d "$ROOT_DIR/images" ]]; then + BINFMT_TAR=$(ls "$ROOT_DIR"/images/tonistiigi-binfmt*.tar.gz 2>/dev/null | head -n1 || true) + fi + if [[ -n "$BINFMT_TAR" && ! $(docker image inspect "$BINFMT_IMG" >/dev/null 2>&1; echo $?) -eq 0 ]]; then + info "从安装包加载 tonistiigi/binfmt 离线镜像: $(basename "$BINFMT_TAR")" + tmp_b=$(mktemp) + gunzip -c "$BINFMT_TAR" > "$tmp_b" + docker load -i "$tmp_b" >/dev/null + rm -f "$tmp_b" + fi + + if ! docker run --privileged --rm "$BINFMT_IMG" --install all; then + err "tonistiigi/binfmt 安装失败,请检查 Docker 权限或确认离线镜像是否完整。" + exit 1 + fi + + info "再次验证 ARM64 容器可运行状态..." + if ! docker run --rm --platform=linux/arm64 "$TEST_IMG" uname -m 2>/dev/null | grep -q 'aarch64'; then + err "安装 binfmt/QEMU 后仍无法运行 linux/arm64 容器,请手工排查 binfmt_misc 配置。" + exit 1 + fi + info "ARM64 容器运行能力检查通过。" +} + +ensure_binfmt_arm64 + +exec "$SCRIPT_DIR/../scripts/install.sh" "$@" + diff --git a/deployment_new/templates/client_arm/scripts_for_x86/uninstall.sh b/deployment_new/templates/client_arm/scripts_for_x86/uninstall.sh new file mode 100644 index 0000000..343a532 --- /dev/null +++ b/deployment_new/templates/client_arm/scripts_for_x86/uninstall.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +exec "$SCRIPT_DIR/../scripts/uninstall.sh" "$@" + diff --git a/deployment_new/templates/server_arm/compose/docker-compose.yml b/deployment_new/templates/server_arm/compose/docker-compose.yml new file mode 100644 index 0000000..820bc57 --- /dev/null +++ b/deployment_new/templates/server_arm/compose/docker-compose.yml @@ -0,0 +1,153 @@ +version: "3.8" + +networks: + argus-sys-net: + external: true + +services: + master: + image: ${MASTER_IMAGE_TAG:-argus-master-arm64:${PKG_VERSION}} + platform: linux/arm64 + container_name: argus-master-sys + environment: + - OFFLINE_THRESHOLD_SECONDS=180 + - ONLINE_THRESHOLD_SECONDS=120 + - SCHEDULER_INTERVAL_SECONDS=30 + - ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133} + - ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015} + ports: + - "${MASTER_PORT:-32300}:3000" + volumes: + - ../private/argus/master:/private/argus/master + - ../private/argus/metric/prometheus:/private/argus/metric/prometheus + - ../private/argus/etc:/private/argus/etc + networks: + argus-sys-net: + aliases: + - master.argus.com + restart: unless-stopped + + prometheus: + image: ${PROM_IMAGE_TAG:-argus-metric-prometheus-arm64:${PKG_VERSION}} + platform: linux/arm64 + container_name: argus-prometheus + restart: unless-stopped + environment: + - TZ=Asia/Shanghai + - PROMETHEUS_BASE_PATH=/private/argus/metric/prometheus + - ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133} + - ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015} + ports: + - "${PROMETHEUS_PORT:-9090}:9090" + volumes: + - ../private/argus/metric/prometheus:/private/argus/metric/prometheus + - ../private/argus/etc:/private/argus/etc + networks: + argus-sys-net: + aliases: + - prom.metric.argus.com + + prometheus-targets-updater: + image: ${PROM_UPDATER_IMAGE_TAG:-argus-metric-prometheus-targets-updater-arm64:${PKG_VERSION}} + platform: linux/arm64 + container_name: argus-prometheus-targets-updater + restart: unless-stopped + environment: + - TZ=Asia/Shanghai + volumes: + - ../private/argus/metric/prometheus:/private/argus/metric/prometheus + networks: + argus-sys-net: + aliases: + - prom-updater.metric.argus.com + depends_on: + - master + - prometheus + + grafana: + image: ${GRAFANA_IMAGE_TAG:-argus-metric-grafana-arm64:${PKG_VERSION}} + platform: linux/arm64 + container_name: argus-grafana + restart: unless-stopped + environment: + - TZ=Asia/Shanghai + - GRAFANA_BASE_PATH=/private/argus/metric/grafana + - ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133} + - ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015} + - GF_SERVER_HTTP_PORT=3000 + - GF_LOG_LEVEL=warn + - GF_LOG_MODE=console + - GF_PATHS_PROVISIONING=/private/argus/metric/grafana/provisioning + - GF_AUTH_ANONYMOUS_ENABLED=true + - GF_AUTH_ANONYMOUS_ORG_ROLE=Viewer + ports: + - "${GRAFANA_PORT:-3000}:3000" + volumes: + - ../private/argus/metric/grafana:/private/argus/metric/grafana + - ../private/argus/etc:/private/argus/etc + depends_on: [prometheus] + networks: + argus-sys-net: + aliases: + - grafana.metric.argus.com + + alertmanager: + image: ${ALERT_IMAGE_TAG:-argus-alertmanager-arm64:${PKG_VERSION}} + platform: linux/arm64 + container_name: argus-alertmanager + environment: + - ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133} + - ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015} + volumes: + - ../private/argus/etc:/private/argus/etc + - ../private/argus/alert/alertmanager:/private/argus/alert/alertmanager + networks: + argus-sys-net: + aliases: + - alertmanager.alert.argus.com + ports: + - "${ALERTMANAGER_PORT:-9093}:9093" + restart: unless-stopped + + web-frontend: + image: ${FRONT_IMAGE_TAG:-argus-web-frontend-arm64:${PKG_VERSION}} + platform: linux/arm64 + container_name: argus-web-frontend + environment: + - ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133} + - ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015} + - EXTERNAL_MASTER_PORT=${WEB_PROXY_PORT_8085:-8085} + - EXTERNAL_ALERTMANAGER_PORT=${WEB_PROXY_PORT_8084:-8084} + - EXTERNAL_GRAFANA_PORT=${WEB_PROXY_PORT_8081:-8081} + - EXTERNAL_PROMETHEUS_PORT=${WEB_PROXY_PORT_8082:-8082} + volumes: + - ../private/argus/etc:/private/argus/etc + networks: + argus-sys-net: + aliases: + - web.argus.com + restart: unless-stopped + + web-proxy: + image: ${WEB_PROXY_IMAGE_TAG:-argus-web-proxy-arm64:${PKG_VERSION}} + platform: linux/arm64 + container_name: argus-web-proxy + depends_on: [master, grafana, prometheus, alertmanager] + environment: + - ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133} + - ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015} + volumes: + - ../private/argus/etc:/private/argus/etc + networks: + argus-sys-net: + aliases: + - proxy.argus.com + ports: + - "${WEB_PROXY_PORT_8080:-8080}:8080" + - "${WEB_PROXY_PORT_8081:-8081}:8081" + - "${WEB_PROXY_PORT_8082:-8082}:8082" + - "${WEB_PROXY_PORT_8083:-8083}:8083" + - "${WEB_PROXY_PORT_8084:-8084}:8084" + - "${WEB_PROXY_PORT_8085:-8085}:8085" + restart: unless-stopped + diff --git a/deployment_new/templates/server_arm/docs/INSTALL_SERVER_ARM_zh.md b/deployment_new/templates/server_arm/docs/INSTALL_SERVER_ARM_zh.md new file mode 100644 index 0000000..e624e96 --- /dev/null +++ b/deployment_new/templates/server_arm/docs/INSTALL_SERVER_ARM_zh.md @@ -0,0 +1,158 @@ +# ARM Server 安装指南(server_arm) + +本包用于在 ARM64 服务器上部署 Argus 的最小 server 端组件集: + +- master +- Prometheus +- Prometheus targets-updater sidecar +- Grafana +- Alertmanager +- Web 前端 + Web Proxy + +> 注意:本 ARM 版本 **不包含 Elasticsearch / Kibana / FTP / BIND9**,仅提供指标与告警能力。 + +--- + +## 1. 前置条件 + +- 目标机器为 ARM64(例如:aarch64 服务器); +- 已安装 Docker 和 docker compose(v2,支持 `docker compose` 命令); +- 已准备好 overlay 网络 `argus-sys-net`(如不需要也可将 compose 改为 `bridge` 网络)。 + +创建 overlay 网络示例: + +```bash +docker network create --driver overlay argus-sys-net || true +``` + +如未使用 Swarm,也可以在 compose 中将 `networks: argus-sys-net` 改成本地 `bridge` 网络。 + +--- + +## 2. 解压和加载镜像 + +假设你已经将 `server_arm_YYYYMMDD.tar.gz` 拷贝到目标 ARM 服务器: + +```bash +tar -xzf server_arm_YYYYMMDD.tar.gz +cd server_arm/YYYYMMDD +``` + +加载镜像: + +```bash +for f in images/*.tar.gz; do + gunzip -c "$f" | docker load +done +``` + +加载后可以用: + +```bash +docker images | grep 'argus-.*arm64' +``` + +确认 master / prometheus / grafana / alertmanager / web 镜像已就绪。 + +--- + +## 3. 配置 .env + +在 `compose/` 目录下有自动生成的 `.env.example`: + +```bash +cd compose +cp .env.example .env +``` + +`.env` 中主要字段: + +- `PKG_VERSION`:包版本(不建议修改)。 +- 镜像 tag(默认使用构建时的 `*-arm64:${PKG_VERSION}`): + - `MASTER_IMAGE_TAG` + - `PROM_IMAGE_TAG` + - `PROM_UPDATER_IMAGE_TAG` + - `GRAFANA_IMAGE_TAG` + - `ALERT_IMAGE_TAG` + - `FRONT_IMAGE_TAG` + - `WEB_PROXY_IMAGE_TAG` +- 端口: + - `MASTER_PORT`(默认 32300) + - `PROMETHEUS_PORT`(默认 9090) + - `GRAFANA_PORT`(默认 3000) + - `ALERTMANAGER_PORT`(默认 9093) + - `WEB_PROXY_PORT_8080` ~ `WEB_PROXY_PORT_8085` +- 其他: + - `ARGUS_BUILD_UID` / `ARGUS_BUILD_GID`(用于挂载目录的权限对齐)。 + +根据实际环境按需调整端口和 UID/GID。 + +--- + +## 4. 准备挂载目录 + +在 package 根目录(与 `compose/` 同级)下,已预创建所需的 `private/argus` 目录骨架: + +- `private/argus/etc` +- `private/argus/master` +- `private/argus/metric/prometheus`(含 `data` / `rules` / `targets`) +- `private/argus/metric/grafana`(含 `data` / `logs` / `plugins` / `provisioning` 等) + +可根据需要调整访问权限,例如: + +```bash +cd .. +sudo chown -R $UID:$GID private/argus +``` + +确保与 `.env` 中的 `ARGUS_BUILD_UID/GID` 一致。 + +--- + +## 5. 启动服务 + +在 `compose/` 目录执行: + +```bash +docker compose --env-file .env up -d +``` + +检查服务状态: + +```bash +docker compose ps +``` + +预期看到: + +- `argus-master-sys` +- `argus-prometheus` +- `argus-prometheus-targets-updater` +- `argus-grafana` +- `argus-alertmanager` +- `argus-web-frontend` +- `argus-web-proxy` + +--- + +## 6. 验证 + +1. Master: + - `curl http://localhost:${MASTER_PORT}/readyz` +2. Prometheus: + - 浏览器访问:`http://:${PROMETHEUS_PORT}` +3. Grafana: + - 浏览器访问:`http://:${GRAFANA_PORT}` +4. Web 门户: + - 浏览器访问:`http://:${WEB_PROXY_PORT_8080}` + +确认这些页面能够正常打开,并且在没有任何节点注册时,Prometheus 中 `job="node"` 可为空,但基础 UI 应可访问。 + +--- + +## 7. 注意事项 + +- 本 ARM server 包仅包含 metrics + alert 相关组件,不提供日志链路(ES/Kibana),也不包含 DNS/FTP 相关服务; +- 若后续需要扩展 ES/Kibana,可在新的 spec 中单独设计 ARM 日志栈; +- 对于多节点/Swarm 部署,请结合 `src/sys/arm_swarm_tests` 中的 compose 与脚本进行扩展。 + diff --git a/deployment_new/templates/server_arm/scripts/config.sh b/deployment_new/templates/server_arm/scripts/config.sh new file mode 100644 index 0000000..60caf08 --- /dev/null +++ b/deployment_new/templates/server_arm/scripts/config.sh @@ -0,0 +1,111 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +PKG_ROOT="$ROOT_DIR" +ENV_EX="$PKG_ROOT/compose/.env.example" +ENV_OUT="$PKG_ROOT/compose/.env" + +info(){ echo -e "\033[34m[CONFIG]\033[0m $*"; } +err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; } + +require(){ local ok=1; for c in "$@"; do command -v "$c" >/dev/null 2>&1 || { err "缺少依赖: $c"; ok=0; }; done; [[ $ok -eq 1 ]]; } + +# Compose 检测:优先 docker compose(v2),回退 docker-compose(v1) +require_compose(){ + if docker compose version >/dev/null 2>&1; then return 0; fi + if command -v docker-compose >/dev/null 2>&1 && docker-compose version >/dev/null 2>&1; then return 0; fi + err "未检测到 Docker Compose,请安装 docker compose v2 或 docker-compose v1"; exit 1 +} + +require docker awk sed +require_compose + +[[ -f "$ENV_EX" ]] || { err "缺少模板文件: $ENV_EX"; exit 1; } + +# 磁盘空间检查(简化版,默认 >= 5GB) +check_disk(){ local p="$1"; local need="${2:-5120}"; local free; + free=$(df -Pm "$p" 2>/dev/null | awk 'NR==2{print $4+0}') + if [[ -z "$free" || "$free" -lt "$need" ]]; then + err "磁盘空间不足: $p 剩余 ${free:-0}MB (<${need}MB),请清理后再继续" + fi +} + +check_disk "$PKG_ROOT" 5120 || true +check_disk "/var/lib/docker" 5120 || true + +cp "$ENV_EX" "$ENV_OUT" + +# 读取/生成 SWARM_MANAGER_ADDR(ARM 下同样引导 Swarm + cluster-info) +SWARM_MANAGER_ADDR=${SWARM_MANAGER_ADDR:-} +if [[ -z "${SWARM_MANAGER_ADDR}" ]]; then + read -rp "请输入本机管理地址 SWARM_MANAGER_ADDR: " SWARM_MANAGER_ADDR +fi +info "SWARM_MANAGER_ADDR=$SWARM_MANAGER_ADDR" + +# 校验 IP 属于本机网卡 +if ! ip -o addr | awk '{print $4}' | cut -d'/' -f1 | grep -qx "$SWARM_MANAGER_ADDR"; then + err "SWARM_MANAGER_ADDR 非本机地址: $SWARM_MANAGER_ADDR"; exit 1; fi + +info "开始分配服务端口(起始=20000,避免系统占用与相互冲突)" +is_port_used(){ local p="$1"; ss -tulnH 2>/dev/null | awk '{print $5}' | sed 's/.*://g' | grep -qx "$p"; } +declare -A PRESENT=() CHOSEN=() USED=() +START_PORT="${START_PORT:-20000}"; cur=$START_PORT +ORDER=(MASTER_PORT PROMETHEUS_PORT GRAFANA_PORT ALERTMANAGER_PORT \ + WEB_PROXY_PORT_8080 WEB_PROXY_PORT_8081 WEB_PROXY_PORT_8082 \ + WEB_PROXY_PORT_8083 WEB_PROXY_PORT_8084 WEB_PROXY_PORT_8085) + +# 标记 .env.example 中实际存在的键 +for key in "${ORDER[@]}"; do + if grep -q "^${key}=" "$ENV_EX"; then PRESENT[$key]=1; fi +done + +next_free(){ local p="$1"; while :; do if [[ -n "${USED[$p]:-}" ]] || is_port_used "$p"; then p=$((p+1)); else echo "$p"; return; fi; done; } + +for key in "${ORDER[@]}"; do + [[ -z "${PRESENT[$key]:-}" ]] && continue + p=$(next_free "$cur"); CHOSEN[$key]="$p"; USED[$p]=1; cur=$((p+1)) +done + +info "端口分配结果:MASTER=${CHOSEN[MASTER_PORT]:-} PROM=${CHOSEN[PROMETHEUS_PORT]:-} GRAFANA=${CHOSEN[GRAFANA_PORT]:-} ALERT=${CHOSEN[ALERTMANAGER_PORT]:-} WEB_PROXY(8080..8085)=${CHOSEN[WEB_PROXY_PORT_8080]:-}/${CHOSEN[WEB_PROXY_PORT_8081]:-}/${CHOSEN[WEB_PROXY_PORT_8082]:-}/${CHOSEN[WEB_PROXY_PORT_8083]:-}/${CHOSEN[WEB_PROXY_PORT_8084]:-}/${CHOSEN[WEB_PROXY_PORT_8085]:-}" + +# 覆盖端口(按唯一化结果写回) +for key in "${ORDER[@]}"; do + val="${CHOSEN[$key]:-}" + [[ -z "$val" ]] && continue + sed -i -E "s#^$key=.*#$key=${val}#" "$ENV_OUT" +done + +# 覆盖/补充 Overlay 名称 +grep -q '^ARGUS_OVERLAY_NET=' "$ENV_OUT" || echo 'ARGUS_OVERLAY_NET=argus-sys-net' >> "$ENV_OUT" + +# 覆盖/补充构建账户 UID/GID,避免权限问题 +RUID=$(id -u) +RGID=$(id -g) + +if grep -q '^ARGUS_BUILD_UID=' "$ENV_OUT"; then + sed -i -E "s#^ARGUS_BUILD_UID=.*#ARGUS_BUILD_UID=${RUID}#" "$ENV_OUT" +else + echo "ARGUS_BUILD_UID=${RUID}" >> "$ENV_OUT" +fi + +if grep -q '^ARGUS_BUILD_GID=' "$ENV_OUT"; then + sed -i -E "s#^ARGUS_BUILD_GID=.*#ARGUS_BUILD_GID=${RGID}#" "$ENV_OUT" +else + echo "ARGUS_BUILD_GID=${RGID}" >> "$ENV_OUT" +fi + +# 更新 cluster-info.env 的 SWARM_MANAGER_ADDR(ARM 版本也保留该机制) +CI="$PKG_ROOT/cluster-info.env" +if [[ -f "$CI" ]]; then + if grep -q '^SWARM_MANAGER_ADDR=' "$CI"; then + sed -i -E "s#^SWARM_MANAGER_ADDR=.*#SWARM_MANAGER_ADDR=${SWARM_MANAGER_ADDR}#" "$CI" + else + echo "SWARM_MANAGER_ADDR=${SWARM_MANAGER_ADDR}" >> "$CI" + fi +else + echo "SWARM_MANAGER_ADDR=${SWARM_MANAGER_ADDR}" > "$CI" +fi + +info "已生成 compose/.env 并更新 cluster-info.env 的 SWARM_MANAGER_ADDR。" +info "下一步可执行: scripts/install.sh" diff --git a/deployment_new/templates/server_arm/scripts/diagnose.sh b/deployment_new/templates/server_arm/scripts/diagnose.sh new file mode 100644 index 0000000..a33e7dd --- /dev/null +++ b/deployment_new/templates/server_arm/scripts/diagnose.sh @@ -0,0 +1,91 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +ENV_FILE="$ROOT/compose/.env"; [[ -f "$ENV_FILE" ]] && set -a && source "$ENV_FILE" && set +a + +ts="$(date -u +%Y%m%d-%H%M%SZ)" +LOG_DIR="$ROOT/logs"; mkdir -p "$LOG_DIR" || true +if ! ( : > "$LOG_DIR/.w" 2>/dev/null ); then LOG_DIR="/tmp/argus-logs"; mkdir -p "$LOG_DIR" || true; fi + +PROJECT="${COMPOSE_PROJECT_NAME:-argus-server-arm}" +DETAILS="$LOG_DIR/diagnose_arm_server_details_${ts}.log" +ERRORS="$LOG_DIR/diagnose_arm_server_error_${ts}.log" +: > "$DETAILS"; : > "$ERRORS" + +logd() { echo "$(date '+%F %T') $*" >> "$DETAILS"; } +append_err() { echo "$*" >> "$ERRORS"; } +http_code() { curl -s -o /dev/null -w "%{http_code}" "$1" || echo 000; } +http_body_head() { curl -s --max-time 3 "$1" 2>/dev/null | sed -n '1,5p' || true; } +header_val() { curl -s -D - -o /dev/null "$@" | awk -F': ' 'BEGIN{IGNORECASE=1}$1=="Access-Control-Allow-Origin"{gsub("\r","",$2);print $2}'; } + +section() { local name="$1"; logd "===== [$name] ====="; } + +svc() { + local svc_name="$1"; local cname="$2" + section "$svc_name ($cname)" + logd "docker ps:"; docker ps -a --format '{{.Names}}\t{{.Status}}\t{{.Image}}' | awk -v n="$cname" '$1==n' >> "$DETAILS" || true + logd "docker inspect:"; docker inspect -f '{{.State.Status}} rc={{.RestartCount}} started={{.State.StartedAt}}' "$cname" >> "$DETAILS" 2>&1 || true + logd "last 200 container logs:"; docker logs --tail 200 "$cname" >> "$DETAILS" 2>&1 || true + docker logs --tail 200 "$cname" 2>&1 | \ + grep -Ei '\b(error|failed|fail|exception|panic|fatal|critical|unhealthy|permission denied|forbidden|refused|traceback|错误|失败)\b' | \ + sed "s/^/[${svc_name}][container] /" >> "$ERRORS" || true + if docker exec "$cname" sh -lc 'command -v supervisorctl >/dev/null 2>&1' >/dev/null 2>&1; then + logd "supervisorctl status:"; docker exec "$cname" sh -lc 'supervisorctl status' >> "$DETAILS" 2>&1 || true + local files; files=$(docker exec "$cname" sh -lc 'ls /var/log/supervisor/*.log 2>/dev/null' || true) + for f in $files; do + logd "tail -n 80 $f:"; docker exec "$cname" sh -lc "tail -n 80 $f 2>/dev/null || true" >> "$DETAILS" 2>&1 || true + docker exec "$cname" sh -lc "tail -n 200 $f 2>/dev/null" 2>/dev/null | \ + grep -Ei '\b(error|failed|fail|exception|panic|fatal|critical|unhealthy|permission denied|forbidden|refused|traceback|错误|失败)\b' | \ + sed "s/^/[${svc_name}][supervisor:$(basename "$f")] /" >> "$ERRORS" || true + done + fi +} + +# 关键服务 +svc master argus-master-sys +svc prometheus argus-prometheus +svc prom-updater argus-prometheus-targets-updater +svc grafana argus-grafana +svc alertmanager argus-alertmanager +svc web-frontend argus-web-frontend +svc web-proxy argus-web-proxy + +section HTTP +logd "Master: $(http_code \"http://localhost:${MASTER_PORT:-32300}/readyz\")" +logd "Prometheus: $(http_code \"http://localhost:${PROMETHEUS_PORT:-9090}/-/ready\")" +logd "Grafana: $(http_code \"http://localhost:${GRAFANA_PORT:-3000}/api/health\")"; http_body_head "http://localhost:${GRAFANA_PORT:-3000}/api/health" >> "$DETAILS" 2>&1 || true +logd "Alertmanager: $(http_code \"http://localhost:${ALERTMANAGER_PORT:-9093}/api/v2/status\")" +cors8084=$(header_val -H "Origin: http://localhost:${WEB_PROXY_PORT_8080:-8080}" "http://localhost:${WEB_PROXY_PORT_8084:-8084}/api/v2/status" || true) +cors8085=$(header_val -H "Origin: http://localhost:${WEB_PROXY_PORT_8080:-8080}" "http://localhost:${WEB_PROXY_PORT_8085:-8085}/api/v1/master/nodes" || true) +logd "Web-Proxy 8080: $(http_code \"http://localhost:${WEB_PROXY_PORT_8080:-8080}/\")" +logd "Web-Proxy 8083: $(http_code \"http://localhost:${WEB_PROXY_PORT_8083:-8083}/\")" +logd "Web-Proxy 8084 CORS: ${cors8084}" +logd "Web-Proxy 8085 CORS: ${cors8085}" + +section SYSTEM +logd "uname -a:"; uname -a >> "$DETAILS" +logd "docker version:"; docker version --format '{{.Server.Version}}' >> "$DETAILS" 2>&1 || true +logd "compose ps (project=$PROJECT):"; (cd "$ROOT/compose" && docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f docker-compose.yml ps) >> "$DETAILS" 2>&1 || true + +section SUMMARY +[[ $(http_code "http://localhost:${MASTER_PORT:-32300}/readyz") != 200 ]] && echo "[master][http] /readyz not 200" >> "$ERRORS" +[[ $(http_code "http://localhost:${PROMETHEUS_PORT:-9090}/-/ready") != 200 ]] && echo "[prometheus][http] /-/ready not 200" >> "$ERRORS" +gfcode=$(http_code "http://localhost:${GRAFANA_PORT:-3000}/api/health"); [[ "$gfcode" != 200 ]] && echo "[grafana][http] /api/health=$gfcode" >> "$ERRORS" +[[ $(http_code "http://localhost:${ALERTMANAGER_PORT:-9093}/api/v2/status") != 200 ]] && echo "[alertmanager][http] /api/v2/status not 200" >> "$ERRORS" +[[ -z "$cors8084" ]] && echo "[web-proxy][cors] 8084 missing Access-Control-Allow-Origin" >> "$ERRORS" +[[ -z "$cors8085" ]] && echo "[web-proxy][cors] 8085 missing Access-Control-Allow-Origin" >> "$ERRORS" +sort -u -o "$ERRORS" "$ERRORS" + +echo "Diagnostic details -> $DETAILS" +echo "Detected errors -> $ERRORS" + +if [[ "$LOG_DIR" == "$ROOT/logs" ]]; then + ln -sfn "$(basename "$DETAILS")" "$ROOT/logs/diagnose_arm_server_details.log" 2>/dev/null || cp "$DETAILS" "$ROOT/logs/diagnose_arm_server_details.log" 2>/dev/null || true + ln -sfn "$(basename "$ERRORS")" "$ROOT/logs/diagnose_arm_server_error.log" 2>/dev/null || cp "$ERRORS" "$ROOT/logs/diagnose_arm_server_error.log" 2>/dev/null || true +fi + +exit 0 + diff --git a/deployment_new/templates/server_arm/scripts/install.sh b/deployment_new/templates/server_arm/scripts/install.sh new file mode 100644 index 0000000..641924a --- /dev/null +++ b/deployment_new/templates/server_arm/scripts/install.sh @@ -0,0 +1,130 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +PKG_ROOT="$ROOT_DIR" +ENV_FILE="$PKG_ROOT/compose/.env" +COMPOSE_FILE="$PKG_ROOT/compose/docker-compose.yml" + +info(){ echo -e "\033[34m[INSTALL]\033[0m $*"; } +err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; } + +require(){ local ok=1; for c in "$@"; do command -v "$c" >/dev/null 2>&1 || { err "缺少依赖: $c"; ok=0; }; done; [[ $ok -eq 1 ]]; } + +require_compose(){ + if docker compose version >/dev/null 2>&1; then return 0; fi + if command -v docker-compose >/dev/null 2>&1 && docker-compose version >/dev/null 2>&1; then return 0; fi + err "未检测到 Docker Compose,请安装 docker compose v2 或 docker-compose v1"; exit 1 +} + +require docker curl jq awk sed tar gzip +require_compose + +[[ -f "$ENV_FILE" ]] || { err "缺少 compose/.env,请先运行 scripts/config.sh"; exit 1; } +info "使用环境文件: $ENV_FILE" +set -a; source "$ENV_FILE"; set +a + +# 兼容:若 .env 未包含 SWARM_MANAGER_ADDR,则从已存在的 cluster-info.env 读取以避免写空 +SMADDR="${SWARM_MANAGER_ADDR:-}" +CI_FILE="$PKG_ROOT/cluster-info.env" +if [[ -z "$SMADDR" && -f "$CI_FILE" ]]; then + SMADDR=$(sed -n 's/^SWARM_MANAGER_ADDR=\(.*\)$/\1/p' "$CI_FILE" | head -n1) +fi +SWARM_MANAGER_ADDR="$SMADDR" + +# Swarm init & overlay(ARM 版也按生产环境方式启用 Swarm) +NET_NAME="${ARGUS_OVERLAY_NET:-argus-sys-net}" +if ! docker info 2>/dev/null | grep -q "Swarm: active"; then + [[ -n "${SWARM_MANAGER_ADDR:-}" ]] || { err "SWARM_MANAGER_ADDR 未设置,请在 scripts/config.sh 中配置"; exit 1; } + info "初始化 Swarm (--advertise-addr $SWARM_MANAGER_ADDR)" + docker swarm init --advertise-addr "$SWARM_MANAGER_ADDR" >/dev/null 2>&1 || true +else + info "Swarm 已激活" +fi + +if ! docker network inspect "$NET_NAME" >/dev/null 2>&1; then + info "创建 overlay 网络: $NET_NAME" + docker network create -d overlay --attachable "$NET_NAME" >/dev/null +else + info "overlay 网络已存在: $NET_NAME" +fi + +# 将 Swarm join token 写入 cluster-info.env,供 ARM client 使用 +TOKEN_WORKER=$(docker swarm join-token -q worker 2>/dev/null || echo "") +TOKEN_MANAGER=$(docker swarm join-token -q manager 2>/dev/null || echo "") +CI_OUT="$PKG_ROOT/cluster-info.env" +info "写入 cluster-info.env (manager/token)" +{ + echo "SWARM_MANAGER_ADDR=${SWARM_MANAGER_ADDR:-}" + echo "SWARM_JOIN_TOKEN_WORKER=${TOKEN_WORKER:-}" + echo "SWARM_JOIN_TOKEN_MANAGER=${TOKEN_MANAGER:-}" +} > "$CI_OUT" + +# 导入镜像 +IMAGES_DIR="$PKG_ROOT/images" +shopt -s nullglob +tars=("$IMAGES_DIR"/*.tar.gz) +if [[ ${#tars[@]} -eq 0 ]]; then err "images 目录为空,缺少镜像 tar.gz"; exit 1; fi +total=${#tars[@]}; idx=0 +for tgz in "${tars[@]}"; do + idx=$((idx+1)) + info "导入镜像 ($idx/$total): $(basename "$tgz")" + tmp=$(mktemp); gunzip -c "$tgz" > "$tmp"; docker load -i "$tmp" >/dev/null; rm -f "$tmp" +done +shopt -u nullglob + +# 启动服务 +PROJECT="${COMPOSE_PROJECT_NAME:-argus-server-arm}" +info "启动服务栈 (docker compose -p $PROJECT up -d)" +docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f "$COMPOSE_FILE" up -d +docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f "$COMPOSE_FILE" ps + +# 简单就绪检查(best-effort,不阻塞安装) +code(){ curl -4 -s -o /dev/null -w "%{http_code}" "$1" || echo 000; } +gf_ok(){ local body; body=$(curl -s "http://127.0.0.1:${GRAFANA_PORT:-3000}/api/health" || true); echo "$body" | grep -q '"database"\s*:\s*"ok"'; } + +RETRIES=${RETRIES:-60}; SLEEP=${SLEEP:-5}; ok=0 +info "等待基础服务就绪 (<= $((RETRIES*SLEEP))s)" +for i in $(seq 1 "$RETRIES"); do + e1=$(code "http://127.0.0.1:${MASTER_PORT:-32300}/readyz") + e2=$(code "http://127.0.0.1:${PROMETHEUS_PORT:-9090}/-/ready") + e3=$(code "http://127.0.0.1:${ALERTMANAGER_PORT:-9093}/api/v2/status") + e4=000; gf_ok && e4=200 + info "[ready] t=$((i*SLEEP))s master=$e1 prom=$e2 graf=$e4 alert=$e3" + [[ "$e1" == 200 ]] && ok=$((ok+1)) + [[ "$e2" == 200 ]] && ok=$((ok+1)) + [[ "$e3" == 200 ]] && ok=$((ok+1)) + [[ "$e4" == 200 ]] && ok=$((ok+1)) + if [[ $ok -ge 4 ]]; then break; fi; ok=0; sleep "$SLEEP" +done +[[ $ok -ge 4 ]] || err "部分服务未就绪(可稍后执行 scripts/selfcheck.sh 进行复查)" + +# 安装报告(ARM 精简版) +ts=$(date +%Y%m%d-%H%M%S) +RPT="$PKG_ROOT/安装报告_ARM_${ts}.md" +{ + echo "# Argus ARM Server 安装报告 (${ts})" + echo + echo "## 端口映射" + echo "- MASTER_PORT=${MASTER_PORT}" + echo "- PROMETHEUS_PORT=${PROMETHEUS_PORT}" + echo "- GRAFANA_PORT=${GRAFANA_PORT}" + echo "- ALERTMANAGER_PORT=${ALERTMANAGER_PORT}" + echo "- WEB_PROXY_PORT_8080=${WEB_PROXY_PORT_8080} ... 8085=${WEB_PROXY_PORT_8085}" + echo + echo "## 网络" + echo "- NET=${NET_NAME}" + echo "- ARGUS_OVERLAY_NET=${ARGUS_OVERLAY_NET:-argus-sys-net}" + echo + echo "## 健康检查(简要)" + echo "- master/readyz=$(code http://127.0.0.1:${MASTER_PORT:-32300}/readyz)" + echo "- prometheus/ready=$(code http://127.0.0.1:${PROMETHEUS_PORT:-9090}/-/ready)" + echo "- grafana/api/health=$(code http://127.0.0.1:${GRAFANA_PORT:-3000}/api/health)" + echo "- alertmanager/api/v2/status=$(code http://127.0.0.1:${ALERTMANAGER_PORT:-9093}/api/v2/status)" +} > "$RPT" +info "已生成报告: $RPT" + +# 刷新 web-proxy 上的 nginx 配置(best-effort) +docker exec argus-web-proxy nginx -t >/dev/null 2>&1 && docker exec argus-web-proxy nginx -s reload >/dev/null 2>&1 || true + +info "安装完成。可通过 scripts/status.sh 查看当前服务状态。" diff --git a/deployment_new/templates/server_arm/scripts/selfcheck.sh b/deployment_new/templates/server_arm/scripts/selfcheck.sh new file mode 100644 index 0000000..8ae9694 --- /dev/null +++ b/deployment_new/templates/server_arm/scripts/selfcheck.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +log() { echo -e "\033[0;34m[CHECK]\033[0m $*"; } +err() { echo -e "\033[0;31m[ERROR]\033[0m $*" >&2; } + +ENV_FILE="$ROOT/compose/.env"; [[ -f "$ENV_FILE" ]] && set -a && source "$ENV_FILE" && set +a + +wait_http() { local url="$1"; local attempts=${2:-120}; local i=1; while ((i<=attempts)); do curl -fsS "$url" >/dev/null 2>&1 && return 0; echo "[..] waiting $url ($i/$attempts)"; sleep 5; ((i++)); done; return 1; } +code_for() { curl -s -o /dev/null -w "%{http_code}" "$1" || echo 000; } +header_val() { curl -s -D - -o /dev/null "$@" | awk -F': ' 'BEGIN{IGNORECASE=1}$1=="Access-Control-Allow-Origin"{gsub("\r","",$2);print $2}'; } + +LOG_DIR="$ROOT/logs"; mkdir -p "$LOG_DIR" || true +OUT_JSON="$LOG_DIR/selfcheck_arm_server.json"; tmp=$(mktemp) + +ok=1 + +log "checking network (${ARGUS_OVERLAY_NET:-argus-sys-net})" +net_ok=false +if docker network inspect "${ARGUS_OVERLAY_NET:-argus-sys-net}" >/dev/null 2>&1; then + net_ok=true +fi +[[ "$net_ok" == true ]] || ok=0 + +log "checking Master" +[[ $(code_for "http://localhost:${MASTER_PORT:-32300}/readyz") == 200 ]] || ok=0 + +log "checking Prometheus" +wait_http "http://localhost:${PROMETHEUS_PORT:-9090}/-/ready" 60 || ok=0 + +log "checking Grafana" +gf_code=$(code_for "http://localhost:${GRAFANA_PORT:-3000}/api/health") +gf_ok=false +if [[ "$gf_code" == 200 ]]; then + body=$(curl -sS "http://localhost:${GRAFANA_PORT:-3000}/api/health" || true) + echo "$body" | grep -q '"database"\s*:\s*"ok"' && gf_ok=true +fi +[[ "$gf_ok" == true ]] || ok=0 + +log "checking Alertmanager" +wait_http "http://localhost:${ALERTMANAGER_PORT:-9093}/api/v2/status" 60 || ok=0 + +log "checking Web-Proxy (CORS)" +cors8084=$(header_val -H "Origin: http://localhost:${WEB_PROXY_PORT_8080:-8080}" "http://localhost:${WEB_PROXY_PORT_8084:-8084}/api/v2/status" || true) +cors8085=$(header_val -H "Origin: http://localhost:${WEB_PROXY_PORT_8080:-8080}" "http://localhost:${WEB_PROXY_PORT_8085:-8085}/api/v1/master/nodes" || true) +wp_ok=true +[[ -n "$cors8084" && -n "$cors8085" ]] || wp_ok=false +[[ "$wp_ok" == true ]] || ok=0 + +cat > "$tmp" </dev/null || cp "$tmp" "$OUT_JSON" + +if [[ "$ok" == 1 ]]; then + log "selfcheck OK -> $OUT_JSON" + exit 0 +else + err "selfcheck FAILED -> $OUT_JSON" + exit 1 +fi + diff --git a/deployment_new/templates/server_arm/scripts/status.sh b/deployment_new/templates/server_arm/scripts/status.sh new file mode 100644 index 0000000..0365e39 --- /dev/null +++ b/deployment_new/templates/server_arm/scripts/status.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +PKG_ROOT="$ROOT_DIR" +ENV_FILE="$PKG_ROOT/compose/.env" +COMPOSE_FILE="$PKG_ROOT/compose/docker-compose.yml" + +if [[ -f "$ENV_FILE" ]]; then + set -a; source "$ENV_FILE"; set +a +fi + +PROJECT="${COMPOSE_PROJECT_NAME:-argus-server-arm}" +docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f "$COMPOSE_FILE" ps + diff --git a/deployment_new/templates/server_arm/scripts/uninstall.sh b/deployment_new/templates/server_arm/scripts/uninstall.sh new file mode 100644 index 0000000..3a65c67 --- /dev/null +++ b/deployment_new/templates/server_arm/scripts/uninstall.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +PKG_ROOT="$ROOT_DIR" +ENV_FILE="$PKG_ROOT/compose/.env" +COMPOSE_FILE="$PKG_ROOT/compose/docker-compose.yml" + +if [[ -f "$ENV_FILE" ]]; then + set -a; source "$ENV_FILE"; set +a +fi + +PROJECT="${COMPOSE_PROJECT_NAME:-argus-server-arm}" + +err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; } + +require_compose(){ + if docker compose version >/dev/null 2>&1; then return 0; fi + if command -v docker-compose >/dev/null 2>&1 && docker-compose version >/dev/null 2>&1; then return 0; fi + err "未检测到 Docker Compose,请安装 docker compose v2 或 docker-compose v1"; exit 1 +} + +require_compose + +echo "[UNINSTALL] stopping compose (project=$PROJECT)" +docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f "$COMPOSE_FILE" down --remove-orphans || true +echo "[UNINSTALL] done" + diff --git a/deployment_new/templates/server_arm/scripts_for_x86/config.sh b/deployment_new/templates/server_arm/scripts_for_x86/config.sh new file mode 100644 index 0000000..8c03d57 --- /dev/null +++ b/deployment_new/templates/server_arm/scripts_for_x86/config.sh @@ -0,0 +1,116 @@ +#!/usr/bin/env bash +set -euo pipefail + +# x86 专用:为 ARM Server 包生成 compose/.env +# - 带磁盘空间检查 +# - 自动分配端口,避免与本机已有服务冲突 +# - 写入 ARGUS_OVERLAY_NET 和 ARGUS_BUILD_UID/GID + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +PKG_ROOT="$ROOT_DIR" +ENV_EX="$PKG_ROOT/compose/.env.example" +ENV_OUT="$PKG_ROOT/compose/.env" + +info(){ echo -e "\033[34m[CONFIG_X86]\033[0m $*"; } +err(){ echo -e "\033[31m[ERROR_X86]\033[0m $*" >&2; } + +require(){ local ok=1; for c in "$@"; do command -v "$c" >/dev/null 2>&1 || { err "缺少依赖: $c"; ok=0; }; done; [[ $ok -eq 1 ]]; } + +# Compose 检测:优先 docker compose(v2),回退 docker-compose(v1) +require_compose(){ + if docker compose version >/dev/null 2>&1; then return 0; fi + if command -v docker-compose >/dev/null 2>&1 && docker-compose version >/dev/null 2>&1; then return 0; fi + err "未检测到 Docker Compose,请安装 docker compose v2 或 docker-compose v1"; exit 1 +} + +require docker awk sed +require_compose + +[[ -f "$ENV_EX" ]] || { err "缺少模板文件: $ENV_EX"; exit 1; } + +# 磁盘空间检查(简化版,默认 >= 5GB) +check_disk(){ local p="$1"; local need="${2:-5120}"; local free; + free=$(df -Pm "$p" 2>/dev/null | awk 'NR==2{print $4+0}') + if [[ -z "$free" || "$free" -lt "$need" ]]; then + err "磁盘空间不足: $p 剩余 ${free:-0}MB (<${need}MB),请清理后再继续" + fi +} + +check_disk "$PKG_ROOT" 5120 || true +check_disk "/var/lib/docker" 5120 || true + +cp "$ENV_EX" "$ENV_OUT" + +# 读取/生成 SWARM_MANAGER_ADDR(与 ARM server 配置保持一致) +SWARM_MANAGER_ADDR=${SWARM_MANAGER_ADDR:-} +if [[ -z "${SWARM_MANAGER_ADDR}" ]]; then + read -rp "请输入本机管理地址 SWARM_MANAGER_ADDR: " SWARM_MANAGER_ADDR +fi +info "SWARM_MANAGER_ADDR=$SWARM_MANAGER_ADDR" + +# 校验 IP 属于本机网卡 +if ! ip -o addr | awk '{print $4}' | cut -d'/' -f1 | grep -qx "$SWARM_MANAGER_ADDR"; then + err "SWARM_MANAGER_ADDR 非本机地址: $SWARM_MANAGER_ADDR"; exit 1; fi + +info "开始分配服务端口(起始=20000,避免系统占用与相互冲突)" +is_port_used(){ local p="$1"; ss -tulnH 2>/dev/null | awk '{print $5}' | sed 's/.*://g' | grep -qx "$p"; } +declare -A PRESENT=() CHOSEN=() USED=() +START_PORT="${START_PORT:-20000}"; cur=$START_PORT +ORDER=(MASTER_PORT PROMETHEUS_PORT GRAFANA_PORT ALERTMANAGER_PORT \ + WEB_PROXY_PORT_8080 WEB_PROXY_PORT_8081 WEB_PROXY_PORT_8082 \ + WEB_PROXY_PORT_8083 WEB_PROXY_PORT_8084 WEB_PROXY_PORT_8085) + +# 标记 .env.example 中实际存在的键 +for key in "${ORDER[@]}"; do + if grep -q "^${key}=" "$ENV_EX"; then PRESENT[$key]=1; fi +done + +next_free(){ local p="$1"; while :; do if [[ -n "${USED[$p]:-}" ]] || is_port_used "$p"; then p=$((p+1)); else echo "$p"; return; fi; done; } + +for key in "${ORDER[@]}"; do + [[ -z "${PRESENT[$key]:-}" ]] && continue + p=$(next_free "$cur"); CHOSEN[$key]="$p"; USED[$p]=1; cur=$((p+1)) +done + +info "端口分配结果:MASTER=${CHOSEN[MASTER_PORT]:-} PROM=${CHOSEN[PROMETHEUS_PORT]:-} GRAFANA=${CHOSEN[GRAFANA_PORT]:-} ALERT=${CHOSEN[ALERTMANAGER_PORT]:-} WEB_PROXY(8080..8085)=${CHOSEN[WEB_PROXY_PORT_8080]:-}/${CHOSEN[WEB_PROXY_PORT_8081]:-}/${CHOSEN[WEB_PROXY_PORT_8082]:-}/${CHOSEN[WEB_PROXY_PORT_8083]:-}/${CHOSEN[WEB_PROXY_PORT_8084]:-}/${CHOSEN[WEB_PROXY_PORT_8085]:-}" + +# 覆盖端口(按唯一化结果写回) +for key in "${ORDER[@]}"; do + val="${CHOSEN[$key]:-}" + [[ -z "$val" ]] && continue + sed -i -E "s#^$key=.*#$key=${val}#" "$ENV_OUT" +done + +# 覆盖/补充 Overlay 名称 +grep -q '^ARGUS_OVERLAY_NET=' "$ENV_OUT" || echo 'ARGUS_OVERLAY_NET=argus-sys-net' >> "$ENV_OUT" + +# 覆盖/补充构建账户 UID/GID,避免权限问题 +RUID=$(id -u) +RGID=$(id -g) + +if grep -q '^ARGUS_BUILD_UID=' "$ENV_OUT"; then + sed -i -E "s#^ARGUS_BUILD_UID=.*#ARGUS_BUILD_UID=${RUID}#" "$ENV_OUT" +else + echo "ARGUS_BUILD_UID=${RUID}" >> "$ENV_OUT" +fi + +if grep -q '^ARGUS_BUILD_GID=' "$ENV_OUT"; then + sed -i -E "s#^ARGUS_BUILD_GID=.*#ARGUS_BUILD_GID=${RGID}#" "$ENV_OUT" +else + echo "ARGUS_BUILD_GID=${RGID}" >> "$ENV_OUT" +fi + +# 更新 cluster-info.env 的 SWARM_MANAGER_ADDR(ARM + x86 一致) +CI="$PKG_ROOT/cluster-info.env" +if [[ -f "$CI" ]]; then + if grep -q '^SWARM_MANAGER_ADDR=' "$CI"; then + sed -i -E "s#^SWARM_MANAGER_ADDR=.*#SWARM_MANAGER_ADDR=${SWARM_MANAGER_ADDR}#" "$CI" + else + echo "SWARM_MANAGER_ADDR=${SWARM_MANAGER_ADDR}" >> "$CI" + fi +else + echo "SWARM_MANAGER_ADDR=${SWARM_MANAGER_ADDR}" > "$CI" +fi + +info "已生成 compose/.env 并更新 cluster-info.env 的 SWARM_MANAGER_ADDR。" +info "下一步可执行: scripts_for_x86/install.sh" diff --git a/deployment_new/templates/server_arm/scripts_for_x86/diagnose.sh b/deployment_new/templates/server_arm/scripts_for_x86/diagnose.sh new file mode 100644 index 0000000..77795bc --- /dev/null +++ b/deployment_new/templates/server_arm/scripts_for_x86/diagnose.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +exec "$SCRIPT_DIR/../scripts/diagnose.sh" "$@" + diff --git a/deployment_new/templates/server_arm/scripts_for_x86/install.sh b/deployment_new/templates/server_arm/scripts_for_x86/install.sh new file mode 100644 index 0000000..4ab1e88 --- /dev/null +++ b/deployment_new/templates/server_arm/scripts_for_x86/install.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash +set -euo pipefail + +# 在 x86_64 主机上安装 ARM server 包: +# 1) 确认/安装 binfmt + QEMU(通过 tonistiigi/binfmt); +# 2) 调用通用的 scripts/install.sh 启动服务。 + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" + +info(){ echo -e "\033[34m[INSTALL_X86]\033[0m $*"; } +err(){ echo -e "\033[31m[ERROR_X86]\033[0m $*" >&2; } + +require(){ local ok=1; for c in "$@"; do command -v "$c" >/dev/null 2>&1 || { err "缺少依赖: $c"; ok=0; }; done; [[ $ok -eq 1 ]]; } + +ensure_binfmt_arm64() { + info "检查当前主机架构: $(uname -m)" + if [[ "$(uname -m)" != "x86_64" && "$(uname -m)" != "amd64" ]]; then + info "当前并非 x86_64($(uname -m)),通常应直接在 ARM 服务器上使用 scripts/install.sh。继续执行但不强制安装 binfmt。" + return 0 + fi + + require docker + + # 优先使用随安装包一起提供的 ubuntu-22.04-arm-test 镜像做自检 + local TEST_IMG="ubuntu:22.04" + local TEST_TAR="" + if [[ -d "$ROOT_DIR/images" ]]; then + TEST_TAR=$(ls "$ROOT_DIR"/images/ubuntu-22.04-arm-test*.tar.gz 2>/dev/null | head -n1 || true) + fi + + if [[ -n "$TEST_TAR" && ! $(docker image inspect "$TEST_IMG" >/dev/null 2>&1; echo $?) -eq 0 ]]; then + info "从安装包加载 ARM 测试基础镜像: $(basename "$TEST_TAR")" + tmp=$(mktemp) + gunzip -c "$TEST_TAR" > "$tmp" + docker load -i "$tmp" >/dev/null + rm -f "$tmp" + fi + + info "验证是否已能运行 linux/arm64 容器..." + if docker run --rm --platform=linux/arm64 "$TEST_IMG" uname -m 2>/dev/null | grep -q 'aarch64'; then + info "检测到本机已支持 ARM64 容器运行 (uname -m = aarch64)" + return 0 + fi + + info "未检测到 ARM64 运行能力,尝试通过 tonistiigi/binfmt 安装 binfmt/QEMU ..." + # 优先从离线包加载 tonistiigi/binfmt 镜像 + local BINFMT_IMG="tonistiigi/binfmt:latest" + local BINFMT_TAR="" + if [[ -d "$ROOT_DIR/images" ]]; then + BINFMT_TAR=$(ls "$ROOT_DIR"/images/tonistiigi-binfmt*.tar.gz 2>/dev/null | head -n1 || true) + fi + if [[ -n "$BINFMT_TAR" && ! $(docker image inspect "$BINFMT_IMG" >/dev/null 2>&1; echo $?) -eq 0 ]]; then + info "从安装包加载 tonistiigi/binfmt 离线镜像: $(basename "$BINFMT_TAR")" + tmp_b=$(mktemp) + gunzip -c "$BINFMT_TAR" > "$tmp_b" + docker load -i "$tmp_b" >/dev/null + rm -f "$tmp_b" + fi + + if ! docker run --privileged --rm "$BINFMT_IMG" --install all; then + err "tonistiigi/binfmt 安装失败,请检查 Docker 权限或确认离线镜像是否完整。" + exit 1 + fi + + info "再次验证 ARM64 容器可运行状态..." + if ! docker run --rm --platform=linux/arm64 "$TEST_IMG" uname -m 2>/dev/null | grep -q 'aarch64'; then + err "安装 binfmt/QEMU 后仍无法运行 linux/arm64 容器,请手工排查 binfmt_misc 配置。" + exit 1 + fi + info "ARM64 容器运行能力检查通过。" +} + +ensure_binfmt_arm64 + +# 准备完 x86 -> ARM 运行环境后,交由通用 install.sh 负责后续部署 +exec "$SCRIPT_DIR/../scripts/install.sh" "$@" diff --git a/deployment_new/templates/server_arm/scripts_for_x86/selfcheck.sh b/deployment_new/templates/server_arm/scripts_for_x86/selfcheck.sh new file mode 100644 index 0000000..de9f36a --- /dev/null +++ b/deployment_new/templates/server_arm/scripts_for_x86/selfcheck.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +exec "$SCRIPT_DIR/../scripts/selfcheck.sh" "$@" + diff --git a/deployment_new/templates/server_arm/scripts_for_x86/status.sh b/deployment_new/templates/server_arm/scripts_for_x86/status.sh new file mode 100644 index 0000000..e77e4d3 --- /dev/null +++ b/deployment_new/templates/server_arm/scripts_for_x86/status.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +exec "$SCRIPT_DIR/../scripts/status.sh" "$@" + diff --git a/deployment_new/templates/server_arm/scripts_for_x86/uninstall.sh b/deployment_new/templates/server_arm/scripts_for_x86/uninstall.sh new file mode 100644 index 0000000..343a532 --- /dev/null +++ b/deployment_new/templates/server_arm/scripts_for_x86/uninstall.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +exec "$SCRIPT_DIR/../scripts/uninstall.sh" "$@" + diff --git a/src/metric/grafana/build/grafana.ini b/src/metric/grafana/build/grafana.ini index fea2ada..c6a088a 100644 --- a/src/metric/grafana/build/grafana.ini +++ b/src/metric/grafana/build/grafana.ini @@ -42,6 +42,11 @@ auto_assign_org = true auto_assign_org_role = Viewer verify_email_enabled = false +[auth.anonymous] +enabled = true +org_name = Main Org. +org_role = Viewer + [log] mode = console level = info