[#50] x86机器使用qemu模拟方式安装部署arm版本安装包,当前未精简版,只有简单的node exporter功能,删减日志,GPU指标功能,仍使用Swarm组网

This commit is contained in:
yuyr 2025-12-01 15:27:12 +08:00
parent 4c45166b44
commit a6e1bf7fe7
26 changed files with 1875 additions and 20 deletions

View File

@ -45,8 +45,8 @@ Options:
--master-offline Build master offline image (requires src/master/offline_wheels.tar.gz) --master-offline Build master offline image (requires src/master/offline_wheels.tar.gz)
--metric Build metric module images (ftp, prometheus, grafana, test nodes) --metric Build metric module images (ftp, prometheus, grafana, test nodes)
--no-cache Build all images without using Docker layer cache --no-cache Build all images without using Docker layer cache
--only LIST Comma-separated targets to build: core,master,metric,web,alert,sys,gpu_bundle,cpu_bundle,server_pkg,client_pkg,all --only LIST Comma-separated targets to build: core,master,metric,web,alert,sys,gpu_bundle,cpu_bundle,server_pkg,client_pkg,arm_server_pkg,arm_client_pkg,all
--version DATE Date tag used by gpu_bundle/server_pkg/client_pkg (e.g. 20251112) --version DATE Date tag used by gpu_bundle/server_pkg/client_pkg/arm_server_pkg/arm_client_pkg (e.g. 20251112)
--client-semver X.Y.Z Override client semver used in all-in-one-full artifact (optional) --client-semver X.Y.Z Override client semver used in all-in-one-full artifact (optional)
--cuda VER CUDA runtime version for NVIDIA base (default: 12.2.2) --cuda VER CUDA runtime version for NVIDIA base (default: 12.2.2)
--tag-latest Also tag bundle image as :latest (for cpu_bundle only; default off) --tag-latest Also tag bundle image as :latest (for cpu_bundle only; default off)
@ -73,6 +73,8 @@ build_gpu_bundle=false
build_cpu_bundle=false build_cpu_bundle=false
build_server_pkg=false build_server_pkg=false
build_client_pkg=false build_client_pkg=false
build_server_pkg_arm=false
build_client_pkg_arm=false
no_cache=false no_cache=false
bundle_date="" bundle_date=""
@ -110,7 +112,7 @@ while [[ $# -gt 0 ]]; do
fi fi
sel="$2"; shift 2 sel="$2"; shift 2
# reset all, then enable selected # reset all, then enable selected
build_core=false; build_master=false; build_metric=false; build_web=false; build_alert=false; build_sys=false; build_gpu_bundle=false; build_cpu_bundle=false; build_server_pkg=false; build_client_pkg=false build_core=false; build_master=false; build_metric=false; build_web=false; build_alert=false; build_sys=false; build_gpu_bundle=false; build_cpu_bundle=false; build_server_pkg=false; build_client_pkg=false; build_server_pkg_arm=false; build_client_pkg_arm=false
IFS=',' read -ra parts <<< "$sel" IFS=',' read -ra parts <<< "$sel"
for p in "${parts[@]}"; do for p in "${parts[@]}"; do
case "$p" in case "$p" in
@ -122,8 +124,12 @@ while [[ $# -gt 0 ]]; do
sys) build_sys=true ;; sys) build_sys=true ;;
gpu_bundle) build_gpu_bundle=true ;; gpu_bundle) build_gpu_bundle=true ;;
cpu_bundle) build_cpu_bundle=true ;; cpu_bundle) build_cpu_bundle=true ;;
server_pkg) build_server_pkg=true; build_core=true; build_master=true; build_metric=true; build_web=true; build_alert=true ;; # 在 ARM 构建脚本中server_pkg 视作 arm_server_pkg 的别名,避免误用 x86 打包逻辑
client_pkg) build_client_pkg=true ;; server_pkg) build_server_pkg_arm=true; build_core=true; build_master=true; build_metric=true; build_web=true; build_alert=true ;;
# 在 ARM 构建脚本中client_pkg 视作 arm_client_pkg 的别名,避免误用 x86 打包逻辑
client_pkg) build_client_pkg_arm=true; build_sys=true ;;
arm_server_pkg) build_server_pkg_arm=true; build_core=true; build_master=true; build_metric=true; build_web=true; build_alert=true ;;
arm_client_pkg) build_client_pkg_arm=true; build_sys=true ;;
all) build_core=true; build_master=true; build_metric=true; build_web=true; build_alert=true; build_sys=true ;; all) build_core=true; build_master=true; build_metric=true; build_web=true; build_alert=true; build_sys=true ;;
*) echo "Unknown --only target: $p" >&2; exit 1 ;; *) echo "Unknown --only target: $p" >&2; exit 1 ;;
esac esac
@ -169,12 +175,12 @@ fi
cd "$root" cd "$root"
# Set default image tag policy before building # Set default image tag policy before building
if [[ "$build_server_pkg" == true ]]; then if [[ "$build_server_pkg" == true || "$build_client_pkg" == true || "$build_server_pkg_arm" == true || "$build_client_pkg_arm" == true ]]; then
DEFAULT_IMAGE_TAG="${bundle_date:-latest}" DEFAULT_IMAGE_TAG="${bundle_date:-latest}"
fi fi
# Select build user profile for pkg vs default # Select build user profile for pkg vs default
if [[ "$build_server_pkg" == true || "$build_client_pkg" == true ]]; then if [[ "$build_server_pkg" == true || "$build_client_pkg" == true || "$build_server_pkg_arm" == true || "$build_client_pkg_arm" == true ]]; then
export ARGUS_BUILD_PROFILE=pkg export ARGUS_BUILD_PROFILE=pkg
fi fi
@ -902,6 +908,44 @@ if [[ "$build_client_pkg" == true ]]; then
fi fi
fi fi
# ARM 专用Server/Client 部署包
if [[ "$build_server_pkg_arm" == true ]]; then
if [[ -z "$bundle_date" ]]; then
echo "❌ arm_server_pkg requires --version YYYYMMDD" >&2
build_failed=true
else
echo ""
echo "🧳 Building ARM Server package..."
if ! "$root/deployment_new/build/make_arm_server_package.sh" --version "$bundle_date"; then
build_failed=true
fi
fi
fi
if [[ "$build_client_pkg_arm" == true ]]; then
if [[ -z "$bundle_date" ]]; then
echo "❌ arm_client_pkg requires --version YYYYMMDD" >&2
build_failed=true
else
echo ""
echo "🧳 Building ARM Client-CPU package..."
cpu_repo="argus-sys-metric-test-node-arm64"
cpu_image_arg="$cpu_repo:latest"
# 若已存在 :<version>,优先使用;否则退回 latest 交由打包脚本打 tag
if docker image inspect "${cpu_repo}:${bundle_date}" >/dev/null 2>&1; then
cpu_image_arg="${cpu_repo}:${bundle_date}"
elif ! docker image inspect "${cpu_repo}:latest" >/dev/null 2>&1; then
echo "❌ required CPU node image missing: ${cpu_repo}:{${bundle_date},latest}" >&2
build_failed=true
fi
if [[ "$build_failed" != "true" ]]; then
if ! "$root/deployment_new/build/make_arm_client_package.sh" --version "$bundle_date" --image "$cpu_image_arg"; then
build_failed=true
fi
fi
fi
fi
echo "=======================================" echo "======================================="
echo "📦 Build Summary" echo "📦 Build Summary"
echo "=======================================" echo "======================================="

View File

@ -0,0 +1,112 @@
#!/usr/bin/env bash
set -euo pipefail
# Make ARM Client-CPU package (versioned CPU node image, compose, env, docs)
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
TEMPL_DIR="$ROOT_DIR/deployment_new/templates/client_arm"
ART_ROOT="$ROOT_DIR/deployment_new/artifact/client_arm"
COMMON_SH="$ROOT_DIR/deployment_new/build/common.sh"
. "$COMMON_SH"
usage(){ cat <<EOF
Build ARM Client-CPU Package (deployment_new)
Usage: $(basename "$0") --version YYYYMMDD [--image IMAGE[:TAG]]
Defaults:
image = argus-sys-metric-test-node-arm64:latest
Outputs: deployment_new/artifact/client_arm/<YYYYMMDD>/ and client_arm_YYYYMMDD.tar.gz
EOF
}
VERSION=""
IMAGE="argus-sys-metric-test-node-arm64:latest"
while [[ $# -gt 0 ]]; do
case "$1" in
--version) VERSION="$2"; shift 2;;
--image) IMAGE="$2"; shift 2;;
-h|--help) usage; exit 0;;
*) err "unknown arg: $1"; usage; exit 1;;
esac
done
if [[ -z "$VERSION" ]]; then VERSION="$(today_version)"; fi
require_cmd docker tar gzip
STAGE="$(mktemp -d)"; trap 'rm -rf "$STAGE"' EXIT
PKG_DIR="$ART_ROOT/$VERSION"
mkdir -p "$PKG_DIR" "$STAGE/images" "$STAGE/compose" "$STAGE/docs" "$STAGE/scripts" "$STAGE/private/argus"
# 1) Save CPU node image with version tag
if ! docker image inspect "$IMAGE" >/dev/null 2>&1; then
err "missing image: $IMAGE"; exit 1; fi
REPO="${IMAGE%%:*}"; TAG_VER="$REPO:$VERSION"
docker tag "$IMAGE" "$TAG_VER"
out_tar="$STAGE/images/${REPO//\//-}-$VERSION.tar"
docker save -o "$out_tar" "$TAG_VER"
gzip -f "$out_tar"
# 2) Compose + env template
cp "$TEMPL_DIR/compose/docker-compose.yml" "$STAGE/compose/docker-compose.yml"
ENV_EX="$STAGE/compose/.env.example"
cat >"$ENV_EX" <<EOF
# Generated by make_arm_client_package.sh
PKG_VERSION=$VERSION
NODE_CPU_IMAGE_TAG=${REPO}:${VERSION}
# Compose project name (isolation from server stack)
COMPOSE_PROJECT_NAME=argus-client-arm
# Required fieldsARM Client 默认通过 overlay alias 访问 master可按需覆盖 MASTER_ENDPOINT
MASTER_ENDPOINT=http://master.argus.com:3000
AGENT_ENV=
AGENT_USER=
AGENT_INSTANCE=
CPU_NODE_HOSTNAME=
# UID/GID for volume ownership
ARGUS_BUILD_UID=2133
ARGUS_BUILD_GID=2015
EOF
# 3) Docs
CLIENT_DOC_SRC="$TEMPL_DIR/docs"
if [[ -d "$CLIENT_DOC_SRC" ]]; then
copy_tree "$CLIENT_DOC_SRC" "$STAGE/docs"
fi
# 4) Scripts & private skeleton
SCRIPTS_SRC="$TEMPL_DIR/scripts"
if [[ -d "$SCRIPTS_SRC" ]]; then
copy_tree "$SCRIPTS_SRC" "$STAGE/scripts"
find "$STAGE/scripts" -type f -name '*.sh' -exec chmod +x {} + 2>/dev/null || true
fi
# x86 专用脚本:用于在 x86_64 主机上运行 ARM Client 包
SCRIPTS_X86_SRC="$TEMPL_DIR/scripts_for_x86"
if [[ -d "$SCRIPTS_X86_SRC" ]]; then
copy_tree "$SCRIPTS_X86_SRC" "$STAGE/scripts_for_x86"
find "$STAGE/scripts_for_x86" -type f -name '*.sh' -exec chmod +x {} + 2>/dev/null || true
fi
mkdir -p "$STAGE/private/argus/agent"
# 5) Manifest & checksums
gen_manifest "$STAGE" "$STAGE/manifest.txt"
checksum_dir "$STAGE" "$STAGE/checksums.txt"
# 6) Move to artifact dir and pack
mkdir -p "$PKG_DIR"
copy_tree "$STAGE" "$PKG_DIR"
OUT_TAR_DIR="$(dirname "$PKG_DIR")"
OUT_TAR="$OUT_TAR_DIR/client_arm_${VERSION}.tar.gz"
log "Creating tarball: $OUT_TAR"
(cd "$PKG_DIR/.." && tar -czf "$OUT_TAR" "$(basename "$PKG_DIR")")
log "Client-ARM package ready: $PKG_DIR"
echo "$OUT_TAR"

View File

@ -0,0 +1,175 @@
#!/usr/bin/env bash
set -euo pipefail
# Make ARM server deployment package (master + prometheus + grafana + alertmanager + web)
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
TEMPL_DIR="$ROOT_DIR/deployment_new/templates/server_arm"
ART_ROOT="$ROOT_DIR/deployment_new/artifact/server_arm"
COMMON_SH="$ROOT_DIR/deployment_new/build/common.sh"
. "$COMMON_SH"
usage(){ cat <<EOF
Build ARM Server Deployment Package (deployment_new)
Usage: $(basename "$0") --version YYYYMMDD
Outputs: deployment_new/artifact/server_arm/<YYYYMMDD>/ and server_arm_YYYYMMDD.tar.gz
EOF
}
VERSION=""
while [[ $# -gt 0 ]]; do
case "$1" in
--version) VERSION="$2"; shift 2;;
-h|--help) usage; exit 0;;
*) err "unknown arg: $1"; usage; exit 1;;
esac
done
if [[ -z "$VERSION" ]]; then VERSION="$(today_version)"; fi
require_cmd docker tar gzip awk sed
IMAGES=(
argus-master-arm64
argus-metric-prometheus-arm64
argus-metric-prometheus-targets-updater-arm64
argus-metric-grafana-arm64
argus-alertmanager-arm64
argus-web-frontend-arm64
argus-web-proxy-arm64
)
STAGE="$(mktemp -d)"; trap 'rm -rf "$STAGE"' EXIT
PKG_DIR="$ART_ROOT/$VERSION"
mkdir -p "$PKG_DIR" "$STAGE/images" "$STAGE/compose" "$STAGE/docs" "$STAGE/scripts" "$STAGE/private/argus"
# 1) Save per-image tars with version tag
log "Tagging and saving ARM server images (version=$VERSION)"
for repo in "${IMAGES[@]}"; do
if ! docker image inspect "$repo:latest" >/dev/null 2>&1 && ! docker image inspect "$repo:$VERSION" >/dev/null 2>&1; then
err "missing image: $repo (need :latest or :$VERSION)"; exit 1; fi
if docker image inspect "$repo:$VERSION" >/dev/null 2>&1; then
tag="$repo:$VERSION"
else
docker tag "$repo:latest" "$repo:$VERSION"
tag="$repo:$VERSION"
fi
out_tar="$STAGE/images/${repo//\//-}-$VERSION.tar"
docker save -o "$out_tar" "$tag"
gzip -f "$out_tar"
done
# 1.1) (可选)打包 x86 ARM 测试用基础镜像 ubuntu:22.04(供 scripts_for_x86/install.sh 使用)
TEST_BASE="ubuntu:22.04"
if docker image inspect "$TEST_BASE" >/dev/null 2>&1 || docker pull "$TEST_BASE" >/dev/null 2>&1; then
log "Saving ARM test base image for x86 host: $TEST_BASE"
test_tar="$STAGE/images/ubuntu-22.04-arm-test.tar"
docker save -o "$test_tar" "$TEST_BASE"
gzip -f "$test_tar"
else
warn "无法获取 $TEST_BASEx86 上的 ARM 运行能力将无法使用该镜像做快速自检(可忽略,或在目标机自行拉取)。"
fi
# 1.2) 随包提供 tonistiigi/binfmt 镜像,便于 x86 离线机器安装 binfmt/QEMU
BINFMT_IMAGE="tonistiigi/binfmt:latest"
if docker image inspect "$BINFMT_IMAGE" >/dev/null 2>&1 || docker pull "$BINFMT_IMAGE" >/dev/null 2>&1; then
log "Saving binfmt helper image for x86 host: $BINFMT_IMAGE"
binfmt_tar="$STAGE/images/tonistiigi-binfmt.tar"
docker save -o "$binfmt_tar" "$BINFMT_IMAGE"
gzip -f "$binfmt_tar"
else
warn "无法获取 $BINFMT_IMAGEx86 上将无法通过离线方式安装 binfmt可在目标机上自行 docker pull"
fi
# 2) Compose + env templateARM 专用)
cp "$TEMPL_DIR/compose/docker-compose.yml" "$STAGE/compose/docker-compose.yml"
ENV_EX="$STAGE/compose/.env.example"
cat >"$ENV_EX" <<EOF
# Generated by make_arm_server_package.sh
PKG_VERSION=$VERSION
# ARM server image tags
MASTER_IMAGE_TAG=argus-master-arm64:${VERSION}
PROM_IMAGE_TAG=argus-metric-prometheus-arm64:${VERSION}
PROM_UPDATER_IMAGE_TAG=argus-metric-prometheus-targets-updater-arm64:${VERSION}
GRAFANA_IMAGE_TAG=argus-metric-grafana-arm64:${VERSION}
ALERT_IMAGE_TAG=argus-alertmanager-arm64:${VERSION}
FRONT_IMAGE_TAG=argus-web-frontend-arm64:${VERSION}
WEB_PROXY_IMAGE_TAG=argus-web-proxy-arm64:${VERSION}
# Host ports for server compose
MASTER_PORT=32300
PROMETHEUS_PORT=9090
GRAFANA_PORT=3000
ALERTMANAGER_PORT=9093
WEB_PROXY_PORT_8080=8080
WEB_PROXY_PORT_8081=8081
WEB_PROXY_PORT_8082=8082
WEB_PROXY_PORT_8083=8083
WEB_PROXY_PORT_8084=8084
WEB_PROXY_PORT_8085=8085
# Overlay network name
ARGUS_OVERLAY_NET=argus-sys-net
# UID/GID for volume ownership
ARGUS_BUILD_UID=2133
ARGUS_BUILD_GID=2015
# Compose project name (isolation from other stacks on same host)
COMPOSE_PROJECT_NAME=argus-server-arm
EOF
# 3) Docs & scripts from templates/server_arm
DOCS_SRC="$TEMPL_DIR/docs"
if [[ -d "$DOCS_SRC" ]]; then
copy_tree "$DOCS_SRC" "$STAGE/docs"
fi
SCRIPTS_SRC="$TEMPL_DIR/scripts"
if [[ -d "$SCRIPTS_SRC" ]]; then
copy_tree "$SCRIPTS_SRC" "$STAGE/scripts"
find "$STAGE/scripts" -type f -name '*.sh' -exec chmod +x {} + 2>/dev/null || true
fi
# x86 专用脚本:用于在 x86_64 主机上运行 ARM server 包
SCRIPTS_X86_SRC="$TEMPL_DIR/scripts_for_x86"
if [[ -d "$SCRIPTS_X86_SRC" ]]; then
copy_tree "$SCRIPTS_X86_SRC" "$STAGE/scripts_for_x86"
find "$STAGE/scripts_for_x86" -type f -name '*.sh' -exec chmod +x {} + 2>/dev/null || true
fi
# 4) Private skeleton (最小 ARM server 所需目录)
mkdir -p \
"$STAGE/private/argus/etc" \
"$STAGE/private/argus/master" \
"$STAGE/private/argus/metric/prometheus" \
"$STAGE/private/argus/metric/prometheus/data" \
"$STAGE/private/argus/metric/prometheus/rules" \
"$STAGE/private/argus/metric/prometheus/targets" \
"$STAGE/private/argus/metric/grafana" \
"$STAGE/private/argus/metric/grafana/data" \
"$STAGE/private/argus/metric/grafana/logs" \
"$STAGE/private/argus/metric/grafana/plugins" \
"$STAGE/private/argus/metric/grafana/provisioning/datasources" \
"$STAGE/private/argus/metric/grafana/provisioning/dashboards" \
"$STAGE/private/argus/metric/grafana/data/sessions" \
"$STAGE/private/argus/metric/grafana/data/dashboards" \
"$STAGE/private/argus/metric/grafana/config"
# 5) Manifest & checksums
gen_manifest "$STAGE" "$STAGE/manifest.txt"
checksum_dir "$STAGE" "$STAGE/checksums.txt"
# 6) Move to artifact dir and pack
mkdir -p "$PKG_DIR"
copy_tree "$STAGE" "$PKG_DIR"
OUT_TAR_DIR="$(dirname "$PKG_DIR")"
OUT_TAR="$OUT_TAR_DIR/server_arm_${VERSION}.tar.gz"
log "Creating tarball: $OUT_TAR"
(cd "$PKG_DIR/.." && tar -czf "$OUT_TAR" "$(basename "$PKG_DIR")")
log "ARM Server package ready: $PKG_DIR"
echo "$OUT_TAR"

View File

@ -0,0 +1,29 @@
version: "3.8"
networks:
argus-sys-net:
external: true
services:
metric-cpu-node:
image: ${NODE_CPU_IMAGE_TAG:-argus-sys-metric-test-node-arm64:${PKG_VERSION}}
platform: linux/arm64
container_name: argus-metric-cpu-node
hostname: ${CPU_NODE_HOSTNAME}
restart: unless-stopped
environment:
- TZ=Asia/Shanghai
- DEBIAN_FRONTEND=noninteractive
- MASTER_ENDPOINT=${MASTER_ENDPOINT}
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
- AGENT_ENV=${AGENT_ENV}
- AGENT_USER=${AGENT_USER}
- AGENT_INSTANCE=${AGENT_INSTANCE}
networks:
argus-sys-net:
aliases:
- ${AGENT_INSTANCE}.node.argus.com
volumes:
- ../private/argus/agent:/private/argus/agent

View File

@ -0,0 +1,154 @@
# ARM Client-CPU 安装指南client_arm
本包用于在 ARM64 CPU 节点上部署 Argus 的最小客户端:
- `argus-agent`(负责注册/上报状态到 master
- `node-exporter`(暴露该节点的系统指标)
> 注意:本 ARM 客户端包 **不包含 GPU/DCGM exporter 和 Fluent Bit**,仅用于 CPU 节点 metrics。
---
## 1. 前置条件
- 目标机器为 ARM64例如aarch64 服务器 / 节点);
- 已安装 Docker 和 docker composev2支持 `docker compose` 命令);
- 能访问 master 所在网络,或已经通过 overlay 网络加入到与 server 相同的 `argus-sys-net` 中。
如采用 overlay 网络Swarm 模式),需提前创建 `argus-sys-net`
```bash
docker network create --driver overlay argus-sys-net || true
```
如仅使用本地 bridge 网络,可将 compose 中 `networks: argus-sys-net` 调整为本地自定义 network。
---
## 2. 解压和加载镜像
假设你已经将 `client_arm_YYYYMMDD.tar.gz` 拷贝到 ARM 节点:
```bash
tar -xzf client_arm_YYYYMMDD.tar.gz
cd client_arm/YYYYMMDD
```
加载 CPU node 镜像:
```bash
for f in images/*.tar.gz; do
gunzip -c "$f" | docker load
done
```
可以通过:
```bash
docker images | grep 'argus-sys-metric-test-node-arm64'
```
确认镜像已加载。
---
## 3. 配置 .env
进入 compose 目录:
```bash
cd compose
cp .env.example .env
```
需要填写的关键字段:
- `MASTER_ENDPOINT`
- 例如:`http://master.argus.com:3000``http://<Server-IP>:32300`
- `AGENT_ENV` / `AGENT_USER` / `AGENT_INSTANCE`
- 用于识别环境、用户和实例,在 UI 中区分节点:
- 示例:`AGENT_ENV=prod`, `AGENT_USER=ops`, `AGENT_INSTANCE=node001`
- `CPU_NODE_HOSTNAME`
- 节点在配置中的 hostname一般与宿主 `hostname` 一致,例如 `swarm-metric-node-001`
- `ARGUS_BUILD_UID` / `ARGUS_BUILD_GID`
- 用于挂载目录 `/private/argus/agent` 的权限映射。
例如:
```env
PKG_VERSION=20251124
NODE_CPU_IMAGE_TAG=argus-sys-metric-test-node-arm64:20251124
COMPOSE_PROJECT_NAME=argus-client-arm
MASTER_ENDPOINT=http://master.argus.com:3000
AGENT_ENV=dev2
AGENT_USER=yuyr
AGENT_INSTANCE=node001sX
CPU_NODE_HOSTNAME=swarm-metric-node-001
ARGUS_BUILD_UID=1000
ARGUS_BUILD_GID=1000
```
---
## 4. 准备挂载目录
包中已经预创建 `private/argus/agent` 目录:
- `../private/argus/agent`
可以根据需要设置所有者和权限:
```bash
cd ..
sudo chown -R $UID:$GID private/argus
```
确保与 `.env` 中的 `ARGUS_BUILD_UID/GID` 一致。
---
## 5. 启动客户端容器
`compose/` 目录执行:
```bash
docker compose --env-file .env up -d
```
查看状态:
```bash
docker compose ps
```
预期看到:
- `argus-metric-cpu-node` 处于 `Up` 状态。
---
## 6. 在 server 端验证
在 server 侧master + Prometheus + Grafana 已经通过 ARM server 包部署):
1. 在 master 的 UI 或 API 中:
- `GET /api/v1/master/nodes` 应能看到新节点,`name=CPU_NODE_HOSTNAME`
2. 在 Prometheus 中:
- `up{job="node"}` 中应有当前节点记录,且 `value=1`
- 可以通过 `node_load1{hostname="swarm-metric-node-001"}` 等表达式查看指标;
3. 在 Web 门户 / Grafana
- 节点列表页面中能看到该节点;
- 通过“按 hostname 的节点面板”查看对应指标。
---
## 7. 注意事项
- ARM 客户端包只负责“CPU 节点 metrics”不包含 GPU/DCGM 或 Fluent Bit
- 如需要 GPU 支持,请继续使用 `client_gpu` 包,并部署在 x86_64 + NVIDIA GPU 节点上;
- ARM 节点的网络连通性(到 master / Prometheus需要运维侧提前规划
本包不做网络路由/防火墙配置,仅假定可以直接访问 `MASTER_ENDPOINT`

View File

@ -0,0 +1,105 @@
#!/usr/bin/env bash
set -euo pipefail
# ARM Client-CPU 配置脚本:
# - 读取 server 侧生成的 cluster-info.env包含 SWARM_MANAGER_ADDR / SWARM_JOIN_TOKEN_WORKER
# - 加入 Swarmworker
# - 预热 overlay 网络,确保能通过 master.argus.com 访问 server
# - 生成/更新 compose/.env填充 Swarm 相关字段,保留人工填写的 AGENT_*、CPU_NODE_HOSTNAME 等)
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
PKG_ROOT="$ROOT_DIR"
ENV_EX="$PKG_ROOT/compose/.env.example"
ENV_OUT="$PKG_ROOT/compose/.env"
info(){ echo -e "\033[34m[CONFIG-ARM-CLIENT]\033[0m $*"; }
err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; }
require(){ local ok=1; for c in "$@"; do command -v "$c" >/dev/null 2>&1 || { err "缺少依赖: $c"; ok=0; }; done; [[ $ok -eq 1 ]]; }
# Compose 检测:优先 docker composev2回退 docker-composev1
require_compose(){
if docker compose version >/dev/null 2>&1; then return 0; fi
if command -v docker-compose >/dev/null 2>&1 && docker-compose version >/dev/null 2>&1; then return 0; fi
err "未检测到 Docker Compose请安装 docker compose v2 或 docker-compose v1"; exit 1
}
require docker curl jq awk sed tar gzip
require_compose
[[ -f "$ENV_EX" ]] || { err "缺少模板文件: $ENV_EX"; exit 1; }
# 磁盘空间检查MB
check_disk(){ local p="$1"; local need=5120; local free
free=$(df -Pm "$p" | awk 'NR==2{print $4+0}')
if [[ -z "$free" || "$free" -lt "$need" ]]; then err "磁盘空间不足: $p 剩余 ${free:-0}MB (<${need}MB)"; fi
}
check_disk "$PKG_ROOT" || true
check_disk "/var/lib/docker" || true
# 导入 cluster-info.env默认取当前包根也可用 CLUSTER_INFO 指定路径)
CI_IN="${CLUSTER_INFO:-$PKG_ROOT/cluster-info.env}"
info "读取 cluster-info.env: $CI_IN"
[[ -f "$CI_IN" ]] || { err "找不到 cluster-info.env默认当前包根或设置环境变量 CLUSTER_INFO 指定绝对路径)"; exit 1; }
set -a; source "$CI_IN"; set +a
[[ -n "${SWARM_MANAGER_ADDR:-}" && -n "${SWARM_JOIN_TOKEN_WORKER:-}" ]] || { err "cluster-info.env 缺少 SWARM 信息SWARM_MANAGER_ADDR/SWARM_JOIN_TOKEN_WORKER"; exit 1; }
# 加入 Swarm幂等
info "加入 Swarm幂等$SWARM_MANAGER_ADDR"
docker swarm join --token "$SWARM_JOIN_TOKEN_WORKER" "$SWARM_MANAGER_ADDR":2377 >/dev/null 2>&1 || true
# 导入 busybox 并做 overlay 预热与连通性
NET_NAME="${ARGUS_OVERLAY_NET:-argus-sys-net}"
if ! docker image inspect busybox:latest >/dev/null 2>&1; then
if [[ -f "$PKG_ROOT/images/busybox.tar" ]]; then
info "加载 busybox.tar 以预热 overlay"
docker load -i "$PKG_ROOT/images/busybox.tar" >/dev/null
else
err "缺少 busybox 镜像(包内 images/busybox.tar 或本地 busybox:latest无法预热 overlay $NET_NAME"; exit 1
fi
fi
# 预热容器worker 侧加入 overlay 以便本地可见)
docker rm -f argus-net-warmup >/dev/null 2>&1 || true
info "启动 warmup 容器加入 overlay: $NET_NAME"
docker run -d --rm --name argus-net-warmup --network "$NET_NAME" busybox:latest sleep 600 >/dev/null 2>&1 || true
for i in {1..60}; do docker network inspect "$NET_NAME" >/dev/null 2>&1 && { info "overlay 可见 (t=${i}s)"; break; }; sleep 1; done
docker network inspect "$NET_NAME" >/dev/null 2>&1 || { err "预热后仍未看到 overlay: $NET_NAME;请确认 server 侧 overlay 已创建且可达"; exit 1; }
# 通过 warmup 容器测试实际数据通路alias → master
if ! docker exec argus-net-warmup sh -lc "ping -c 1 -W 2 master.argus.com >/dev/null 2>&1"; then
err "warmup 容器内无法通过别名访问 master.argus.com请确认 server compose 已启动并加入 overlay $NET_NAME"
exit 1
fi
info "warmup 容器内可达 master.argus.comDocker DNS + alias 正常)"
# 生成/更新 .env保留人工填写项不覆盖已有键
if [[ ! -f "$ENV_OUT" ]]; then
cp "$ENV_EX" "$ENV_OUT"
fi
set_kv(){ local k="$1" v="$2"; if grep -q "^${k}=" "$ENV_OUT"; then sed -i -E "s#^${k}=.*#${k}=${v}#" "$ENV_OUT"; else echo "${k}=${v}" >> "$ENV_OUT"; fi }
# 写入与 Swarm 相关的字段,便于后续诊断
set_kv SWARM_MANAGER_ADDR "${SWARM_MANAGER_ADDR:-}"
set_kv SWARM_JOIN_TOKEN_WORKER "${SWARM_JOIN_TOKEN_WORKER:-}"
set_kv SWARM_JOIN_TOKEN_MANAGER "${SWARM_JOIN_TOKEN_MANAGER:-}"
# 若未显式设置 MASTER_ENDPOINT则默认走 overlay 别名
if ! grep -q '^MASTER_ENDPOINT=' "$ENV_OUT"; then
echo "MASTER_ENDPOINT=http://master.argus.com:3000" >> "$ENV_OUT"
fi
# 检查用户必须填写的字段
REQ_VARS=(AGENT_ENV AGENT_USER AGENT_INSTANCE CPU_NODE_HOSTNAME)
missing=()
for v in "${REQ_VARS[@]}"; do
val=$(grep -E "^$v=" "$ENV_OUT" | head -1 | cut -d= -f2-)
if [[ -z "$val" ]]; then missing+=("$v"); fi
done
if [[ ${#missing[@]} -gt 0 ]]; then
err "以下变量必须在 compose/.env 中填写:${missing[*]}(已保留你现有的内容,不会被覆盖)"
exit 1
fi
info "已生成 compose/.env可执行 scripts/install.sh 启动 ARM Client-CPU"

View File

@ -0,0 +1,48 @@
#!/usr/bin/env bash
set -euo pipefail
# ARM Client-CPU 安装脚本:
# - 导入 ARM CPU node 镜像
# - 启动 compose 中的 metric-cpu-node 服务
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
PKG_ROOT="$ROOT_DIR"
ENV_FILE="$PKG_ROOT/compose/.env"
COMPOSE_FILE="$PKG_ROOT/compose/docker-compose.yml"
info(){ echo -e "\033[34m[INSTALL-ARM-CLIENT]\033[0m $*"; }
err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; }
require(){ local ok=1; for c in "$@"; do command -v "$c" >/dev/null 2>&1 || { err "缺少依赖: $c"; ok=0; }; done; [[ $ok -eq 1 ]]; }
require_compose(){
if docker compose version >/dev/null 2>&1; then return 0; fi
if command -v docker-compose >/dev/null 2>&1 && docker-compose version >/dev/null 2>&1; then return 0; fi
err "未检测到 Docker Compose请安装 docker compose v2 或 docker-compose v1"; exit 1
}
require docker
require_compose
[[ -f "$ENV_FILE" ]] || { err "缺少 compose/.env请先运行 scripts/config.sh"; exit 1; }
info "使用环境文件: $ENV_FILE"
set -a; source "$ENV_FILE"; set +a
# 导入 ARM CPU node 镜像
IMG_TGZ=$(ls -1 "$PKG_ROOT"/images/argus-sys-metric-test-node-arm64-*.tar.gz 2>/dev/null | head -1 || true)
[[ -n "$IMG_TGZ" ]] || { err "找不到 ARM CPU node 镜像 tar.gzargus-sys-metric-test-node-arm64-*.tar.gz"; exit 1; }
info "导入 ARM CPU node 镜像: $(basename "$IMG_TGZ")"
tmp=$(mktemp); gunzip -c "$IMG_TGZ" > "$tmp"; docker load -i "$tmp" >/dev/null; rm -f "$tmp"
# 确保 agent 挂载目录存在
mkdir -p "$PKG_ROOT/private/argus/agent"
# 启动 compose
PROJECT="${COMPOSE_PROJECT_NAME:-argus-client-arm}"
info "启动 ARM CPU 节点 (docker compose -p $PROJECT up -d)"
docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f "$COMPOSE_FILE" up -d
docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f "$COMPOSE_FILE" ps
info "ARM Client-CPU 安装完成。可通过 docker logs -f argus-metric-cpu-node 查看节点容器日志。"

View File

@ -0,0 +1,28 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
PKG_ROOT="$ROOT_DIR"
ENV_FILE="$PKG_ROOT/compose/.env"
COMPOSE_FILE="$PKG_ROOT/compose/docker-compose.yml"
if [[ -f "$ENV_FILE" ]]; then
set -a; source "$ENV_FILE"; set +a
fi
PROJECT="${COMPOSE_PROJECT_NAME:-argus-client-arm}"
err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; }
require_compose(){
if docker compose version >/dev/null 2>&1; then return 0; fi
if command -v docker-compose >/dev/null 2>&1 && docker-compose version >/dev/null 2>&1; then return 0; fi
err "未检测到 Docker Compose请安装 docker compose v2 或 docker-compose v1"; exit 1
}
require_compose
echo "[UNINSTALL-ARM-CLIENT] stopping compose (project=$PROJECT)"
docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f "$COMPOSE_FILE" down --remove-orphans || true
echo "[UNINSTALL-ARM-CLIENT] done"

View File

@ -0,0 +1,96 @@
#!/usr/bin/env bash
set -euo pipefail
# x86 上安装 ARM Client-CPU 的配置脚本:
# 逻辑与 ARM 版基本一致(读取 cluster-info、加入 Swarm、预热 overlay、生成 .env只是日志前缀不同。
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
PKG_ROOT="$ROOT_DIR"
ENV_EX="$PKG_ROOT/compose/.env.example"
ENV_OUT="$PKG_ROOT/compose/.env"
info(){ echo -e "\033[34m[CONFIG-ARM-CLIENT-X86]\033[0m $*"; }
err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; }
require(){ local ok=1; for c in "$@"; do command -v "$c" >/dev/null 2>&1 || { err "缺少依赖: $c"; ok=0; }; done; [[ $ok -eq 1 ]]; }
require_compose(){
if docker compose version >/dev/null 2>&1; then return 0; fi
if command -v docker-compose >/dev/null 2>&1 && docker-compose version >/dev/null 2>&1; then return 0; fi
err "未检测到 Docker Compose请安装 docker compose v2 或 docker-compose v1"; exit 1
}
require docker curl jq awk sed tar gzip
require_compose
[[ -f "$ENV_EX" ]] || { err "缺少模板文件: $ENV_EX"; exit 1; }
# 磁盘空间检查
check_disk(){ local p="$1"; local need=5120; local free
free=$(df -Pm "$p" | awk 'NR==2{print $4+0}')
if [[ -z "$free" || "$free" -lt "$need" ]]; then err "磁盘空间不足: $p 剩余 ${free:-0}MB (<${need}MB)"; fi
}
check_disk "$PKG_ROOT" || true
check_disk "/var/lib/docker" || true
# 导入 cluster-info.env
CI_IN="${CLUSTER_INFO:-$PKG_ROOT/cluster-info.env}"
info "读取 cluster-info.env: $CI_IN"
[[ -f "$CI_IN" ]] || { err "找不到 cluster-info.env默认当前包根或设置环境变量 CLUSTER_INFO 指定绝对路径)"; exit 1; }
set -a; source "$CI_IN"; set +a
[[ -n "${SWARM_MANAGER_ADDR:-}" && -n "${SWARM_JOIN_TOKEN_WORKER:-}" ]] || { err "cluster-info.env 缺少 SWARM 信息SWARM_MANAGER_ADDR/SWARM_JOIN_TOKEN_WORKER"; exit 1; }
# 加入 Swarm幂等
info "加入 Swarm幂等$SWARM_MANAGER_ADDR"
docker swarm join --token "$SWARM_JOIN_TOKEN_WORKER" "$SWARM_MANAGER_ADDR":2377 >/dev/null 2>&1 || true
# 导入 busybox 并做 overlay 预热与连通性
NET_NAME="${ARGUS_OVERLAY_NET:-argus-sys-net}"
if ! docker image inspect busybox:latest >/dev/null 2>&1; then
if [[ -f "$PKG_ROOT/images/busybox.tar" ]]; then
info "加载 busybox.tar 以预热 overlay"
docker load -i "$PKG_ROOT/images/busybox.tar" >/dev/null
else
err "缺少 busybox 镜像(包内 images/busybox.tar 或本地 busybox:latest无法预热 overlay $NET_NAME"; exit 1
fi
fi
docker rm -f argus-net-warmup >/dev/null 2>&1 || true
info "启动 warmup 容器加入 overlay: $NET_NAME"
docker run -d --rm --name argus-net-warmup --network "$NET_NAME" busybox:latest sleep 600 >/dev/null 2>&1 || true
for i in {1..60}; do docker network inspect "$NET_NAME" >/dev/null 2>&1 && { info "overlay 可见 (t=${i}s)"; break; }; sleep 1; done
docker network inspect "$NET_NAME" >/dev/null 2>&1 || { err "预热后仍未看到 overlay: $NET_NAME;请确认 server 侧 overlay 已创建且可达"; exit 1; }
if ! docker exec argus-net-warmup sh -lc "ping -c 1 -W 2 master.argus.com >/dev/null 2>&1"; then
err "warmup 容器内无法通过别名访问 master.argus.com请确认 server compose 已启动并加入 overlay $NET_NAME"
exit 1
fi
info "warmup 容器内可达 master.argus.comDocker DNS + alias 正常)"
if [[ ! -f "$ENV_OUT" ]]; then
cp "$ENV_EX" "$ENV_OUT"
fi
set_kv(){ local k="$1" v="$2"; if grep -q "^${k}=" "$ENV_OUT"; then sed -i -E "s#^${k}=.*#${k}=${v}#" "$ENV_OUT"; else echo "${k}=${v}" >> "$ENV_OUT"; fi }
set_kv SWARM_MANAGER_ADDR "${SWARM_MANAGER_ADDR:-}"
set_kv SWARM_JOIN_TOKEN_WORKER "${SWARM_JOIN_TOKEN_WORKER:-}"
set_kv SWARM_JOIN_TOKEN_MANAGER "${SWARM_JOIN_TOKEN_MANAGER:-}"
if ! grep -q '^MASTER_ENDPOINT=' "$ENV_OUT"; then
echo "MASTER_ENDPOINT=http://master.argus.com:3000" >> "$ENV_OUT"
fi
REQ_VARS=(AGENT_ENV AGENT_USER AGENT_INSTANCE CPU_NODE_HOSTNAME)
missing=()
for v in "${REQ_VARS[@]}"; do
val=$(grep -E "^$v=" "$ENV_OUT" | head -1 | cut -d= -f2-)
if [[ -z "$val" ]]; then missing+=("$v"); fi
done
if [[ ${#missing[@]} -gt 0 ]]; then
err "以下变量必须在 compose/.env 中填写:${missing[*]}(已保留你现有的内容,不会被覆盖)"
exit 1
fi
info "已生成 compose/.env可执行 scripts_for_x86/install.sh 启动 ARM Client-CPU"

View File

@ -0,0 +1,76 @@
#!/usr/bin/env bash
set -euo pipefail
# 在 x86_64 主机上安装 ARM Client-CPU
# 1) 确认/安装 binfmt + QEMU通过 tonistiigi/binfmt
# 2) 调用通用的 scripts/install.sh 启动 ARM CPU node 容器。
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
info(){ echo -e "\033[34m[INSTALL-ARM-CLIENT-X86]\033[0m $*"; }
err(){ echo -e "\033[31m[ERROR-ARM-CLIENT-X86]\033[0m $*" >&2; }
require(){ local ok=1; for c in "$@"; do command -v "$c" >/dev/null 2>&1 || { err "缺少依赖: $c"; ok=0; }; done; [[ $ok -eq 1 ]]; }
ensure_binfmt_arm64() {
info "检查当前主机架构: $(uname -m)"
if [[ "$(uname -m)" != "x86_64" && "$(uname -m)" != "amd64" ]]; then
info "当前并非 x86_64$(uname -m)),通常应直接在 ARM 服务器上使用 scripts/install.sh。继续执行但不强制安装 binfmt。"
return 0
fi
require docker
# 优先使用随安装包一起提供的 ubuntu-22.04-arm-test 镜像做自检
local TEST_IMG="ubuntu:22.04"
local TEST_TAR=""
if [[ -d "$ROOT_DIR/images" ]]; then
TEST_TAR=$(ls "$ROOT_DIR"/images/ubuntu-22.04-arm-test*.tar.gz 2>/dev/null | head -n1 || true)
fi
if [[ -n "$TEST_TAR" && ! $(docker image inspect "$TEST_IMG" >/dev/null 2>&1; echo $?) -eq 0 ]]; then
info "从安装包加载 ARM 测试基础镜像: $(basename "$TEST_TAR")"
tmp=$(mktemp)
gunzip -c "$TEST_TAR" > "$tmp"
docker load -i "$tmp" >/dev/null
rm -f "$tmp"
fi
info "验证是否已能运行 linux/arm64 容器..."
if docker run --rm --platform=linux/arm64 "$TEST_IMG" uname -m 2>/dev/null | grep -q 'aarch64'; then
info "检测到本机已支持 ARM64 容器运行 (uname -m = aarch64)"
return 0
fi
info "未检测到 ARM64 运行能力,尝试通过 tonistiigi/binfmt 安装 binfmt/QEMU ..."
local BINFMT_IMG="tonistiigi/binfmt:latest"
local BINFMT_TAR=""
if [[ -d "$ROOT_DIR/images" ]]; then
BINFMT_TAR=$(ls "$ROOT_DIR"/images/tonistiigi-binfmt*.tar.gz 2>/dev/null | head -n1 || true)
fi
if [[ -n "$BINFMT_TAR" && ! $(docker image inspect "$BINFMT_IMG" >/dev/null 2>&1; echo $?) -eq 0 ]]; then
info "从安装包加载 tonistiigi/binfmt 离线镜像: $(basename "$BINFMT_TAR")"
tmp_b=$(mktemp)
gunzip -c "$BINFMT_TAR" > "$tmp_b"
docker load -i "$tmp_b" >/dev/null
rm -f "$tmp_b"
fi
if ! docker run --privileged --rm "$BINFMT_IMG" --install all; then
err "tonistiigi/binfmt 安装失败,请检查 Docker 权限或确认离线镜像是否完整。"
exit 1
fi
info "再次验证 ARM64 容器可运行状态..."
if ! docker run --rm --platform=linux/arm64 "$TEST_IMG" uname -m 2>/dev/null | grep -q 'aarch64'; then
err "安装 binfmt/QEMU 后仍无法运行 linux/arm64 容器,请手工排查 binfmt_misc 配置。"
exit 1
fi
info "ARM64 容器运行能力检查通过。"
}
ensure_binfmt_arm64
exec "$SCRIPT_DIR/../scripts/install.sh" "$@"

View File

@ -0,0 +1,6 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
exec "$SCRIPT_DIR/../scripts/uninstall.sh" "$@"

View File

@ -0,0 +1,153 @@
version: "3.8"
networks:
argus-sys-net:
external: true
services:
master:
image: ${MASTER_IMAGE_TAG:-argus-master-arm64:${PKG_VERSION}}
platform: linux/arm64
container_name: argus-master-sys
environment:
- OFFLINE_THRESHOLD_SECONDS=180
- ONLINE_THRESHOLD_SECONDS=120
- SCHEDULER_INTERVAL_SECONDS=30
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
ports:
- "${MASTER_PORT:-32300}:3000"
volumes:
- ../private/argus/master:/private/argus/master
- ../private/argus/metric/prometheus:/private/argus/metric/prometheus
- ../private/argus/etc:/private/argus/etc
networks:
argus-sys-net:
aliases:
- master.argus.com
restart: unless-stopped
prometheus:
image: ${PROM_IMAGE_TAG:-argus-metric-prometheus-arm64:${PKG_VERSION}}
platform: linux/arm64
container_name: argus-prometheus
restart: unless-stopped
environment:
- TZ=Asia/Shanghai
- PROMETHEUS_BASE_PATH=/private/argus/metric/prometheus
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
ports:
- "${PROMETHEUS_PORT:-9090}:9090"
volumes:
- ../private/argus/metric/prometheus:/private/argus/metric/prometheus
- ../private/argus/etc:/private/argus/etc
networks:
argus-sys-net:
aliases:
- prom.metric.argus.com
prometheus-targets-updater:
image: ${PROM_UPDATER_IMAGE_TAG:-argus-metric-prometheus-targets-updater-arm64:${PKG_VERSION}}
platform: linux/arm64
container_name: argus-prometheus-targets-updater
restart: unless-stopped
environment:
- TZ=Asia/Shanghai
volumes:
- ../private/argus/metric/prometheus:/private/argus/metric/prometheus
networks:
argus-sys-net:
aliases:
- prom-updater.metric.argus.com
depends_on:
- master
- prometheus
grafana:
image: ${GRAFANA_IMAGE_TAG:-argus-metric-grafana-arm64:${PKG_VERSION}}
platform: linux/arm64
container_name: argus-grafana
restart: unless-stopped
environment:
- TZ=Asia/Shanghai
- GRAFANA_BASE_PATH=/private/argus/metric/grafana
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
- GF_SERVER_HTTP_PORT=3000
- GF_LOG_LEVEL=warn
- GF_LOG_MODE=console
- GF_PATHS_PROVISIONING=/private/argus/metric/grafana/provisioning
- GF_AUTH_ANONYMOUS_ENABLED=true
- GF_AUTH_ANONYMOUS_ORG_ROLE=Viewer
ports:
- "${GRAFANA_PORT:-3000}:3000"
volumes:
- ../private/argus/metric/grafana:/private/argus/metric/grafana
- ../private/argus/etc:/private/argus/etc
depends_on: [prometheus]
networks:
argus-sys-net:
aliases:
- grafana.metric.argus.com
alertmanager:
image: ${ALERT_IMAGE_TAG:-argus-alertmanager-arm64:${PKG_VERSION}}
platform: linux/arm64
container_name: argus-alertmanager
environment:
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
volumes:
- ../private/argus/etc:/private/argus/etc
- ../private/argus/alert/alertmanager:/private/argus/alert/alertmanager
networks:
argus-sys-net:
aliases:
- alertmanager.alert.argus.com
ports:
- "${ALERTMANAGER_PORT:-9093}:9093"
restart: unless-stopped
web-frontend:
image: ${FRONT_IMAGE_TAG:-argus-web-frontend-arm64:${PKG_VERSION}}
platform: linux/arm64
container_name: argus-web-frontend
environment:
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
- EXTERNAL_MASTER_PORT=${WEB_PROXY_PORT_8085:-8085}
- EXTERNAL_ALERTMANAGER_PORT=${WEB_PROXY_PORT_8084:-8084}
- EXTERNAL_GRAFANA_PORT=${WEB_PROXY_PORT_8081:-8081}
- EXTERNAL_PROMETHEUS_PORT=${WEB_PROXY_PORT_8082:-8082}
volumes:
- ../private/argus/etc:/private/argus/etc
networks:
argus-sys-net:
aliases:
- web.argus.com
restart: unless-stopped
web-proxy:
image: ${WEB_PROXY_IMAGE_TAG:-argus-web-proxy-arm64:${PKG_VERSION}}
platform: linux/arm64
container_name: argus-web-proxy
depends_on: [master, grafana, prometheus, alertmanager]
environment:
- ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}
- ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}
volumes:
- ../private/argus/etc:/private/argus/etc
networks:
argus-sys-net:
aliases:
- proxy.argus.com
ports:
- "${WEB_PROXY_PORT_8080:-8080}:8080"
- "${WEB_PROXY_PORT_8081:-8081}:8081"
- "${WEB_PROXY_PORT_8082:-8082}:8082"
- "${WEB_PROXY_PORT_8083:-8083}:8083"
- "${WEB_PROXY_PORT_8084:-8084}:8084"
- "${WEB_PROXY_PORT_8085:-8085}:8085"
restart: unless-stopped

View File

@ -0,0 +1,158 @@
# ARM Server 安装指南server_arm
本包用于在 ARM64 服务器上部署 Argus 的最小 server 端组件集:
- master
- Prometheus
- Prometheus targets-updater sidecar
- Grafana
- Alertmanager
- Web 前端 + Web Proxy
> 注意:本 ARM 版本 **不包含 Elasticsearch / Kibana / FTP / BIND9**,仅提供指标与告警能力。
---
## 1. 前置条件
- 目标机器为 ARM64例如aarch64 服务器);
- 已安装 Docker 和 docker composev2支持 `docker compose` 命令);
- 已准备好 overlay 网络 `argus-sys-net`(如不需要也可将 compose 改为 `bridge` 网络)。
创建 overlay 网络示例:
```bash
docker network create --driver overlay argus-sys-net || true
```
如未使用 Swarm也可以在 compose 中将 `networks: argus-sys-net` 改成本地 `bridge` 网络。
---
## 2. 解压和加载镜像
假设你已经将 `server_arm_YYYYMMDD.tar.gz` 拷贝到目标 ARM 服务器:
```bash
tar -xzf server_arm_YYYYMMDD.tar.gz
cd server_arm/YYYYMMDD
```
加载镜像:
```bash
for f in images/*.tar.gz; do
gunzip -c "$f" | docker load
done
```
加载后可以用:
```bash
docker images | grep 'argus-.*arm64'
```
确认 master / prometheus / grafana / alertmanager / web 镜像已就绪。
---
## 3. 配置 .env
`compose/` 目录下有自动生成的 `.env.example`
```bash
cd compose
cp .env.example .env
```
`.env` 中主要字段:
- `PKG_VERSION`:包版本(不建议修改)。
- 镜像 tag默认使用构建时的 `*-arm64:${PKG_VERSION}`
- `MASTER_IMAGE_TAG`
- `PROM_IMAGE_TAG`
- `PROM_UPDATER_IMAGE_TAG`
- `GRAFANA_IMAGE_TAG`
- `ALERT_IMAGE_TAG`
- `FRONT_IMAGE_TAG`
- `WEB_PROXY_IMAGE_TAG`
- 端口:
- `MASTER_PORT`(默认 32300
- `PROMETHEUS_PORT`(默认 9090
- `GRAFANA_PORT`(默认 3000
- `ALERTMANAGER_PORT`(默认 9093
- `WEB_PROXY_PORT_8080` ~ `WEB_PROXY_PORT_8085`
- 其他:
- `ARGUS_BUILD_UID` / `ARGUS_BUILD_GID`(用于挂载目录的权限对齐)。
根据实际环境按需调整端口和 UID/GID。
---
## 4. 准备挂载目录
在 package 根目录(与 `compose/` 同级)下,已预创建所需的 `private/argus` 目录骨架:
- `private/argus/etc`
- `private/argus/master`
- `private/argus/metric/prometheus`(含 `data` / `rules` / `targets`
- `private/argus/metric/grafana`(含 `data` / `logs` / `plugins` / `provisioning` 等)
可根据需要调整访问权限,例如:
```bash
cd ..
sudo chown -R $UID:$GID private/argus
```
确保与 `.env` 中的 `ARGUS_BUILD_UID/GID` 一致。
---
## 5. 启动服务
`compose/` 目录执行:
```bash
docker compose --env-file .env up -d
```
检查服务状态:
```bash
docker compose ps
```
预期看到:
- `argus-master-sys`
- `argus-prometheus`
- `argus-prometheus-targets-updater`
- `argus-grafana`
- `argus-alertmanager`
- `argus-web-frontend`
- `argus-web-proxy`
---
## 6. 验证
1. Master
- `curl http://localhost:${MASTER_PORT}/readyz`
2. Prometheus
- 浏览器访问:`http://<host>:${PROMETHEUS_PORT}`
3. Grafana
- 浏览器访问:`http://<host>:${GRAFANA_PORT}`
4. Web 门户:
- 浏览器访问:`http://<host>:${WEB_PROXY_PORT_8080}`
确认这些页面能够正常打开并且在没有任何节点注册时Prometheus 中 `job="node"` 可为空,但基础 UI 应可访问。
---
## 7. 注意事项
- 本 ARM server 包仅包含 metrics + alert 相关组件不提供日志链路ES/Kibana也不包含 DNS/FTP 相关服务;
- 若后续需要扩展 ES/Kibana可在新的 spec 中单独设计 ARM 日志栈;
- 对于多节点/Swarm 部署,请结合 `src/sys/arm_swarm_tests` 中的 compose 与脚本进行扩展。

View File

@ -0,0 +1,111 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
PKG_ROOT="$ROOT_DIR"
ENV_EX="$PKG_ROOT/compose/.env.example"
ENV_OUT="$PKG_ROOT/compose/.env"
info(){ echo -e "\033[34m[CONFIG]\033[0m $*"; }
err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; }
require(){ local ok=1; for c in "$@"; do command -v "$c" >/dev/null 2>&1 || { err "缺少依赖: $c"; ok=0; }; done; [[ $ok -eq 1 ]]; }
# Compose 检测:优先 docker composev2回退 docker-composev1
require_compose(){
if docker compose version >/dev/null 2>&1; then return 0; fi
if command -v docker-compose >/dev/null 2>&1 && docker-compose version >/dev/null 2>&1; then return 0; fi
err "未检测到 Docker Compose请安装 docker compose v2 或 docker-compose v1"; exit 1
}
require docker awk sed
require_compose
[[ -f "$ENV_EX" ]] || { err "缺少模板文件: $ENV_EX"; exit 1; }
# 磁盘空间检查(简化版,默认 >= 5GB
check_disk(){ local p="$1"; local need="${2:-5120}"; local free;
free=$(df -Pm "$p" 2>/dev/null | awk 'NR==2{print $4+0}')
if [[ -z "$free" || "$free" -lt "$need" ]]; then
err "磁盘空间不足: $p 剩余 ${free:-0}MB (<${need}MB),请清理后再继续"
fi
}
check_disk "$PKG_ROOT" 5120 || true
check_disk "/var/lib/docker" 5120 || true
cp "$ENV_EX" "$ENV_OUT"
# 读取/生成 SWARM_MANAGER_ADDRARM 下同样引导 Swarm + cluster-info
SWARM_MANAGER_ADDR=${SWARM_MANAGER_ADDR:-}
if [[ -z "${SWARM_MANAGER_ADDR}" ]]; then
read -rp "请输入本机管理地址 SWARM_MANAGER_ADDR: " SWARM_MANAGER_ADDR
fi
info "SWARM_MANAGER_ADDR=$SWARM_MANAGER_ADDR"
# 校验 IP 属于本机网卡
if ! ip -o addr | awk '{print $4}' | cut -d'/' -f1 | grep -qx "$SWARM_MANAGER_ADDR"; then
err "SWARM_MANAGER_ADDR 非本机地址: $SWARM_MANAGER_ADDR"; exit 1; fi
info "开始分配服务端口(起始=20000避免系统占用与相互冲突"
is_port_used(){ local p="$1"; ss -tulnH 2>/dev/null | awk '{print $5}' | sed 's/.*://g' | grep -qx "$p"; }
declare -A PRESENT=() CHOSEN=() USED=()
START_PORT="${START_PORT:-20000}"; cur=$START_PORT
ORDER=(MASTER_PORT PROMETHEUS_PORT GRAFANA_PORT ALERTMANAGER_PORT \
WEB_PROXY_PORT_8080 WEB_PROXY_PORT_8081 WEB_PROXY_PORT_8082 \
WEB_PROXY_PORT_8083 WEB_PROXY_PORT_8084 WEB_PROXY_PORT_8085)
# 标记 .env.example 中实际存在的键
for key in "${ORDER[@]}"; do
if grep -q "^${key}=" "$ENV_EX"; then PRESENT[$key]=1; fi
done
next_free(){ local p="$1"; while :; do if [[ -n "${USED[$p]:-}" ]] || is_port_used "$p"; then p=$((p+1)); else echo "$p"; return; fi; done; }
for key in "${ORDER[@]}"; do
[[ -z "${PRESENT[$key]:-}" ]] && continue
p=$(next_free "$cur"); CHOSEN[$key]="$p"; USED[$p]=1; cur=$((p+1))
done
info "端口分配结果MASTER=${CHOSEN[MASTER_PORT]:-} PROM=${CHOSEN[PROMETHEUS_PORT]:-} GRAFANA=${CHOSEN[GRAFANA_PORT]:-} ALERT=${CHOSEN[ALERTMANAGER_PORT]:-} WEB_PROXY(8080..8085)=${CHOSEN[WEB_PROXY_PORT_8080]:-}/${CHOSEN[WEB_PROXY_PORT_8081]:-}/${CHOSEN[WEB_PROXY_PORT_8082]:-}/${CHOSEN[WEB_PROXY_PORT_8083]:-}/${CHOSEN[WEB_PROXY_PORT_8084]:-}/${CHOSEN[WEB_PROXY_PORT_8085]:-}"
# 覆盖端口(按唯一化结果写回)
for key in "${ORDER[@]}"; do
val="${CHOSEN[$key]:-}"
[[ -z "$val" ]] && continue
sed -i -E "s#^$key=.*#$key=${val}#" "$ENV_OUT"
done
# 覆盖/补充 Overlay 名称
grep -q '^ARGUS_OVERLAY_NET=' "$ENV_OUT" || echo 'ARGUS_OVERLAY_NET=argus-sys-net' >> "$ENV_OUT"
# 覆盖/补充构建账户 UID/GID避免权限问题
RUID=$(id -u)
RGID=$(id -g)
if grep -q '^ARGUS_BUILD_UID=' "$ENV_OUT"; then
sed -i -E "s#^ARGUS_BUILD_UID=.*#ARGUS_BUILD_UID=${RUID}#" "$ENV_OUT"
else
echo "ARGUS_BUILD_UID=${RUID}" >> "$ENV_OUT"
fi
if grep -q '^ARGUS_BUILD_GID=' "$ENV_OUT"; then
sed -i -E "s#^ARGUS_BUILD_GID=.*#ARGUS_BUILD_GID=${RGID}#" "$ENV_OUT"
else
echo "ARGUS_BUILD_GID=${RGID}" >> "$ENV_OUT"
fi
# 更新 cluster-info.env 的 SWARM_MANAGER_ADDRARM 版本也保留该机制)
CI="$PKG_ROOT/cluster-info.env"
if [[ -f "$CI" ]]; then
if grep -q '^SWARM_MANAGER_ADDR=' "$CI"; then
sed -i -E "s#^SWARM_MANAGER_ADDR=.*#SWARM_MANAGER_ADDR=${SWARM_MANAGER_ADDR}#" "$CI"
else
echo "SWARM_MANAGER_ADDR=${SWARM_MANAGER_ADDR}" >> "$CI"
fi
else
echo "SWARM_MANAGER_ADDR=${SWARM_MANAGER_ADDR}" > "$CI"
fi
info "已生成 compose/.env 并更新 cluster-info.env 的 SWARM_MANAGER_ADDR。"
info "下一步可执行: scripts/install.sh"

View File

@ -0,0 +1,91 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
ENV_FILE="$ROOT/compose/.env"; [[ -f "$ENV_FILE" ]] && set -a && source "$ENV_FILE" && set +a
ts="$(date -u +%Y%m%d-%H%M%SZ)"
LOG_DIR="$ROOT/logs"; mkdir -p "$LOG_DIR" || true
if ! ( : > "$LOG_DIR/.w" 2>/dev/null ); then LOG_DIR="/tmp/argus-logs"; mkdir -p "$LOG_DIR" || true; fi
PROJECT="${COMPOSE_PROJECT_NAME:-argus-server-arm}"
DETAILS="$LOG_DIR/diagnose_arm_server_details_${ts}.log"
ERRORS="$LOG_DIR/diagnose_arm_server_error_${ts}.log"
: > "$DETAILS"; : > "$ERRORS"
logd() { echo "$(date '+%F %T') $*" >> "$DETAILS"; }
append_err() { echo "$*" >> "$ERRORS"; }
http_code() { curl -s -o /dev/null -w "%{http_code}" "$1" || echo 000; }
http_body_head() { curl -s --max-time 3 "$1" 2>/dev/null | sed -n '1,5p' || true; }
header_val() { curl -s -D - -o /dev/null "$@" | awk -F': ' 'BEGIN{IGNORECASE=1}$1=="Access-Control-Allow-Origin"{gsub("\r","",$2);print $2}'; }
section() { local name="$1"; logd "===== [$name] ====="; }
svc() {
local svc_name="$1"; local cname="$2"
section "$svc_name ($cname)"
logd "docker ps:"; docker ps -a --format '{{.Names}}\t{{.Status}}\t{{.Image}}' | awk -v n="$cname" '$1==n' >> "$DETAILS" || true
logd "docker inspect:"; docker inspect -f '{{.State.Status}} rc={{.RestartCount}} started={{.State.StartedAt}}' "$cname" >> "$DETAILS" 2>&1 || true
logd "last 200 container logs:"; docker logs --tail 200 "$cname" >> "$DETAILS" 2>&1 || true
docker logs --tail 200 "$cname" 2>&1 | \
grep -Ei '\b(error|failed|fail|exception|panic|fatal|critical|unhealthy|permission denied|forbidden|refused|traceback|错误|失败)\b' | \
sed "s/^/[${svc_name}][container] /" >> "$ERRORS" || true
if docker exec "$cname" sh -lc 'command -v supervisorctl >/dev/null 2>&1' >/dev/null 2>&1; then
logd "supervisorctl status:"; docker exec "$cname" sh -lc 'supervisorctl status' >> "$DETAILS" 2>&1 || true
local files; files=$(docker exec "$cname" sh -lc 'ls /var/log/supervisor/*.log 2>/dev/null' || true)
for f in $files; do
logd "tail -n 80 $f:"; docker exec "$cname" sh -lc "tail -n 80 $f 2>/dev/null || true" >> "$DETAILS" 2>&1 || true
docker exec "$cname" sh -lc "tail -n 200 $f 2>/dev/null" 2>/dev/null | \
grep -Ei '\b(error|failed|fail|exception|panic|fatal|critical|unhealthy|permission denied|forbidden|refused|traceback|错误|失败)\b' | \
sed "s/^/[${svc_name}][supervisor:$(basename "$f")] /" >> "$ERRORS" || true
done
fi
}
# 关键服务
svc master argus-master-sys
svc prometheus argus-prometheus
svc prom-updater argus-prometheus-targets-updater
svc grafana argus-grafana
svc alertmanager argus-alertmanager
svc web-frontend argus-web-frontend
svc web-proxy argus-web-proxy
section HTTP
logd "Master: $(http_code \"http://localhost:${MASTER_PORT:-32300}/readyz\")"
logd "Prometheus: $(http_code \"http://localhost:${PROMETHEUS_PORT:-9090}/-/ready\")"
logd "Grafana: $(http_code \"http://localhost:${GRAFANA_PORT:-3000}/api/health\")"; http_body_head "http://localhost:${GRAFANA_PORT:-3000}/api/health" >> "$DETAILS" 2>&1 || true
logd "Alertmanager: $(http_code \"http://localhost:${ALERTMANAGER_PORT:-9093}/api/v2/status\")"
cors8084=$(header_val -H "Origin: http://localhost:${WEB_PROXY_PORT_8080:-8080}" "http://localhost:${WEB_PROXY_PORT_8084:-8084}/api/v2/status" || true)
cors8085=$(header_val -H "Origin: http://localhost:${WEB_PROXY_PORT_8080:-8080}" "http://localhost:${WEB_PROXY_PORT_8085:-8085}/api/v1/master/nodes" || true)
logd "Web-Proxy 8080: $(http_code \"http://localhost:${WEB_PROXY_PORT_8080:-8080}/\")"
logd "Web-Proxy 8083: $(http_code \"http://localhost:${WEB_PROXY_PORT_8083:-8083}/\")"
logd "Web-Proxy 8084 CORS: ${cors8084}"
logd "Web-Proxy 8085 CORS: ${cors8085}"
section SYSTEM
logd "uname -a:"; uname -a >> "$DETAILS"
logd "docker version:"; docker version --format '{{.Server.Version}}' >> "$DETAILS" 2>&1 || true
logd "compose ps (project=$PROJECT):"; (cd "$ROOT/compose" && docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f docker-compose.yml ps) >> "$DETAILS" 2>&1 || true
section SUMMARY
[[ $(http_code "http://localhost:${MASTER_PORT:-32300}/readyz") != 200 ]] && echo "[master][http] /readyz not 200" >> "$ERRORS"
[[ $(http_code "http://localhost:${PROMETHEUS_PORT:-9090}/-/ready") != 200 ]] && echo "[prometheus][http] /-/ready not 200" >> "$ERRORS"
gfcode=$(http_code "http://localhost:${GRAFANA_PORT:-3000}/api/health"); [[ "$gfcode" != 200 ]] && echo "[grafana][http] /api/health=$gfcode" >> "$ERRORS"
[[ $(http_code "http://localhost:${ALERTMANAGER_PORT:-9093}/api/v2/status") != 200 ]] && echo "[alertmanager][http] /api/v2/status not 200" >> "$ERRORS"
[[ -z "$cors8084" ]] && echo "[web-proxy][cors] 8084 missing Access-Control-Allow-Origin" >> "$ERRORS"
[[ -z "$cors8085" ]] && echo "[web-proxy][cors] 8085 missing Access-Control-Allow-Origin" >> "$ERRORS"
sort -u -o "$ERRORS" "$ERRORS"
echo "Diagnostic details -> $DETAILS"
echo "Detected errors -> $ERRORS"
if [[ "$LOG_DIR" == "$ROOT/logs" ]]; then
ln -sfn "$(basename "$DETAILS")" "$ROOT/logs/diagnose_arm_server_details.log" 2>/dev/null || cp "$DETAILS" "$ROOT/logs/diagnose_arm_server_details.log" 2>/dev/null || true
ln -sfn "$(basename "$ERRORS")" "$ROOT/logs/diagnose_arm_server_error.log" 2>/dev/null || cp "$ERRORS" "$ROOT/logs/diagnose_arm_server_error.log" 2>/dev/null || true
fi
exit 0

View File

@ -0,0 +1,130 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
PKG_ROOT="$ROOT_DIR"
ENV_FILE="$PKG_ROOT/compose/.env"
COMPOSE_FILE="$PKG_ROOT/compose/docker-compose.yml"
info(){ echo -e "\033[34m[INSTALL]\033[0m $*"; }
err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; }
require(){ local ok=1; for c in "$@"; do command -v "$c" >/dev/null 2>&1 || { err "缺少依赖: $c"; ok=0; }; done; [[ $ok -eq 1 ]]; }
require_compose(){
if docker compose version >/dev/null 2>&1; then return 0; fi
if command -v docker-compose >/dev/null 2>&1 && docker-compose version >/dev/null 2>&1; then return 0; fi
err "未检测到 Docker Compose请安装 docker compose v2 或 docker-compose v1"; exit 1
}
require docker curl jq awk sed tar gzip
require_compose
[[ -f "$ENV_FILE" ]] || { err "缺少 compose/.env请先运行 scripts/config.sh"; exit 1; }
info "使用环境文件: $ENV_FILE"
set -a; source "$ENV_FILE"; set +a
# 兼容:若 .env 未包含 SWARM_MANAGER_ADDR则从已存在的 cluster-info.env 读取以避免写空
SMADDR="${SWARM_MANAGER_ADDR:-}"
CI_FILE="$PKG_ROOT/cluster-info.env"
if [[ -z "$SMADDR" && -f "$CI_FILE" ]]; then
SMADDR=$(sed -n 's/^SWARM_MANAGER_ADDR=\(.*\)$/\1/p' "$CI_FILE" | head -n1)
fi
SWARM_MANAGER_ADDR="$SMADDR"
# Swarm init & overlayARM 版也按生产环境方式启用 Swarm
NET_NAME="${ARGUS_OVERLAY_NET:-argus-sys-net}"
if ! docker info 2>/dev/null | grep -q "Swarm: active"; then
[[ -n "${SWARM_MANAGER_ADDR:-}" ]] || { err "SWARM_MANAGER_ADDR 未设置,请在 scripts/config.sh 中配置"; exit 1; }
info "初始化 Swarm (--advertise-addr $SWARM_MANAGER_ADDR)"
docker swarm init --advertise-addr "$SWARM_MANAGER_ADDR" >/dev/null 2>&1 || true
else
info "Swarm 已激活"
fi
if ! docker network inspect "$NET_NAME" >/dev/null 2>&1; then
info "创建 overlay 网络: $NET_NAME"
docker network create -d overlay --attachable "$NET_NAME" >/dev/null
else
info "overlay 网络已存在: $NET_NAME"
fi
# 将 Swarm join token 写入 cluster-info.env供 ARM client 使用
TOKEN_WORKER=$(docker swarm join-token -q worker 2>/dev/null || echo "")
TOKEN_MANAGER=$(docker swarm join-token -q manager 2>/dev/null || echo "")
CI_OUT="$PKG_ROOT/cluster-info.env"
info "写入 cluster-info.env (manager/token)"
{
echo "SWARM_MANAGER_ADDR=${SWARM_MANAGER_ADDR:-}"
echo "SWARM_JOIN_TOKEN_WORKER=${TOKEN_WORKER:-}"
echo "SWARM_JOIN_TOKEN_MANAGER=${TOKEN_MANAGER:-}"
} > "$CI_OUT"
# 导入镜像
IMAGES_DIR="$PKG_ROOT/images"
shopt -s nullglob
tars=("$IMAGES_DIR"/*.tar.gz)
if [[ ${#tars[@]} -eq 0 ]]; then err "images 目录为空,缺少镜像 tar.gz"; exit 1; fi
total=${#tars[@]}; idx=0
for tgz in "${tars[@]}"; do
idx=$((idx+1))
info "导入镜像 ($idx/$total): $(basename "$tgz")"
tmp=$(mktemp); gunzip -c "$tgz" > "$tmp"; docker load -i "$tmp" >/dev/null; rm -f "$tmp"
done
shopt -u nullglob
# 启动服务
PROJECT="${COMPOSE_PROJECT_NAME:-argus-server-arm}"
info "启动服务栈 (docker compose -p $PROJECT up -d)"
docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f "$COMPOSE_FILE" up -d
docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f "$COMPOSE_FILE" ps
# 简单就绪检查best-effort不阻塞安装
code(){ curl -4 -s -o /dev/null -w "%{http_code}" "$1" || echo 000; }
gf_ok(){ local body; body=$(curl -s "http://127.0.0.1:${GRAFANA_PORT:-3000}/api/health" || true); echo "$body" | grep -q '"database"\s*:\s*"ok"'; }
RETRIES=${RETRIES:-60}; SLEEP=${SLEEP:-5}; ok=0
info "等待基础服务就绪 (<= $((RETRIES*SLEEP))s)"
for i in $(seq 1 "$RETRIES"); do
e1=$(code "http://127.0.0.1:${MASTER_PORT:-32300}/readyz")
e2=$(code "http://127.0.0.1:${PROMETHEUS_PORT:-9090}/-/ready")
e3=$(code "http://127.0.0.1:${ALERTMANAGER_PORT:-9093}/api/v2/status")
e4=000; gf_ok && e4=200
info "[ready] t=$((i*SLEEP))s master=$e1 prom=$e2 graf=$e4 alert=$e3"
[[ "$e1" == 200 ]] && ok=$((ok+1))
[[ "$e2" == 200 ]] && ok=$((ok+1))
[[ "$e3" == 200 ]] && ok=$((ok+1))
[[ "$e4" == 200 ]] && ok=$((ok+1))
if [[ $ok -ge 4 ]]; then break; fi; ok=0; sleep "$SLEEP"
done
[[ $ok -ge 4 ]] || err "部分服务未就绪(可稍后执行 scripts/selfcheck.sh 进行复查)"
# 安装报告ARM 精简版)
ts=$(date +%Y%m%d-%H%M%S)
RPT="$PKG_ROOT/安装报告_ARM_${ts}.md"
{
echo "# Argus ARM Server 安装报告 (${ts})"
echo
echo "## 端口映射"
echo "- MASTER_PORT=${MASTER_PORT}"
echo "- PROMETHEUS_PORT=${PROMETHEUS_PORT}"
echo "- GRAFANA_PORT=${GRAFANA_PORT}"
echo "- ALERTMANAGER_PORT=${ALERTMANAGER_PORT}"
echo "- WEB_PROXY_PORT_8080=${WEB_PROXY_PORT_8080} ... 8085=${WEB_PROXY_PORT_8085}"
echo
echo "## 网络"
echo "- NET=${NET_NAME}"
echo "- ARGUS_OVERLAY_NET=${ARGUS_OVERLAY_NET:-argus-sys-net}"
echo
echo "## 健康检查(简要)"
echo "- master/readyz=$(code http://127.0.0.1:${MASTER_PORT:-32300}/readyz)"
echo "- prometheus/ready=$(code http://127.0.0.1:${PROMETHEUS_PORT:-9090}/-/ready)"
echo "- grafana/api/health=$(code http://127.0.0.1:${GRAFANA_PORT:-3000}/api/health)"
echo "- alertmanager/api/v2/status=$(code http://127.0.0.1:${ALERTMANAGER_PORT:-9093}/api/v2/status)"
} > "$RPT"
info "已生成报告: $RPT"
# 刷新 web-proxy 上的 nginx 配置best-effort
docker exec argus-web-proxy nginx -t >/dev/null 2>&1 && docker exec argus-web-proxy nginx -s reload >/dev/null 2>&1 || true
info "安装完成。可通过 scripts/status.sh 查看当前服务状态。"

View File

@ -0,0 +1,74 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
log() { echo -e "\033[0;34m[CHECK]\033[0m $*"; }
err() { echo -e "\033[0;31m[ERROR]\033[0m $*" >&2; }
ENV_FILE="$ROOT/compose/.env"; [[ -f "$ENV_FILE" ]] && set -a && source "$ENV_FILE" && set +a
wait_http() { local url="$1"; local attempts=${2:-120}; local i=1; while ((i<=attempts)); do curl -fsS "$url" >/dev/null 2>&1 && return 0; echo "[..] waiting $url ($i/$attempts)"; sleep 5; ((i++)); done; return 1; }
code_for() { curl -s -o /dev/null -w "%{http_code}" "$1" || echo 000; }
header_val() { curl -s -D - -o /dev/null "$@" | awk -F': ' 'BEGIN{IGNORECASE=1}$1=="Access-Control-Allow-Origin"{gsub("\r","",$2);print $2}'; }
LOG_DIR="$ROOT/logs"; mkdir -p "$LOG_DIR" || true
OUT_JSON="$LOG_DIR/selfcheck_arm_server.json"; tmp=$(mktemp)
ok=1
log "checking network (${ARGUS_OVERLAY_NET:-argus-sys-net})"
net_ok=false
if docker network inspect "${ARGUS_OVERLAY_NET:-argus-sys-net}" >/dev/null 2>&1; then
net_ok=true
fi
[[ "$net_ok" == true ]] || ok=0
log "checking Master"
[[ $(code_for "http://localhost:${MASTER_PORT:-32300}/readyz") == 200 ]] || ok=0
log "checking Prometheus"
wait_http "http://localhost:${PROMETHEUS_PORT:-9090}/-/ready" 60 || ok=0
log "checking Grafana"
gf_code=$(code_for "http://localhost:${GRAFANA_PORT:-3000}/api/health")
gf_ok=false
if [[ "$gf_code" == 200 ]]; then
body=$(curl -sS "http://localhost:${GRAFANA_PORT:-3000}/api/health" || true)
echo "$body" | grep -q '"database"\s*:\s*"ok"' && gf_ok=true
fi
[[ "$gf_ok" == true ]] || ok=0
log "checking Alertmanager"
wait_http "http://localhost:${ALERTMANAGER_PORT:-9093}/api/v2/status" 60 || ok=0
log "checking Web-Proxy (CORS)"
cors8084=$(header_val -H "Origin: http://localhost:${WEB_PROXY_PORT_8080:-8080}" "http://localhost:${WEB_PROXY_PORT_8084:-8084}/api/v2/status" || true)
cors8085=$(header_val -H "Origin: http://localhost:${WEB_PROXY_PORT_8080:-8080}" "http://localhost:${WEB_PROXY_PORT_8085:-8085}/api/v1/master/nodes" || true)
wp_ok=true
[[ -n "$cors8084" && -n "$cors8085" ]] || wp_ok=false
[[ "$wp_ok" == true ]] || ok=0
cat > "$tmp" <<JSON
{
"overlay_net": $net_ok,
"master_readyz": true,
"prometheus": true,
"grafana": $gf_ok,
"alertmanager": true,
"web_proxy_cors": $wp_ok,
"timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
}
JSON
mv "$tmp" "$OUT_JSON" 2>/dev/null || cp "$tmp" "$OUT_JSON"
if [[ "$ok" == 1 ]]; then
log "selfcheck OK -> $OUT_JSON"
exit 0
else
err "selfcheck FAILED -> $OUT_JSON"
exit 1
fi

View File

@ -0,0 +1,15 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
PKG_ROOT="$ROOT_DIR"
ENV_FILE="$PKG_ROOT/compose/.env"
COMPOSE_FILE="$PKG_ROOT/compose/docker-compose.yml"
if [[ -f "$ENV_FILE" ]]; then
set -a; source "$ENV_FILE"; set +a
fi
PROJECT="${COMPOSE_PROJECT_NAME:-argus-server-arm}"
docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f "$COMPOSE_FILE" ps

View File

@ -0,0 +1,28 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
PKG_ROOT="$ROOT_DIR"
ENV_FILE="$PKG_ROOT/compose/.env"
COMPOSE_FILE="$PKG_ROOT/compose/docker-compose.yml"
if [[ -f "$ENV_FILE" ]]; then
set -a; source "$ENV_FILE"; set +a
fi
PROJECT="${COMPOSE_PROJECT_NAME:-argus-server-arm}"
err(){ echo -e "\033[31m[ERROR]\033[0m $*" >&2; }
require_compose(){
if docker compose version >/dev/null 2>&1; then return 0; fi
if command -v docker-compose >/dev/null 2>&1 && docker-compose version >/dev/null 2>&1; then return 0; fi
err "未检测到 Docker Compose请安装 docker compose v2 或 docker-compose v1"; exit 1
}
require_compose
echo "[UNINSTALL] stopping compose (project=$PROJECT)"
docker compose -p "$PROJECT" --env-file "$ENV_FILE" -f "$COMPOSE_FILE" down --remove-orphans || true
echo "[UNINSTALL] done"

View File

@ -0,0 +1,116 @@
#!/usr/bin/env bash
set -euo pipefail
# x86 专用:为 ARM Server 包生成 compose/.env
# - 带磁盘空间检查
# - 自动分配端口,避免与本机已有服务冲突
# - 写入 ARGUS_OVERLAY_NET 和 ARGUS_BUILD_UID/GID
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
PKG_ROOT="$ROOT_DIR"
ENV_EX="$PKG_ROOT/compose/.env.example"
ENV_OUT="$PKG_ROOT/compose/.env"
info(){ echo -e "\033[34m[CONFIG_X86]\033[0m $*"; }
err(){ echo -e "\033[31m[ERROR_X86]\033[0m $*" >&2; }
require(){ local ok=1; for c in "$@"; do command -v "$c" >/dev/null 2>&1 || { err "缺少依赖: $c"; ok=0; }; done; [[ $ok -eq 1 ]]; }
# Compose 检测:优先 docker composev2回退 docker-composev1
require_compose(){
if docker compose version >/dev/null 2>&1; then return 0; fi
if command -v docker-compose >/dev/null 2>&1 && docker-compose version >/dev/null 2>&1; then return 0; fi
err "未检测到 Docker Compose请安装 docker compose v2 或 docker-compose v1"; exit 1
}
require docker awk sed
require_compose
[[ -f "$ENV_EX" ]] || { err "缺少模板文件: $ENV_EX"; exit 1; }
# 磁盘空间检查(简化版,默认 >= 5GB
check_disk(){ local p="$1"; local need="${2:-5120}"; local free;
free=$(df -Pm "$p" 2>/dev/null | awk 'NR==2{print $4+0}')
if [[ -z "$free" || "$free" -lt "$need" ]]; then
err "磁盘空间不足: $p 剩余 ${free:-0}MB (<${need}MB),请清理后再继续"
fi
}
check_disk "$PKG_ROOT" 5120 || true
check_disk "/var/lib/docker" 5120 || true
cp "$ENV_EX" "$ENV_OUT"
# 读取/生成 SWARM_MANAGER_ADDR与 ARM server 配置保持一致)
SWARM_MANAGER_ADDR=${SWARM_MANAGER_ADDR:-}
if [[ -z "${SWARM_MANAGER_ADDR}" ]]; then
read -rp "请输入本机管理地址 SWARM_MANAGER_ADDR: " SWARM_MANAGER_ADDR
fi
info "SWARM_MANAGER_ADDR=$SWARM_MANAGER_ADDR"
# 校验 IP 属于本机网卡
if ! ip -o addr | awk '{print $4}' | cut -d'/' -f1 | grep -qx "$SWARM_MANAGER_ADDR"; then
err "SWARM_MANAGER_ADDR 非本机地址: $SWARM_MANAGER_ADDR"; exit 1; fi
info "开始分配服务端口(起始=20000避免系统占用与相互冲突"
is_port_used(){ local p="$1"; ss -tulnH 2>/dev/null | awk '{print $5}' | sed 's/.*://g' | grep -qx "$p"; }
declare -A PRESENT=() CHOSEN=() USED=()
START_PORT="${START_PORT:-20000}"; cur=$START_PORT
ORDER=(MASTER_PORT PROMETHEUS_PORT GRAFANA_PORT ALERTMANAGER_PORT \
WEB_PROXY_PORT_8080 WEB_PROXY_PORT_8081 WEB_PROXY_PORT_8082 \
WEB_PROXY_PORT_8083 WEB_PROXY_PORT_8084 WEB_PROXY_PORT_8085)
# 标记 .env.example 中实际存在的键
for key in "${ORDER[@]}"; do
if grep -q "^${key}=" "$ENV_EX"; then PRESENT[$key]=1; fi
done
next_free(){ local p="$1"; while :; do if [[ -n "${USED[$p]:-}" ]] || is_port_used "$p"; then p=$((p+1)); else echo "$p"; return; fi; done; }
for key in "${ORDER[@]}"; do
[[ -z "${PRESENT[$key]:-}" ]] && continue
p=$(next_free "$cur"); CHOSEN[$key]="$p"; USED[$p]=1; cur=$((p+1))
done
info "端口分配结果MASTER=${CHOSEN[MASTER_PORT]:-} PROM=${CHOSEN[PROMETHEUS_PORT]:-} GRAFANA=${CHOSEN[GRAFANA_PORT]:-} ALERT=${CHOSEN[ALERTMANAGER_PORT]:-} WEB_PROXY(8080..8085)=${CHOSEN[WEB_PROXY_PORT_8080]:-}/${CHOSEN[WEB_PROXY_PORT_8081]:-}/${CHOSEN[WEB_PROXY_PORT_8082]:-}/${CHOSEN[WEB_PROXY_PORT_8083]:-}/${CHOSEN[WEB_PROXY_PORT_8084]:-}/${CHOSEN[WEB_PROXY_PORT_8085]:-}"
# 覆盖端口(按唯一化结果写回)
for key in "${ORDER[@]}"; do
val="${CHOSEN[$key]:-}"
[[ -z "$val" ]] && continue
sed -i -E "s#^$key=.*#$key=${val}#" "$ENV_OUT"
done
# 覆盖/补充 Overlay 名称
grep -q '^ARGUS_OVERLAY_NET=' "$ENV_OUT" || echo 'ARGUS_OVERLAY_NET=argus-sys-net' >> "$ENV_OUT"
# 覆盖/补充构建账户 UID/GID避免权限问题
RUID=$(id -u)
RGID=$(id -g)
if grep -q '^ARGUS_BUILD_UID=' "$ENV_OUT"; then
sed -i -E "s#^ARGUS_BUILD_UID=.*#ARGUS_BUILD_UID=${RUID}#" "$ENV_OUT"
else
echo "ARGUS_BUILD_UID=${RUID}" >> "$ENV_OUT"
fi
if grep -q '^ARGUS_BUILD_GID=' "$ENV_OUT"; then
sed -i -E "s#^ARGUS_BUILD_GID=.*#ARGUS_BUILD_GID=${RGID}#" "$ENV_OUT"
else
echo "ARGUS_BUILD_GID=${RGID}" >> "$ENV_OUT"
fi
# 更新 cluster-info.env 的 SWARM_MANAGER_ADDRARM + x86 一致)
CI="$PKG_ROOT/cluster-info.env"
if [[ -f "$CI" ]]; then
if grep -q '^SWARM_MANAGER_ADDR=' "$CI"; then
sed -i -E "s#^SWARM_MANAGER_ADDR=.*#SWARM_MANAGER_ADDR=${SWARM_MANAGER_ADDR}#" "$CI"
else
echo "SWARM_MANAGER_ADDR=${SWARM_MANAGER_ADDR}" >> "$CI"
fi
else
echo "SWARM_MANAGER_ADDR=${SWARM_MANAGER_ADDR}" > "$CI"
fi
info "已生成 compose/.env 并更新 cluster-info.env 的 SWARM_MANAGER_ADDR。"
info "下一步可执行: scripts_for_x86/install.sh"

View File

@ -0,0 +1,6 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
exec "$SCRIPT_DIR/../scripts/diagnose.sh" "$@"

View File

@ -0,0 +1,77 @@
#!/usr/bin/env bash
set -euo pipefail
# 在 x86_64 主机上安装 ARM server 包:
# 1) 确认/安装 binfmt + QEMU通过 tonistiigi/binfmt
# 2) 调用通用的 scripts/install.sh 启动服务。
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
info(){ echo -e "\033[34m[INSTALL_X86]\033[0m $*"; }
err(){ echo -e "\033[31m[ERROR_X86]\033[0m $*" >&2; }
require(){ local ok=1; for c in "$@"; do command -v "$c" >/dev/null 2>&1 || { err "缺少依赖: $c"; ok=0; }; done; [[ $ok -eq 1 ]]; }
ensure_binfmt_arm64() {
info "检查当前主机架构: $(uname -m)"
if [[ "$(uname -m)" != "x86_64" && "$(uname -m)" != "amd64" ]]; then
info "当前并非 x86_64$(uname -m)),通常应直接在 ARM 服务器上使用 scripts/install.sh。继续执行但不强制安装 binfmt。"
return 0
fi
require docker
# 优先使用随安装包一起提供的 ubuntu-22.04-arm-test 镜像做自检
local TEST_IMG="ubuntu:22.04"
local TEST_TAR=""
if [[ -d "$ROOT_DIR/images" ]]; then
TEST_TAR=$(ls "$ROOT_DIR"/images/ubuntu-22.04-arm-test*.tar.gz 2>/dev/null | head -n1 || true)
fi
if [[ -n "$TEST_TAR" && ! $(docker image inspect "$TEST_IMG" >/dev/null 2>&1; echo $?) -eq 0 ]]; then
info "从安装包加载 ARM 测试基础镜像: $(basename "$TEST_TAR")"
tmp=$(mktemp)
gunzip -c "$TEST_TAR" > "$tmp"
docker load -i "$tmp" >/dev/null
rm -f "$tmp"
fi
info "验证是否已能运行 linux/arm64 容器..."
if docker run --rm --platform=linux/arm64 "$TEST_IMG" uname -m 2>/dev/null | grep -q 'aarch64'; then
info "检测到本机已支持 ARM64 容器运行 (uname -m = aarch64)"
return 0
fi
info "未检测到 ARM64 运行能力,尝试通过 tonistiigi/binfmt 安装 binfmt/QEMU ..."
# 优先从离线包加载 tonistiigi/binfmt 镜像
local BINFMT_IMG="tonistiigi/binfmt:latest"
local BINFMT_TAR=""
if [[ -d "$ROOT_DIR/images" ]]; then
BINFMT_TAR=$(ls "$ROOT_DIR"/images/tonistiigi-binfmt*.tar.gz 2>/dev/null | head -n1 || true)
fi
if [[ -n "$BINFMT_TAR" && ! $(docker image inspect "$BINFMT_IMG" >/dev/null 2>&1; echo $?) -eq 0 ]]; then
info "从安装包加载 tonistiigi/binfmt 离线镜像: $(basename "$BINFMT_TAR")"
tmp_b=$(mktemp)
gunzip -c "$BINFMT_TAR" > "$tmp_b"
docker load -i "$tmp_b" >/dev/null
rm -f "$tmp_b"
fi
if ! docker run --privileged --rm "$BINFMT_IMG" --install all; then
err "tonistiigi/binfmt 安装失败,请检查 Docker 权限或确认离线镜像是否完整。"
exit 1
fi
info "再次验证 ARM64 容器可运行状态..."
if ! docker run --rm --platform=linux/arm64 "$TEST_IMG" uname -m 2>/dev/null | grep -q 'aarch64'; then
err "安装 binfmt/QEMU 后仍无法运行 linux/arm64 容器,请手工排查 binfmt_misc 配置。"
exit 1
fi
info "ARM64 容器运行能力检查通过。"
}
ensure_binfmt_arm64
# 准备完 x86 -> ARM 运行环境后,交由通用 install.sh 负责后续部署
exec "$SCRIPT_DIR/../scripts/install.sh" "$@"

View File

@ -0,0 +1,6 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
exec "$SCRIPT_DIR/../scripts/selfcheck.sh" "$@"

View File

@ -0,0 +1,6 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
exec "$SCRIPT_DIR/../scripts/status.sh" "$@"

View File

@ -0,0 +1,6 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
exec "$SCRIPT_DIR/../scripts/uninstall.sh" "$@"

View File

@ -42,6 +42,11 @@ auto_assign_org = true
auto_assign_org_role = Viewer auto_assign_org_role = Viewer
verify_email_enabled = false verify_email_enabled = false
[auth.anonymous]
enabled = true
org_name = Main Org.
org_role = Viewer
[log] [log]
mode = console mode = console
level = info level = info