From 9858f4471e6cd4241cd05baa87cc30da251102b4 Mon Sep 17 00:00:00 2001 From: yuyr Date: Tue, 4 Nov 2025 11:37:27 +0800 Subject: [PATCH] =?UTF-8?q?[#37]=20server=20install=20=E5=A2=9E=E5=8A=A0?= =?UTF-8?q?=E9=87=8D=E8=AF=95=E8=87=AA=E6=A3=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../build/templates/docs/INSTALL_SERVER.md | 11 ++-- .../build/templates/docs/INSTALL_SERVER_zh.md | 13 ++-- .../templates/docs/TROUBLESHOOTING_zh.md | 5 +- .../build/templates/scripts/server-install.sh | 61 ++++++++++++++++++- 4 files changed, 77 insertions(+), 13 deletions(-) diff --git a/deployment/build/templates/docs/INSTALL_SERVER.md b/deployment/build/templates/docs/INSTALL_SERVER.md index e72f5c8..652fe04 100644 --- a/deployment/build/templates/docs/INSTALL_SERVER.md +++ b/deployment/build/templates/docs/INSTALL_SERVER.md @@ -7,8 +7,8 @@ ## Quick Start 1. Extract to a target dir, e.g. `/opt/argus-deploy/versions/` -2. `cd scripts && sudo ./server-prepare-dirs.sh` -3. `./server-install.sh` +2. `cd scripts && sudo ./server-prepare-dirs.sh` (recommended) +3. `./server-install.sh` (non‑root is supported: it will precreate minimal dirs and auto-fix Kibana/ES/Bind in containers) 4. `./server-status.sh` 5. `./server-selfcheck.sh` (on failure it auto-runs diagnose and writes logs under `logs/`) 6. `./server-uninstall.sh` to tear down @@ -25,7 +25,11 @@ - Writes `logs/selfcheck.json` as final summary ## OS Compatibility -- NixOS / non-xattr FS: containers run with `security_opt: ["label=disable"]` and `userns_mode: host`; ensure data dirs are pre-created via `sudo ./server-prepare-dirs.sh` and owned by runtime UID:GID (default 1000:1000). +- NixOS / non-xattr FS: containers run with `security_opt: ["label=disable"]` and `userns_mode: host`. +- If you cannot use sudo, the installer will: + - create minimal data dirs (incl. `private/argus/log/{elasticsearch,kibana}`) with permissive perms when possible; + - ensure inside containers: Kibana `data` → `/private/argus/log/kibana`, Elasticsearch `data` → `/private/argus/log/elasticsearch`, and Bind `rndc.key` is generated. + You can still run `sudo ./server-prepare-dirs.sh` later to normalize ownership. ## Files & Layout - `compose/` (docker-compose.yml, .env) @@ -45,4 +49,3 @@ Common issues: - Kibana 503: wait cold start or fix DNS so `es.log.argus.com` resolves - web‑proxy 504: check nginx `resolver` includes `172.31.0.2 127.0.0.11` - EACCES/locks: ensure `sudo ./server-prepare-dirs.sh` ran and ownership matches UID:GID - diff --git a/deployment/build/templates/docs/INSTALL_SERVER_zh.md b/deployment/build/templates/docs/INSTALL_SERVER_zh.md index 2f999a9..673193a 100644 --- a/deployment/build/templates/docs/INSTALL_SERVER_zh.md +++ b/deployment/build/templates/docs/INSTALL_SERVER_zh.md @@ -7,8 +7,8 @@ ## 快速开始 1. 解压到目标目录(例如 `/opt/argus-deploy/versions/`) -2. 进入 `scripts/`:`sudo ./server-prepare-dirs.sh` -3. 安装:`./server-install.sh` +2. 进入 `scripts/`:`sudo ./server-prepare-dirs.sh`(推荐) +3. 安装:`./server-install.sh`(支持普通用户:会自动创建最小目录并在容器内修复 Kibana/ES/Bind) 4. 状态:`./server-status.sh` 5. 自检:`./server-selfcheck.sh`(失败会自动采集诊断) 6. 卸载:`./server-uninstall.sh` @@ -19,10 +19,13 @@ - 输出自检结果到 `logs/selfcheck.json`。 ## 兼容说明(NixOS 等) -- 使用 `security_opt: ["label=disable"]` 与 `userns_mode: host`; -- 先运行 `sudo ./server-prepare-dirs.sh` 创建/授权目录为 `1000:1000`; +- 使用 `security_opt: ["label=disable"]` 与 `userns_mode: host`。 +- 若不能使用 sudo:安装器会创建最小目录(含 `private/argus/log/{elasticsearch,kibana}`),并在容器内完成: + - Kibana 的 `data` 软链到 `/private/argus/log/kibana` + - Elasticsearch 的 `data` 软链到 `/private/argus/log/elasticsearch` + - Bind 生成 `/etc/bind/rndc.key` + 安装后也可再执行 `sudo ./server-prepare-dirs.sh` 统一目录属主。 ## 故障排查(见下文 Troubleshooting_zh) - `./server-selfcheck.sh` → `logs/selfcheck.json` - `./server-diagnose.sh` → `logs/diagnose_error_*.log` / `logs/diagnose_details_*.log` - diff --git a/deployment/build/templates/docs/TROUBLESHOOTING_zh.md b/deployment/build/templates/docs/TROUBLESHOOTING_zh.md index e4d6b47..f03d158 100644 --- a/deployment/build/templates/docs/TROUBLESHOOTING_zh.md +++ b/deployment/build/templates/docs/TROUBLESHOOTING_zh.md @@ -11,5 +11,6 @@ Web‑Proxy:8083=200/302/403;8084/8085 需包含 CORS Kibana:确认可解析 `es.log.argus.com` -权限:先运行 `sudo ./server-prepare-dirs.sh` - +权限: +- 非 root 安装时,安装器已创建最小目录并在容器内修复 Kibana/ES/Bind; +- 如仍有 `EACCES`/锁文件报错,可再运行 `sudo ./server-prepare-dirs.sh` 统一目录属主。 diff --git a/deployment/build/templates/scripts/server-install.sh b/deployment/build/templates/scripts/server-install.sh index 0e60b17..365b02c 100755 --- a/deployment/build/templates/scripts/server-install.sh +++ b/deployment/build/templates/scripts/server-install.sh @@ -40,6 +40,8 @@ prepare_data_dirs() { # still ensure basic directories exist (no chown) mkdir -p \ "$PKG_ROOT/private/argus/etc" \ + "$PKG_ROOT/private/argus/log/elasticsearch" \ + "$PKG_ROOT/private/argus/log/kibana" \ "$PKG_ROOT/private/argus/metric/prometheus" \ "$PKG_ROOT/private/argus/metric/prometheus/data" \ "$PKG_ROOT/private/argus/metric/prometheus/rules" \ @@ -153,6 +155,37 @@ YAML (cd "$PKG_ROOT/compose" && "${COMPOSE[@]}" -p "$PROJECT_NAME" -f docker-compose.yml -f $(basename "$ov") up -d "${services[@]}") } +# Post bootstrap container-side fixes that do not require sudo on host. +post_bootstrap_fixes() { + # Kibana: ensure /usr/share/kibana/data is a symlink into mounted path to avoid EACCES + if docker ps --format '{{.Names}}' | grep -q '^argus-kibana-sys$'; then + docker exec argus-kibana-sys bash -lc ' + set -e + mkdir -p /private/argus/log/kibana && chmod 777 /private/argus/log/kibana || true + if [ -d /usr/share/kibana/data ] && [ ! -L /usr/share/kibana/data ]; then rm -rf /usr/share/kibana/data; fi + if [ ! -e /usr/share/kibana/data ]; then ln -s /private/argus/log/kibana /usr/share/kibana/data; fi + ' >/dev/null 2>&1 || true + fi + # Elasticsearch: ensure data path points to mounted path and is writable + if docker ps --format '{{.Names}}' | grep -q '^argus-es-sys$'; then + docker exec argus-es-sys bash -lc ' + set -e + mkdir -p /private/argus/log/elasticsearch && chmod 777 /private/argus/log/elasticsearch || true + if [ -d /usr/share/elasticsearch/data ] && [ ! -L /usr/share/elasticsearch/data ]; then rm -rf /usr/share/elasticsearch/data; fi + if [ ! -e /usr/share/elasticsearch/data ]; then ln -s /private/argus/log/elasticsearch /usr/share/elasticsearch/data; fi + ' >/dev/null 2>&1 || true + fi + # Bind9: ensure rndc.key exists + if docker ps --format '{{.Names}}' | grep -q '^argus-bind-sys$'; then + docker exec argus-bind-sys bash -lc ' + set -e + mkdir -p /etc/bind + if [ ! -f /etc/bind/rndc.key ]; then rndc-confgen -a -c /etc/bind/rndc.key; fi + chmod 644 /etc/bind/rndc.key || true + ' >/dev/null 2>&1 || true + fi +} + dns_bootstrap() { log "DNS bootstrap: initializing shared dns.conf and container resolv.conf" local etc_dir="$PKG_ROOT/private/argus/etc" @@ -206,8 +239,31 @@ dns_bootstrap() { } selfcheck() { - log "running selfcheck" - bash "$PKG_ROOT/scripts/server-selfcheck.sh" || { err "selfcheck failed"; exit 1; } + # Initial selfcheck with retries to absorb cold starts + local max_retries="${SELF_CHECK_RETRIES:-5}" # 重试次数(不含首次),默认 5 + local wait_seconds="${SELF_CHECK_WAIT_SECONDS:-30}" # 每次重试前等待秒数,默认 30s + + local attempt=0 + while :; do + attempt=$((attempt+1)) + if (( attempt == 1 )); then + log "running selfcheck (attempt ${attempt})" + else + log "running selfcheck (attempt ${attempt}/${max_retries}+1)" + fi + + if bash "$PKG_ROOT/scripts/server-selfcheck.sh"; then + return 0 + fi + + # failed + if (( attempt > max_retries )); then + err "selfcheck failed after ${attempt} attempt(s)" + exit 1 + fi + log "selfcheck not ready yet; retrying in ${wait_seconds}s..." + sleep "$wait_seconds" + done } main() { @@ -216,6 +272,7 @@ main() { prepare_data_dirs load_images bring_up + post_bootstrap_fixes dns_bootstrap selfcheck log "install completed. See logs in $PKG_ROOT/logs/"