diff --git a/build/build_images.sh b/build/build_images.sh index a88bf7f..d716e04 100755 --- a/build/build_images.sh +++ b/build/build_images.sh @@ -46,8 +46,19 @@ while [[ $# -gt 0 ]]; do done root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +. "$root/scripts/common/build_user.sh" + +declare -a build_args=() + +if [[ "$use_intranet" == true ]]; then + build_args+=("--build-arg" "USE_INTRANET=true") +fi + cd "$root" +load_build_user +build_args+=("--build-arg" "ARGUS_BUILD_UID=${ARGUS_BUILD_UID}" "--build-arg" "ARGUS_BUILD_GID=${ARGUS_BUILD_GID}") + master_root="$root/src/master" master_offline_tar="$master_root/offline_wheels.tar.gz" master_offline_dir="$master_root/offline_wheels" @@ -75,12 +86,12 @@ echo "=======================================" if [[ "$use_intranet" == true ]]; then echo "🌐 Mode: Intranet (Using internal mirror: 10.68.64.1)" - build_args="--build-arg USE_INTRANET=true" else echo "🌐 Mode: Public (Using default package sources)" - build_args="" fi +echo "👤 Build user UID:GID -> ${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" + echo "📁 Build context: $root" echo "" @@ -88,13 +99,14 @@ build_image() { local image_name=$1 local dockerfile_path=$2 local tag=$3 - local extra_args=$4 + shift 3 + local extra_args=("$@") echo "🔄 Building $image_name image..." echo " Dockerfile: $dockerfile_path" echo " Tag: $tag" - if docker build $build_args $extra_args -f "$dockerfile_path" -t "$tag" .; then + if docker build "${build_args[@]}" "${extra_args[@]}" -f "$dockerfile_path" -t "$tag" .; then echo "✅ $image_name image built successfully" return 0 else @@ -106,7 +118,7 @@ build_image() { images_built=() build_failed=false -if build_image "Elasticsearch" "src/log/elasticsearch/build/Dockerfile" "argus-elasticsearch:latest" ""; then +if build_image "Elasticsearch" "src/log/elasticsearch/build/Dockerfile" "argus-elasticsearch:latest"; then images_built+=("argus-elasticsearch:latest") else build_failed=true @@ -114,7 +126,7 @@ fi echo "" -if build_image "Kibana" "src/log/kibana/build/Dockerfile" "argus-kibana:latest" ""; then +if build_image "Kibana" "src/log/kibana/build/Dockerfile" "argus-kibana:latest"; then images_built+=("argus-kibana:latest") else build_failed=true @@ -122,7 +134,7 @@ fi echo "" -if build_image "BIND9" "src/bind/build/Dockerfile" "argus-bind9:latest" ""; then +if build_image "BIND9" "src/bind/build/Dockerfile" "argus-bind9:latest"; then images_built+=("argus-bind9:latest") else build_failed=true diff --git a/configs/.gitignore b/configs/.gitignore new file mode 100644 index 0000000..2f80b1e --- /dev/null +++ b/configs/.gitignore @@ -0,0 +1,2 @@ +# Local overrides for build user/group settings +build_user.local.conf diff --git a/configs/build_user.conf b/configs/build_user.conf new file mode 100644 index 0000000..e4df5be --- /dev/null +++ b/configs/build_user.conf @@ -0,0 +1,6 @@ +# Default build-time UID/GID for Argus images +# Override by creating configs/build_user.local.conf with the same format. +# Syntax: KEY=VALUE, supports UID/GID only. Whitespace and lines starting with # are ignored. + +UID=2133 +GID=2015 diff --git a/doc/build-user-config.md b/doc/build-user-config.md new file mode 100644 index 0000000..8b809a4 --- /dev/null +++ b/doc/build-user-config.md @@ -0,0 +1,38 @@ +# Argus 镜像构建 UID/GID 配置说明 + +通过统一配置文件可以为 Kibana、Elasticsearch、Bind、Master 等容器指定运行账号,解决跨机器部署时 UID/GID 不一致导致的权限问题。 + +## 配置入口 + +- 默认配置存放在 `configs/build_user.conf`,内容示例: + + ```bash + UID=2133 + GID=2015 + ``` + +- 如果需要本地覆盖,可在 `configs/` 下新建 `build_user.local.conf`,字段与默认文件一致。该文件已列入 `.gitignore`,不会被意外提交。 +- 亦可在执行脚本前通过环境变量 `ARGUS_BUILD_UID` / `ARGUS_BUILD_GID` 强制指定值,优先级最高。 + +## 作用范围 + +- `build/build_images.sh` 在构建 log/bind/master 镜像时读取配置,并传递 `--build-arg ARGUS_BUILD_UID/GID`;控制台会输出当前使用的 UID/GID。 +- `src/master/scripts/build_images.sh` 同步使用配置,确保单独构建 master 镜像时行为一致。 +- 各镜像 Dockerfile 会根据传入的 UID/GID 调整容器内账号(如 `elasticsearch`、`kibana`、`bind`、`argus`),并以环境变量形式暴露运行时可见值。 +- Master 启动脚本会在执行 DNS 逻辑后,降权到配置的账号运行 `gunicorn`,确保写入 `/private/argus/**` 的文件具备正确属主。 +- Log 模块测试脚本 `01_bootstrap.sh` 会根据配置修正挂载目录属主,方便端到端测试在任意用户下运行。 + +## 使用建议 + +1. 初次克隆仓库后无需修改,默认 UID/GID 保持向后兼容。 +2. 如果在目标环境中使用新的账号(例如 `uid=4001,gid=4001`): + - 编辑 `configs/build_user.local.conf` 填入新值; + - 使用新账号登录,并确保其加入宿主机的 `docker` 组; + - 重新执行 `build/build_images.sh` 或相关模块的构建脚本。 +3. 切换配置后建议重新运行目标模块的端到端脚本(如 `src/log/tests/scripts/01_bootstrap.sh`、`src/master/tests/scripts/00_e2e_test.sh`、`src/agent/tests/scripts/00_e2e_test.sh`),验证 `/private/argus` 下文件属主是否为期望账号。 + +## 故障排查 + +- **镜像构建报错 `groupmod: GID already in use`**:说明所选 GID 已存在于基础镜像,建议换用未占用的值,或在自定义基础镜像中先移除冲突。 +- **容器内运行时报写权限不足**:检查宿主机挂载目录是否已经由目标 UID/GID 创建;必要时重新执行模块的 `01_bootstrap.sh` 之类的准备脚本。 +- **仍看到旧 UID/GID**:确认脚本执行时未继承旧缓存,可运行 `ARGUS_BUILD_UID=... ARGUS_BUILD_GID=... ./build/build_images.sh` 强制覆盖。 diff --git a/scripts/common/build_user.sh b/scripts/common/build_user.sh new file mode 100644 index 0000000..c8f5c08 --- /dev/null +++ b/scripts/common/build_user.sh @@ -0,0 +1,115 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Shared helper to load Argus build user/group configuration. +# Usage: +# source "${PROJECT_ROOT}/scripts/common/build_user.sh" +# load_build_user +# echo "$ARGUS_BUILD_UID:$ARGUS_BUILD_GID" + +ARGUS_BUILD_UID_DEFAULT=2133 +ARGUS_BUILD_GID_DEFAULT=2015 + +shopt -s extglob + +_ARGUS_BUILD_USER_LOADED="${_ARGUS_BUILD_USER_LOADED:-0}" + +_argus_build_user_script_dir() { + local dir + dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + echo "$dir" +} + +argus_project_root() { + local script_dir + script_dir="$(_argus_build_user_script_dir)" + (cd "$script_dir/../.." >/dev/null && pwd) +} + +_argus_trim() { + local value="$1" + value="${value##+([[:space:]])}" + value="${value%%+([[:space:]])}" + printf '%s' "$value" +} + +_argus_is_number() { + [[ "$1" =~ ^[0-9]+$ ]] +} + +load_build_user() { + if [[ "$_ARGUS_BUILD_USER_LOADED" == "1" ]]; then + return 0 + fi + + local project_root config_files config uid gid + project_root="$(argus_project_root)" + config_files=( + "$project_root/configs/build_user.local.conf" + "$project_root/configs/build_user.conf" + ) + + uid="$ARGUS_BUILD_UID_DEFAULT" + gid="$ARGUS_BUILD_GID_DEFAULT" + + for config in "${config_files[@]}"; do + if [[ -f "$config" ]]; then + while IFS= read -r raw_line || [[ -n "$raw_line" ]]; do + local line key value + line="${raw_line%%#*}" + line="$(_argus_trim "${line}")" + [[ -z "$line" ]] && continue + if [[ "$line" != *=* ]]; then + echo "[ARGUS build_user] Ignoring malformed line in $config: $raw_line" >&2 + continue + fi + key="${line%%=*}" + value="${line#*=}" + key="$(_argus_trim "$key")" + value="$(_argus_trim "$value")" + case "$key" in + UID) + uid="$value" + ;; + GID) + gid="$value" + ;; + *) + echo "[ARGUS build_user] Unknown key '$key' in $config" >&2 + ;; + esac + done < "$config" + break + fi + done + + if [[ -n "${ARGUS_BUILD_UID:-}" ]]; then + uid="$ARGUS_BUILD_UID" + fi + if [[ -n "${ARGUS_BUILD_GID:-}" ]]; then + gid="$ARGUS_BUILD_GID" + fi + + if ! _argus_is_number "$uid"; then + echo "[ARGUS build_user] Invalid UID '$uid'" >&2 + return 1 + fi + if ! _argus_is_number "$gid"; then + echo "[ARGUS build_user] Invalid GID '$gid'" >&2 + return 1 + fi + + export ARGUS_BUILD_UID="$uid" + export ARGUS_BUILD_GID="$gid" + _ARGUS_BUILD_USER_LOADED=1 +} + +argus_build_user_args() { + load_build_user + printf '%s' "--build-arg ARGUS_BUILD_UID=${ARGUS_BUILD_UID} --build-arg ARGUS_BUILD_GID=${ARGUS_BUILD_GID}" +} + +print_build_user() { + load_build_user + echo "ARGUS build user: UID=${ARGUS_BUILD_UID} GID=${ARGUS_BUILD_GID}" +} diff --git a/src/agent/scripts/build_binary.sh b/src/agent/scripts/build_binary.sh index 7dce519..7e5a720 100755 --- a/src/agent/scripts/build_binary.sh +++ b/src/agent/scripts/build_binary.sh @@ -49,7 +49,10 @@ run_docker_build() { USED_DOCKER=1 echo "[INFO] Building agent binary inside $AGENT_BUILD_IMAGE" >&2 - docker_env=("--rm" "-u" "$(id -u):$(id -g)" "-v" "$MODULE_ROOT:/workspace" "-w" "/workspace") + local host_uid host_gid + host_uid="$(id -u)" + host_gid="$(id -g)" + docker_env=("--rm" "-v" "$MODULE_ROOT:/workspace" "-w" "/workspace" "--env" "TARGET_UID=${host_uid}" "--env" "TARGET_GID=${host_gid}") pass_env_if_set() { local var="$1" @@ -69,7 +72,7 @@ run_docker_build() { pass_env_if_set https_proxy pass_env_if_set no_proxy - build_script=$(cat <<'INNER' +build_script=$(cat <<'INNER' set -euo pipefail cd /workspace apt-get update >/dev/null @@ -93,6 +96,10 @@ pyinstaller \ entry.py chmod +x dist/argus-agent +TARGET_UID="${TARGET_UID:-0}" +TARGET_GID="${TARGET_GID:-0}" +chown -R "$TARGET_UID:$TARGET_GID" dist build 2>/dev/null || true + python3 - <<'PY' from pathlib import Path from PyInstaller.archive.readers import CArchiveReader diff --git a/src/agent/tests/docker-compose.yml b/src/agent/tests/docker-compose.yml index 5703200..6fa9e4b 100644 --- a/src/agent/tests/docker-compose.yml +++ b/src/agent/tests/docker-compose.yml @@ -8,6 +8,9 @@ services: networks: default: ipv4_address: 172.28.0.2 + environment: + - "ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}" + - "ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}" master: image: argus-master:dev @@ -18,6 +21,8 @@ services: - OFFLINE_THRESHOLD_SECONDS=6 - ONLINE_THRESHOLD_SECONDS=2 - SCHEDULER_INTERVAL_SECONDS=1 + - "ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}" + - "ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}" ports: - "32300:3000" volumes: @@ -38,6 +43,8 @@ services: environment: - MASTER_ENDPOINT=http://master.argus.com:3000 - REPORT_INTERVAL_SECONDS=2 + - "ARGUS_BUILD_UID=${ARGUS_BUILD_UID:-2133}" + - "ARGUS_BUILD_GID=${ARGUS_BUILD_GID:-2015}" volumes: - ./private/argus/agent/dev-e2euser-e2einst-pod-0:/private/argus/agent/dev-e2euser-e2einst-pod-0 - ./private/argus/agent/dev-e2euser-e2einst-pod-0/health:/private/argus/agent/dev-e2euser-e2einst-pod-0/health diff --git a/src/agent/tests/scripts/02_up.sh b/src/agent/tests/scripts/02_up.sh index fcb4b09..2d9de25 100755 --- a/src/agent/tests/scripts/02_up.sh +++ b/src/agent/tests/scripts/02_up.sh @@ -3,8 +3,19 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +REPO_ROOT="$(cd "$TEST_ROOT/../../.." && pwd)" TMP_ROOT="$TEST_ROOT/tmp" +ENV_FILE="$TEST_ROOT/.env" + +source "$REPO_ROOT/scripts/common/build_user.sh" +load_build_user +export ARGUS_BUILD_UID ARGUS_BUILD_GID + +cat > "$ENV_FILE" <&2 diff --git a/src/agent/tests/scripts/07_down.sh b/src/agent/tests/scripts/07_down.sh index db7e9db..b9674ee 100755 --- a/src/agent/tests/scripts/07_down.sh +++ b/src/agent/tests/scripts/07_down.sh @@ -3,6 +3,7 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +ENV_FILE="$TEST_ROOT/.env" compose() { if docker compose version >/dev/null 2>&1; then @@ -18,7 +19,18 @@ pushd "$TEST_ROOT" >/dev/null compose down --remove-orphans popd >/dev/null -rm -rf "$TEST_ROOT/private" +if [[ -d "$TEST_ROOT/private" ]]; then + docker run --rm \ + -v "$TEST_ROOT/private:/target" \ + ubuntu:24.04 \ + chown -R "$(id -u):$(id -g)" /target >/dev/null 2>&1 || true + rm -rf "$TEST_ROOT/private" +fi + rm -rf "$TEST_ROOT/tmp" +if [[ -f "$ENV_FILE" ]]; then + rm -f "$ENV_FILE" +fi + echo "[INFO] Agent E2E environment cleaned up" diff --git a/src/agent/tests/scripts/agent_entrypoint.sh b/src/agent/tests/scripts/agent_entrypoint.sh index b46a213..1823605 100755 --- a/src/agent/tests/scripts/agent_entrypoint.sh +++ b/src/agent/tests/scripts/agent_entrypoint.sh @@ -5,11 +5,36 @@ LOG_PREFIX="[AGENT-ENTRYPOINT]" DNS_SCRIPT="/private/argus/etc/update-dns.sh" DNS_CONF="/private/argus/etc/dns.conf" TARGET_DOMAIN="master.argus.com" +AGENT_UID="${ARGUS_BUILD_UID:-2133}" +AGENT_GID="${ARGUS_BUILD_GID:-2015}" +AGENT_HOSTNAME="${HOSTNAME:-unknown}" +AGENT_DATA_DIR="/private/argus/agent/${AGENT_HOSTNAME}" +AGENT_HEALTH_DIR="${AGENT_DATA_DIR}/health" +RUNTIME_GROUP="argusagent" +RUNTIME_USER="argusagent" log() { echo "${LOG_PREFIX} $*" } +mkdir -p "$AGENT_DATA_DIR" "$AGENT_HEALTH_DIR" +chown -R "$AGENT_UID:$AGENT_GID" "$AGENT_DATA_DIR" "$AGENT_HEALTH_DIR" 2>/dev/null || true +chown -R "$AGENT_UID:$AGENT_GID" "/private/argus/etc" 2>/dev/null || true + +if ! getent group "$AGENT_GID" >/dev/null 2>&1; then + groupadd -g "$AGENT_GID" "$RUNTIME_GROUP" +else + RUNTIME_GROUP="$(getent group "$AGENT_GID" | cut -d: -f1)" +fi + +if ! getent passwd "$AGENT_UID" >/dev/null 2>&1; then + useradd -u "$AGENT_UID" -g "$AGENT_GID" -M -s /bin/bash "$RUNTIME_USER" +else + RUNTIME_USER="$(getent passwd "$AGENT_UID" | cut -d: -f1)" +fi + +log "运行用户: $RUNTIME_USER ($AGENT_UID:$AGENT_GID)" + # 中文提示:等待 bind 下发的 update-dns.sh 脚本 for _ in {1..30}; do if [[ -x "$DNS_SCRIPT" ]]; then @@ -51,4 +76,4 @@ for _ in {1..30}; do done log "启动 argus-agent" -exec /usr/local/bin/argus-agent +exec su -s /bin/bash -c /usr/local/bin/argus-agent "$RUNTIME_USER" diff --git a/src/bind/build/Dockerfile b/src/bind/build/Dockerfile index f743d86..c6293d3 100644 --- a/src/bind/build/Dockerfile +++ b/src/bind/build/Dockerfile @@ -6,6 +6,11 @@ ENV TZ=Asia/Shanghai # 设置构建参数 ARG USE_INTRANET=false +ARG ARGUS_BUILD_UID=2133 +ARG ARGUS_BUILD_GID=2015 + +ENV ARGUS_BUILD_UID=${ARGUS_BUILD_UID} \ + ARGUS_BUILD_GID=${ARGUS_BUILD_GID} # 配置内网 apt 源 (如果指定了内网选项) RUN if [ "$USE_INTRANET" = "true" ]; then \ @@ -29,6 +34,24 @@ RUN apt-get update && \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* +# 调整 bind 用户与用户组 ID 以匹配宿主机配置 +RUN set -eux; \ + current_gid="$(getent group bind | awk -F: '{print $3}')"; \ + if [ -z "$current_gid" ]; then \ + groupadd -g "${ARGUS_BUILD_GID}" bind; \ + elif [ "$current_gid" != "${ARGUS_BUILD_GID}" ]; then \ + groupmod -g "${ARGUS_BUILD_GID}" bind; \ + fi; \ + if id bind >/dev/null 2>&1; then \ + current_uid="$(id -u bind)"; \ + if [ "$current_uid" != "${ARGUS_BUILD_UID}" ]; then \ + usermod -u "${ARGUS_BUILD_UID}" bind; \ + fi; \ + else \ + useradd -m -u "${ARGUS_BUILD_UID}" -g "${ARGUS_BUILD_GID}" bind; \ + fi; \ + chown -R "${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" /var/cache/bind /var/lib/bind + # 配置部署时使用的apt源 RUN if [ "$USE_INTRANET" = "true" ]; then \ echo "deb [trusted=yes] https://10.92.132.52/mirrors/ubuntu2204/ jammy main" > /etc/apt/sources.list; \ diff --git a/src/bind/build/argus_dns_sync.sh b/src/bind/build/argus_dns_sync.sh index 7cc8be4..cfa4adc 100644 --- a/src/bind/build/argus_dns_sync.sh +++ b/src/bind/build/argus_dns_sync.sh @@ -9,6 +9,9 @@ SLEEP_SECONDS=10 RELOAD_SCRIPT="/usr/local/bin/reload-bind9.sh" # 这里放你已有脚本的路径 mkdir -p "$(dirname "$LOCKFILE")" "$BACKUP_DIR" +BACKUP_UID="${ARGUS_BUILD_UID:-2133}" +BACKUP_GID="${ARGUS_BUILD_GID:-2015}" +chown -R "$BACKUP_UID:$BACKUP_GID" "$BACKUP_DIR" 2>/dev/null || true is_ipv4() { local ip="$1" @@ -33,6 +36,7 @@ upsert_record() { local changed=0 cp -a "$ZONE_DB" "$BACKUP_DIR/db.argus.com.$ts.bak" + chown "$BACKUP_UID:$BACKUP_GID" "$BACKUP_DIR/db.argus.com.$ts.bak" 2>/dev/null || true local cur_ip cur_ip="$(get_current_ip "$name" || true)" @@ -73,7 +77,7 @@ while true; do shopt -s nullglob NEED_RELOAD=0 - for f in "$WATCH_DIR"/*.argus.com; do + for f in "$WATCH_DIR"/*.argus.com; do base="$(basename "$f")" name="${base%.argus.com}" ip="$(grep -Eo '([0-9]{1,3}\.){3}[0-9]{1,3}' "$f" | tail -n1 || true)" diff --git a/src/bind/build/startup.sh b/src/bind/build/startup.sh index 964867f..66a2e5d 100644 --- a/src/bind/build/startup.sh +++ b/src/bind/build/startup.sh @@ -6,6 +6,8 @@ chmod 777 /private 2>/dev/null || true # Create persistent directories for BIND9 configs and DNS sync mkdir -p /private/argus/bind mkdir -p /private/argus/etc +chown bind:bind /private/argus 2>/dev/null || true +chown -R bind:bind /private/argus/bind /private/argus/etc # Copy configuration files to persistent storage if they don't exist if [ ! -f /private/argus/bind/named.conf.local ]; then diff --git a/src/log/elasticsearch/build/Dockerfile b/src/log/elasticsearch/build/Dockerfile index 9b80f84..7b05ac1 100644 --- a/src/log/elasticsearch/build/Dockerfile +++ b/src/log/elasticsearch/build/Dockerfile @@ -3,10 +3,29 @@ FROM docker.elastic.co/elasticsearch/elasticsearch:8.13.4 # 切换到 root 用户进行系统级安装 USER root -# 修改elasticsearch用户的UID和GID -RUN usermod -u 2133 elasticsearch && \ - groupmod -g 2015 elasticsearch && \ - chown -R elasticsearch:elasticsearch /usr/share/elasticsearch +ARG ARGUS_BUILD_UID=2133 +ARG ARGUS_BUILD_GID=2015 + +ENV ARGUS_BUILD_UID=${ARGUS_BUILD_UID} \ + ARGUS_BUILD_GID=${ARGUS_BUILD_GID} + +# 调整 elasticsearch 用户与用户组 ID 以匹配宿主机配置 +RUN set -eux; \ + current_gid="$(getent group elasticsearch | awk -F: '{print $3}')"; \ + if [ -z "$current_gid" ]; then \ + groupadd -g "${ARGUS_BUILD_GID}" elasticsearch; \ + elif [ "$current_gid" != "${ARGUS_BUILD_GID}" ]; then \ + groupmod -g "${ARGUS_BUILD_GID}" elasticsearch; \ + fi; \ + if id elasticsearch >/dev/null 2>&1; then \ + current_uid="$(id -u elasticsearch)"; \ + if [ "$current_uid" != "${ARGUS_BUILD_UID}" ]; then \ + usermod -u "${ARGUS_BUILD_UID}" elasticsearch; \ + fi; \ + else \ + useradd -m -u "${ARGUS_BUILD_UID}" -g "${ARGUS_BUILD_GID}" elasticsearch; \ + fi; \ + chown -R "${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" /usr/share/elasticsearch # 设置构建参数 ARG USE_INTRANET=false diff --git a/src/log/kibana/build/Dockerfile b/src/log/kibana/build/Dockerfile index 211440d..a8b16d7 100644 --- a/src/log/kibana/build/Dockerfile +++ b/src/log/kibana/build/Dockerfile @@ -3,10 +3,29 @@ FROM docker.elastic.co/kibana/kibana:8.13.4 # 切换到 root 用户进行系统级安装 USER root -# 修改kibana用户的UID和GID -RUN usermod -u 2133 kibana && \ - groupmod -g 2015 kibana && \ - chown -R kibana:kibana /usr/share/kibana +ARG ARGUS_BUILD_UID=2133 +ARG ARGUS_BUILD_GID=2015 + +ENV ARGUS_BUILD_UID=${ARGUS_BUILD_UID} \ + ARGUS_BUILD_GID=${ARGUS_BUILD_GID} + +# 调整 kibana 用户与用户组 ID 以匹配宿主机配置 +RUN set -eux; \ + current_gid="$(getent group kibana | awk -F: '{print $3}')"; \ + if [ -z "$current_gid" ]; then \ + groupadd -g "${ARGUS_BUILD_GID}" kibana; \ + elif [ "$current_gid" != "${ARGUS_BUILD_GID}" ]; then \ + groupmod -g "${ARGUS_BUILD_GID}" kibana; \ + fi; \ + if id kibana >/dev/null 2>&1; then \ + current_uid="$(id -u kibana)"; \ + if [ "$current_uid" != "${ARGUS_BUILD_UID}" ]; then \ + usermod -u "${ARGUS_BUILD_UID}" kibana; \ + fi; \ + else \ + useradd -m -u "${ARGUS_BUILD_UID}" -g "${ARGUS_BUILD_GID}" kibana; \ + fi; \ + chown -R "${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" /usr/share/kibana # 设置构建参数 ARG USE_INTRANET=false diff --git a/src/log/tests/scripts/01_bootstrap.sh b/src/log/tests/scripts/01_bootstrap.sh index ba3842b..e45fde3 100755 --- a/src/log/tests/scripts/01_bootstrap.sh +++ b/src/log/tests/scripts/01_bootstrap.sh @@ -1,6 +1,10 @@ #!/usr/bin/env bash set -euo pipefail root="$(cd "$(dirname "${BASH_SOURCE[0]}")/../" && pwd)" +project_root="$(cd "$root/../../.." && pwd)" + +source "$project_root/scripts/common/build_user.sh" +load_build_user # 创建新的private目录结构 (基于argus目录结构) echo "[INFO] Creating private directory structure for supervisor-based containers..." @@ -11,9 +15,9 @@ mkdir -p "$root/private/argus/etc/" # 设置数据目录权限(ES 和 Kibana 容器都使用 UID 1000) echo "[INFO] Setting permissions for data directories..." -sudo chown -R 2133:2015 "$root/private/argus/log/elasticsearch" 2>/dev/null || true -sudo chown -R 2133:2015 "$root/private/argus/log/kibana" 2>/dev/null || true -sudo chown -R 2133:2015 "$root/private/argus/etc" 2>/dev/null || true +sudo chown -R "${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" "$root/private/argus/log/elasticsearch" 2>/dev/null || true +sudo chown -R "${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" "$root/private/argus/log/kibana" 2>/dev/null || true +sudo chown -R "${ARGUS_BUILD_UID}:${ARGUS_BUILD_GID}" "$root/private/argus/etc" 2>/dev/null || true echo "[INFO] Supervisor-based containers will manage their own scripts and configurations" diff --git a/src/log/tests/scripts/03_send_test_host01.sh b/src/log/tests/scripts/03_send_test_host01.sh index 8889b06..2fe11b8 100755 --- a/src/log/tests/scripts/03_send_test_host01.sh +++ b/src/log/tests/scripts/03_send_test_host01.sh @@ -4,8 +4,22 @@ set -euo pipefail # 获取fluent-bit-host01容器名称 container_name="logging-mvp-fluent-bit-host01-1" -# 检查容器是否存在并运行 -if ! docker ps | grep -q "$container_name"; then +wait_for_container() { + local name="$1" + local attempts=30 + local delay=5 + local i + for ((i = 1; i <= attempts; i++)); do + if docker ps --format '{{.Names}}' | grep -Fx "$name" >/dev/null; then + return 0 + fi + echo "[INFO] 等待容器 $name 启动中... ($i/$attempts)" + sleep "$delay" + done + return 1 +} + +if ! wait_for_container "$container_name"; then echo "[ERROR] Fluent Bit容器 $container_name 未运行" exit 1 fi @@ -28,4 +42,4 @@ STACK" echo "[OK] 已通过docker exec写入测试日志到 host01 容器内:" echo " - /logs/train/train-demo.log" -echo " - /logs/infer/infer-demo.log" \ No newline at end of file +echo " - /logs/infer/infer-demo.log" diff --git a/src/log/tests/scripts/03_send_test_host02.sh b/src/log/tests/scripts/03_send_test_host02.sh index 039c0cc..d36ecf4 100755 --- a/src/log/tests/scripts/03_send_test_host02.sh +++ b/src/log/tests/scripts/03_send_test_host02.sh @@ -4,8 +4,22 @@ set -euo pipefail # 获取fluent-bit-host02容器名称 container_name="logging-mvp-fluent-bit-host02-1" -# 检查容器是否存在并运行 -if ! docker ps | grep -q "$container_name"; then +wait_for_container() { + local name="$1" + local attempts=30 + local delay=5 + local i + for ((i = 1; i <= attempts; i++)); do + if docker ps --format '{{.Names}}' | grep -Fx "$name" >/dev/null; then + return 0 + fi + echo "[INFO] 等待容器 $name 启动中... ($i/$attempts)" + sleep "$delay" + done + return 1 +} + +if ! wait_for_container "$container_name"; then echo "[ERROR] Fluent Bit容器 $container_name 未运行" exit 1 fi @@ -24,4 +38,4 @@ docker exec "$container_name" sh -c "printf '%s INFO [host02] inference complete echo "[OK] 已通过docker exec写入测试日志到 host02 容器内:" echo " - /logs/train/train-demo.log" -echo " - /logs/infer/infer-demo.log" \ No newline at end of file +echo " - /logs/infer/infer-demo.log" diff --git a/src/log/tests/scripts/e2e_test.sh b/src/log/tests/scripts/e2e_test.sh index c7748fe..fbe5197 100755 --- a/src/log/tests/scripts/e2e_test.sh +++ b/src/log/tests/scripts/e2e_test.sh @@ -19,7 +19,7 @@ get_log_count() { # 函数:等待服务就绪 wait_for_services() { echo "[INFO] Waiting for all services to be ready..." - local max_attempts=60 + local max_attempts=${SERVICE_WAIT_ATTEMPTS:-120} local attempt=1 while [ $attempt -le $max_attempts ]; do diff --git a/src/master/Dockerfile b/src/master/Dockerfile index 7e43fbe..bcc932d 100644 --- a/src/master/Dockerfile +++ b/src/master/Dockerfile @@ -5,6 +5,11 @@ SHELL ["/bin/bash", "-c"] ARG PIP_INDEX_URL= ARG USE_OFFLINE=0 ARG USE_INTRANET=false +ARG ARGUS_BUILD_UID=2133 +ARG ARGUS_BUILD_GID=2015 + +ENV ARGUS_BUILD_UID=${ARGUS_BUILD_UID} \ + ARGUS_BUILD_GID=${ARGUS_BUILD_GID} ENV PIP_NO_CACHE_DIR=1 \ PYTHONUNBUFFERED=1 \ @@ -52,6 +57,18 @@ RUN if [[ "$USE_INTRANET" == "true" ]]; then \ RUN mkdir -p /var/log/supervisor +RUN set -eux; \ + if getent group argus >/dev/null; then \ + groupmod -g "${ARGUS_BUILD_GID}" argus; \ + else \ + groupadd -g "${ARGUS_BUILD_GID}" argus; \ + fi; \ + if id argus >/dev/null 2>&1; then \ + usermod -u "${ARGUS_BUILD_UID}" -g "${ARGUS_BUILD_GID}" argus; \ + else \ + useradd -m -u "${ARGUS_BUILD_UID}" -g "${ARGUS_BUILD_GID}" -s /bin/bash argus; \ + fi + COPY ./src/master/build/supervisord.conf /etc/supervisor/conf.d/supervisord.conf COPY ./src/master/build/start-master.sh /usr/local/bin/start-master.sh COPY ./src/master/build/dns-monitor.sh /usr/local/bin/dns-monitor.sh diff --git a/src/master/build/start-master.sh b/src/master/build/start-master.sh index ec57f3d..deeb211 100755 --- a/src/master/build/start-master.sh +++ b/src/master/build/start-master.sh @@ -5,8 +5,17 @@ set -euo pipefail DNS_DIR="/private/argus/etc" DNS_SCRIPT="${DNS_DIR}/update-dns.sh" MASTER_DOMAIN_FILE="${DNS_DIR}/master.argus.com" +RUNTIME_USER="${ARGUS_RUNTIME_USER:-argus}" +RUNTIME_UID="${ARGUS_BUILD_UID:-2133}" +RUNTIME_GID="${ARGUS_BUILD_GID:-2015}" +MASTER_DATA_DIR="/private/argus/master" +METRIC_DIR="/private/argus/metric/prometheus" mkdir -p "$DNS_DIR" +chown -R "$RUNTIME_UID:$RUNTIME_GID" "$DNS_DIR" 2>/dev/null || true +mkdir -p "$MASTER_DATA_DIR" +mkdir -p "$METRIC_DIR" +chown -R "$RUNTIME_UID:$RUNTIME_GID" "$MASTER_DATA_DIR" "$METRIC_DIR" 2>/dev/null || true if [[ -x "$DNS_SCRIPT" ]]; then echo "[INFO] Running update-dns.sh before master starts" @@ -21,6 +30,7 @@ MASTER_IP=$(ifconfig | grep -A 1 eth0 | grep inet | awk '{print $2}' || true) if [[ -n "${MASTER_IP}" ]]; then echo "current IP: ${MASTER_IP}" echo "${MASTER_IP}" > "$MASTER_DOMAIN_FILE" + chown "$RUNTIME_UID:$RUNTIME_GID" "$MASTER_DOMAIN_FILE" 2>/dev/null || true else echo "[WARN] Failed to detect master IP via ifconfig" fi @@ -35,4 +45,15 @@ else EXTRA_ARRAY=() fi -exec gunicorn --bind "$BIND_ADDR" --workers "$WORKERS" "${EXTRA_ARRAY[@]}" "app:create_app()" +command=(gunicorn --bind "$BIND_ADDR" --workers "$WORKERS") +if [[ ${#EXTRA_ARRAY[@]} -gt 0 ]]; then + command+=("${EXTRA_ARRAY[@]}") +fi +command+=("app:create_app()") + +if command -v runuser >/dev/null 2>&1; then + exec runuser -u "$RUNTIME_USER" -- "${command[@]}" +else + printf -v _cmd_str '%q ' "${command[@]}" + exec su -s /bin/bash -m "$RUNTIME_USER" -c "exec ${_cmd_str}" +fi diff --git a/src/master/scripts/build_images.sh b/src/master/scripts/build_images.sh index 7bd5992..1f2da4b 100755 --- a/src/master/scripts/build_images.sh +++ b/src/master/scripts/build_images.sh @@ -20,6 +20,10 @@ DOCKERFILE="src/master/Dockerfile" BUILD_ARGS=() OFFLINE_MODE=0 +source "$PROJECT_ROOT/scripts/common/build_user.sh" +load_build_user +BUILD_ARGS+=("--build-arg" "ARGUS_BUILD_UID=${ARGUS_BUILD_UID}" "--build-arg" "ARGUS_BUILD_GID=${ARGUS_BUILD_GID}") + cd "$PROJECT_ROOT" while [[ "$#" -gt 0 ]]; do