[#40] log目录供宿主机其他程序可写

This commit is contained in:
yuyr 2025-11-12 15:06:37 +08:00
parent df1f519355
commit 06131a268a
4 changed files with 39 additions and 6 deletions

View File

@ -79,3 +79,9 @@ if [[ ${#missing[@]} -gt 0 ]]; then
err "以下变量必须在 compose/.env 中填写:${missing[*]}(已保留你现有的内容,不会被覆盖)"; exit 1; fi err "以下变量必须在 compose/.env 中填写:${missing[*]}(已保留你现有的内容,不会被覆盖)"; exit 1; fi
info "已生成 compose/.env可执行 scripts/install.sh" info "已生成 compose/.env可执行 scripts/install.sh"
# 准备并赋权宿主日志目录(幂等,便于安装前人工检查/预创建)
mkdir -p "$PKG_ROOT/logs/train" "$PKG_ROOT/logs/infer"
chmod 1777 "$PKG_ROOT/logs/train" "$PKG_ROOT/logs/infer" || true
info "日志目录权限(期待 1777含粘滞位:"
stat -c '%a %U:%G %n' "$PKG_ROOT/logs/train" "$PKG_ROOT/logs/infer" 2>/dev/null || true

View File

@ -45,13 +45,20 @@ IMG_TGZ=$(ls -1 "$PKG_ROOT"/images/argus-sys-metric-test-node-bundle-gpu-*.tar.g
info "导入 GPU bundle 镜像: $(basename "$IMG_TGZ")" info "导入 GPU bundle 镜像: $(basename "$IMG_TGZ")"
tmp=$(mktemp); gunzip -c "$IMG_TGZ" > "$tmp"; docker load -i "$tmp" >/dev/null; rm -f "$tmp" tmp=$(mktemp); gunzip -c "$IMG_TGZ" > "$tmp"; docker load -i "$tmp" >/dev/null; rm -f "$tmp"
# 确保日志目录存在(宿主侧,用于映射 /logs/infer 与 /logs/train # 确保日志目录存在(宿主侧,用于映射 /logs/infer 与 /logs/train,并赋权 1777粘滞位
mkdir -p "$PKG_ROOT/logs/infer" "$PKG_ROOT/logs/train" mkdir -p "$PKG_ROOT/logs/infer" "$PKG_ROOT/logs/train"
info "日志目录已准备: logs/infer logs/train" chmod 1777 "$PKG_ROOT/logs/infer" "$PKG_ROOT/logs/train" || true
info "日志目录已准备并赋权 1777: logs/infer logs/train"
stat -c '%a %U:%G %n' "$PKG_ROOT/logs/infer" "$PKG_ROOT/logs/train" 2>/dev/null || true
# 启动 compose 并跟踪日志 # 启动 compose 并跟踪日志
info "启动 GPU 节点 (docker compose up -d)" info "启动 GPU 节点 (docker compose up -d)"
docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" up -d docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" up -d
docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" ps docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" ps
# 再次校准宿主日志目录权限,避免容器内脚本对 bind mount 权限回退
chmod 1777 "$PKG_ROOT/logs/infer" "$PKG_ROOT/logs/train" || true
stat -c '%a %U:%G %n' "$PKG_ROOT/logs/infer" "$PKG_ROOT/logs/train" 2>/dev/null || true
info "跟踪节点容器日志(按 Ctrl+C 退出)" info "跟踪节点容器日志(按 Ctrl+C 退出)"
docker logs -f argus-metric-gpu-node-swarm || true docker logs -f argus-metric-gpu-node-swarm || true

View File

@ -77,7 +77,20 @@ cp -r /tmp/flb/etc/* /etc/fluent-bit/
# Create logs/buffers dirs # Create logs/buffers dirs
mkdir -p /logs/train /logs/infer /buffers mkdir -p /logs/train /logs/infer /buffers
chmod 755 /logs/train /logs/infer /buffers
# 控制日志目录权限:默认对宿主 bind mount 目录采用 1777可由环境变量关闭
: "${ARGUS_LOGS_WORLD_WRITABLE:=1}"
if [[ "${ARGUS_LOGS_WORLD_WRITABLE}" == "1" ]]; then
chmod 1777 /logs/train /logs/infer || true
else
chmod 755 /logs/train /logs/infer || true
fi
# 缓冲目录仅供进程使用,不对外开放写入
chmod 770 /buffers || true
# 目录属主设置为 fluent-bit不影响 1777 粘滞位)
chown -R fluent-bit:fluent-bit /logs /buffers 2>/dev/null || true
# Wait for Elasticsearch via bash /dev/tcp to avoid curl dependency # Wait for Elasticsearch via bash /dev/tcp to avoid curl dependency
echo "[INFO] Waiting for Elasticsearch to be ready (tcp ${ES_HOST}:${ES_PORT})..." echo "[INFO] Waiting for Elasticsearch to be ready (tcp ${ES_HOST}:${ES_PORT})..."

View File

@ -171,9 +171,16 @@ fi
# 创建日志和缓冲区目录 # 创建日志和缓冲区目录
log_info "Creating log and buffer directories..." log_info "Creating log and buffer directories..."
mkdir -p /logs/train /logs/infer /buffers mkdir -p /logs/train /logs/infer /buffers
chmod 755 /logs/train /logs/infer # 对共享日志目录采用 1777含粘滞位便于宿主任意账号创建文件/目录
chmod 770 /buffers if [[ "${ARGUS_LOGS_WORLD_WRITABLE:-1}" == "1" ]]; then
chown -R fluent-bit:fluent-bit /logs /buffers chmod 1777 /logs/train /logs/infer || true
else
chmod 755 /logs/train /logs/infer || true
fi
# 缓冲目录限进程使用
chmod 770 /buffers || true
# 目录属主设置,不影响 1777 粘滞位
chown -R fluent-bit:fluent-bit /logs /buffers 2>/dev/null || true
# 启动 Fluent Bit # 启动 Fluent Bit
log_info "Starting Fluent Bit with configuration from /etc/fluent-bit/" log_info "Starting Fluent Bit with configuration from /etc/fluent-bit/"