yuyr 34cb239bf4 完成H20服务器部署及重启测试 (#51)
当前部署情况
- h1: 部署server & client
- h2: 部署client
- 部署2025-11-25
- 部署目录:  /home2/argus/server  ,  /home2/argus/client
- 部署使用账号:argus

网络拓扑:
- h1 作为docker swarm manager
- h2 作为worker加入docker swarm
- docker swarm 上创建overlay network

访问方式:
- 通过ssh到h1服务器,端口转发 20006-20011 端口到笔记本本地;
- 门户网址:http://localhost:20006/dashboard

部署截图:
![image.png](/attachments/86c1a7af-dacc-4ba7-a182-f7cefd4e6427)
![image.png](/attachments/06f20852-771c-4264-b031-e6acd0f6ea1c)
![image.png](/attachments/091ab5a8-95bf-466f-a394-3255dcb49735)

注意事项:
- server各容器使用域名作为overlay network上alias别名,实现域名访问,当前版本禁用bind作为域名解析,原因是容器重启后IP变化场景bind机制复杂且不稳定。
- client 构建是内置安装包,容器启动时执行安装流程,后续重启容器跳过安装步骤。
- UID/GID:部署使用 argus账号 uid=2133, gid=2015。

Reviewed-on: #51
Reviewed-by: sundapeng <sundp@mail.zgclab.edu.cn>
Reviewed-by: xuxt <xuxt@zgclab.edu.cn>
Reviewed-by: huhy <husteryezi@163.com>
2025-11-25 15:54:29 +08:00

526 lines
16 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
set -e
# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# 日志函数
log_info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
log_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
log_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# 显示帮助信息
show_help() {
echo "AIOps All-in-One 打包脚本"
echo
echo "用法: $0 [选项]"
echo
echo "选项:"
echo " --force 强制重新打包,即使版本已存在"
echo " --help 显示此帮助信息"
echo
echo "示例:"
echo " $0 # 正常打包,跳过已存在的版本"
echo " $0 --force # 强制重新打包"
echo
}
# 解析命令行参数
FORCE_PACKAGE=false
if [[ "$1" == "--force" ]]; then
FORCE_PACKAGE=true
log_info "强制重新打包模式"
elif [[ "$1" == "--help" || "$1" == "-h" ]]; then
show_help
exit 0
fi
# 获取当前目录和版本
CURRENT_DIR=$(pwd)
VERSION=$(cat config/VERSION 2>/dev/null || echo "1.0.0")
ARTIFACT_DIR="artifact/$VERSION"
log_info "开始打包 AIOps All-in-One 安装包 v$VERSION"
# 若强制打包且目录已存在,先清理旧产物以避免同一版本下残留多个 tar.gz 导致校验混乱
if [[ "$FORCE_PACKAGE" == "true" && -d "$ARTIFACT_DIR" ]]; then
log_info "--force: 清理旧的 $ARTIFACT_DIR 下的 tar 与元数据"
rm -rf "$ARTIFACT_DIR"
fi
# 检查必要文件
log_info "检查必要文件..."
if [[ ! -f "config/VERSION" ]]; then
log_error "VERSION 文件不存在"
exit 1
fi
if [[ ! -f "config/checklist" ]]; then
log_error "checklist 文件不存在"
exit 1
fi
# 检查是否已存在该版本
if [[ -d "$ARTIFACT_DIR" && "$FORCE_PACKAGE" == "false" ]]; then
log_info "检查版本 $VERSION 是否已存在..."
# 检查 version.json 是否存在
if [[ -f "$ARTIFACT_DIR/version.json" ]]; then
log_info "找到已存在的版本信息文件"
# 检查是否所有组件文件都存在
missing_files=0
existing_components=0
# 解析已存在的 version.json 来检查文件
if command -v jq &> /dev/null; then
# 使用 jq 解析
while IFS= read -r component; do
existing_components=$((existing_components + 1))
# 查找对应的 tar 文件
found_file=false
for file in "$ARTIFACT_DIR/${component}-"*.tar.gz; do
if [[ -f "$file" ]]; then
found_file=true
break
fi
done
if [[ "$found_file" == "false" ]]; then
missing_files=$((missing_files + 1))
log_warning " 缺少文件: $component"
fi
done < <(jq -r '.artifact_list | keys[]' "$ARTIFACT_DIR/version.json" 2>/dev/null)
else
# 简单的文件检查
for file in "$ARTIFACT_DIR"/*.tar.gz; do
if [[ -f "$file" ]]; then
existing_components=$((existing_components + 1))
fi
done
fi
# 如果所有文件都存在,则跳过打包
if [[ $missing_files -eq 0 && $existing_components -gt 0 ]]; then
log_success "版本 $VERSION 已完整打包,跳过重复打包"
echo
echo "现有文件:"
ls -la "$ARTIFACT_DIR"
echo
echo "如需强制重新打包,请删除目录: rm -rf $ARTIFACT_DIR"
echo "或使用: ./package.sh --force"
exit 0
else
log_warning "版本 $VERSION 存在但不完整,将重新打包"
log_info " 现有组件: $existing_components"
log_info " 缺少文件: $missing_files"
fi
else
log_warning "版本目录存在但缺少 version.json将重新打包"
fi
fi
# 创建 artifact 目录(清理后重建)
mkdir -p "$ARTIFACT_DIR"
log_info "创建输出目录: $ARTIFACT_DIR"
# 创建临时文件存储数据
TEMP_DIR=$(mktemp -d)
COMPONENTS_FILE="$TEMP_DIR/components.txt"
VERSIONS_FILE="$TEMP_DIR/versions.txt"
DEPENDENCIES_FILE="$TEMP_DIR/dependencies.txt"
INSTALL_ORDER_FILE="$TEMP_DIR/install_order.txt"
CHECKSUMS_FILE="$TEMP_DIR/checksums.txt"
ARTIFACT_LIST_FILE="$TEMP_DIR/artifact_list.txt"
# 解析 checklist 文件
log_info "解析组件清单..."
line_num=0
component_count=0
while IFS= read -r line; do
[[ -z "$line" || "$line" =~ ^[[:space:]]*# ]] && continue
line_num=$((line_num + 1))
# 解析行: 组件名 目录路径 版本 [依赖组件] [安装顺序]
read -r component component_path version dep_component order <<< "$line"
if [[ -z "$component" || -z "$component_path" || -z "$version" ]]; then
log_warning "跳过无效行 $line_num: $line"
continue
fi
# 存储组件信息
echo "$component" >> "$COMPONENTS_FILE"
echo "$component:$version" >> "$VERSIONS_FILE"
echo "$component:$component_path" >> "$TEMP_DIR/component_paths.txt"
if [[ -n "$dep_component" && "$dep_component" != "$component" ]]; then
echo "$component:$dep_component" >> "$DEPENDENCIES_FILE"
fi
if [[ -n "$order" && "$order" =~ ^[0-9]+$ ]]; then
echo "$order:$component" >> "$INSTALL_ORDER_FILE"
else
# 如果没有指定顺序,按解析顺序分配
echo "$line_num:$component" >> "$INSTALL_ORDER_FILE"
fi
component_count=$((component_count + 1))
log_info " - $component v$version"
done < config/checklist
if [[ $component_count -eq 0 ]]; then
log_error "没有找到有效的组件"
rm -rf "$TEMP_DIR"
exit 1
fi
log_success "找到 $component_count 个组件"
# 检查组件目录是否存在
log_info "检查组件目录..."
missing_components=()
while IFS= read -r component; do
# 获取组件路径
component_path=$(grep "^$component:" "$TEMP_DIR/component_paths.txt" | cut -d':' -f2-)
if [[ -z "$component_path" ]]; then
log_error "未找到组件 $component 的路径配置"
log_info "请检查 component_paths.txt 文件或添加路径配置"
exit 1
fi
if [[ ! -d "$component_path" ]]; then
missing_components+=("$component:$component_path")
fi
done < "$COMPONENTS_FILE"
if [[ ${#missing_components[@]} -gt 0 ]]; then
log_error "以下组件目录不存在:"
for component_path in "${missing_components[@]}"; do
echo " - $component_path"
done
rm -rf "$TEMP_DIR"
exit 1
fi
# 额外校验:阻止将 Git LFS 指针文件打进安装包
# 仅检查各组件目录下的 bin/ 内文件(常见为二进制或 .deb/.tar.gz 制品)
is_lfs_pointer() {
local f="$1"
# 读取首行判断是否为 LFS pointer无需依赖 file 命令)
head -n1 "$f" 2>/dev/null | grep -q '^version https://git-lfs.github.com/spec/v1$'
}
log_info "检查组件二进制是否已从 LFS 拉取..."
while IFS= read -r component; do
component_path=$(grep "^$component:" "$TEMP_DIR/component_paths.txt" | cut -d':' -f2-)
bin_dir="$component_path/bin"
[[ -d "$bin_dir" ]] || continue
while IFS= read -r f; do
# 只检查常见可执行/包后缀;无后缀的也检查
case "$f" in
*.sh) continue;;
*) :;;
esac
if is_lfs_pointer "$f"; then
log_error "检测到 Git LFS 指针文件: $f"
log_error "请在仓库根目录执行: git lfs fetch --all && git lfs checkout"
log_error "或确保 CI 在打包前已还原 LFS 大文件。"
rm -rf "$TEMP_DIR"
exit 1
fi
done < <(find "$bin_dir" -maxdepth 1 -type f 2>/dev/null | sort)
done < "$COMPONENTS_FILE"
log_success "LFS 校验通过:未发现指针文件"
# 打包各个组件
log_info "开始打包组件..."
while IFS= read -r component; do
# 获取组件版本和路径
version=$(grep "^$component:" "$VERSIONS_FILE" | cut -d':' -f2)
component_path=$(grep "^$component:" "$TEMP_DIR/component_paths.txt" | cut -d':' -f2-)
if [[ -z "$component_path" ]]; then
log_error "未找到组件 $component 的路径配置"
log_info "请检查 component_paths.txt 文件或添加路径配置"
exit 1
fi
log_info "打包 $component v$version..."
log_info " 组件路径: $component_path"
# 进入组件目录
cd "$component_path"
# 组件内二次防御:若包脚本缺失 LFS 校验,这里再次阻断
if [[ -d bin ]]; then
for f in bin/*; do
[[ -f "$f" ]] || continue
if head -n1 "$f" 2>/dev/null | grep -q '^version https://git-lfs.github.com/spec/v1$'; then
log_error "组件 $component 含 LFS 指针文件: $f"
log_error "请执行: git lfs fetch --all && git lfs checkout"
cd "$CURRENT_DIR"; rm -rf "$TEMP_DIR"; exit 1
fi
done
fi
# 检查组件是否有 package.sh
if [[ ! -f "package.sh" ]]; then
log_error "$component 缺少 package.sh 文件"
cd "$CURRENT_DIR"
rm -rf "$TEMP_DIR"
exit 1
fi
# 清理组件目录内历史 tar 包,避免 find 误选旧文件
rm -f ./*.tar.gz 2>/dev/null || true
# 执行组件的打包脚本
if ./package.sh; then
# 查找生成的 tar 包
tar_file=$(ls -1t ./*.tar.gz 2>/dev/null | head -1)
if [[ -n "$tar_file" ]]; then
# 移动到 artifact 目录
mv "$tar_file" "$CURRENT_DIR/$ARTIFACT_DIR/"
tar_filename=$(basename "$tar_file")
# 计算校验和
checksum=$(sha256sum "$CURRENT_DIR/$ARTIFACT_DIR/$tar_filename" | cut -d' ' -f1)
echo "$component:sha256:$checksum" >> "$CHECKSUMS_FILE"
echo "$component:$version" >> "$ARTIFACT_LIST_FILE"
# 将完整的文件名存储到安装顺序文件中
echo "$tar_filename" >> "$TEMP_DIR/install_order_files.txt"
log_success " $component 打包完成: $tar_filename"
else
log_error "$component 打包失败,未找到生成的 tar 包"
cd "$CURRENT_DIR"
rm -rf "$TEMP_DIR"
exit 1
fi
else
log_error "$component 打包失败"
cd "$CURRENT_DIR"
rm -rf "$TEMP_DIR"
exit 1
fi
# 返回主目录
cd "$CURRENT_DIR"
done < "$COMPONENTS_FILE"
# 生成 version.json
log_info "生成版本信息文件..."
version_json="$ARTIFACT_DIR/version.json"
# 构建依赖关系 JSON
deps_json=""
if [[ -f "$DEPENDENCIES_FILE" ]]; then
first=true
while IFS= read -r line; do
component=$(echo "$line" | cut -d':' -f1)
dep=$(echo "$line" | cut -d':' -f2)
if [[ "$first" == "true" ]]; then
deps_json="\"$component\":[\"$dep\"]"
first=false
else
deps_json="$deps_json,\"$component\":[\"$dep\"]"
fi
done < "$DEPENDENCIES_FILE"
fi
# 构建安装顺序数组
order_array=""
if [[ -f "$TEMP_DIR/install_order_files.txt" ]]; then
first=true
while IFS= read -r filename; do
if [[ "$first" == "true" ]]; then
order_array="\"$filename\""
first=false
else
order_array="$order_array,\"$filename\""
fi
done < "$TEMP_DIR/install_order_files.txt"
fi
# 构建 artifact_list JSON
artifact_json=""
if [[ -f "$ARTIFACT_LIST_FILE" ]]; then
first=true
while IFS= read -r line; do
component=$(echo "$line" | cut -d':' -f1)
version=$(echo "$line" | cut -d':' -f2)
if [[ "$first" == "true" ]]; then
artifact_json="\"$component\":\"$version\""
first=false
else
artifact_json="$artifact_json,\"$component\":\"$version\""
fi
done < "$ARTIFACT_LIST_FILE"
fi
# 构建 checksums JSON
checksums_json=""
if [[ -f "$CHECKSUMS_FILE" ]]; then
first=true
while IFS= read -r line; do
component=$(echo "$line" | cut -d':' -f1)
checksum=$(echo "$line" | cut -d':' -f2-)
if [[ "$first" == "true" ]]; then
checksums_json="\"$component\":\"$checksum\""
first=false
else
checksums_json="$checksums_json,\"$component\":\"$checksum\""
fi
done < "$CHECKSUMS_FILE"
fi
# 生成完整的 version.json
cat > "$version_json" << EOF
{
"version": "$VERSION",
"build_time": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
"artifact_list": {
$artifact_json
},
"checksums": {
$checksums_json
},
"dependencies": {
$deps_json
},
"install_order": [
$order_array
]
}
EOF
log_success "版本信息文件生成完成: $version_json"
# 复制`安装`脚本到 artifact 目录
log_info "复制安装脚本..."
if [[ -f "scripts/install_artifact.sh" ]]; then
cp "scripts/install_artifact.sh" "$ARTIFACT_DIR/install.sh"
chmod +x "$ARTIFACT_DIR/install.sh"
log_success "安装脚本复制完成: $ARTIFACT_DIR/install.sh"
else
log_warning "scripts/install_artifact.sh 文件不存在"
fi
# 复制`卸载`脚本到 artifact 目录
log_info "复制卸载脚本..."
if [[ -f "scripts/uninstall_artifact.sh" ]]; then
cp "scripts/uninstall_artifact.sh" "$ARTIFACT_DIR/uninstall.sh"
chmod +x "$ARTIFACT_DIR/uninstall.sh"
log_success "卸载脚本复制完成: $ARTIFACT_DIR/uninstall.sh"
else
log_warning "scripts/uninstall_artifact.sh 文件不存在"
fi
# 复制`健康检查`脚本到 artifact 目录
log_info "复制健康检查脚本..."
if [[ -f "scripts/check_health.sh" ]]; then
cp "scripts/check_health.sh" "$ARTIFACT_DIR/check_health.sh"
chmod +x "$ARTIFACT_DIR/check_health.sh"
log_success "健康检查脚本复制完成: $ARTIFACT_DIR/check_health.sh"
else
log_warning "scripts/check_health.sh 文件不存在"
fi
# 复制`DNS 同步`脚本到 artifact 目录
log_info "复制 DNS 同步脚本..."
if [[ -f "scripts/sync_dns.sh" ]]; then
cp "scripts/sync_dns.sh" "$ARTIFACT_DIR/sync_dns.sh"
chmod +x "$ARTIFACT_DIR/sync_dns.sh"
log_success "DNS 同步脚本复制完成: $ARTIFACT_DIR/sync_dns.sh"
else
log_warning "scripts/sync_dns.sh 文件不存在"
fi
# 复制`版本校验`脚本到 artifact 目录
log_info "复制版本校验脚本..."
if [[ -f "scripts/check_version.sh" ]]; then
cp "scripts/check_version.sh" "$ARTIFACT_DIR/check_version.sh"
chmod +x "$ARTIFACT_DIR/check_version.sh"
log_success "版本校验脚本复制完成: $ARTIFACT_DIR/check_version.sh"
else
log_warning "scripts/check_version.sh 文件不存在"
fi
# 复制`自动重启`脚本到 artifact 目录
log_info "复制自动重启脚本..."
if [[ -f "scripts/restart_unhealthy.sh" ]]; then
cp "scripts/restart_unhealthy.sh" "$ARTIFACT_DIR/restart_unhealthy.sh"
chmod +x "$ARTIFACT_DIR/restart_unhealthy.sh"
log_success "自动重启脚本复制完成: $ARTIFACT_DIR/restart_unhealthy.sh"
else
log_warning "scripts/restart_unhealthy.sh 文件不存在"
fi
# 复制配置文件到 artifact 目录
log_info "复制配置文件..."
if [[ -f "config/config.env" ]]; then
cp "config/config.env" "$ARTIFACT_DIR/"
log_success "配置文件复制完成: $ARTIFACT_DIR/config.env"
else
log_warning "config 目录不存在,跳过配置文件复制"
fi
# DNS 配置文件不需要复制到版本目录,直接从 FTP 服务器根目录获取
# 复制 deps 目录到 artifact 目录
log_info "复制系统依赖包..."
if [[ -d "deps" ]]; then
cp -r "deps" "$ARTIFACT_DIR/"
log_success "系统依赖包复制完成: $ARTIFACT_DIR/deps"
# 显示deps目录内容
log_info " 依赖包列表:"
find "$ARTIFACT_DIR/deps" -name "*.tar.gz" -exec basename {} \; | while read dep_file; do
log_info " - $dep_file"
done
else
log_warning "deps 目录不存在,跳过依赖包复制"
fi
# 显示打包结果
log_success "打包完成!"
echo
echo "版本: $VERSION"
echo "输出目录: $ARTIFACT_DIR"
echo "包含组件:"
if [[ -f "$ARTIFACT_LIST_FILE" ]]; then
while IFS= read -r line; do
component=$(echo "$line" | cut -d':' -f1)
version=$(echo "$line" | cut -d':' -f2)
echo " - $component v$version"
done < "$ARTIFACT_LIST_FILE"
fi
echo
echo "文件列表:"
ls -la "$ARTIFACT_DIR"
echo
# 清理临时文件
rm -rf "$TEMP_DIR"