feat: 新增GPU测试节点argus-metric-test-gpu-node,测试安装包安装流程及与agent/master组件之间数据准确性;
refs #20
This commit is contained in:
parent
e8a543e1d1
commit
4965a25ea3
@ -4,6 +4,15 @@
|
|||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
|
# PID 文件检测,防止重复执行
|
||||||
|
PIDFILE="/var/run/check_health.pid"
|
||||||
|
if [ -f "$PIDFILE" ] && kill -0 $(cat "$PIDFILE") 2>/dev/null; then
|
||||||
|
echo "健康检查脚本已在运行中,跳过本次执行" >&2
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
echo $$ > "$PIDFILE"
|
||||||
|
trap "rm -f $PIDFILE" EXIT
|
||||||
|
|
||||||
# 获取脚本所在目录
|
# 获取脚本所在目录
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
HEALTH_LOG_FILE="$SCRIPT_DIR/.health_log"
|
HEALTH_LOG_FILE="$SCRIPT_DIR/.health_log"
|
||||||
|
|||||||
@ -200,22 +200,22 @@ parse_version_info() {
|
|||||||
VERSION=$(grep '"version"' "$VERSION_FILE_PATH" | sed 's/.*"version": *"\([^"]*\)".*/\1/')
|
VERSION=$(grep '"version"' "$VERSION_FILE_PATH" | sed 's/.*"version": *"\([^"]*\)".*/\1/')
|
||||||
BUILD_TIME=$(grep '"build_time"' "$VERSION_FILE_PATH" | sed 's/.*"build_time": *"\([^"]*\)".*/\1/')
|
BUILD_TIME=$(grep '"build_time"' "$VERSION_FILE_PATH" | sed 's/.*"build_time": *"\([^"]*\)".*/\1/')
|
||||||
|
|
||||||
# 解析 artifact_list
|
# 解析 artifact_list(跳过字段名本身)
|
||||||
grep -A 100 '"artifact_list"' "$VERSION_FILE_PATH" | grep -E '^\s*"[^"]+":\s*"[^"]+"' | while read line; do
|
grep -A 100 '"artifact_list"' "$VERSION_FILE_PATH" | grep -v '"artifact_list"' | grep -E '^\s*"[^"]+":\s*"[^"]+"' | while read line; do
|
||||||
component=$(echo "$line" | sed 's/.*"\([^"]*\)":\s*"[^"]*".*/\1/')
|
component=$(echo "$line" | sed 's/.*"\([^"]*\)":\s*"[^"]*".*/\1/')
|
||||||
version=$(echo "$line" | sed 's/.*"[^"]*":\s*"\([^"]*\)".*/\1/')
|
version=$(echo "$line" | sed 's/.*"[^"]*":\s*"\([^"]*\)".*/\1/')
|
||||||
echo "$component:$version" >> "$TEMP_DIR/components.txt"
|
echo "$component:$version" >> "$TEMP_DIR/components.txt"
|
||||||
done
|
done
|
||||||
|
|
||||||
# 解析 checksums
|
# 解析 checksums(跳过字段名本身)
|
||||||
grep -A 100 '"checksums"' "$VERSION_FILE_PATH" | grep -E '^\s*"[^"]+":\s*"[^"]+"' | while read line; do
|
grep -A 100 '"checksums"' "$VERSION_FILE_PATH" | grep -v '"checksums"' | grep -E '^\s*"[^"]+":\s*"[^"]+"' | while read line; do
|
||||||
component=$(echo "$line" | sed 's/.*"\([^"]*\)":\s*"[^"]*".*/\1/')
|
component=$(echo "$line" | sed 's/.*"\([^"]*\)":\s*"[^"]*".*/\1/')
|
||||||
checksum=$(echo "$line" | sed 's/.*"[^"]*":\s*"\([^"]*\)".*/\1/')
|
checksum=$(echo "$line" | sed 's/.*"[^"]*":\s*"\([^"]*\)".*/\1/')
|
||||||
echo "$component:$checksum" >> "$TEMP_DIR/checksums.txt"
|
echo "$component:$checksum" >> "$TEMP_DIR/checksums.txt"
|
||||||
done
|
done
|
||||||
|
|
||||||
# 解析 install_order
|
# 解析 install_order(跳过字段名本身,只取数组元素)
|
||||||
grep -A 100 '"install_order"' "$VERSION_FILE_PATH" | grep -E '^\s*"[^"]+"' | while read line; do
|
grep -A 100 '"install_order"' "$VERSION_FILE_PATH" | grep -v '"install_order"' | grep -E '^\s*"[^"]+"' | while read line; do
|
||||||
component=$(echo "$line" | sed 's/.*"\([^"]*\)".*/\1/')
|
component=$(echo "$line" | sed 's/.*"\([^"]*\)".*/\1/')
|
||||||
echo "$component" >> "$TEMP_DIR/install_order.txt"
|
echo "$component" >> "$TEMP_DIR/install_order.txt"
|
||||||
done
|
done
|
||||||
@ -317,6 +317,30 @@ create_install_dirs() {
|
|||||||
log_success "安装目录创建完成: $INSTALL_DIR"
|
log_success "安装目录创建完成: $INSTALL_DIR"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# 获取系统版本
|
||||||
|
get_system_version() {
|
||||||
|
if [[ ! -f /etc/os-release ]]; then
|
||||||
|
log_error "无法检测操作系统版本"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
source /etc/os-release
|
||||||
|
|
||||||
|
# 提取主版本号
|
||||||
|
case "$VERSION_ID" in
|
||||||
|
"20.04")
|
||||||
|
echo "ubuntu20"
|
||||||
|
;;
|
||||||
|
"22.04")
|
||||||
|
echo "ubuntu22"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
log_warning "未识别的Ubuntu版本: $VERSION_ID,尝试使用ubuntu22"
|
||||||
|
echo "ubuntu22"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
|
||||||
# 安装系统依赖包
|
# 安装系统依赖包
|
||||||
install_system_deps() {
|
install_system_deps() {
|
||||||
log_info "检查系统依赖包..."
|
log_info "检查系统依赖包..."
|
||||||
@ -330,21 +354,43 @@ install_system_deps() {
|
|||||||
return 0
|
return 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# 检查是否有tar.gz文件
|
# 获取系统版本对应的依赖目录
|
||||||
|
local system_version=$(get_system_version)
|
||||||
|
local version_deps_dir="$deps_dir/$system_version"
|
||||||
|
|
||||||
|
log_info "检测到系统版本: $system_version"
|
||||||
|
|
||||||
|
# 检查版本特定的依赖目录是否存在
|
||||||
|
if [[ ! -d "$version_deps_dir" ]]; then
|
||||||
|
log_warning "未找到 $system_version 版本的依赖目录: $version_deps_dir"
|
||||||
|
# 回退到旧的逻辑,检查根deps目录
|
||||||
local deps_count=$(find "$deps_dir" -name "*.tar.gz" | wc -l)
|
local deps_count=$(find "$deps_dir" -name "*.tar.gz" | wc -l)
|
||||||
if [[ $deps_count -eq 0 ]]; then
|
if [[ $deps_count -eq 0 ]]; then
|
||||||
log_info "deps 目录中没有 tar.gz 文件,跳过系统依赖包安装"
|
log_info "deps 目录中没有 tar.gz 文件,跳过系统依赖包安装"
|
||||||
return 0
|
return 0
|
||||||
fi
|
fi
|
||||||
|
version_deps_dir="$deps_dir"
|
||||||
|
else
|
||||||
|
# 检查版本目录中是否有tar.gz文件
|
||||||
|
local deps_count=$(find "$version_deps_dir" -name "*.tar.gz" | wc -l)
|
||||||
|
if [[ $deps_count -eq 0 ]]; then
|
||||||
|
log_info "$system_version 版本目录中没有 tar.gz 文件,跳过系统依赖包安装"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
log_info "找到 $deps_count 个系统依赖包,开始安装..."
|
log_info "找到 $system_version 版本的依赖包,开始安装..."
|
||||||
|
|
||||||
# 创建临时目录用于解压依赖包
|
# 创建临时目录用于解压依赖包
|
||||||
local deps_temp_dir="$TEMP_DIR/deps"
|
local deps_temp_dir="${TEMP_DIR:-/tmp}/deps"
|
||||||
mkdir -p "$deps_temp_dir"
|
mkdir -p "$deps_temp_dir"
|
||||||
|
|
||||||
|
# 定义要检查的核心依赖
|
||||||
|
local CORE_DEPS=(jq cron curl)
|
||||||
|
local FAILED_DEPS=()
|
||||||
|
|
||||||
# 处理每个tar.gz文件
|
# 处理每个tar.gz文件
|
||||||
find "$deps_dir" -name "*.tar.gz" | while read tar_file; do
|
find "$version_deps_dir" -name "*.tar.gz" | while read tar_file; do
|
||||||
local tar_basename=$(basename "$tar_file")
|
local tar_basename=$(basename "$tar_file")
|
||||||
local extract_name="${tar_basename%.tar.gz}"
|
local extract_name="${tar_basename%.tar.gz}"
|
||||||
|
|
||||||
@ -362,40 +408,61 @@ install_system_deps() {
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
# 进入解压目录,查找deb包
|
# 进入解压目录,查找deb包
|
||||||
cd "$extract_dir"
|
cd "$extract_dir" || continue
|
||||||
local deb_count=$(find . -name "*.deb" | wc -l)
|
local deb_files=(*.deb)
|
||||||
|
if [[ ${#deb_files[@]} -gt 0 ]]; then
|
||||||
|
log_info " 找到 ${#deb_files[@]} 个 deb 包,开始安装..."
|
||||||
|
|
||||||
if [[ $deb_count -gt 0 ]]; then
|
for deb in "${deb_files[@]}"; do
|
||||||
log_info " 找到 $deb_count 个 deb 包,开始安装..."
|
local pkg_name
|
||||||
|
pkg_name=$(dpkg-deb -f "$deb" Package 2>/dev/null)
|
||||||
|
|
||||||
# 1. 先尝试安装所有deb包
|
# 如果已安装,则跳过
|
||||||
log_info " 第1步:批量安装deb包..."
|
if dpkg -s "$pkg_name" &>/dev/null; then
|
||||||
if dpkg -i *.deb 2>/dev/null; then
|
log_success " $pkg_name 已安装,跳过"
|
||||||
log_success " 所有deb包安装成功"
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 尝试安装
|
||||||
|
log_info " 安装 $pkg_name..."
|
||||||
|
if DEBIAN_FRONTEND=noninteractive dpkg -i "$deb" &>/dev/null; then
|
||||||
|
log_success " $pkg_name 安装成功"
|
||||||
else
|
else
|
||||||
log_warning " 部分deb包安装失败,可能存在依赖问题"
|
log_warning " $pkg_name 安装失败,尝试修复依赖..."
|
||||||
|
if DEBIAN_FRONTEND=noninteractive apt-get install -f -y &>/dev/null; then
|
||||||
# 2. 使用apt-get修复依赖
|
if dpkg -s "$pkg_name" &>/dev/null; then
|
||||||
log_info " 第2步:修复依赖关系..."
|
log_success " $pkg_name 修复安装成功"
|
||||||
if apt-get install -f -y; then
|
|
||||||
log_success " 依赖关系修复完成"
|
|
||||||
else
|
else
|
||||||
log_error " 依赖关系修复失败"
|
log_error " $pkg_name 仍未安装成功"
|
||||||
# 继续处理其他包,不退出
|
FAILED_DEPS+=("$pkg_name")
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
log_error " $pkg_name 自动修复失败"
|
||||||
|
FAILED_DEPS+=("$pkg_name")
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
done
|
||||||
else
|
else
|
||||||
log_info " $tar_basename 中没有找到deb包,跳过"
|
log_info " $tar_basename 中没有找到deb包,跳过"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# 返回到依赖临时目录
|
# 返回到依赖临时目录
|
||||||
cd "$deps_temp_dir"
|
cd "$deps_temp_dir" || continue
|
||||||
done
|
done
|
||||||
|
|
||||||
# 检查并启动 cron 服务
|
# 检查并启动 cron 服务
|
||||||
start_cron_service
|
start_cron_service
|
||||||
|
|
||||||
log_success "系统依赖包安装完成"
|
# 总结安装结果
|
||||||
|
if [[ ${#FAILED_DEPS[@]} -gt 0 ]]; then
|
||||||
|
log_error "以下系统依赖未能成功安装,安装终止,请手动安装后重试:"
|
||||||
|
for f in "${FAILED_DEPS[@]}"; do
|
||||||
|
echo " - $f"
|
||||||
|
done
|
||||||
|
exit 1
|
||||||
|
else
|
||||||
|
log_success "系统依赖包安装完成,全部就绪"
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
# 启动 cron 服务
|
# 启动 cron 服务
|
||||||
@ -637,6 +704,18 @@ EOF
|
|||||||
log_success "安装记录已创建: $install_record_file"
|
log_success "安装记录已创建: $install_record_file"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# 检查cron任务是否已存在
|
||||||
|
check_cron_task_exists() {
|
||||||
|
local task_pattern="$1"
|
||||||
|
local temp_cron="$2"
|
||||||
|
|
||||||
|
if grep -q "$task_pattern" "$temp_cron"; then
|
||||||
|
return 0 # 任务已存在
|
||||||
|
else
|
||||||
|
return 1 # 任务不存在
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
# 设置健康检查定时任务
|
# 设置健康检查定时任务
|
||||||
setup_health_check_cron() {
|
setup_health_check_cron() {
|
||||||
log_info "设置健康检查定时任务..."
|
log_info "设置健康检查定时任务..."
|
||||||
@ -661,7 +740,7 @@ setup_health_check_cron() {
|
|||||||
crontab -l 2>/dev/null > "$temp_cron" || touch "$temp_cron"
|
crontab -l 2>/dev/null > "$temp_cron" || touch "$temp_cron"
|
||||||
|
|
||||||
# 检查并删除旧的健康检查任务
|
# 检查并删除旧的健康检查任务
|
||||||
if grep -q "check_health.sh" "$temp_cron"; then
|
if check_cron_task_exists "check_health.sh" "$temp_cron"; then
|
||||||
log_info "发现旧的健康检查定时任务,正在更新..."
|
log_info "发现旧的健康检查定时任务,正在更新..."
|
||||||
# 删除所有包含check_health.sh的行
|
# 删除所有包含check_health.sh的行
|
||||||
grep -v "check_health.sh" "$temp_cron" > "$temp_cron.new"
|
grep -v "check_health.sh" "$temp_cron" > "$temp_cron.new"
|
||||||
@ -716,7 +795,7 @@ setup_dns_sync_cron() {
|
|||||||
crontab -l 2>/dev/null > "$temp_cron" || touch "$temp_cron"
|
crontab -l 2>/dev/null > "$temp_cron" || touch "$temp_cron"
|
||||||
|
|
||||||
# 检查并删除旧的 DNS 同步任务
|
# 检查并删除旧的 DNS 同步任务
|
||||||
if grep -q "sync_dns.sh" "$temp_cron"; then
|
if check_cron_task_exists "sync_dns.sh" "$temp_cron"; then
|
||||||
log_info "发现旧的 DNS 同步定时任务,正在更新..."
|
log_info "发现旧的 DNS 同步定时任务,正在更新..."
|
||||||
# 删除所有包含sync_dns.sh的行
|
# 删除所有包含sync_dns.sh的行
|
||||||
grep -v "sync_dns.sh" "$temp_cron" > "$temp_cron.new"
|
grep -v "sync_dns.sh" "$temp_cron" > "$temp_cron.new"
|
||||||
@ -724,16 +803,15 @@ setup_dns_sync_cron() {
|
|||||||
log_info "旧的 DNS 同步定时任务已删除"
|
log_info "旧的 DNS 同步定时任务已删除"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# 添加新的定时任务(每30秒执行一次)
|
# 添加新的定时任务(每1分钟执行一次)
|
||||||
# 直接使用版本目录中的 DNS 同步脚本
|
# 直接使用版本目录中的 DNS 同步脚本
|
||||||
echo "# Argus-Metrics DNS 同步定时任务" >> "$temp_cron"
|
echo "# Argus-Metrics DNS 同步定时任务" >> "$temp_cron"
|
||||||
echo "* * * * * $sync_dns_script >> $INSTALL_DIR/.dns_sync.log 2>&1" >> "$temp_cron"
|
echo "* * * * * $sync_dns_script >> $INSTALL_DIR/.dns_sync.log 2>&1" >> "$temp_cron"
|
||||||
echo "* * * * * sleep 30; $sync_dns_script >> $INSTALL_DIR/.dns_sync.log 2>&1" >> "$temp_cron"
|
|
||||||
|
|
||||||
# 安装新的crontab
|
# 安装新的crontab
|
||||||
if crontab "$temp_cron"; then
|
if crontab "$temp_cron"; then
|
||||||
log_success "DNS 同步定时任务设置成功"
|
log_success "DNS 同步定时任务设置成功"
|
||||||
log_info " 执行频率: 每30秒"
|
log_info " 执行频率: 每1分钟"
|
||||||
log_info " 日志文件: $INSTALL_DIR/.dns_sync.log"
|
log_info " 日志文件: $INSTALL_DIR/.dns_sync.log"
|
||||||
log_info " 查看定时任务: crontab -l"
|
log_info " 查看定时任务: crontab -l"
|
||||||
log_info " 删除定时任务: crontab -e"
|
log_info " 删除定时任务: crontab -e"
|
||||||
@ -771,7 +849,7 @@ setup_version_check_cron() {
|
|||||||
crontab -l > "$temp_cron" 2>/dev/null || touch "$temp_cron"
|
crontab -l > "$temp_cron" 2>/dev/null || touch "$temp_cron"
|
||||||
|
|
||||||
# 检查是否已存在版本校验定时任务
|
# 检查是否已存在版本校验定时任务
|
||||||
if grep -q "check_version.sh" "$temp_cron"; then
|
if check_cron_task_exists "check_version.sh" "$temp_cron"; then
|
||||||
log_info "发现旧的版本校验定时任务,正在更新..."
|
log_info "发现旧的版本校验定时任务,正在更新..."
|
||||||
# 删除所有包含check_version.sh的行
|
# 删除所有包含check_version.sh的行
|
||||||
grep -v "check_version.sh" "$temp_cron" > "$temp_cron.new"
|
grep -v "check_version.sh" "$temp_cron" > "$temp_cron.new"
|
||||||
@ -824,7 +902,7 @@ setup_restart_cron() {
|
|||||||
crontab -l > "$temp_cron" 2>/dev/null || touch "$temp_cron"
|
crontab -l > "$temp_cron" 2>/dev/null || touch "$temp_cron"
|
||||||
|
|
||||||
# 检查是否已存在自动重启定时任务
|
# 检查是否已存在自动重启定时任务
|
||||||
if grep -q "restart_unhealthy.sh" "$temp_cron"; then
|
if check_cron_task_exists "restart_unhealthy.sh" "$temp_cron"; then
|
||||||
log_info "发现旧的自动重启定时任务,正在更新..."
|
log_info "发现旧的自动重启定时任务,正在更新..."
|
||||||
# 删除所有包含restart_unhealthy.sh的行
|
# 删除所有包含restart_unhealthy.sh的行
|
||||||
grep -v "restart_unhealthy.sh" "$temp_cron" > "$temp_cron.new"
|
grep -v "restart_unhealthy.sh" "$temp_cron" > "$temp_cron.new"
|
||||||
@ -885,9 +963,9 @@ main() {
|
|||||||
check_system
|
check_system
|
||||||
find_version_file
|
find_version_file
|
||||||
create_install_dirs
|
create_install_dirs
|
||||||
|
install_system_deps
|
||||||
parse_version_info
|
parse_version_info
|
||||||
verify_checksums
|
verify_checksums
|
||||||
install_system_deps
|
|
||||||
install_components
|
install_components
|
||||||
copy_config_files
|
copy_config_files
|
||||||
create_install_record
|
create_install_record
|
||||||
@ -895,6 +973,20 @@ main() {
|
|||||||
setup_dns_sync_cron
|
setup_dns_sync_cron
|
||||||
setup_version_check_cron
|
setup_version_check_cron
|
||||||
setup_restart_cron
|
setup_restart_cron
|
||||||
|
|
||||||
|
# 注释掉立即执行健康检查,避免与cron任务重复执行
|
||||||
|
# log_info "立即执行一次健康检查..."
|
||||||
|
# local check_health_script="$INSTALL_DIR/check_health.sh"
|
||||||
|
# if [[ -f "$check_health_script" ]]; then
|
||||||
|
# if "$check_health_script" >> "$INSTALL_DIR/.health_check.log" 2>&1; then
|
||||||
|
# log_success "健康检查执行完成"
|
||||||
|
# else
|
||||||
|
# log_warning "健康检查执行失败,请检查日志: $INSTALL_DIR/.health_check.log"
|
||||||
|
# fi
|
||||||
|
# else
|
||||||
|
# log_warning "健康检查脚本不存在: $check_health_script"
|
||||||
|
# fi
|
||||||
|
|
||||||
show_install_info
|
show_install_info
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -29,26 +29,68 @@ log_error() {
|
|||||||
show_help() {
|
show_help() {
|
||||||
echo "Argus-Metric Artifact 发布脚本"
|
echo "Argus-Metric Artifact 发布脚本"
|
||||||
echo
|
echo
|
||||||
echo "用法: $0 <版本号>"
|
echo "用法: $0 <版本号> [选项]"
|
||||||
echo
|
echo
|
||||||
echo "参数:"
|
echo "参数:"
|
||||||
echo " <版本号> 要发布的版本号,对应 artifact 目录中的版本"
|
echo " <版本号> 要发布的版本号,对应 artifact 目录中的版本"
|
||||||
echo
|
echo
|
||||||
|
echo "选项:"
|
||||||
|
echo " --output-dir <路径> 指定输出目录 (默认: /private/argus/ftp/share/)"
|
||||||
|
echo " --owner <uid:gid> 指定文件所有者 (默认: 2133:2015)"
|
||||||
|
echo " -h, --help 显示此帮助信息"
|
||||||
|
echo
|
||||||
echo "示例:"
|
echo "示例:"
|
||||||
echo " $0 1.20.0 # 发布 1.20.0 版本"
|
echo " $0 1.20.0 # 使用默认配置发布"
|
||||||
|
echo " $0 1.20.0 --output-dir /tmp/publish # 指定输出目录"
|
||||||
|
echo " $0 1.20.0 --owner 1000:1000 # 指定文件所有者"
|
||||||
|
echo " $0 1.20.0 --output-dir /srv/ftp --owner root:root # 同时指定两者"
|
||||||
echo
|
echo
|
||||||
}
|
}
|
||||||
|
|
||||||
# 检查参数
|
# 默认配置
|
||||||
if [[ $# -ne 1 ]]; then
|
DEFAULT_PUBLISH_DIR="/private/argus/ftp/share/"
|
||||||
|
DEFAULT_OWNER="2133:2015"
|
||||||
|
|
||||||
|
# 解析参数
|
||||||
|
VERSION=""
|
||||||
|
PUBLISH_DIR="$DEFAULT_PUBLISH_DIR"
|
||||||
|
OWNER="$DEFAULT_OWNER"
|
||||||
|
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case $1 in
|
||||||
|
-h|--help)
|
||||||
|
show_help
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
--output-dir)
|
||||||
|
PUBLISH_DIR="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--owner)
|
||||||
|
OWNER="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
if [[ -z "$VERSION" ]]; then
|
||||||
|
VERSION="$1"
|
||||||
|
shift
|
||||||
|
else
|
||||||
|
log_error "未知参数: $1"
|
||||||
|
show_help
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# 检查版本号是否提供
|
||||||
|
if [[ -z "$VERSION" ]]; then
|
||||||
log_error "请提供版本号参数"
|
log_error "请提供版本号参数"
|
||||||
show_help
|
show_help
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
VERSION="$1"
|
|
||||||
ARTIFACT_DIR="artifact/$VERSION"
|
ARTIFACT_DIR="artifact/$VERSION"
|
||||||
PUBLISH_DIR="/Users/sundapeng/Project/nlp/aiops/client-plugins/all-in-one/publish/"
|
|
||||||
|
|
||||||
# 检查版本目录是否存在
|
# 检查版本目录是否存在
|
||||||
if [[ ! -d "$ARTIFACT_DIR" ]]; then
|
if [[ ! -d "$ARTIFACT_DIR" ]]; then
|
||||||
@ -57,10 +99,12 @@ if [[ ! -d "$ARTIFACT_DIR" ]]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
log_info "开始发布版本: $VERSION"
|
log_info "开始发布版本: $VERSION"
|
||||||
|
log_info "输出目录: $PUBLISH_DIR"
|
||||||
|
log_info "文件所有者: $OWNER"
|
||||||
|
|
||||||
# 确保发布目录存在
|
# 确保发布目录存在
|
||||||
log_info "确保发布目录存在: $PUBLISH_DIR"
|
log_info "确保发布目录存在: $PUBLISH_DIR"
|
||||||
mkdir -p "$PUBLISH_DIR"
|
sudo mkdir -p "$PUBLISH_DIR"
|
||||||
|
|
||||||
# 创建临时目录用于打包
|
# 创建临时目录用于打包
|
||||||
TEMP_PACKAGE_DIR="/tmp/argus-metric-package-$$"
|
TEMP_PACKAGE_DIR="/tmp/argus-metric-package-$$"
|
||||||
@ -164,20 +208,26 @@ fi
|
|||||||
TAR_NAME="argus-metric_$(echo $VERSION | tr '.' '_').tar.gz"
|
TAR_NAME="argus-metric_$(echo $VERSION | tr '.' '_').tar.gz"
|
||||||
log_info "创建发布包: $TAR_NAME"
|
log_info "创建发布包: $TAR_NAME"
|
||||||
cd "$TEMP_PACKAGE_DIR"
|
cd "$TEMP_PACKAGE_DIR"
|
||||||
tar -czf "$PUBLISH_DIR/$TAR_NAME" *
|
sudo tar -czf "$PUBLISH_DIR/$TAR_NAME" *
|
||||||
cd - > /dev/null
|
cd - > /dev/null
|
||||||
|
|
||||||
|
# 设置文件所有者
|
||||||
|
log_info "设置文件所有者为: $OWNER"
|
||||||
|
sudo chown "$OWNER" "$PUBLISH_DIR/$TAR_NAME"
|
||||||
|
|
||||||
# 清理临时目录
|
# 清理临时目录
|
||||||
rm -rf "$TEMP_PACKAGE_DIR"
|
rm -rf "$TEMP_PACKAGE_DIR"
|
||||||
|
|
||||||
# 更新 LATEST_VERSION 文件
|
# 更新 LATEST_VERSION 文件
|
||||||
log_info "更新 LATEST_VERSION 文件..."
|
log_info "更新 LATEST_VERSION 文件..."
|
||||||
echo "$VERSION" > "$PUBLISH_DIR/LATEST_VERSION"
|
echo "$VERSION" | sudo tee "$PUBLISH_DIR/LATEST_VERSION" > /dev/null
|
||||||
|
sudo chown "$OWNER" "$PUBLISH_DIR/LATEST_VERSION"
|
||||||
|
|
||||||
# 复制 DNS 配置文件到发布目录根目录(直接从 config 目录复制)
|
# 复制 DNS 配置文件到发布目录根目录(直接从 config 目录复制)
|
||||||
if [[ -f "config/dns.conf" ]]; then
|
if [[ -f "config/dns.conf" ]]; then
|
||||||
log_info "复制 DNS 配置文件到发布目录根目录..."
|
log_info "复制 DNS 配置文件到发布目录根目录..."
|
||||||
cp "config/dns.conf" "$PUBLISH_DIR/"
|
sudo cp "config/dns.conf" "$PUBLISH_DIR/"
|
||||||
|
sudo chown "$OWNER" "$PUBLISH_DIR/dns.conf"
|
||||||
log_success "DNS 配置文件复制完成: $PUBLISH_DIR/dns.conf"
|
log_success "DNS 配置文件复制完成: $PUBLISH_DIR/dns.conf"
|
||||||
else
|
else
|
||||||
log_warning "未找到 config/dns.conf 文件,跳过 DNS 配置文件复制"
|
log_warning "未找到 config/dns.conf 文件,跳过 DNS 配置文件复制"
|
||||||
@ -186,7 +236,8 @@ fi
|
|||||||
# 复制 setup.sh 到发布目录
|
# 复制 setup.sh 到发布目录
|
||||||
if [[ -f "scripts/setup.sh" ]]; then
|
if [[ -f "scripts/setup.sh" ]]; then
|
||||||
log_info "复制 setup.sh 到发布目录..."
|
log_info "复制 setup.sh 到发布目录..."
|
||||||
cp "scripts/setup.sh" "$PUBLISH_DIR/"
|
sudo cp "scripts/setup.sh" "$PUBLISH_DIR/"
|
||||||
|
sudo chown "$OWNER" "$PUBLISH_DIR/setup.sh"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# 显示发布结果
|
# 显示发布结果
|
||||||
|
|||||||
@ -2,6 +2,15 @@
|
|||||||
|
|
||||||
# 此脚本会检查各组件的健康状态,并重启不健康的组件
|
# 此脚本会检查各组件的健康状态,并重启不健康的组件
|
||||||
|
|
||||||
|
# PID 文件检测,防止重复执行
|
||||||
|
PIDFILE="/var/run/restart_unhealthy.pid"
|
||||||
|
if [ -f "$PIDFILE" ] && kill -0 $(cat "$PIDFILE") 2>/dev/null; then
|
||||||
|
echo "自动重启脚本已在运行中,跳过本次执行" >&2
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
echo $$ > "$PIDFILE"
|
||||||
|
trap "rm -f $PIDFILE" EXIT
|
||||||
|
|
||||||
# 获取脚本所在目录
|
# 获取脚本所在目录
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
INSTALL_RECORD_FILE="$SCRIPT_DIR/.install_record"
|
INSTALL_RECORD_FILE="$SCRIPT_DIR/.install_record"
|
||||||
|
|||||||
@ -1,257 +1,143 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
# DNS 同步脚本
|
|
||||||
# 比较 FTP 根目录的 dns.conf 和本地的 dns.conf,如果有变化则同步到 /etc/resolv.conf
|
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
# 颜色定义
|
# 颜色
|
||||||
RED='\033[0;31m'
|
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; BLUE='\033[0;34m'; NC='\033[0m'
|
||||||
GREEN='\033[0;32m'
|
|
||||||
YELLOW='\033[1;33m'
|
|
||||||
BLUE='\033[0;34m'
|
|
||||||
NC='\033[0m' # No Color
|
|
||||||
|
|
||||||
# 日志函数 - 输出到 stderr 避免影响函数返回值
|
# 日志函数
|
||||||
log_info() {
|
log_info() { echo -e "${BLUE}[INFO]${NC} $1" >&2; }
|
||||||
echo -e "${BLUE}[INFO]${NC} $1" >&2
|
log_success() { echo -e "${GREEN}[SUCCESS]${NC} $1" >&2; }
|
||||||
}
|
log_warning() { echo -e "${YELLOW}[WARNING]${NC} $1" >&2; }
|
||||||
|
log_error() { echo -e "${RED}[ERROR]${NC} $1" >&2; }
|
||||||
|
|
||||||
log_success() {
|
|
||||||
echo -e "${GREEN}[SUCCESS]${NC} $1" >&2
|
|
||||||
}
|
|
||||||
|
|
||||||
log_warning() {
|
|
||||||
echo -e "${YELLOW}[WARNING]${NC} $1" >&2
|
|
||||||
}
|
|
||||||
|
|
||||||
log_error() {
|
|
||||||
echo -e "${RED}[ERROR]${NC} $1" >&2
|
|
||||||
}
|
|
||||||
|
|
||||||
# 获取脚本所在目录
|
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
LOCAL_DNS_CONF="/opt/argus-metric/dns.conf"
|
LOCAL_DNS_CONF="/opt/argus-metric/dns.conf"
|
||||||
REMOTE_DNS_CONF_URL=""
|
|
||||||
RESOLV_CONF="/etc/resolv.conf"
|
RESOLV_CONF="/etc/resolv.conf"
|
||||||
|
ALT_RESOLV_CONF="/run/resolv.conf"
|
||||||
LOG_FILE="/opt/argus-metric/.dns_sync.log"
|
LOG_FILE="/opt/argus-metric/.dns_sync.log"
|
||||||
|
REMOTE_DNS_CONF_URL=""
|
||||||
|
|
||||||
# 从环境变量或配置文件获取 FTP 服务器信息
|
# 获取 FTP 配置
|
||||||
get_ftp_config() {
|
get_ftp_config() {
|
||||||
# 优先从环境变量获取配置
|
|
||||||
log_info "获取 FTP 配置信息..."
|
log_info "获取 FTP 配置信息..."
|
||||||
|
|
||||||
# 如果环境变量中没有设置,则尝试从配置文件读取
|
|
||||||
if [[ -z "$FTP_SERVER" || -z "$FTP_USER" || -z "$FTP_PASSWORD" ]]; then
|
if [[ -z "$FTP_SERVER" || -z "$FTP_USER" || -z "$FTP_PASSWORD" ]]; then
|
||||||
local config_file="$SCRIPT_DIR/config.env"
|
[[ -f "$SCRIPT_DIR/config.env" ]] && source "$SCRIPT_DIR/config.env"
|
||||||
if [[ -f "$config_file" ]]; then
|
|
||||||
log_info "从配置文件读取 FTP 配置: $config_file"
|
|
||||||
source "$config_file"
|
|
||||||
fi
|
fi
|
||||||
else
|
|
||||||
log_info "使用环境变量中的 FTP 配置"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# 设置默认值(如果环境变量和配置文件都没有设置)
|
|
||||||
FTP_SERVER="${FTP_SERVER:-localhost}"
|
FTP_SERVER="${FTP_SERVER:-localhost}"
|
||||||
FTP_USER="${FTP_USER:-ftpuser}"
|
FTP_USER="${FTP_USER:-ftpuser}"
|
||||||
FTP_PASSWORD="${FTP_PASSWORD:-ZGClab1234!}"
|
FTP_PASSWORD="${FTP_PASSWORD:-ZGClab1234!}"
|
||||||
|
|
||||||
# 构建远程 DNS 配置文件 URL
|
|
||||||
REMOTE_DNS_CONF_URL="ftp://${FTP_USER}:${FTP_PASSWORD}@${FTP_SERVER}/dns.conf"
|
REMOTE_DNS_CONF_URL="ftp://${FTP_USER}:${FTP_PASSWORD}@${FTP_SERVER}/dns.conf"
|
||||||
|
|
||||||
log_info "FTP 配置来源: ${FTP_CONFIG_SOURCE:-环境变量/配置文件}"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# 下载远程 DNS 配置文件
|
# 下载远程 dns.conf
|
||||||
download_remote_dns_conf() {
|
download_remote_dns_conf() {
|
||||||
local temp_file="/tmp/dns.conf.remote.$$"
|
local tmp="/tmp/dns.remote.$$"
|
||||||
|
|
||||||
log_info "从 FTP 服务器下载 DNS 配置文件..."
|
|
||||||
log_info "远程地址: $REMOTE_DNS_CONF_URL"
|
|
||||||
log_info "FTP 服务器: $FTP_SERVER"
|
|
||||||
log_info "FTP 用户: $FTP_USER"
|
|
||||||
|
|
||||||
# 先测试 FTP 连接
|
|
||||||
log_info "测试 FTP 连接..."
|
log_info "测试 FTP 连接..."
|
||||||
if curl -u "${FTP_USER}:${FTP_PASSWORD}" -sfI "ftp://${FTP_SERVER}/" >/dev/null 2>&1; then
|
if ! curl -u "${FTP_USER}:${FTP_PASSWORD}" -sfI "ftp://${FTP_SERVER}/" >/dev/null; then
|
||||||
log_success "FTP 服务器连接成功"
|
log_error "无法连接到 FTP 服务器: $FTP_SERVER"; return 1
|
||||||
else
|
|
||||||
log_error "无法连接到 FTP 服务器: $FTP_SERVER"
|
|
||||||
log_error "请检查:"
|
|
||||||
log_error " 1. FTP 服务器是否运行"
|
|
||||||
log_error " 2. 网络连接是否正常"
|
|
||||||
log_error " 3. 服务器地址是否正确"
|
|
||||||
return 1
|
|
||||||
fi
|
fi
|
||||||
|
if ! curl -u "${FTP_USER}:${FTP_PASSWORD}" -sf "ftp://${FTP_SERVER}/dns.conf" -o "$tmp" 2>/dev/null; then
|
||||||
# 测试 dns.conf 文件是否存在
|
log_error "下载 dns.conf 失败"; rm -f "$tmp"; return 1
|
||||||
log_info "检查远程 dns.conf 文件是否存在..."
|
|
||||||
if curl -u "${FTP_USER}:${FTP_PASSWORD}" -sfI "ftp://${FTP_SERVER}/dns.conf" >/dev/null 2>&1; then
|
|
||||||
log_success "远程 dns.conf 文件存在"
|
|
||||||
else
|
|
||||||
log_error "远程 dns.conf 文件不存在或无法访问"
|
|
||||||
log_error "请检查 FTP 服务器根目录下是否有 dns.conf 文件"
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# 尝试下载文件
|
|
||||||
log_info "开始下载 dns.conf 文件..."
|
|
||||||
if curl -u "${FTP_USER}:${FTP_PASSWORD}" -sf "ftp://${FTP_SERVER}/dns.conf" -o "$temp_file" 2>/dev/null; then
|
|
||||||
log_success "远程 DNS 配置文件下载成功"
|
|
||||||
echo "$temp_file"
|
|
||||||
else
|
|
||||||
log_error "下载 dns.conf 文件失败"
|
|
||||||
log_error "尝试手动测试命令:"
|
|
||||||
log_error " curl -u ${FTP_USER}:${FTP_PASSWORD} ftp://${FTP_SERVER}/dns.conf"
|
|
||||||
rm -f "$temp_file"
|
|
||||||
return 1
|
|
||||||
fi
|
fi
|
||||||
|
echo "$tmp"
|
||||||
}
|
}
|
||||||
|
|
||||||
# 比较两个文件是否相同
|
# 文件比较
|
||||||
compare_files() {
|
compare_files() { diff -q "$1" "$2" >/dev/null 2>&1; }
|
||||||
local file1="$1"
|
|
||||||
local file2="$2"
|
|
||||||
|
|
||||||
if [[ ! -f "$file1" || ! -f "$file2" ]]; then
|
# 从 dns.conf 提取有效 IP
|
||||||
return 1
|
get_dns_ips() {
|
||||||
fi
|
grep -Eo '^[0-9]{1,3}(\.[0-9]{1,3}){3}$' "$1" | sort -u
|
||||||
|
|
||||||
# 使用 diff 比较文件内容
|
|
||||||
if diff -q "$file1" "$file2" >/dev/null 2>&1; then
|
|
||||||
return 0 # 文件相同
|
|
||||||
else
|
|
||||||
return 1 # 文件不同
|
|
||||||
fi
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# 将 DNS 配置添加到 /etc/resolv.conf 开头
|
# 安全更新 resolv.conf(保留符号链接)
|
||||||
update_resolv_conf() {
|
update_resolv_conf() {
|
||||||
local dns_conf_file="$1"
|
local dns_conf="$1"
|
||||||
|
local dns_ips
|
||||||
|
mapfile -t dns_ips < <(get_dns_ips "$dns_conf")
|
||||||
|
[[ ${#dns_ips[@]} -eq 0 ]] && { log_warning "未检测到有效 DNS"; return; }
|
||||||
|
|
||||||
log_info "更新 /etc/resolv.conf 文件..."
|
local target_file="$RESOLV_CONF"
|
||||||
|
if [[ ! -w "$RESOLV_CONF" ]]; then
|
||||||
|
log_warning "/etc/resolv.conf 不可写,使用兜底路径 $ALT_RESOLV_CONF"
|
||||||
|
target_file="$ALT_RESOLV_CONF"
|
||||||
|
fi
|
||||||
|
|
||||||
# 创建临时文件
|
local temp="/tmp/resolv.new.$$"
|
||||||
local temp_resolv="/tmp/resolv.conf.$$"
|
cp "$target_file" "${target_file}.backup.$(date +%Y%m%d_%H%M%S)" 2>/dev/null || true
|
||||||
|
log_info "更新 DNS 配置文件: $target_file"
|
||||||
|
|
||||||
# 将 dns.conf 内容转换为 nameserver 添加到临时文件开头
|
# 写入新的 nameserver 行
|
||||||
while IFS= read -r line; do
|
for ip in "${dns_ips[@]}"; do
|
||||||
# 跳过空行和注释
|
echo "nameserver $ip"
|
||||||
[[ -z "$line" || "$line" =~ ^[[:space:]]*# ]] && continue
|
done >"$temp"
|
||||||
# 验证 IP 格式
|
|
||||||
if [[ "$line" =~ ^[0-9]{1,3}(\.[0-9]{1,3}){3}$ ]]; then
|
# 追加原内容(去掉重复 nameserver)
|
||||||
echo "nameserver $line" >> "$temp_resolv"
|
grep -v '^nameserver' "$target_file" >>"$temp" 2>/dev/null || true
|
||||||
log_info "添加 DNS 到临时文件: $line"
|
awk '!a[$0]++' "$temp" >"${temp}.uniq"
|
||||||
|
|
||||||
|
# ⚙️ 使用 cat 原地覆盖,避免 mv 引发 “设备忙”
|
||||||
|
if cat "${temp}.uniq" >"$target_file" 2>/dev/null; then
|
||||||
|
chmod 644 "$target_file"
|
||||||
|
log_success "DNS 更新完成: ${dns_ips[*]}"
|
||||||
else
|
else
|
||||||
log_warning "跳过无效 DNS: $line"
|
log_error "无法写入 $target_file,可能被系统锁定"
|
||||||
fi
|
|
||||||
done < "$dns_conf_file"
|
|
||||||
|
|
||||||
# 将原 resolv.conf 内容追加到临时文件后面
|
|
||||||
if [[ -f "$RESOLV_CONF" ]]; then
|
|
||||||
cat "$RESOLV_CONF" >> "$temp_resolv"
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# 判断是否是 root
|
rm -f "$temp" "${temp}.uniq"
|
||||||
if [[ $(id -u) -eq 0 ]]; then
|
|
||||||
# root 直接写入
|
|
||||||
tee "$RESOLV_CONF" < "$temp_resolv" >/dev/null
|
|
||||||
chmod 644 "$RESOLV_CONF"
|
|
||||||
else
|
|
||||||
# 非 root 尝试使用 sudo
|
|
||||||
if command -v sudo >/dev/null 2>&1; then
|
|
||||||
sudo tee "$RESOLV_CONF" < "$temp_resolv" >/dev/null
|
|
||||||
sudo chmod 644 "$RESOLV_CONF"
|
|
||||||
else
|
|
||||||
log_error "非 root 用户且系统未安装 sudo,无法更新 /etc/resolv.conf"
|
|
||||||
rm -f "$temp_resolv"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
# 清理临时文件
|
|
||||||
rm -f "$temp_resolv"
|
|
||||||
|
|
||||||
log_success "/etc/resolv.conf 已更新"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# 记录同步日志
|
# 检查 resolv.conf 是否包含 dns.conf 内容
|
||||||
log_sync() {
|
ensure_dns_in_resolv() {
|
||||||
local message="$1"
|
local dns_conf="$1"
|
||||||
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
|
local dns_ips
|
||||||
echo "[$timestamp] $message" >> "$LOG_FILE"
|
mapfile -t dns_ips < <(get_dns_ips "$dns_conf")
|
||||||
|
[[ ${#dns_ips[@]} -eq 0 ]] && return
|
||||||
|
|
||||||
|
for ip in "${dns_ips[@]}"; do
|
||||||
|
if ! grep -q "nameserver $ip" "$RESOLV_CONF" 2>/dev/null; then
|
||||||
|
log_warning "检测到 /etc/resolv.conf 缺少 $ip,执行兜底修复"
|
||||||
|
update_resolv_conf "$dns_conf"
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
log_info "/etc/resolv.conf 已包含所有 DNS"
|
||||||
}
|
}
|
||||||
|
|
||||||
# 主函数
|
log_sync() { echo "[$(date '+%F %T')] $1" >>"$LOG_FILE"; }
|
||||||
|
|
||||||
main() {
|
main() {
|
||||||
log_info "开始 DNS 同步检查..."
|
log_info "开始 DNS 同步检查..."
|
||||||
log_sync "DNS 同步检查开始"
|
mkdir -p /opt/argus-metric
|
||||||
|
|
||||||
# 确保系统目录存在
|
|
||||||
mkdir -p "/opt/argus-metric"
|
|
||||||
|
|
||||||
# 获取 FTP 配置
|
|
||||||
get_ftp_config
|
get_ftp_config
|
||||||
|
local remote_file
|
||||||
# 检查本地 DNS 配置文件是否存在
|
if ! remote_file=$(download_remote_dns_conf); then
|
||||||
if [[ ! -f "$LOCAL_DNS_CONF" ]]; then
|
log_error "下载失败"; log_sync "同步失败"; exit 1
|
||||||
log_warning "本地 DNS 配置文件不存在: $LOCAL_DNS_CONF"
|
|
||||||
log_warning "将下载远程配置文件并更新系统 DNS 设置"
|
|
||||||
|
|
||||||
# 下载远程配置文件
|
|
||||||
if remote_file=$(download_remote_dns_conf); then
|
|
||||||
# 复制到本地
|
|
||||||
cp "$remote_file" "$LOCAL_DNS_CONF"
|
|
||||||
log_success "远程 DNS 配置文件已保存到本地"
|
|
||||||
|
|
||||||
# 更新 resolv.conf
|
|
||||||
update_resolv_conf "$LOCAL_DNS_CONF"
|
|
||||||
log_sync "首次同步完成,DNS 配置已更新"
|
|
||||||
|
|
||||||
# 清理临时文件
|
|
||||||
rm -f "$remote_file"
|
|
||||||
else
|
|
||||||
log_error "无法下载远程 DNS 配置文件,同步失败"
|
|
||||||
log_sync "同步失败:无法下载远程配置文件"
|
|
||||||
exit 1
|
|
||||||
fi
|
fi
|
||||||
else
|
|
||||||
log_info "本地 DNS 配置文件存在: $LOCAL_DNS_CONF"
|
|
||||||
|
|
||||||
# 下载远程配置文件进行比较
|
if [[ ! -f "$LOCAL_DNS_CONF" ]]; then
|
||||||
if remote_file=$(download_remote_dns_conf); then
|
log_info "本地 dns.conf 不存在,初始化..."
|
||||||
# 比较文件
|
cp "$remote_file" "$LOCAL_DNS_CONF"
|
||||||
if compare_files "$LOCAL_DNS_CONF" "$remote_file"; then
|
update_resolv_conf "$LOCAL_DNS_CONF"
|
||||||
log_info "DNS 配置文件无变化,无需更新"
|
log_sync "首次同步完成"
|
||||||
log_sync "DNS 配置文件无变化"
|
else
|
||||||
else
|
if compare_files "$LOCAL_DNS_CONF" "$remote_file"; then
|
||||||
log_info "检测到 DNS 配置文件有变化,开始同步..."
|
log_info "dns.conf 无变化"
|
||||||
log_sync "检测到 DNS 配置文件变化,开始同步"
|
ensure_dns_in_resolv "$LOCAL_DNS_CONF"
|
||||||
|
log_sync "dns.conf 无变化,执行兜底检查"
|
||||||
# 更新本地配置文件
|
else
|
||||||
|
log_info "检测到 DNS 配置更新"
|
||||||
cp "$remote_file" "$LOCAL_DNS_CONF"
|
cp "$remote_file" "$LOCAL_DNS_CONF"
|
||||||
log_success "本地 DNS 配置文件已更新"
|
|
||||||
|
|
||||||
# 更新 resolv.conf
|
|
||||||
update_resolv_conf "$LOCAL_DNS_CONF"
|
update_resolv_conf "$LOCAL_DNS_CONF"
|
||||||
log_sync "DNS 配置同步完成"
|
log_sync "DNS 配置同步完成"
|
||||||
fi
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
# 清理临时文件
|
|
||||||
rm -f "$remote_file"
|
rm -f "$remote_file"
|
||||||
else
|
log_success "DNS 同步流程完成"
|
||||||
log_error "无法下载远程 DNS 配置文件,跳过本次同步"
|
|
||||||
log_sync "同步失败:无法下载远程配置文件"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
log_success "DNS 同步检查完成"
|
|
||||||
log_sync "DNS 同步检查完成"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# 脚本入口
|
|
||||||
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
||||||
main "$@"
|
main "$@"
|
||||||
fi
|
fi
|
||||||
|
|||||||
39
src/metric/tests/client-test-gpu-node/build/Dockerfile
Normal file
39
src/metric/tests/client-test-gpu-node/build/Dockerfile
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
# 使用NVIDIA官方CUDA基础镜像
|
||||||
|
FROM nvidia/cuda:12.2.2-runtime-ubuntu22.04
|
||||||
|
|
||||||
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
|
|
||||||
|
# 设置时区
|
||||||
|
ENV TZ=Asia/Shanghai
|
||||||
|
|
||||||
|
RUN apt-get update -qq && \
|
||||||
|
apt-get install -y -qq \
|
||||||
|
tzdata \
|
||||||
|
curl \
|
||||||
|
wget \
|
||||||
|
gnupg2 \
|
||||||
|
software-properties-common \
|
||||||
|
ca-certificates \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# 配置时区
|
||||||
|
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# 创建启动脚本,在运行时验证GPU
|
||||||
|
COPY <<EOF /app/start.sh
|
||||||
|
#!/bin/bash
|
||||||
|
echo "检查GPU环境..."
|
||||||
|
if command -v nvidia-smi &> /dev/null; then
|
||||||
|
nvidia-smi
|
||||||
|
echo "GPU环境正常"
|
||||||
|
else
|
||||||
|
echo "警告: nvidia-smi 命令不可用,请确保容器运行时启用了GPU支持"
|
||||||
|
fi
|
||||||
|
exec "\$@"
|
||||||
|
EOF
|
||||||
|
|
||||||
|
RUN chmod +x /app/start.sh
|
||||||
|
|
||||||
|
CMD ["/app/start.sh", "/bin/bash"]
|
||||||
@ -142,3 +142,44 @@ services:
|
|||||||
max-size: "10m"
|
max-size: "10m"
|
||||||
max-file: "3"
|
max-file: "3"
|
||||||
|
|
||||||
|
test-gpu-node:
|
||||||
|
build:
|
||||||
|
context: ./client-test-gpu-node/build
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
image: argus-metric-test-gpu-node:latest
|
||||||
|
container_name: argus-metric-test-gpu-node
|
||||||
|
hostname: test-metric-gpu-node-001
|
||||||
|
restart: unless-stopped
|
||||||
|
privileged: true
|
||||||
|
runtime: nvidia
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- driver: nvidia
|
||||||
|
count: all
|
||||||
|
capabilities:
|
||||||
|
- gpu
|
||||||
|
depends_on:
|
||||||
|
- ftp
|
||||||
|
- prometheus
|
||||||
|
environment:
|
||||||
|
- TZ=Asia/Shanghai
|
||||||
|
- DEBIAN_FRONTEND=noninteractive
|
||||||
|
- NVIDIA_VISIBLE_DEVICES=all
|
||||||
|
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||||
|
- GPU_MODE=gpu
|
||||||
|
volumes:
|
||||||
|
- ${DATA_ROOT:-/private}/argus/agent:/private/argus/agent
|
||||||
|
- /etc/localtime:/etc/localtime:ro
|
||||||
|
- /etc/timezone:/etc/timezone:ro
|
||||||
|
command: sleep infinity
|
||||||
|
networks:
|
||||||
|
default:
|
||||||
|
ipv4_address: 172.30.0.51
|
||||||
|
logging:
|
||||||
|
driver: "json-file"
|
||||||
|
options:
|
||||||
|
max-size: "10m"
|
||||||
|
max-file: "3"
|
||||||
|
|
||||||
|
|||||||
@ -21,6 +21,7 @@ docker ps | grep argus-ftp
|
|||||||
docker ps | grep argus-prometheus
|
docker ps | grep argus-prometheus
|
||||||
docker ps | grep argus-grafana
|
docker ps | grep argus-grafana
|
||||||
docker ps | grep argus-metric-test-node
|
docker ps | grep argus-metric-test-node
|
||||||
|
docker ps | grep argus-metric-test-gpu-node
|
||||||
|
|
||||||
echo "[01] 所有服务已启动"
|
echo "[01] 基础服务已启动"
|
||||||
|
|
||||||
|
|||||||
@ -20,7 +20,7 @@ else
|
|||||||
echo "[02] 默认路径: $OUTPUT_DIR"
|
echo "[02] 默认路径: $OUTPUT_DIR"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
OWNER="${FTP_UID:-2133}:${FTP_GID:-2015}"
|
OWNER="${ARGUS_BUILD_UID:-2133}:${ARGUS_BUILD_GID:-2015}"
|
||||||
|
|
||||||
cd "$PLUGIN_DIR"
|
cd "$PLUGIN_DIR"
|
||||||
|
|
||||||
|
|||||||
33
src/metric/tests/scripts/04_test_gpu_node_install.sh
Executable file
33
src/metric/tests/scripts/04_test_gpu_node_install.sh
Executable file
@ -0,0 +1,33 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
FTP_SERVER="${FTP_SERVER:-172.30.0.40}"
|
||||||
|
FTP_USER="${FTP_USER:-ftpuser}"
|
||||||
|
FTP_PASSWORD="${FTP_PASSWORD:-ZGClab1234!}"
|
||||||
|
FTP_PORT="${FTP_PORT:-21}"
|
||||||
|
|
||||||
|
FTP_HOST="${FTP_SERVER}"
|
||||||
|
|
||||||
|
echo "[03] 进入测试节点执行安装..."
|
||||||
|
echo "[03] 使用 FTP 地址: ${FTP_HOST}:${FTP_PORT}"
|
||||||
|
|
||||||
|
docker exec argus-metric-test-gpu-node bash -c "
|
||||||
|
set -e
|
||||||
|
|
||||||
|
if ! command -v curl &>/dev/null; then
|
||||||
|
echo '[03] curl 未安装,正在安装...'
|
||||||
|
apt-get update && apt-get install -y curl
|
||||||
|
fi
|
||||||
|
|
||||||
|
cd /tmp
|
||||||
|
echo '[03] 下载 setup.sh...'
|
||||||
|
curl -u ${FTP_USER}:${FTP_PASSWORD} ftp://${FTP_HOST}:${FTP_PORT}/setup.sh -o setup.sh
|
||||||
|
|
||||||
|
echo '[03] 执行安装...'
|
||||||
|
chmod +x setup.sh
|
||||||
|
bash setup.sh --server ${FTP_HOST} --user ${FTP_USER} --password '${FTP_PASSWORD}' --port ${FTP_PORT}
|
||||||
|
|
||||||
|
echo '[03] 安装完成'
|
||||||
|
"
|
||||||
|
|
||||||
|
echo "[03] 完成"
|
||||||
@ -44,6 +44,7 @@ BUILD_DIRS=(
|
|||||||
"../prometheus/build"
|
"../prometheus/build"
|
||||||
"../grafana/build"
|
"../grafana/build"
|
||||||
"client-test-node/build"
|
"client-test-node/build"
|
||||||
|
"client-test-gpu-node/build"
|
||||||
)
|
)
|
||||||
|
|
||||||
for dir in "${BUILD_DIRS[@]}"; do
|
for dir in "${BUILD_DIRS[@]}"; do
|
||||||
@ -87,7 +88,7 @@ echo "2. 准备 Docker 镜像..."
|
|||||||
|
|
||||||
# 检查镜像是否存在
|
# 检查镜像是否存在
|
||||||
IMAGE_CACHE_DIR="$TEST_DIR/images-cache"
|
IMAGE_CACHE_DIR="$TEST_DIR/images-cache"
|
||||||
IMAGES=("argus-metric-ftp:latest" "argus-metric-prometheus:latest" "argus-metric-grafana:latest" "argus-metric-test-node:latest")
|
IMAGES=("argus-metric-ftp:latest" "argus-metric-prometheus:latest" "argus-metric-grafana:latest" "argus-metric-test-node:latest" "argus-metric-test-gpu-node:latest")
|
||||||
all_images_exist=true
|
all_images_exist=true
|
||||||
|
|
||||||
for image in "${IMAGES[@]}"; do
|
for image in "${IMAGES[@]}"; do
|
||||||
@ -126,6 +127,9 @@ else
|
|||||||
"argus-metric-test-node:latest")
|
"argus-metric-test-node:latest")
|
||||||
cache_file="${IMAGE_CACHE_DIR}/argus-test-node.tar"
|
cache_file="${IMAGE_CACHE_DIR}/argus-test-node.tar"
|
||||||
;;
|
;;
|
||||||
|
"argus-metric-test-gpu-node:latest")
|
||||||
|
cache_file="${IMAGE_CACHE_DIR}/argus-test-gpu-node.tar"
|
||||||
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
if [ -f "$cache_file" ]; then
|
if [ -f "$cache_file" ]; then
|
||||||
@ -174,6 +178,9 @@ else
|
|||||||
"argus-metric-test-node:latest")
|
"argus-metric-test-node:latest")
|
||||||
docker save -o "${IMAGE_CACHE_DIR}/argus-test-node.tar" "$image" && echo " 已保存: argus-test-node.tar"
|
docker save -o "${IMAGE_CACHE_DIR}/argus-test-node.tar" "$image" && echo " 已保存: argus-test-node.tar"
|
||||||
;;
|
;;
|
||||||
|
"argus-metric-test-gpu-node:latest")
|
||||||
|
docker save -o "${IMAGE_CACHE_DIR}/argus-test-gpu-node.tar" "$image" && echo " 已保存: argus-test-gpu-node.tar"
|
||||||
|
;;
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
echo "镜像已保存到: $IMAGE_CACHE_DIR/"
|
echo "镜像已保存到: $IMAGE_CACHE_DIR/"
|
||||||
@ -185,40 +192,12 @@ else
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "3. 启动服务..."
|
echo "3. 启动基础服务..."
|
||||||
cd "$TEST_DIR"
|
cd "$TEST_DIR"
|
||||||
docker compose up -d
|
# 启动除GPU节点外的所有服务
|
||||||
|
docker compose up -d ftp prometheus grafana test-node test-gpu-node
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "4. 等待服务启动..."
|
echo "4. 等待服务启动..."
|
||||||
sleep 5
|
sleep 5
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo "5. 检查服务状态..."
|
|
||||||
cd "$TEST_DIR"
|
|
||||||
docker compose ps
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo "=========================================="
|
|
||||||
echo " 服务启动完成!"
|
|
||||||
echo "=========================================="
|
|
||||||
echo ""
|
|
||||||
echo "服务访问地址:"
|
|
||||||
echo " - FTP: ftp://localhost:${FTP_PORT:-21}"
|
|
||||||
echo " 用户名: ftpuser"
|
|
||||||
echo " 密码: ${FTP_PASSWORD:-ZGClab1234!}"
|
|
||||||
echo ""
|
|
||||||
echo " - Prometheus: http://localhost:${PROMETHEUS_PORT:-9090}"
|
|
||||||
echo ""
|
|
||||||
echo " - Grafana: http://localhost:${GRAFANA_PORT:-3000}"
|
|
||||||
echo " 用户名: admin"
|
|
||||||
echo " 密码: admin"
|
|
||||||
echo ""
|
|
||||||
echo "常用命令:"
|
|
||||||
echo " 查看日志: docker compose logs -f [service]"
|
|
||||||
echo " 停止服务: docker compose stop"
|
|
||||||
echo " 重启服务: docker compose restart"
|
|
||||||
echo " 停止并删除: docker compose down"
|
|
||||||
echo " 停止并删除卷: docker compose down -v"
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
|
|||||||
@ -24,7 +24,8 @@ declare -A IMAGES=(
|
|||||||
["argus-metric-ftp:latest"]="argus-ftp.tar"
|
["argus-metric-ftp:latest"]="argus-ftp.tar"
|
||||||
["argus-metric-prometheus:latest"]="argus-prometheus.tar"
|
["argus-metric-prometheus:latest"]="argus-prometheus.tar"
|
||||||
["argus-metric-grafana:latest"]="argus-grafana.tar"
|
["argus-metric-grafana:latest"]="argus-grafana.tar"
|
||||||
["ubuntu:22.04"]="test-node.tar"
|
["argus-metric-test-node:latest"]="argus-test-node.tar"
|
||||||
|
["argus-metric-test-gpu-node:latest"]="argus-test-gpu-node.tar"
|
||||||
)
|
)
|
||||||
|
|
||||||
# 检查镜像是否存在并保存
|
# 检查镜像是否存在并保存
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user