From cb659f581bcfabf41e44db88d7f8037ebc6eaf6d Mon Sep 17 00:00:00 2001 From: "sundapeng.sdp" Date: Mon, 29 Sep 2025 10:03:56 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E8=AE=BE=E7=BD=AE=E5=85=A8=E5=B1=80?= =?UTF-8?q?=E7=8E=AF=E5=A2=83=E5=8F=98=E9=87=8F=EF=BC=9B=E9=9B=86=E6=88=90?= =?UTF-8?q?fluent-bit=E5=AE=89=E8=A3=85=E5=8C=85=EF=BC=9B=E8=AE=BE?= =?UTF-8?q?=E7=BD=AE=E7=BB=84=E4=BB=B6=E5=81=A5=E5=BA=B7=E6=A3=80=E6=9F=A5?= =?UTF-8?q?=E7=9B=AE=E5=BD=95=EF=BC=9B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit refs #11 --- .../all-in-one/scripts/check_health.sh | 4 +- .../all-in-one/scripts/install_artifact.sh | 51 ++++-------- .../all-in-one/scripts/setup.sh | 78 ++++++++++++------- 3 files changed, 65 insertions(+), 68 deletions(-) diff --git a/src/metric/client-plugins/all-in-one/scripts/check_health.sh b/src/metric/client-plugins/all-in-one/scripts/check_health.sh index a6ab23a..991cc9f 100755 --- a/src/metric/client-plugins/all-in-one/scripts/check_health.sh +++ b/src/metric/client-plugins/all-in-one/scripts/check_health.sh @@ -77,13 +77,13 @@ get_utc_timestamp() { # 获取主机名 get_hostname() { - hostname + echo "${HOSTNAME:-$(hostname)}" } # 创建健康状态目录 create_health_dir() { local hostname=$(get_hostname) - local health_dir="/private/argus/agent/health/$hostname" + local health_dir="/private/argus/agent/$hostname/health" if [[ ! -d "$health_dir" ]]; then log_info "创建健康状态目录: $health_dir" diff --git a/src/metric/client-plugins/all-in-one/scripts/install_artifact.sh b/src/metric/client-plugins/all-in-one/scripts/install_artifact.sh index d2a3ea9..b1acd59 100755 --- a/src/metric/client-plugins/all-in-one/scripts/install_artifact.sh +++ b/src/metric/client-plugins/all-in-one/scripts/install_artifact.sh @@ -544,8 +544,8 @@ EOF # 根据组件名查找进程,使用多种方法确保能找到PID case "$component" in - "node-exporter-installer") - # 尝试多种方式查找node_exporter进程 + "node-exporter") + # 查找node_exporter进程 component_pid=$(pgrep -f "node_exporter" | head -1) if [[ -z "$component_pid" ]]; then component_pid=$(pgrep -f "node-exporter" | head -1) @@ -554,8 +554,8 @@ EOF component_pid=$(ps aux | grep -v grep | grep "node_exporter" | awk '{print $2}' | head -1) fi ;; - "dcgm-exporter-installer") - # 尝试多种方式查找dcgm-exporter进程 + "dcgm-exporter") + # 查找dcgm-exporter进程 component_pid=$(pgrep -f "dcgm-exporter" | head -1) if [[ -z "$component_pid" ]]; then component_pid=$(pgrep -f "dcgm_exporter" | head -1) @@ -564,6 +564,16 @@ EOF component_pid=$(ps aux | grep -v grep | grep "dcgm-exporter" | awk '{print $2}' | head -1) fi ;; + "fluent-bit") + # 查找fluent-bit进程 + component_pid=$(pgrep -f "fluent-bit" | head -1) + if [[ -z "$component_pid" ]]; then + component_pid=$(pgrep -f "fluent_bit" | head -1) + fi + if [[ -z "$component_pid" ]]; then + component_pid=$(ps aux | grep -v grep | grep "fluent-bit" | awk '{print $2}' | head -1) + fi + ;; esac # 记录找到的PID信息 @@ -652,43 +662,12 @@ setup_health_check_cron() { # 清理临时文件 rm -f "$temp_cron" - # 立即执行一次健康检查 - log_info "执行首次健康检查..." - if "$check_health_script"; then - log_success "首次健康检查完成" - else - log_warning "首次健康检查失败,但定时任务已设置" - fi + log_info "健康检查通过crontab自动执行" } # 显示安装信息 show_install_info() { log_success "Argus-Metrics All-in-One 安装完成!" - echo - echo "安装信息:" - echo " 版本: $VERSION" - echo " 构建时间: $BUILD_TIME" - echo " 安装目录: $INSTALL_DIR" - echo - echo "已安装组件:" - if [[ -f "$TEMP_DIR/components.txt" ]]; then - while IFS= read -r line; do - component=$(echo "$line" | cut -d':' -f1) - version=$(echo "$line" | cut -d':' -f2) - echo " - $component v$version" - done < "$TEMP_DIR/components.txt" - fi - echo - echo "访问地址:" - echo " Node Exporter: http://localhost:9100" - echo " DCGM Exporter: http://localhost:9400" - echo - echo "健康检查:" - echo " 安装记录: .install_record" - echo " 健康日志: .health_log" - echo " 定时任务日志: .health_cron.log" - echo " 查看定时任务: crontab -l" - echo } cleanup() { diff --git a/src/metric/client-plugins/all-in-one/scripts/setup.sh b/src/metric/client-plugins/all-in-one/scripts/setup.sh index b311dc9..dc95d7d 100755 --- a/src/metric/client-plugins/all-in-one/scripts/setup.sh +++ b/src/metric/client-plugins/all-in-one/scripts/setup.sh @@ -2,6 +2,12 @@ set -e +# 加载配置文件(仅在解压后的目录中可用) +load_config() { + # setup.sh 脚本不需要配置文件,FTP参数通过命令行参数或环境变量提供 + log_info "setup.sh 脚本使用命令行参数或环境变量获取FTP配置" +} + # 颜色定义 RED='\033[0;31m' GREEN='\033[0;32m' @@ -122,6 +128,7 @@ get_latest_version() { # 解析参数 ARGUS_VERSION="" # 使用不同的变量名避免与系统VERSION冲突 ACTION="install" +FORCE_INSTALL=false while [[ $# -gt 0 ]]; do case $1 in @@ -165,6 +172,10 @@ while [[ $# -gt 0 ]]; do ACTION="status" shift ;; + --force) + FORCE_INSTALL=true + shift + ;; --help) echo "Argus Metric FTP在线安装脚本" echo @@ -179,6 +190,7 @@ while [[ $# -gt 0 ]]; do echo " --version VERSION 指定版本 (默认: 自动获取最新版本)" echo " --port PORT FTP端口 (默认: 21)" echo " --install-dir DIR 安装目录 (默认: /opt/argus-metric)" + echo " --force 强制重新安装 (即使相同版本)" echo " --uninstall 卸载 (自动确认)" echo " --rollback 回滚到上一个备份版本" echo " --backup-list 列出所有备份版本" @@ -202,6 +214,9 @@ while [[ $# -gt 0 ]]; do echo " # 指定版本安装" echo " sudo sh setup.sh --server 10.211.55.4 --user ftpuser --password admin1234 --version 1.30.0" echo " " + echo " # 强制重新安装" + echo " sudo sh setup.sh --server 10.211.55.4 --user ftpuser --password admin1234 --force" + echo " " echo " # 卸载" echo " sudo sh setup.sh --server 10.211.55.4 --user ftpuser --password admin1234 --uninstall" exit 0 @@ -289,6 +304,9 @@ backup_current_version() { return 0 fi + # 确保备份目录存在 + mkdir -p "$BACKUPS_DIR" + local backup_name="$current_version" local backup_path="$BACKUPS_DIR/$backup_name" @@ -300,36 +318,23 @@ backup_current_version() { rm -rf "$backup_path" fi - # 复制当前版本目录 - if cp -r "$CURRENT_LINK" "$backup_path"; then + # 复制当前版本目录(跟随软链接复制实际内容) + if cp -rL "$CURRENT_LINK" "$backup_path"; then log_success "版本备份完成: $backup_name" - - # 清理旧备份,只保留最近3个 - cleanup_old_backups + else log_error "版本备份失败" exit 1 fi } -# 清理旧备份 -cleanup_old_backups() { - log_info "清理旧版本备份..." - - # 获取备份目录列表,按时间排序,保留最近3个 - local backup_count=$(ls -1 "$BACKUPS_DIR" 2>/dev/null | wc -l) - if [[ $backup_count -gt 3 ]]; then - local to_remove=$((backup_count - 3)) - ls -1t "$BACKUPS_DIR" | tail -n $to_remove | while read backup; do - log_info "删除旧备份: $backup" - rm -rf "$BACKUPS_DIR/$backup" - done - fi -} - # 回滚到备份版本 rollback_to_backup() { local backup_name="$1" + + # 确保备份目录存在 + mkdir -p "$BACKUPS_DIR" + local backup_path="$BACKUPS_DIR/$backup_name" if [[ ! -d "$backup_path" ]]; then @@ -504,25 +509,33 @@ install_argus_metric() { log_info "开始安装 Argus Metric v$ARGUS_VERSION..." log_info "安装目录: $INSTALL_DIR" + + create_install_directories # 检查是否已安装 local is_upgrade=false if check_installed; then local current_version=$(get_current_version) if [[ "$current_version" == "$ARGUS_VERSION" ]]; then - log_info "版本 v$ARGUS_VERSION 已安装,无需重复安装" - return 0 + if [[ "$FORCE_INSTALL" == true ]]; then + log_info "检测到相同版本 v$ARGUS_VERSION,使用了 --force 参数,可强制重新安装" + is_upgrade=true + # 备份当前版本 + backup_current_version + else + log_info "版本 v$ARGUS_VERSION 已安装,无需重复安装" + log_info "如需强制重新安装,请使用 --force 参数" + return 0 + fi + else + log_info "检测到版本升级: v$current_version -> v$ARGUS_VERSION" + is_upgrade=true + + # 备份当前版本 + backup_current_version fi - log_info "检测到版本升级: v$current_version -> v$ARGUS_VERSION" - is_upgrade=true - - # 备份当前版本 - backup_current_version fi - # 创建安装目录结构 - create_install_directories - # 创建临时目录 mkdir -p "$TEMP_DIR" cd "$TEMP_DIR" @@ -614,6 +627,8 @@ install_argus_metric() { # 如果是升级失败,尝试回滚 if [[ "$is_upgrade" == true ]]; then log_warning "升级失败,尝试回滚到之前版本..." + # 确保备份目录存在 + mkdir -p "$BACKUPS_DIR" local latest_backup=$(ls -1t "$BACKUPS_DIR" 2>/dev/null | head -n 1) if [[ -n "$latest_backup" ]]; then rollback_to_backup "$latest_backup" @@ -799,6 +814,9 @@ rollback_version() { exit 1 fi + # 确保备份目录存在 + mkdir -p "$BACKUPS_DIR" + # 获取最新的备份 local latest_backup=$(ls -1t "$BACKUPS_DIR" 2>/dev/null | head -n 1) if [[ -z "$latest_backup" ]]; then