feat: 基于算力平台的Prometheus镜像改造，supervisor自启应用；调整Prometheus.yml结构；

refs #9
feat: FTP服务器离线安装及配置；数据采集客户端支持一键部署、版本校验、组件健康检查、失败回滚等功能；
2025-09-23 17:37:59 +08:00 · 2025-09-23 17:37:59 +08:00 · 2025-09-23 17:37:59 +08:00 · 2025-09-23 17:37:59 +08:00 · 2025-09-22 16:39:38 +08:00
76 changed files with 6900 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+.idea/
--- a/build/build_images.sh
+++ b/build/build_images.sh
@ -0,0 +1,138 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# 帮助信息
+show_help() {
+    cat << EOF
+ARGUS Unified Build System - Image Build Tool
+
+Usage: $0 [OPTIONS]
+
+Options:
+  --intranet    Use intranet mirror for Ubuntu 22.04 packages
+  -h, --help    Show this help message
+
+Examples:
+  $0                # Build with default sources
+  $0 --intranet     # Build with intranet mirror
+
+EOF
+}
+
+# 解析命令行参数
+use_intranet=false
+
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --intranet)
+            use_intranet=true
+            shift
+            ;;
+        -h|--help)
+            show_help
+            exit 0
+            ;;
+        *)
+            echo "Unknown option: $1"
+            show_help
+            exit 1
+            ;;
+    esac
+done
+
+# 获取项目根目录
+root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+cd "$root"
+
+echo "======================================="
+echo "ARGUS Unified Build System"
+echo "======================================="
+
+if [[ "$use_intranet" == true ]]; then
+    echo "🌐 Mode: Intranet (Using internal mirror: 10.68.64.1)"
+    build_args="--build-arg USE_INTRANET=true"
+else
+    echo "🌐 Mode: Public (Using default package sources)"
+    build_args=""
+fi
+
+echo "📁 Build context: $root"
+echo ""
+
+# 构建镜像的函数
+build_image() {
+    local image_name=$1
+    local dockerfile_path=$2
+    local tag=$3
+
+    echo "🔄 Building $image_name image..."
+    echo "   Dockerfile: $dockerfile_path"
+    echo "   Tag: $tag"
+
+    if docker build $build_args -f "$dockerfile_path" -t "$tag" .; then
+        echo "✅ $image_name image built successfully"
+        return 0
+    else
+        echo "❌ Failed to build $image_name image"
+        return 1
+    fi
+}
+
+# 构建所有镜像
+images_built=()
+build_failed=false
+
+# 构建 Elasticsearch 镜像
+if build_image "Elasticsearch" "src/log/elasticsearch/build/Dockerfile" "argus-elasticsearch:latest"; then
+    images_built+=("argus-elasticsearch:latest")
+else
+    build_failed=true
+fi
+
+echo ""
+
+# 构建 Kibana 镜像
+if build_image "Kibana" "src/log/kibana/build/Dockerfile" "argus-kibana:latest"; then
+    images_built+=("argus-kibana:latest")
+else
+    build_failed=true
+fi
+
+echo ""
+
+# 构建 BIND9 镜像
+if build_image "BIND9" "src/bind/build/Dockerfile" "argus-bind9:latest"; then
+    images_built+=("argus-bind9:latest")
+else
+    build_failed=true
+fi
+
+echo ""
+echo "======================================="
+echo "📦 Build Summary"
+echo "======================================="
+
+if [[ ${#images_built[@]} -gt 0 ]]; then
+    echo "✅ Successfully built images:"
+    for image in "${images_built[@]}"; do
+        echo "   • $image"
+    done
+fi
+
+if [[ "$build_failed" == true ]]; then
+    echo ""
+    echo "❌ Some images failed to build. Please check the errors above."
+    exit 1
+fi
+
+if [[ "$use_intranet" == true ]]; then
+    echo ""
+    echo "🌐 Built with intranet mirror configuration"
+fi
+
+echo ""
+echo "🚀 Next steps:"
+echo "   cd src/log && ./scripts/save_images.sh     # Export log images"
+echo "   cd src/bind && ./scripts/save_images.sh    # Export bind images"
+echo "   cd src/log/tests && ./scripts/02_up.sh     # Start log services"
+echo ""
--- a/build/save_images.sh
+++ b/build/save_images.sh
@ -0,0 +1,222 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# 帮助信息
+show_help() {
+    cat << EOF
+ARGUS Unified Build System - Image Export Tool
+
+Usage: $0 [OPTIONS]
+
+Options:
+  --compress    Compress exported images with gzip
+  -h, --help    Show this help message
+
+Examples:
+  $0                # Export all images without compression
+  $0 --compress     # Export all images with gzip compression
+
+EOF
+}
+
+# 解析命令行参数
+use_compression=false
+
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --compress)
+            use_compression=true
+            shift
+            ;;
+        -h|--help)
+            show_help
+            exit 0
+            ;;
+        *)
+            echo "Unknown option: $1"
+            show_help
+            exit 1
+            ;;
+    esac
+done
+
+# 获取项目根目录
+root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+cd "$root"
+
+# 创建镜像输出目录
+images_dir="$root/images"
+mkdir -p "$images_dir"
+
+echo "======================================="
+echo "ARGUS Unified Build System - Image Export"
+echo "======================================="
+echo ""
+
+if [[ "$use_compression" == true ]]; then
+    echo "🗜️  Mode: With gzip compression"
+else
+    echo "📦 Mode: No compression"
+fi
+
+echo "📁 Output directory: $images_dir"
+echo ""
+
+# 定义镜像列表
+declare -A images=(
+    ["argus-elasticsearch:latest"]="argus-elasticsearch-latest.tar"
+    ["argus-kibana:latest"]="argus-kibana-latest.tar"
+    ["argus-bind9:latest"]="argus-bind9-latest.tar"
+)
+
+# 函数：检查镜像是否存在
+check_image() {
+    local image_name="$1"
+    if docker images --format "{{.Repository}}:{{.Tag}}" | grep -q "^$image_name$"; then
+        echo "✅ Image found: $image_name"
+        return 0
+    else
+        echo "❌ Image not found: $image_name"
+        return 1
+    fi
+}
+
+# 函数：显示镜像信息
+show_image_info() {
+    local image_name="$1"
+    echo "📋 Image info for $image_name:"
+    docker images "$image_name" --format "   Size: {{.Size}}, Created: {{.CreatedSince}}, ID: {{.ID}}"
+}
+
+# 函数：保存镜像
+save_image() {
+    local image_name="$1"
+    local output_file="$2"
+    local output_path="$images_dir/$output_file"
+
+    echo "🔄 Saving $image_name to $output_file..."
+
+    # 删除旧的镜像文件（如果存在）
+    if [[ -f "$output_path" ]]; then
+        echo "   Removing existing file: $output_file"
+        rm "$output_path"
+    fi
+
+    if [[ "$use_compression" == true && -f "$output_path.gz" ]]; then
+        echo "   Removing existing compressed file: $output_file.gz"
+        rm "$output_path.gz"
+    fi
+
+    # 保存镜像
+    docker save "$image_name" -o "$output_path"
+
+    if [[ "$use_compression" == true ]]; then
+        echo "   Compressing with gzip..."
+        gzip "$output_path"
+        output_path="$output_path.gz"
+        output_file="$output_file.gz"
+    fi
+
+    # 检查文件大小
+    local file_size=$(du -h "$output_path" | cut -f1)
+    echo "✅ Saved successfully: $output_file ($file_size)"
+}
+
+echo "🔍 Checking for ARGUS images..."
+echo ""
+
+# 检查所有镜像
+available_images=()
+missing_images=()
+
+for image_name in "${!images[@]}"; do
+    if check_image "$image_name"; then
+        show_image_info "$image_name"
+        available_images+=("$image_name")
+    else
+        missing_images+=("$image_name")
+    fi
+    echo ""
+done
+
+# 如果没有镜像存在，提示构建
+if [[ ${#available_images[@]} -eq 0 ]]; then
+    echo "❌ No ARGUS images found to export."
+    echo ""
+    echo "🔧 Please build the images first with:"
+    echo "   ./build/build_images.sh"
+    exit 1
+fi
+
+# 显示缺失的镜像
+if [[ ${#missing_images[@]} -gt 0 ]]; then
+    echo "⚠️  Missing images (will be skipped):"
+    for image_name in "${missing_images[@]}"; do
+        echo "   • $image_name"
+    done
+    echo ""
+fi
+
+echo "💾 Starting image export process..."
+echo ""
+
+# 保存所有可用的镜像
+exported_files=()
+for image_name in "${available_images[@]}"; do
+    output_file="${images[$image_name]}"
+    save_image "$image_name" "$output_file"
+
+    if [[ "$use_compression" == true ]]; then
+        exported_files+=("$output_file.gz")
+    else
+        exported_files+=("$output_file")
+    fi
+    echo ""
+done
+
+echo "======================================="
+echo "📦 Export Summary"
+echo "======================================="
+
+# 显示导出的文件
+echo "📁 Exported files in $images_dir:"
+total_size=0
+for file in "${exported_files[@]}"; do
+    full_path="$images_dir/$file"
+    if [[ -f "$full_path" ]]; then
+        size=$(du -h "$full_path" | cut -f1)
+        size_bytes=$(du -b "$full_path" | cut -f1)
+        total_size=$((total_size + size_bytes))
+        echo "   ✅ $file ($size)"
+    fi
+done
+
+# 显示总大小
+if [[ $total_size -gt 0 ]]; then
+    total_size_human=$(numfmt --to=iec --suffix=B $total_size)
+    echo ""
+    echo "📊 Total size: $total_size_human"
+fi
+
+echo ""
+echo "🚀 Usage instructions:"
+echo "   To load these images on another system:"
+
+if [[ "$use_compression" == true ]]; then
+    for file in "${exported_files[@]}"; do
+        if [[ -f "$images_dir/$file" ]]; then
+            base_name="${file%.gz}"
+            echo "     gunzip $file && docker load -i $base_name"
+        fi
+    done
+else
+    for file in "${exported_files[@]}"; do
+        if [[ -f "$images_dir/$file" ]]; then
+            echo "     docker load -i $file"
+        fi
+    done
+fi
+
+echo ""
+echo "✅ Image export completed successfully!"
+echo ""
--- a/src/bind/.gitignore
+++ b/src/bind/.gitignore
@ -0,0 +1,2 @@
+
+images/
--- a/src/bind/build/Dockerfile
+++ b/src/bind/build/Dockerfile
@ -0,0 +1,66 @@
+FROM ubuntu:22.04
+
+# Set timezone and avoid interactive prompts
+ENV DEBIAN_FRONTEND=noninteractive
+ENV TZ=Asia/Shanghai
+
+# 设置构建参数
+ARG USE_INTRANET=false
+
+# 配置内网 apt 源 (如果指定了内网选项)
+RUN if [ "$USE_INTRANET" = "true" ]; then \
+        echo "Configuring intranet apt sources..." && \
+        cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
+        echo "deb [trusted=yes] http://10.68.64.1/ubuntu2204/ jammy main" > /etc/apt/sources.list && \
+        echo 'Acquire::https::Verify-Peer "false";' > /etc/apt/apt.conf.d/99disable-ssl-check && \
+        echo 'Acquire::https::Verify-Host "false";' >> /etc/apt/apt.conf.d/99disable-ssl-check; \
+    fi
+
+# Update package list and install required packages
+RUN apt-get update && \
+    apt-get install -y \
+    bind9 \
+    bind9utils \
+    bind9-doc \
+    supervisor \
+    net-tools \
+    inetutils-ping \
+    vim \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# 配置部署时使用的apt源
+RUN if [ "$USE_INTRANET" = "true" ]; then \
+	echo "deb [trusted=yes] https://10.92.132.52/mirrors/ubuntu2204/ jammy main" > /etc/apt/sources.list; \
+    fi
+
+# Create supervisor configuration directory
+RUN mkdir -p /etc/supervisor/conf.d
+
+# Copy supervisor configuration
+COPY src/bind/build/supervisord.conf /etc/supervisor/conf.d/supervisord.conf
+
+# Copy BIND9 configuration files
+COPY src/bind/build/named.conf.local /etc/bind/named.conf.local
+COPY src/bind/build/db.argus.com /etc/bind/db.argus.com
+
+# Copy startup and reload scripts
+COPY src/bind/build/startup.sh /usr/local/bin/startup.sh
+COPY src/bind/build/reload-bind9.sh /usr/local/bin/reload-bind9.sh
+COPY src/bind/build/argus_dns_sync.sh /usr/local/bin/argus_dns_sync.sh
+COPY src/bind/build/update-dns.sh /usr/local/bin/update-dns.sh
+
+# Make scripts executable
+RUN chmod +x /usr/local/bin/startup.sh /usr/local/bin/reload-bind9.sh  /usr/local/bin/argus_dns_sync.sh /usr/local/bin/update-dns.sh
+
+# Set proper ownership for BIND9 files
+RUN chown bind:bind /etc/bind/named.conf.local /etc/bind/db.argus.com
+
+# Expose DNS port
+EXPOSE 53/tcp 53/udp
+
+# Use root user as requested
+USER root
+
+# Start with startup script
+CMD ["/usr/local/bin/startup.sh"]
--- a/src/bind/build/argus_dns_sync.sh
+++ b/src/bind/build/argus_dns_sync.sh
@ -0,0 +1,100 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+WATCH_DIR="/private/argus/etc"
+ZONE_DB="/private/argus/bind/db.argus.com"
+LOCKFILE="/var/lock/argus_dns_sync.lock"
+BACKUP_DIR="/private/argus/bind/.backup"
+SLEEP_SECONDS=10
+RELOAD_SCRIPT="/usr/local/bin/reload-bind9.sh"   # 这里放你已有脚本的路径
+
+mkdir -p "$(dirname "$LOCKFILE")" "$BACKUP_DIR"
+
+is_ipv4() {
+  local ip="$1"
+  [[ "$ip" =~ ^([0-9]{1,3}\.){3}[0-9]{1,3}$ ]] || return 1
+  IFS='.' read -r a b c d <<<"$ip"
+  for n in "$a" "$b" "$c" "$d"; do
+    (( n >= 0 && n <= 255 )) || return 1
+  done
+  return 0
+}
+
+get_current_ip() {
+  local name="$1"
+  sed -n -E "s/^${name}[[:space:]]+IN[[:space:]]+A[[:space:]]+([0-9.]+)[[:space:]]*$/\1/p" "$ZONE_DB" | head -n1
+}
+
+upsert_record() {
+  local name="$1"
+  local new_ip="$2"
+  local ts
+  ts="$(date +%Y%m%d-%H%M%S)"
+  local changed=0
+
+  cp -a "$ZONE_DB" "$BACKUP_DIR/db.argus.com.$ts.bak"
+
+  local cur_ip
+  cur_ip="$(get_current_ip "$name" || true)"
+
+  if [[ -z "$cur_ip" ]]; then
+    # Ensure the file ends with a newline before adding new record
+    if [[ -s "$ZONE_DB" ]] && [[ $(tail -c1 "$ZONE_DB" | wc -l) -eq 0 ]]; then
+      echo "" >> "$ZONE_DB"
+    fi
+    printf "%-20s IN A %s\n" "$name" "$new_ip" >> "$ZONE_DB"
+    echo "[ADD] ${name} -> ${new_ip}"
+    changed=1
+  elif [[ "$cur_ip" != "$new_ip" ]]; then
+    awk -v n="$name" -v ip="$new_ip" '
+      {
+        if ($1==n && $2=="IN" && $3=="A") {
+          printf "%-20s IN A %s\n", n, ip
+        } else {
+          print
+        }
+      }
+    ' "$ZONE_DB" > "${ZONE_DB}.tmp" && mv "${ZONE_DB}.tmp" "$ZONE_DB"
+    echo "[UPDATE] ${name}: ${cur_ip} -> ${new_ip}"
+    changed=1
+  else
+    echo "[SKIP] ${name} unchanged (${new_ip})"
+  fi
+
+  return $changed
+}
+
+while true; do
+  exec 9>"$LOCKFILE"
+  if flock -n 9; then
+    shopt -s nullglob
+    NEED_RELOAD=0
+
+    for f in "$WATCH_DIR"/*.argus.com; do
+      base="$(basename "$f")"
+      name="${base%.argus.com}"
+      ip="$(grep -Eo '([0-9]{1,3}\.){3}[0-9]{1,3}' "$f" | tail -n1 || true)"
+
+      if [[ -z "$ip" ]] || ! is_ipv4 "$ip"; then
+        echo "[WARN] $f 未找到有效 IPv4，跳过"
+        continue
+      fi
+
+      if upsert_record "$name" "$ip"; then
+        NEED_RELOAD=1
+      fi
+    done
+
+    if [[ $NEED_RELOAD -eq 1 ]]; then
+      echo "[INFO] 检测到 db.argus.com 变更，执行 reload-bind9.sh"
+      bash "$RELOAD_SCRIPT"
+    fi
+
+    flock -u 9
+  else
+    echo "[INFO] 已有同步任务在运行，跳过本轮"
+  fi
+
+  sleep "$SLEEP_SECONDS"
+done
+
--- a/src/bind/build/db.argus.com
+++ b/src/bind/build/db.argus.com
@ -0,0 +1,16 @@
+$TTL    604800
+@       IN      SOA     ns1.argus.com. admin.argus.com. (
+                              2         ; Serial
+                         604800         ; Refresh
+                          86400         ; Retry
+                        2419200         ; Expire
+                         604800 )       ; Negative Cache TTL
+
+; 定义 DNS 服务器
+@       IN      NS      ns1.argus.com.
+
+; 定义 ns1 主机
+ns1     IN      A       127.0.0.1
+
+; 定义 web 指向 12.4.5.6
+web     IN      A       12.4.5.6
--- a/src/bind/build/dns-monitor.sh
+++ b/src/bind/build/dns-monitor.sh
@ -0,0 +1,68 @@
+#!/bin/bash
+
+# DNS监控脚本 - 每10秒检查dns.conf是否有变化
+# 如果有变化则执行update-dns.sh脚本
+
+DNS_CONF="/private/argus/etc/dns.conf"
+DNS_BACKUP="/tmp/dns.conf.backup"
+UPDATE_SCRIPT="/private/argus/etc/update-dns.sh"
+LOG_FILE="/var/log/supervisor/dns-monitor.log"
+
+# 确保日志文件存在
+touch "$LOG_FILE"
+
+log_message() {
+    echo "$(date '+%Y-%m-%d %H:%M:%S') [DNS-Monitor] $1" >> "$LOG_FILE"
+}
+
+log_message "DNS监控脚本启动"
+
+while true; do
+    if [ -f "$DNS_CONF" ]; then
+        if [ -f "$DNS_BACKUP" ]; then
+            # 比较文件内容
+            if ! cmp -s "$DNS_CONF" "$DNS_BACKUP"; then
+                log_message "检测到DNS配置变化"
+
+                # 更新备份文件
+                cp "$DNS_CONF" "$DNS_BACKUP"
+
+                # 执行更新脚本
+                if [ -x "$UPDATE_SCRIPT" ]; then
+                    log_message "执行DNS更新脚本: $UPDATE_SCRIPT"
+                    "$UPDATE_SCRIPT" >> "$LOG_FILE" 2>&1
+                    if [ $? -eq 0 ]; then
+                        log_message "DNS更新脚本执行成功"
+                    else
+                        log_message "DNS更新脚本执行失败"
+                    fi
+                else
+                    log_message "警告: 更新脚本不存在或不可执行: $UPDATE_SCRIPT"
+                fi
+            fi
+        else
+
+            # 第一次检测到配置文件，执行更新脚本
+            if [ -x "$UPDATE_SCRIPT" ]; then
+                log_message "执行DNS更新脚本: $UPDATE_SCRIPT"
+                "$UPDATE_SCRIPT" >> "$LOG_FILE" 2>&1
+                if [ $? -eq 0 ]; then
+                    log_message "DNS更新脚本执行成功"
+
+		    # 第一次运行，创建备份并执行更新
+		    cp "$DNS_CONF" "$DNS_BACKUP"
+		    log_message "创建DNS配置备份文件"
+
+                else
+                    log_message "DNS更新脚本执行失败"
+                fi
+            else
+                log_message "警告: 更新脚本不存在或不可执行: $UPDATE_SCRIPT"
+            fi
+        fi
+    else
+        log_message "警告: DNS配置文件不存在: $DNS_CONF"
+    fi
+
+    sleep 10
+done
--- a/src/bind/build/named.conf.local
+++ b/src/bind/build/named.conf.local
@ -0,0 +1,4 @@
+zone "argus.com" {
+    type master;
+    file "/etc/bind/db.argus.com";
+};
--- a/src/bind/build/reload-bind9.sh
+++ b/src/bind/build/reload-bind9.sh
@ -0,0 +1,27 @@
+#!/bin/bash
+
+echo "Reloading BIND9 configuration..."
+
+# Check if configuration files are valid
+echo "Checking named.conf.local syntax..."
+if ! named-checkconf /etc/bind/named.conf.local; then
+    echo "ERROR: named.conf.local has syntax errors!"
+    exit 1
+fi
+
+echo "Checking zone file syntax..."
+if ! named-checkzone argus.com /etc/bind/db.argus.com; then
+    echo "ERROR: db.argus.com has syntax errors!"
+    exit 1
+fi
+
+# Reload BIND9 via supervisor
+echo "Reloading BIND9 service..."
+supervisorctl restart bind9
+
+if [ $? -eq 0 ]; then
+    echo "BIND9 reloaded successfully!"
+else
+    echo "ERROR: Failed to reload BIND9!"
+    exit 1
+fi
--- a/src/bind/build/startup.sh
+++ b/src/bind/build/startup.sh
@ -0,0 +1,40 @@
+#!/bin/bash
+
+# Set /private permissions to 777 as requested
+chmod 777 /private 2>/dev/null || true
+
+# Create persistent directories for BIND9 configs and DNS sync
+mkdir -p /private/argus/bind
+mkdir -p /private/argus/etc
+
+# Copy configuration files to persistent storage if they don't exist
+if [ ! -f /private/argus/bind/named.conf.local ]; then
+    cp /etc/bind/named.conf.local /private/argus/bind/named.conf.local
+fi
+
+if [ ! -f /private/argus/bind/db.argus.com ]; then
+    cp /etc/bind/db.argus.com /private/argus/bind/db.argus.com
+fi
+
+# Copy update-dns.sh to /private/argus/etc/
+cp /usr/local/bin/update-dns.sh /private/argus/etc/update-dns.sh
+chown bind:bind /private/argus/etc/update-dns.sh
+chmod a+x /private/argus/etc/update-dns.sh
+
+# Create symlinks to use persistent configs
+ln -sf /private/argus/bind/named.conf.local /etc/bind/named.conf.local
+ln -sf /private/argus/bind/db.argus.com /etc/bind/db.argus.com
+
+# Set proper ownership
+chown bind:bind /private/argus/bind/named.conf.local /private/argus/bind/db.argus.com
+
+# 记录容器ip地址更新到dns.conf
+IP=`ifconfig | grep -A 1 eth0 | grep inet | awk '{print $2}'`
+echo current IP: ${IP}
+echo ${IP} > /private/argus/etc/dns.conf
+
+# Create supervisor log directory
+mkdir -p /var/log/supervisor
+
+# Start supervisor
+exec /usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf
--- a/src/bind/build/supervisord.conf
+++ b/src/bind/build/supervisord.conf
@ -0,0 +1,37 @@
+[unix_http_server]
+file=/var/run/supervisor.sock
+chmod=0700
+
+[supervisord]
+nodaemon=true
+user=root
+logfile=/var/log/supervisor/supervisord.log
+pidfile=/var/run/supervisord.pid
+
+[rpcinterface:supervisor]
+supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
+
+[supervisorctl]
+serverurl=unix:///var/run/supervisor.sock
+
+[program:bind9]
+command=/usr/sbin/named -g -c /etc/bind/named.conf -u bind
+user=bind
+autostart=true
+autorestart=true
+stderr_logfile=/var/log/supervisor/bind9.err.log
+stdout_logfile=/var/log/supervisor/bind9.out.log
+priority=10
+
+[program:argus-dns-sync]
+command=/usr/local/bin/argus_dns_sync.sh
+autostart=true
+autorestart=true
+startsecs=3
+stopsignal=TERM
+user=root
+stdout_logfile=/var/log/argus_dns_sync.out.log
+stderr_logfile=/var/log/argus_dns_sync.err.log
+; 根据环境调整环境变量（可选）
+; environment=RNDC_RELOAD="yes"
+
--- a/src/bind/build/update-dns.sh
+++ b/src/bind/build/update-dns.sh
@ -0,0 +1,31 @@
+#!/bin/sh
+# update-dns.sh
+# 从 /private/argus/etc/dns.conf 读取 IP，写入 /etc/resolv.conf
+
+DNS_CONF="/private/argus/etc/dns.conf"
+RESOLV_CONF="/etc/resolv.conf"
+
+# 检查配置文件是否存在
+if [ ! -f "$DNS_CONF" ]; then
+  echo "配置文件不存在: $DNS_CONF" >&2
+  exit 1
+fi
+
+# 生成 resolv.conf 内容
+{
+  while IFS= read -r ip; do
+    # 跳过空行和注释
+    case "$ip" in
+      \#*) continue ;;
+      "") continue ;;
+    esac
+    echo "nameserver $ip"
+  done < "$DNS_CONF"
+} > "$RESOLV_CONF".tmp
+
+# 替换写入 /etc/resolv.conf
+cat "$RESOLV_CONF".tmp > "$RESOLV_CONF"
+rm -f "$RESOLV_CONF".tmp
+
+echo "已更新 $RESOLV_CONF"
+
--- a/src/bind/tests/docker-compose.yml
+++ b/src/bind/tests/docker-compose.yml
@ -0,0 +1,16 @@
+services:
+  bind9:
+    image: argus-bind9:latest
+    container_name: argus-bind9-test
+    ports:
+      - "53:53/tcp"
+      - "53:53/udp"
+    volumes:
+      - ./private:/private
+    restart: unless-stopped
+    networks:
+      - bind-test-network
+
+networks:
+  bind-test-network:
+    driver: bridge
--- a/src/bind/tests/scripts/00_e2e_test.sh
+++ b/src/bind/tests/scripts/00_e2e_test.sh
@ -0,0 +1,115 @@
+#!/bin/bash
+
+# End-to-end test for BIND9 DNS server
+# This script runs all tests in sequence to validate the complete functionality
+# Usage: ./00_e2e_test.sh
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+echo "=========================================="
+echo "BIND9 DNS Server End-to-End Test Suite"
+echo "=========================================="
+
+# Track test results
+total_tests=0
+passed_tests=0
+failed_tests=0
+
+# Function to run a test step
+run_test_step() {
+    local step_name="$1"
+    local script_name="$2"
+    local description="$3"
+
+    echo ""
+    echo "[$step_name] $description"
+    echo "$(printf '=%.0s' {1..50})"
+
+    ((total_tests++))
+
+    if [ ! -f "$SCRIPT_DIR/$script_name" ]; then
+        echo "✗ Test script not found: $script_name"
+        ((failed_tests++))
+        return 1
+    fi
+
+    # Make sure script is executable
+    chmod +x "$SCRIPT_DIR/$script_name"
+
+    # Run the test
+    echo "Executing: $SCRIPT_DIR/$script_name"
+    if "$SCRIPT_DIR/$script_name"; then
+        echo "✓ $step_name completed successfully"
+        ((passed_tests++))
+        return 0
+    else
+        echo "✗ $step_name failed"
+        ((failed_tests++))
+        return 1
+    fi
+}
+
+# Cleanup any previous test environment (but preserve the Docker image)
+echo ""
+echo "[SETUP] Cleaning up any previous test environment..."
+if [ -f "$SCRIPT_DIR/05_cleanup.sh" ]; then
+    chmod +x "$SCRIPT_DIR/05_cleanup.sh"
+    "$SCRIPT_DIR/05_cleanup.sh" || true
+fi
+
+echo ""
+echo "Starting BIND9 DNS server end-to-end test sequence..."
+
+# Test sequence
+run_test_step "TEST-01" "01_start_container.sh" "Start BIND9 container" || true
+
+run_test_step "TEST-02" "02_dig_test.sh" "Initial DNS resolution test" || true
+
+run_test_step "TEST-03" "03_reload_test.sh" "Configuration reload with IP modification" || true
+
+run_test_step "TEST-03.5" "03.5_dns_sync_test.sh" "DNS auto-sync functionality test" || true
+
+run_test_step "TEST-04" "04_persistence_test.sh" "Configuration persistence after restart" || true
+
+# Final cleanup (but preserve logs for review)
+echo ""
+echo "[CLEANUP] Cleaning up test environment..."
+run_test_step "CLEANUP" "05_cleanup.sh" "Clean up containers and networks" || true
+
+# Test summary
+echo ""
+echo "=========================================="
+echo "TEST SUMMARY"
+echo "=========================================="
+echo "Total tests: $total_tests"
+echo "Passed: $passed_tests"
+echo "Failed: $failed_tests"
+
+if [ $failed_tests -eq 0 ]; then
+    echo ""
+    echo "✅ ALL TESTS PASSED!"
+    echo ""
+    echo "BIND9 DNS server functionality validated:"
+    echo "  ✓ Container startup and basic functionality"
+    echo "  ✓ DNS resolution for configured domains"
+    echo "  ✓ Configuration modification and reload"
+    echo "  ✓ DNS auto-sync from IP files"
+    echo "  ✓ Configuration persistence across restarts"
+    echo "  ✓ Cleanup and resource management"
+    echo ""
+    echo "The BIND9 DNS server is ready for production use."
+    exit 0
+else
+    echo ""
+    echo "❌ SOME TESTS FAILED!"
+    echo ""
+    echo "Please review the test output above to identify and fix issues."
+    echo "You may need to:"
+    echo "  - Check Docker installation and permissions"
+    echo "  - Verify network connectivity"
+    echo "  - Review BIND9 configuration files"
+    echo "  - Check system resources and port availability"
+    exit 1
+fi
--- a/src/bind/tests/scripts/01_start_container.sh
+++ b/src/bind/tests/scripts/01_start_container.sh
@ -0,0 +1,38 @@
+#!/bin/bash
+
+# Start BIND9 test container
+# Usage: ./01_start_container.sh
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+TEST_DIR="$(dirname "$SCRIPT_DIR")"
+
+cd "$TEST_DIR"
+
+echo "Starting BIND9 test container..."
+
+# Ensure private directory exists with proper permissions
+mkdir -p private
+chmod 777 private
+
+# Start the container
+docker compose up -d
+
+echo "Waiting for container to be ready..."
+sleep 5
+
+# Check if container is running
+if docker compose ps | grep -q "Up"; then
+    echo "✓ Container started successfully"
+    echo "Container status:"
+    docker compose ps
+else
+    echo "✗ Failed to start container"
+    docker compose logs
+    exit 1
+fi
+
+echo ""
+echo "BIND9 test environment is ready!"
+echo "DNS server listening on localhost:53"
--- a/src/bind/tests/scripts/02_dig_test.sh
+++ b/src/bind/tests/scripts/02_dig_test.sh
@ -0,0 +1,72 @@
+#!/bin/bash
+
+# Test DNS resolution using dig
+# Usage: ./02_dig_test.sh
+
+set -e
+
+echo "Testing DNS resolution with dig..."
+
+# Function to test DNS query
+test_dns_query() {
+    local hostname="$1"
+    local expected_ip="$2"
+    local description="$3"
+
+    echo ""
+    echo "Testing: $description"
+    echo "Query: $hostname.argus.com"
+    echo "Expected IP: $expected_ip"
+
+    # Perform dig query
+    result=$(dig @localhost $hostname.argus.com A +short 2>/dev/null || echo "QUERY_FAILED")
+
+    if [ "$result" = "QUERY_FAILED" ]; then
+        echo "✗ DNS query failed"
+        return 1
+    elif [ "$result" = "$expected_ip" ]; then
+        echo "✓ DNS query successful: $result"
+        return 0
+    else
+        echo "✗ DNS query returned unexpected result: $result"
+        return 1
+    fi
+}
+
+# Check if dig is available
+if ! command -v dig &> /dev/null; then
+    echo "Installing dig (dnsutils)..."
+    apt-get update && apt-get install -y dnsutils
+fi
+
+# Check if container is running
+if ! docker compose ps | grep -q "Up"; then
+    echo "Error: BIND9 container is not running"
+    echo "Please start the container first with: ./01_start_container.sh"
+    exit 1
+fi
+
+echo "=== DNS Resolution Tests ==="
+
+# Test cases based on current configuration
+failed_tests=0
+
+# Test ns1.argus.com -> 127.0.0.1
+if ! test_dns_query "ns1" "127.0.0.1" "Name server resolution"; then
+    ((failed_tests++))
+fi
+
+# Test web.argus.com -> 12.4.5.6
+if ! test_dns_query "web" "12.4.5.6" "Web server resolution"; then
+    ((failed_tests++))
+fi
+
+echo ""
+echo "=== Test Summary ==="
+if [ $failed_tests -eq 0 ]; then
+    echo "✓ All DNS tests passed!"
+    exit 0
+else
+    echo "✗ $failed_tests test(s) failed"
+    exit 1
+fi
--- a/src/bind/tests/scripts/03.5_dns_sync_test.sh
+++ b/src/bind/tests/scripts/03.5_dns_sync_test.sh
@ -0,0 +1,256 @@
+#!/bin/bash
+
+# Test DNS auto-sync functionality using argus_dns_sync.sh
+# This test validates the automatic DNS record updates from IP files
+# Usage: ./03.5_dns_sync_test.sh
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+TEST_DIR="$(dirname "$SCRIPT_DIR")"
+
+echo "=== DNS Auto-Sync Functionality Test ==="
+
+# Check if container is running
+if ! docker compose ps | grep -q "Up"; then
+    echo "Error: BIND9 container is not running"
+    echo "Please start the container first with: ./01_start_container.sh"
+    exit 1
+fi
+
+# Check if dig is available
+if ! command -v dig &> /dev/null; then
+    echo "Installing dig (dnsutils)..."
+    apt-get update && apt-get install -y dnsutils
+fi
+
+# Function to test DNS query
+test_dns_query() {
+    local hostname="$1"
+    local expected_ip="$2"
+    local description="$3"
+
+    echo "Testing: $description"
+    echo "Query: $hostname.argus.com -> Expected: $expected_ip"
+
+    # Wait a moment for DNS cache
+    sleep 2
+
+    result=$(dig @localhost $hostname.argus.com A +short 2>/dev/null || echo "QUERY_FAILED")
+
+    if [ "$result" = "$expected_ip" ]; then
+        echo "✓ $result"
+        return 0
+    else
+        echo "✗ Got: $result, Expected: $expected_ip"
+        return 1
+    fi
+}
+
+# Function to wait for sync to complete
+wait_for_sync() {
+    local timeout=15
+    local elapsed=0
+    echo "Waiting for DNS sync to complete (max ${timeout}s)..."
+
+    while [ $elapsed -lt $timeout ]; do
+        if docker compose exec bind9 test -f /var/lock/argus_dns_sync.lock; then
+            echo "Sync process is running..."
+        else
+            echo "Sync completed"
+            sleep 2  # Extra wait for DNS propagation
+            return 0
+        fi
+        sleep 2
+        elapsed=$((elapsed + 2))
+    done
+
+    echo "Warning: Sync may still be running after ${timeout}s"
+    return 0
+}
+
+echo ""
+echo "Step 1: Preparing test environment..."
+
+# Ensure required directories exist
+docker compose exec bind9 mkdir -p /private/argus/etc
+docker compose exec bind9 mkdir -p /private/argus/bind/.backup
+
+# Backup original configuration if it exists
+docker compose exec bind9 test -f /private/argus/bind/db.argus.com && \
+    docker compose exec bind9 cp /private/argus/bind/db.argus.com /private/argus/bind/db.argus.com.backup.test || true
+
+# Ensure initial configuration is available (may already be symlinked)
+docker compose exec bind9 test -f /private/argus/bind/db.argus.com || \
+    docker compose exec bind9 cp /etc/bind/db.argus.com /private/argus/bind/db.argus.com
+
+echo "✓ Test environment prepared"
+
+echo ""
+echo "Step 2: Testing initial DNS configuration..."
+
+# Get current IP for web.argus.com (may have been changed by previous tests)
+current_web_ip=$(dig @localhost web.argus.com A +short 2>/dev/null || echo "UNKNOWN")
+echo "Current web.argus.com IP: $current_web_ip"
+
+# Test that DNS is working (regardless of specific IP)
+if [ "$current_web_ip" = "UNKNOWN" ] || [ -z "$current_web_ip" ]; then
+    echo "DNS resolution not working for web.argus.com"
+    exit 1
+fi
+
+echo "✓ DNS resolution is working"
+
+echo ""
+echo "Step 3: Creating IP files for auto-sync..."
+
+# Create test IP files in the watch directory
+echo "Creating test1.argus.com with IP 10.0.0.100"
+docker compose exec bind9 bash -c 'echo "10.0.0.100" > /private/argus/etc/test1.argus.com'
+
+echo "Creating test2.argus.com with IP 10.0.0.200"
+docker compose exec bind9 bash -c 'echo "test2 service running on 10.0.0.200" > /private/argus/etc/test2.argus.com'
+
+echo "Creating api.argus.com with IP 192.168.1.50"
+docker compose exec bind9 bash -c 'echo "API server: 192.168.1.50 port 8080" > /private/argus/etc/api.argus.com'
+
+echo "✓ IP files created"
+
+echo ""
+echo "Step 4: Checking DNS sync process..."
+
+# Check if DNS sync process is already running (via supervisord)
+if docker compose exec bind9 pgrep -f argus_dns_sync.sh > /dev/null; then
+    echo "✓ DNS sync process already running (via supervisord)"
+else
+    echo "Starting DNS sync process manually..."
+    # Start the DNS sync process in background if not running
+    docker compose exec -d bind9 /usr/local/bin/argus_dns_sync.sh
+    echo "✓ DNS sync process started manually"
+fi
+
+# Wait for first sync cycle
+wait_for_sync
+
+echo ""
+echo "Step 5: Testing auto-synced DNS records..."
+
+failed_tests=0
+
+# Test new DNS records created by auto-sync
+if ! test_dns_query "test1" "10.0.0.100" "Auto-synced test1.argus.com"; then
+    ((failed_tests++))
+fi
+
+if ! test_dns_query "test2" "10.0.0.200" "Auto-synced test2.argus.com"; then
+    ((failed_tests++))
+fi
+
+if ! test_dns_query "api" "192.168.1.50" "Auto-synced api.argus.com"; then
+    ((failed_tests++))
+fi
+
+# Verify original records still work (use current IP from earlier)
+if ! test_dns_query "web" "$current_web_ip" "Original web.argus.com still working"; then
+    ((failed_tests++))
+fi
+
+if ! test_dns_query "ns1" "127.0.0.1" "Original ns1.argus.com still working"; then
+    ((failed_tests++))
+fi
+
+echo ""
+echo "Step 6: Testing IP update functionality..."
+
+# Update an existing IP file
+echo "Updating test1.argus.com IP from 10.0.0.100 to 10.0.0.150"
+docker compose exec bind9 bash -c 'echo "10.0.0.150" > /private/argus/etc/test1.argus.com'
+
+# Wait for sync
+wait_for_sync
+
+# Test updated record
+if ! test_dns_query "test1" "10.0.0.150" "Updated test1.argus.com IP"; then
+    ((failed_tests++))
+fi
+
+echo ""
+echo "Step 7: Testing invalid IP handling..."
+
+# Create file with invalid IP
+echo "Creating invalid.argus.com with invalid IP"
+docker compose exec bind9 bash -c 'echo "this is not an IP address" > /private/argus/etc/invalid.argus.com'
+
+# Wait for sync (should skip invalid IP)
+wait_for_sync
+
+# Verify invalid record was not added (should fail to resolve)
+result=$(dig @localhost invalid.argus.com A +short 2>/dev/null || echo "NO_RESULT")
+if [ "$result" = "NO_RESULT" ] || [ -z "$result" ]; then
+    echo "✓ Invalid IP correctly ignored"
+else
+    echo "✗ Invalid IP was processed: $result"
+    ((failed_tests++))
+fi
+
+echo ""
+echo "Step 8: Verifying backup functionality..."
+
+# Check if backups were created
+backup_count=$(docker compose exec bind9 ls -1 /private/argus/bind/.backup/ | wc -l || echo "0")
+if [ "$backup_count" -gt 0 ]; then
+    echo "✓ Configuration backups created ($backup_count files)"
+    # Show latest backup
+    docker compose exec bind9 ls -la /private/argus/bind/.backup/ | tail -1
+else
+    echo "✗ No backup files found"
+    ((failed_tests++))
+fi
+
+echo ""
+echo "Step 9: Cleanup..."
+
+# Note: We don't stop the DNS sync process since it's managed by supervisord
+echo "Note: DNS sync process will continue running (managed by supervisord)"
+
+# Clean up test files
+docker compose exec bind9 rm -f /private/argus/etc/test1.argus.com
+docker compose exec bind9 rm -f /private/argus/etc/test2.argus.com
+docker compose exec bind9 rm -f /private/argus/etc/api.argus.com
+docker compose exec bind9 rm -f /private/argus/etc/invalid.argus.com
+
+# Restore original configuration if backup exists
+docker compose exec bind9 test -f /private/argus/bind/db.argus.com.backup.test && \
+    docker compose exec bind9 cp /private/argus/bind/db.argus.com.backup.test /private/argus/bind/db.argus.com && \
+    docker compose exec bind9 rm /private/argus/bind/db.argus.com.backup.test || true
+
+# Reload original configuration
+docker compose exec bind9 /usr/local/bin/reload-bind9.sh
+
+echo "✓ Cleanup completed"
+
+echo ""
+echo "=== DNS Auto-Sync Test Summary ==="
+if [ $failed_tests -eq 0 ]; then
+    echo "✅ All DNS auto-sync tests passed!"
+    echo ""
+    echo "Validated functionality:"
+    echo "  ✓ Automatic DNS record creation from IP files"
+    echo "  ✓ IP address extraction from various file formats"
+    echo "  ✓ Dynamic DNS record updates"
+    echo "  ✓ Invalid IP address handling"
+    echo "  ✓ Configuration backup mechanism"
+    echo "  ✓ Preservation of existing DNS records"
+    echo ""
+    echo "The DNS auto-sync functionality is working correctly!"
+    exit 0
+else
+    echo "❌ $failed_tests DNS auto-sync test(s) failed!"
+    echo ""
+    echo "Please check:"
+    echo "  - argus_dns_sync.sh script configuration"
+    echo "  - File permissions in /private/argus/etc/"
+    echo "  - BIND9 reload functionality"
+    echo "  - Network connectivity and DNS resolution"
+    exit 1
+fi
--- a/src/bind/tests/scripts/03_reload_test.sh
+++ b/src/bind/tests/scripts/03_reload_test.sh
@ -0,0 +1,112 @@
+#!/bin/bash
+
+# Test DNS configuration reload with IP modification
+# Usage: ./03_reload_test.sh
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+TEST_DIR="$(dirname "$SCRIPT_DIR")"
+
+echo "=== DNS Configuration Reload Test ==="
+
+# Check if container is running
+if ! docker compose ps | grep -q "Up"; then
+    echo "Error: BIND9 container is not running"
+    echo "Please start the container first with: ./01_start_container.sh"
+    exit 1
+fi
+
+# Check if dig is available
+if ! command -v dig &> /dev/null; then
+    echo "Installing dig (dnsutils)..."
+    apt-get update && apt-get install -y dnsutils
+fi
+
+# Function to test DNS query
+test_dns_query() {
+    local hostname="$1"
+    local expected_ip="$2"
+    local description="$3"
+
+    echo "Testing: $description"
+    echo "Query: $hostname.argus.com -> Expected: $expected_ip"
+
+    result=$(dig @localhost $hostname.argus.com A +short 2>/dev/null || echo "QUERY_FAILED")
+
+    if [ "$result" = "$expected_ip" ]; then
+        echo "✓ $result"
+        return 0
+    else
+        echo "✗ Got: $result, Expected: $expected_ip"
+        return 1
+    fi
+}
+
+echo ""
+echo "Step 1: Testing initial DNS configuration..."
+
+# Test initial configuration
+if ! test_dns_query "web" "12.4.5.6" "Initial web.argus.com resolution"; then
+    echo "Initial DNS test failed"
+    exit 1
+fi
+
+echo ""
+echo "Step 2: Modifying DNS configuration..."
+
+# Backup original configuration
+cp "$TEST_DIR/private/argus/bind/db.argus.com" "$TEST_DIR/private/argus/bind/db.argus.com.backup" 2>/dev/null || true
+
+# Create new configuration with modified IP
+DB_FILE="$TEST_DIR/private/argus/bind/db.argus.com"
+
+# Check if persistent config exists, if not use from container
+if [ ! -f "$DB_FILE" ]; then
+    echo "Persistent config not found, copying from container..."
+    docker compose exec bind9 cp /etc/bind/db.argus.com /private/argus/bind/db.argus.com
+    docker compose exec bind9 chown bind:bind /private/argus/bind/db.argus.com
+fi
+
+# Modify the IP address (12.4.5.6 -> 192.168.1.100)
+sed -i 's/12\.4\.5\.6/192.168.1.100/g' "$DB_FILE"
+
+# Increment serial number for DNS cache invalidation
+current_serial=$(grep -o "2[[:space:]]*;" "$DB_FILE" | grep -o "2")
+new_serial=$((current_serial + 1))
+sed -i "s/2[[:space:]]*;/${new_serial}         ;/" "$DB_FILE"
+
+echo "Modified configuration:"
+echo "- Changed web.argus.com IP: 12.4.5.6 -> 192.168.1.100"
+echo "- Updated serial number: $current_serial -> $new_serial"
+
+echo ""
+echo "Step 3: Reloading BIND9 configuration..."
+
+# Reload BIND9 configuration
+docker compose exec bind9 /usr/local/bin/reload-bind9.sh
+
+echo "Configuration reloaded"
+
+# Wait a moment for changes to take effect
+sleep 3
+
+echo ""
+echo "Step 4: Testing modified DNS configuration..."
+
+# Test modified configuration
+if ! test_dns_query "web" "192.168.1.100" "Modified web.argus.com resolution"; then
+    echo "Modified DNS test failed"
+    exit 1
+fi
+
+# Also verify ns1 still works
+if ! test_dns_query "ns1" "127.0.0.1" "ns1.argus.com still working"; then
+    echo "ns1 DNS test failed after reload"
+    exit 1
+fi
+
+echo ""
+echo "✓ DNS configuration reload test completed successfully!"
+echo "✓ IP address changed from 12.4.5.6 to 192.168.1.100"
+echo "✓ Configuration persisted and reloaded correctly"
--- a/src/bind/tests/scripts/04_persistence_test.sh
+++ b/src/bind/tests/scripts/04_persistence_test.sh
@ -0,0 +1,115 @@
+#!/bin/bash
+
+# Test configuration persistence after container restart
+# Usage: ./04_persistence_test.sh
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+TEST_DIR="$(dirname "$SCRIPT_DIR")"
+
+echo "=== Configuration Persistence Test ==="
+
+# Check if dig is available
+if ! command -v dig &> /dev/null; then
+    echo "Installing dig (dnsutils)..."
+    apt-get update && apt-get install -y dnsutils
+fi
+
+# Function to test DNS query
+test_dns_query() {
+    local hostname="$1"
+    local expected_ip="$2"
+    local description="$3"
+
+    echo "Testing: $description"
+    echo "Query: $hostname.argus.com -> Expected: $expected_ip"
+
+    result=$(dig @localhost $hostname.argus.com A +short 2>/dev/null || echo "QUERY_FAILED")
+
+    if [ "$result" = "$expected_ip" ]; then
+        echo "✓ $result"
+        return 0
+    else
+        echo "✗ Got: $result, Expected: $expected_ip"
+        return 1
+    fi
+}
+
+echo ""
+echo "Step 1: Stopping current container..."
+
+# Stop the container
+docker compose down
+
+echo "Container stopped"
+
+echo ""
+echo "Step 2: Verifying persistent configuration exists..."
+
+# Check if modified configuration exists
+DB_FILE="$TEST_DIR/private/argus/bind/db.argus.com"
+
+if [ ! -f "$DB_FILE" ]; then
+    echo "✗ Persistent configuration file not found: $DB_FILE"
+    exit 1
+fi
+
+# Check if the modified IP is in the configuration
+if grep -q "192.168.1.100" "$DB_FILE"; then
+    echo "✓ Modified IP (192.168.1.100) found in persistent configuration"
+else
+    echo "✗ Modified IP not found in persistent configuration"
+    echo "Configuration content:"
+    cat "$DB_FILE"
+    exit 1
+fi
+
+echo ""
+echo "Step 3: Restarting container with persistent configuration..."
+
+# Start the container again
+docker compose up -d
+
+echo "Waiting for container to be ready..."
+sleep 5
+
+# Check if container is running
+if ! docker compose ps | grep -q "Up"; then
+    echo "✗ Failed to restart container"
+    docker compose logs
+    exit 1
+fi
+
+echo "✓ Container restarted successfully"
+
+echo ""
+echo "Step 4: Testing DNS resolution after restart..."
+
+# Wait a bit more for DNS to be fully ready
+sleep 5
+
+# Test that the modified configuration is still active
+if ! test_dns_query "web" "192.168.1.100" "Persistent web.argus.com resolution"; then
+    echo "✗ Persistent configuration test failed"
+    exit 1
+fi
+
+# Also verify ns1 still works
+if ! test_dns_query "ns1" "127.0.0.1" "ns1.argus.com still working"; then
+    echo "✗ ns1 DNS test failed after restart"
+    exit 1
+fi
+
+echo ""
+echo "Step 5: Verifying configuration files are linked correctly..."
+
+# Check that the persistent files are properly linked
+echo "Checking file links in container:"
+docker compose exec bind9 ls -la /etc/bind/named.conf.local /etc/bind/db.argus.com
+
+echo ""
+echo "✓ Configuration persistence test completed successfully!"
+echo "✓ Modified IP (192.168.1.100) persisted after container restart"
+echo "✓ Configuration files properly linked to persistent storage"
+echo "✓ DNS resolution working correctly with persisted configuration"
--- a/src/bind/tests/scripts/05_cleanup.sh
+++ b/src/bind/tests/scripts/05_cleanup.sh
@ -0,0 +1,87 @@
+#!/bin/bash
+
+# Clean up test environment and containers
+# Usage: ./05_cleanup.sh [--full]
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+TEST_DIR="$(dirname "$SCRIPT_DIR")"
+
+# Parse command line arguments
+FULL_CLEANUP=true
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --full)
+            FULL_CLEANUP=true
+            shift
+            ;;
+        *)
+            echo "Unknown option: $1"
+            echo "Usage: $0 [--full]"
+            echo "  --full: Also remove persistent data "
+            exit 1
+            ;;
+    esac
+done
+
+cd "$TEST_DIR"
+
+echo "=== Cleaning up BIND9 test environment ==="
+
+echo ""
+echo "Step 1: Stopping and removing containers..."
+
+# Stop and remove containers
+docker compose down -v
+
+echo "✓ Containers stopped and removed"
+
+echo ""
+echo "Step 2: Removing Docker networks..."
+
+# Clean up networks
+docker network prune -f > /dev/null 2>&1 || true
+
+echo "✓ Docker networks cleaned"
+
+if [ "$FULL_CLEANUP" = true ]; then
+    echo ""
+    echo "Step 3: Removing persistent data..."
+
+    # Remove persistent data directory
+    if [ -d "private" ]; then
+        rm -rf private
+        echo "✓ Persistent data directory removed"
+    else
+        echo "✓ No persistent data directory found"
+    fi
+
+else
+    echo ""
+    echo "Step 3: Preserving persistent data and Docker image..."
+    echo "✓ Persistent data preserved in: private/"
+    echo "✓ Docker image 'argus-bind9:latest' preserved"
+    echo ""
+    echo "To perform full cleanup including persistent data and image, run:"
+    echo "  $0 --full"
+fi
+
+echo ""
+echo "=== Cleanup Summary ==="
+echo "✓ Containers stopped and removed"
+echo "✓ Docker networks cleaned"
+
+if [ "$FULL_CLEANUP" = true ]; then
+    echo "✓ Persistent data removed"
+    echo ""
+    echo "Full cleanup completed! Test environment completely removed."
+else
+    echo "✓ Persistent data preserved"
+    echo "✓ Docker image preserved"
+    echo ""
+    echo "Basic cleanup completed! Run './01_start_container.sh' to restart testing."
+fi
+
+echo ""
+echo "Test environment cleanup finished."
--- a/src/log/.gitignore
+++ b/src/log/.gitignore
@ -0,0 +1,5 @@
+
+private/
+
+
+images/
--- a/src/log/README.md
+++ b/src/log/README.md
@ -0,0 +1,8 @@
+
+测试log模块开发
+
+elasticsearch: 部署镜像构建及启动脚本（解决账号问题、挂载目录、使用supervisor守护）
+kibana: 镜像构建
+fluent-bit: 安装包，脚本准备， 交付给大鹏统一组织客户端侧安装流程
+init: EK初始化脚本：数据视图创建脚本等
+
--- a/src/log/elasticsearch/build/Dockerfile
+++ b/src/log/elasticsearch/build/Dockerfile
@ -0,0 +1,56 @@
+FROM docker.elastic.co/elasticsearch/elasticsearch:8.13.4
+
+# 切换到 root 用户进行系统级安装
+USER root
+
+# 修改elasticsearch用户的UID和GID
+RUN usermod -u 2133 elasticsearch && \
+    groupmod -g 2015 elasticsearch && \
+    chown -R elasticsearch:elasticsearch /usr/share/elasticsearch
+
+# 设置构建参数
+ARG USE_INTRANET=false
+
+# 配置内网 apt 源 (如果指定了内网选项)
+RUN if [ "$USE_INTRANET" = "true" ]; then \
+        echo "Configuring intranet apt sources..." && \
+        cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
+        echo "deb [trusted=yes] http://10.68.64.1/ubuntu2204/ jammy main" > /etc/apt/sources.list && \
+        echo 'Acquire::https::Verify-Peer "false";' > /etc/apt/apt.conf.d/99disable-ssl-check && \
+        echo 'Acquire::https::Verify-Host "false";' >> /etc/apt/apt.conf.d/99disable-ssl-check; \
+    fi
+
+# 安装 supervisor, net-tools, vim
+RUN apt-get update && \
+    apt-get install -y supervisor net-tools inetutils-ping vim && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# 配置部署时使用的apt源
+RUN if [ "$USE_INTRANET" = "true" ]; then \
+	echo "deb [trusted=yes] https://10.92.132.52/mirrors/ubuntu2204/ jammy main" > /etc/apt/sources.list; \
+    fi
+
+# 创建 supervisor 日志目录
+RUN mkdir -p /var/log/supervisor
+
+
+# 复制 supervisor 配置文件
+COPY src/log/elasticsearch/build/supervisord.conf /etc/supervisor/conf.d/supervisord.conf
+
+# 复制启动脚本
+COPY src/log/elasticsearch/build/start-es-supervised.sh /usr/local/bin/start-es-supervised.sh
+RUN chmod +x /usr/local/bin/start-es-supervised.sh
+
+# 复制DNS监控脚本
+COPY src/log/elasticsearch/build/dns-monitor.sh /usr/local/bin/dns-monitor.sh
+RUN chmod +x /usr/local/bin/dns-monitor.sh
+
+# 保持 root 用户，由 supervisor 管理用户切换
+USER root
+
+# 暴露端口
+EXPOSE 9200 9300
+
+# 使用 supervisor 作为入口点
+CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
--- a/src/log/elasticsearch/build/dns-monitor.sh
+++ b/src/log/elasticsearch/build/dns-monitor.sh
@ -0,0 +1 @@
+../../../bind/build/dns-monitor.sh
--- a/src/log/elasticsearch/build/start-es-supervised.sh
+++ b/src/log/elasticsearch/build/start-es-supervised.sh
@ -0,0 +1,32 @@
+#!/bin/bash
+set -euo pipefail
+
+echo "[INFO] Starting Elasticsearch under supervisor..."
+
+# 创建数据目录并设置权限（如果不存在）
+mkdir -p /private/argus/log/elasticsearch
+
+# 创建软链接到Elasticsearch预期的数据目录
+if [ -L /usr/share/elasticsearch/data ]; then
+    rm /usr/share/elasticsearch/data
+elif [ -d /usr/share/elasticsearch/data ]; then
+    rm -rf /usr/share/elasticsearch/data
+fi
+
+ln -sf /private/argus/log/elasticsearch /usr/share/elasticsearch/data
+
+# 记录容器ip地址
+DOMAIN=es.log.argus.com
+IP=`ifconfig | grep -A 1 eth0 | grep inet | awk '{print $2}'`
+echo current IP: ${IP}
+echo ${IP} > /private/argus/etc/${DOMAIN}
+
+echo "[INFO] Data directory linked: /usr/share/elasticsearch/data -> /private/argus/log/elasticsearch"
+
+# 设置环境变量（ES配置通过docker-compose传递）
+export ES_JAVA_OPTS="${ES_JAVA_OPTS:-"-Xms512m -Xmx512m"}"
+
+echo "[INFO] Starting Elasticsearch process..."
+
+# 启动原始的Elasticsearch entrypoint
+exec /usr/local/bin/docker-entrypoint.sh elasticsearch
--- a/src/log/elasticsearch/build/supervisord.conf
+++ b/src/log/elasticsearch/build/supervisord.conf
@ -0,0 +1,39 @@
+[supervisord]
+nodaemon=true
+logfile=/var/log/supervisor/supervisord.log
+pidfile=/var/run/supervisord.pid
+user=root
+
+[program:elasticsearch]
+command=/usr/local/bin/start-es-supervised.sh
+user=elasticsearch
+stdout_logfile=/var/log/supervisor/elasticsearch.log
+stderr_logfile=/var/log/supervisor/elasticsearch_error.log
+autorestart=true
+startretries=3
+startsecs=30
+stopwaitsecs=30
+killasgroup=true
+stopasgroup=true
+
+[program:dns-monitor]
+command=/usr/local/bin/dns-monitor.sh
+user=root
+stdout_logfile=/var/log/supervisor/dns-monitor.log
+stderr_logfile=/var/log/supervisor/dns-monitor_error.log
+autorestart=true
+startretries=3
+startsecs=5
+stopwaitsecs=10
+killasgroup=true
+stopasgroup=true
+
+[unix_http_server]
+file=/var/run/supervisor.sock
+chmod=0700
+
+[supervisorctl]
+serverurl=unix:///var/run/supervisor.sock
+
+[rpcinterface:supervisor]
+supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
--- a/src/log/fluent-bit/build/etc/fluent-bit.conf
+++ b/src/log/fluent-bit/build/etc/fluent-bit.conf
@ -0,0 +1,37 @@
+[SERVICE]
+    Daemon       Off
+    Parsers_File parsers.conf
+    HTTP_Server  On
+    HTTP_Listen  0.0.0.0
+    HTTP_Port    2020
+    storage.path /buffers
+    storage.sync normal
+    storage.checksum on
+    storage.backlog.mem_limit 128M
+    # 备注：该镜像默认未开启 Hot Reload，修改配置后请重启容器。
+
+@INCLUDE inputs.d/*.conf
+
+[FILTER]
+    Name   parser
+    Match  app.*
+    Key_Name log
+    Parser timestamp_parser
+    Reserve_Data On
+    Preserve_Key On
+    Unescape_Key On
+
+[FILTER]
+    Name   record_modifier
+    Match  *
+    Record cluster  ${CLUSTER}
+    Record rack     ${RACK}
+    Record host     ${HOSTNAME}
+
+[FILTER]
+    Name   lua
+    Match  app.*
+    script inject_labels.lua
+    call   add_labels
+
+@INCLUDE outputs.d/*.conf
--- a/src/log/fluent-bit/build/etc/inject_labels.lua
+++ b/src/log/fluent-bit/build/etc/inject_labels.lua
@ -0,0 +1,15 @@
+function add_labels(tag, ts, record)
+  record["job_id"] = os.getenv("FB_JOB_ID") or record["job_id"] or "unknown"
+  record["user"]   = os.getenv("FB_USER")   or record["user"]   or "unknown"
+  record["model"]  = os.getenv("FB_MODEL")  or record["model"]  or "unknown"
+  record["gpu_id"] = os.getenv("FB_GPU_ID") or record["gpu_id"] or "na"
+  local p = record["log_path"] or ""
+  if string.find(p, "/logs/infer/") then
+    record["role"] = "infer"
+  elseif string.find(p, "/logs/train/") then
+    record["role"] = "train"
+  else
+    record["role"] = record["role"] or "app"
+  end
+  return 1, ts, record
+end
--- a/src/log/fluent-bit/build/etc/inputs.d/10-train.conf
+++ b/src/log/fluent-bit/build/etc/inputs.d/10-train.conf
@ -0,0 +1,10 @@
+[INPUT]
+    Name              tail
+    Path              /logs/train/*.log
+    Tag               app.train
+    Path_Key          log_path
+    Refresh_Interval  5
+    DB                /buffers/train.db
+    Skip_Long_Lines   On
+    storage.type      filesystem
+    multiline.parser  python,go,java
--- a/src/log/fluent-bit/build/etc/inputs.d/20-infer.conf
+++ b/src/log/fluent-bit/build/etc/inputs.d/20-infer.conf
@ -0,0 +1,10 @@
+[INPUT]
+    Name              tail
+    Path              /logs/infer/*.log
+    Tag               app.infer
+    Path_Key          log_path
+    Refresh_Interval  5
+    DB                /buffers/infer.db
+    Skip_Long_Lines   On
+    storage.type      filesystem
+    multiline.parser  python,go,java
--- a/src/log/fluent-bit/build/etc/outputs.d/10-es.conf
+++ b/src/log/fluent-bit/build/etc/outputs.d/10-es.conf
@ -0,0 +1,24 @@
+# 重要：使用 Logstash_Format + Logstash_Prefix，生成 train-*/infer-* 索引
+[OUTPUT]
+    Name                es
+    Match               app.train
+    Host                ${ES_HOST}
+    Port                ${ES_PORT}
+    Logstash_Format     On
+    Logstash_Prefix     train
+    Replace_Dots        On
+    Generate_ID         On
+    Retry_Limit         False
+    Suppress_Type_Name  On
+
+[OUTPUT]
+    Name                es
+    Match               app.infer
+    Host                ${ES_HOST}
+    Port                ${ES_PORT}
+    Logstash_Format     On
+    Logstash_Prefix     infer
+    Replace_Dots        On
+    Generate_ID         On
+    Retry_Limit         False
+    Suppress_Type_Name  On
--- a/src/log/fluent-bit/build/etc/parsers.conf
+++ b/src/log/fluent-bit/build/etc/parsers.conf
@ -0,0 +1,27 @@
+[MULTILINE_PARSER]
+    Name   python
+    Type   regex
+    Flush  2
+    Rule   "start_state"  "/^\d{4}-\d{2}-\d{2}[\sT]/"  "cont"
+    Rule   "cont"         "/^\s+|^Traceback|^\tat\s+/" "cont"
+
+[MULTILINE_PARSER]
+    Name   go
+    Type   regex
+    Flush  2
+    Rule   "start_state"  "/^[0-9]{4}\/[0-9]{2}\/[0-9]{2}/" "cont"
+    Rule   "cont"         "/^\s+|^\t/" "cont"
+
+[MULTILINE_PARSER]
+    Name   java
+    Type   regex
+    Flush  2
+    Rule   "start_state"  "/^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}/" "cont"
+    Rule   "cont"         "/^\s+at\s+|^\t.../" "cont"
+
+[PARSER]
+    Name   timestamp_parser
+    Format regex
+    Regex  ^(?<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\s+(?<level>\w+)\s+(?<message>.*)$
+    Time_Key    timestamp
+    Time_Format %Y-%m-%d %H:%M:%S
--- a/src/log/fluent-bit/build/packages/fluent-bit_3.1.9_amd64.deb
+++ b/src/log/fluent-bit/build/packages/fluent-bit_3.1.9_amd64.deb
--- a/src/log/fluent-bit/build/start-fluent-bit.sh
+++ b/src/log/fluent-bit/build/start-fluent-bit.sh
@ -0,0 +1,47 @@
+#!/bin/bash
+set -euo pipefail
+
+echo "[INFO] Starting Fluent Bit setup in Ubuntu container..."
+
+# 安装必要的工具
+echo "[INFO] Installing required packages..."
+export DEBIAN_FRONTEND=noninteractive
+apt-get update -qq
+apt-get install -y -qq curl
+
+# 解压bundle到/tmp
+echo "[INFO] Extracting fluent-bit bundle..."
+cp -r /private/etc /tmp
+cp -r /private/packages /tmp
+cd /tmp
+
+# 安装 Fluent Bit 从 deb 包
+echo "[INFO] Installing Fluent Bit from deb package..."
+dpkg -i /tmp/packages/fluent-bit_3.1.9_amd64.deb || true
+apt-get install -f -y -qq  # 解决依赖问题
+
+# 验证 Fluent Bit 可以运行
+echo "[INFO] Fluent Bit version:"
+/opt/fluent-bit/bin/fluent-bit --version
+
+# 创建配置目录
+mkdir -p /etc/fluent-bit
+cp -r /tmp/etc/* /etc/fluent-bit/
+
+# 创建日志和缓冲区目录
+mkdir -p /logs/train /logs/infer /buffers
+chmod 755 /logs/train /logs/infer /buffers
+
+# 等待 Elasticsearch 就绪
+echo "[INFO] Waiting for Elasticsearch to be ready..."
+while ! curl -fs http://${ES_HOST}:${ES_PORT}/_cluster/health >/dev/null 2>&1; do
+    echo "  Waiting for ES at ${ES_HOST}:${ES_PORT}..."
+    sleep 5
+done
+echo "[INFO] Elasticsearch is ready"
+
+# 启动 Fluent Bit
+echo "[INFO] Starting Fluent Bit with configuration from /etc/fluent-bit/"
+echo "[INFO] Command: /opt/fluent-bit/bin/fluent-bit --config=/etc/fluent-bit/fluent-bit.conf"
+exec /opt/fluent-bit/bin/fluent-bit \
+    --config=/etc/fluent-bit/fluent-bit.conf
--- a/src/log/kibana/build/Dockerfile
+++ b/src/log/kibana/build/Dockerfile
@ -0,0 +1,60 @@
+FROM docker.elastic.co/kibana/kibana:8.13.4
+
+# 切换到 root 用户进行系统级安装
+USER root
+
+# 修改kibana用户的UID和GID
+RUN usermod -u 2133 kibana && \
+    groupmod -g 2015 kibana && \
+    chown -R kibana:kibana /usr/share/kibana
+
+# 设置构建参数
+ARG USE_INTRANET=false
+
+# 配置内网 apt 源 (如果指定了内网选项)
+RUN if [ "$USE_INTRANET" = "true" ]; then \
+        echo "Configuring intranet apt sources..." && \
+        cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
+        echo "deb [trusted=yes]  http://10.68.64.1/ubuntu2204/ jammy main" > /etc/apt/sources.list && \
+        echo 'Acquire::https::Verify-Peer "false";' > /etc/apt/apt.conf.d/99disable-ssl-check && \
+        echo 'Acquire::https::Verify-Host "false";' >> /etc/apt/apt.conf.d/99disable-ssl-check; \
+    fi
+
+# 安装 supervisor, net-tools, vim
+RUN apt-get update && \
+    apt-get install -y supervisor net-tools inetutils-ping vim && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# 配置部署时使用的apt源
+RUN if [ "$USE_INTRANET" = "true" ]; then \
+        echo "deb [trusted=yes] https://10.92.132.52/mirrors/ubuntu2204/ jammy main" > /etc/apt/sources.list; \
+    fi
+
+# 创建 supervisor 日志目录
+RUN mkdir -p /var/log/supervisor
+
+
+# 复制 supervisor 配置文件
+COPY src/log/kibana/build/supervisord.conf /etc/supervisor/conf.d/supervisord.conf
+
+# 复制启动脚本
+COPY src/log/kibana/build/start-kibana-supervised.sh /usr/local/bin/start-kibana-supervised.sh
+COPY src/log/kibana/build/kibana-post-start.sh /usr/local/bin/kibana-post-start.sh
+RUN chmod +x /usr/local/bin/start-kibana-supervised.sh /usr/local/bin/kibana-post-start.sh
+
+# 复制DNS监控脚本
+COPY src/log/kibana/build/dns-monitor.sh /usr/local/bin/dns-monitor.sh
+RUN chmod +x /usr/local/bin/dns-monitor.sh
+
+# kibana需要用到 /root/.config/puppeteer 路径
+RUN chmod 777 /root
+
+# 保持 root 用户，由 supervisor 管理用户切换
+USER root
+
+# 暴露端口
+EXPOSE 5601
+
+# 使用 supervisor 作为入口点
+CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
--- a/src/log/kibana/build/dns-monitor.sh
+++ b/src/log/kibana/build/dns-monitor.sh
@ -0,0 +1 @@
+../../../bind/build/dns-monitor.sh
--- a/src/log/kibana/build/kibana-post-start.sh
+++ b/src/log/kibana/build/kibana-post-start.sh
@ -0,0 +1,146 @@
+#!/bin/bash
+set -euo pipefail
+
+ES_HOST="${ELASTICSEARCH_HOSTS:-http://es:9200}"
+KB_HOST="http://localhost:5601"
+
+echo "[INFO] Starting Kibana post-start configuration..."
+
+# 等待 Elasticsearch 可用
+wait_for_elasticsearch() {
+    echo "[INFO] Waiting for Elasticsearch..."
+    local max_attempts=60
+    local attempt=1
+
+    while [ $attempt -le $max_attempts ]; do
+        if curl -fs "$ES_HOST/_cluster/health" >/dev/null 2>&1; then
+            echo "[OK] Elasticsearch is available"
+            return 0
+        fi
+        echo "    Waiting for ES... ($attempt/$max_attempts)"
+        sleep 5
+        ((attempt++))
+    done
+
+    echo "[ERROR] Elasticsearch timeout"
+    return 1
+}
+
+# 等待 Kibana 可用
+wait_for_kibana() {
+    echo "[INFO] Waiting for Kibana..."
+    local max_attempts=120
+    local attempt=1
+
+    while [ $attempt -le $max_attempts ]; do
+        if curl -fs "$KB_HOST/api/status" >/dev/null 2>&1; then
+            local status=$(curl -s "$KB_HOST/api/status" | grep -o '"level":"available"' || echo "")
+            if [ -n "$status" ]; then
+                echo "[OK] Kibana is available"
+                return 0
+            fi
+            echo "    Waiting for Kibana... ($attempt/$max_attempts, status: $status)"
+        else
+            echo "    Waiting for Kibana... ($attempt/$max_attempts, connection failed)"
+        fi
+        sleep 5
+        ((attempt++))
+    done
+
+    echo "[ERROR] Kibana timeout"
+    return 1
+}
+
+# 幂等设置索引副本数为0
+fix_replicas_idempotent() {
+    echo "[INFO] Checking and fixing index replicas..."
+
+    # 获取所有 train-* 和 infer-* 索引
+    local indices=$(curl -s "$ES_HOST/_cat/indices/train-*,infer-*?h=index" 2>/dev/null || echo "")
+
+    if [ -z "$indices" ]; then
+        echo "[INFO] No train-*/infer-* indices found, skipping replica adjustment"
+        return 0
+    fi
+
+    for idx in $indices; do
+        # 检查当前副本数
+        local current_replicas=$(curl -s "$ES_HOST/$idx/_settings" | grep -o '"number_of_replicas":"[^"]*"' | cut -d'"' -f4 || echo "")
+
+        if [ "$current_replicas" != "0" ]; then
+            echo "[INFO] Setting replicas to 0 for index: $idx (current: $current_replicas)"
+            curl -fsS -X PUT "$ES_HOST/$idx/_settings" \
+                -H 'Content-Type: application/json' \
+                -d '{"index":{"number_of_replicas":0}}' >/dev/null || {
+                echo "[WARN] Failed to set replicas for $idx"
+                continue
+            }
+            echo "[OK] Updated replicas for $idx"
+        else
+            echo "[INFO] Index $idx already has 0 replicas, skipping"
+        fi
+    done
+}
+
+# 幂等创建数据视图
+create_data_views_idempotent() {
+    echo "[INFO] Checking and creating data views..."
+
+    # 检查是否存在匹配的索引
+    local train_indices=$(curl -s "$ES_HOST/_cat/indices/train-*?h=index" 2>/dev/null | wc -l || echo "0")
+    local infer_indices=$(curl -s "$ES_HOST/_cat/indices/infer-*?h=index" 2>/dev/null | wc -l || echo "0")
+
+    # 创建 train 数据视图
+    if [ "$train_indices" -gt 0 ]; then
+        # 检查数据视图是否已存在
+        local train_exists=$(curl -s "$KB_HOST/api/data_views" -H 'kbn-xsrf: true' 2>/dev/null | grep '"title":"train-\*"' | wc -l )
+
+        if [ "$train_exists" -eq 0 ]; then
+            echo "[INFO] Creating data view for train-* indices"
+            curl -fsS -X POST "$KB_HOST/api/data_views/data_view" \
+                -H 'kbn-xsrf: true' \
+                -H 'Content-Type: application/json' \
+                -d '{"data_view":{"name":"train","title":"train-*","timeFieldName":"@timestamp"}}' \
+                >/dev/null && echo "[OK] Created train data view" || echo "[WARN] Failed to create train data view"
+        else
+            echo "[INFO] Train data view already exists, skipping"
+        fi
+    else
+        echo "[INFO] No train-* indices found, skipping train data view creation"
+    fi
+
+    # 创建 infer 数据视图
+    if [ "$infer_indices" -gt 0 ]; then
+        # 检查数据视图是否已存在
+        local infer_exists=$(curl -s "$KB_HOST/api/data_views" -H 'kbn-xsrf: true' 2>/dev/null | grep '"title":"infer-\*"' | wc -l )
+
+        if [ "$infer_exists" -eq 0 ]; then
+            echo "[INFO] Creating data view for infer-* indices"
+            curl -fsS -X POST "$KB_HOST/api/data_views/data_view" \
+                -H 'kbn-xsrf: true' \
+                -H 'Content-Type: application/json' \
+                -d '{"data_view":{"name":"infer","title":"infer-*","timeFieldName":"@timestamp"}}' \
+                >/dev/null && echo "[OK] Created infer data view" || echo "[WARN] Failed to create infer data view"
+        else
+            echo "[INFO] Infer data view already exists, skipping"
+        fi
+    else
+        echo "[INFO] No infer-* indices found, skipping infer data view creation"
+    fi
+}
+
+# 主逻辑
+main() {
+    # 等待服务可用
+    wait_for_elasticsearch || exit 1
+    wait_for_kibana || exit 1
+
+    # 执行幂等配置
+    fix_replicas_idempotent
+    create_data_views_idempotent
+
+    echo "[INFO] Kibana post-start configuration completed"
+}
+
+# 运行主逻辑
+main
--- a/src/log/kibana/build/start-kibana-supervised.sh
+++ b/src/log/kibana/build/start-kibana-supervised.sh
@ -0,0 +1,37 @@
+#!/bin/bash
+set -euo pipefail
+
+echo "[INFO] Starting Kibana under supervisor..."
+
+mkdir -p /private/argus/log/kibana
+
+# 创建软链接到Kibana预期的数据目录
+if [ -L /usr/share/kibana/data ]; then
+    rm /usr/share/kibana/data
+elif [ -d /usr/share/kibana/data ]; then
+    rm -rf /usr/share/kibana/data
+fi
+
+ln -sf /private/argus/log/kibana /usr/share/kibana/data
+
+echo "[INFO] Data directory linked: /usr/share/kibana/data -> /private/argus/log/kibana"
+
+# 记录容器ip地址
+DOMAIN=kibana.log.argus.com
+IP=`ifconfig | grep -A 1 eth0 | grep inet | awk '{print $2}'`
+echo current IP: ${IP}
+echo ${IP} > /private/argus/etc/${DOMAIN}
+
+# 设置环境变量
+export ELASTICSEARCH_HOSTS="${ELASTICSEARCH_HOSTS:-"http://es:9200"}"
+
+echo "[INFO] Connecting to Elasticsearch at: $ELASTICSEARCH_HOSTS"
+
+# 启动后台配置任务
+echo "[INFO] Starting background post-start configuration..."
+/usr/local/bin/kibana-post-start.sh &
+
+echo "[INFO] Starting Kibana process..."
+
+# 启动原始的Kibana entrypoint
+exec /usr/local/bin/kibana-docker
--- a/src/log/kibana/build/supervisord.conf
+++ b/src/log/kibana/build/supervisord.conf
@ -0,0 +1,39 @@
+[supervisord]
+nodaemon=true
+logfile=/var/log/supervisor/supervisord.log
+pidfile=/var/run/supervisord.pid
+user=root
+
+[program:kibana]
+command=/usr/local/bin/start-kibana-supervised.sh
+user=kibana
+stdout_logfile=/var/log/supervisor/kibana.log
+stderr_logfile=/var/log/supervisor/kibana_error.log
+autorestart=true
+startretries=3
+startsecs=30
+stopwaitsecs=30
+killasgroup=true
+stopasgroup=true
+
+[program:dns-monitor]
+command=/usr/local/bin/dns-monitor.sh
+user=root
+stdout_logfile=/var/log/supervisor/dns-monitor.log
+stderr_logfile=/var/log/supervisor/dns-monitor_error.log
+autorestart=true
+startretries=3
+startsecs=5
+stopwaitsecs=10
+killasgroup=true
+stopasgroup=true
+
+[unix_http_server]
+file=/var/run/supervisor.sock
+chmod=0700
+
+[supervisorctl]
+serverurl=unix:///var/run/supervisor.sock
+
+[rpcinterface:supervisor]
+supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
--- a/src/log/tests/docker-compose.yml
+++ b/src/log/tests/docker-compose.yml
@ -0,0 +1,85 @@
+version: "3.8"
+services:
+  es:
+    build:
+      context: ../elasticsearch/build
+      dockerfile: Dockerfile
+    image: argus-elasticsearch:latest
+    environment:
+      - discovery.type=single-node
+      - xpack.security.enabled=false
+      - ES_JAVA_OPTS=-Xms512m -Xmx512m
+    volumes:
+      - ./private/argus/:/private/argus/
+    ports: ["9200:9200"]
+    healthcheck:
+      test: ["CMD-SHELL", "curl -fs http://localhost:9200 >/dev/null || exit 1"]
+      interval: 10s
+      timeout: 5s
+      retries: 30
+
+  kibana:
+    build:
+      context: ../kibana/build
+      dockerfile: Dockerfile
+    image: argus-kibana:latest
+    environment:
+      - ELASTICSEARCH_HOSTS=http://es.log.argus.com:9200
+    volumes:
+      - ./private/argus/:/private/argus/
+    ports: ["5601:5601"]
+    depends_on:
+      es:
+        condition: service_healthy
+
+  fluent-bit-host01:
+    image: ubuntu:22.04
+    environment:
+      - CLUSTER=local
+      - RACK=dev
+      - HOSTNAME=host01
+      - ES_HOST=es
+      - ES_PORT=9200
+    volumes:
+      - ../fluent-bit/build:/private/
+    ports: ["2020:2020"]
+    depends_on:
+      es:
+        condition: service_healthy
+    command: /private/start-fluent-bit.sh
+    healthcheck:
+      test: ["CMD-SHELL", "curl -fs http://localhost:2020/api/v2/metrics >/dev/null || exit 1"]
+      interval: 15s
+      timeout: 10s
+      retries: 30
+
+  fluent-bit-host02:
+    image: ubuntu:22.04
+    environment:
+      - CLUSTER=local
+      - RACK=dev
+      - HOSTNAME=host02
+      - ES_HOST=es
+      - ES_PORT=9200
+    volumes:
+      - ../fluent-bit/build:/private/
+    ports: ["2021:2020"]
+    depends_on:
+      es:
+        condition: service_healthy
+    command: /private/start-fluent-bit.sh
+    healthcheck:
+      test: ["CMD-SHELL", "curl -fs http://localhost:2020/api/v2/metrics >/dev/null || exit 1"]
+      interval: 15s
+      timeout: 10s
+      retries: 30
+
+  bind9:
+      image: argus-bind9:latest
+      ports:
+        - "53:53/tcp"
+        - "53:53/udp"
+      volumes:
+        - ./private/argus:/private/argus/
+      restart: unless-stopped
+
--- a/src/log/tests/scripts/01_bootstrap.sh
+++ b/src/log/tests/scripts/01_bootstrap.sh
@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+set -euo pipefail
+root="$(cd "$(dirname "${BASH_SOURCE[0]}")/../" && pwd)"
+
+# 创建新的private目录结构 (基于argus目录结构)
+echo "[INFO] Creating private directory structure for supervisor-based containers..."
+mkdir -p "$root/private/argus/log/elasticsearch"
+mkdir -p "$root/private/argus/log/kibana"
+mkdir -p "$root/private/argus/etc/"
+
+
+# 设置数据目录权限（ES 和 Kibana 容器都使用 UID 1000）
+echo "[INFO] Setting permissions for data directories..."
+sudo chown -R 2133:2015 "$root/private/argus/log/elasticsearch" 2>/dev/null || true
+sudo chown -R 2133:2015 "$root/private/argus/log/kibana" 2>/dev/null || true
+sudo chown -R 2133:2015 "$root/private/argus/etc" 2>/dev/null || true
+
+echo "[INFO] Supervisor-based containers will manage their own scripts and configurations"
+
+# 检查fluent-bit相关文件是否存在
+if [[ ! -f "$root/../fluent-bit/fluent-bit-bundle.tar.gz" ]]; then
+    echo "[WARN] fluent-bit/fluent-bit-bundle.tar.gz 不存在，请确保已创建该文件"
+fi
+
+if [[ ! -f "$root/../fluent-bit/start-fluent-bit.sh" ]]; then
+    echo "[WARN] fluent-bit/start-fluent-bit.sh 不存在，请确保已创建该启动脚本"
+fi
+
+echo "[OK] 初始化完成: private/argus/log/{elasticsearch,kibana}"
+echo "[INFO] Fluent-bit files should be in fluent-bit/ directory"
--- a/src/log/tests/scripts/02_up.sh
+++ b/src/log/tests/scripts/02_up.sh
@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "$(dirname "$0")/.."
+compose_cmd="docker compose"
+if ! $compose_cmd version >/dev/null 2>&1; then
+  if command -v docker-compose >/dev/null 2>&1; then compose_cmd="docker-compose"; else
+    echo "需要 Docker Compose，请安装后重试" >&2; exit 1; fi
+fi
+$compose_cmd -p logging-mvp up -d --remove-orphans
+echo "[OK] 服务已启动：ES http://localhost:9200  Kibana http://localhost:5601  Fluent-Bit host01 http://localhost:2020  Fluent-Bit host02 http://localhost:2021"
--- a/src/log/tests/scripts/03_send_test_host01.sh
+++ b/src/log/tests/scripts/03_send_test_host01.sh
@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# 获取fluent-bit-host01容器名称
+container_name="logging-mvp-fluent-bit-host01-1"
+
+# 检查容器是否存在并运行
+if ! docker ps | grep -q "$container_name"; then
+    echo "[ERROR] Fluent Bit容器 $container_name 未运行"
+    exit 1
+fi
+
+# 创建日志目录
+docker exec "$container_name" mkdir -p /logs/train /logs/infer
+
+# 写入训练日志 (host01)
+docker exec "$container_name" sh -c "printf '%s INFO [host01] training step=1 loss=1.23 model=bert\n' \"\$(date '+%F %T')\" >> /logs/train/train-demo.log"
+docker exec "$container_name" sh -c "printf '%s INFO [host01] training step=2 loss=1.15 model=bert\n' \"\$(date '+%F %T')\" >> /logs/train/train-demo.log"
+
+# 写入推理日志 (host01)
+docker exec "$container_name" sh -c "printf '%s ERROR [host01] inference failed on batch=1\n' \"\$(date '+%F %T')\" >> /logs/infer/infer-demo.log"
+docker exec "$container_name" sh -c "cat <<'STACK' >> /logs/infer/infer-demo.log
+Traceback (most recent call last):
+  File \"inference.py\", line 15, in <module>
+    raise RuntimeError(\"CUDA out of memory on host01\")
+RuntimeError: CUDA out of memory on host01
+STACK"
+
+echo "[OK] 已通过docker exec写入测试日志到 host01 容器内："
+echo " - /logs/train/train-demo.log"
+echo " - /logs/infer/infer-demo.log"
--- a/src/log/tests/scripts/03_send_test_host02.sh
+++ b/src/log/tests/scripts/03_send_test_host02.sh
@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# 获取fluent-bit-host02容器名称
+container_name="logging-mvp-fluent-bit-host02-1"
+
+# 检查容器是否存在并运行
+if ! docker ps | grep -q "$container_name"; then
+    echo "[ERROR] Fluent Bit容器 $container_name 未运行"
+    exit 1
+fi
+
+# 创建日志目录
+docker exec "$container_name" mkdir -p /logs/train /logs/infer
+
+# 写入训练日志 (host02)
+docker exec "$container_name" sh -c "printf '%s INFO [host02] training step=1 loss=1.45 model=gpt\n' \"\$(date '+%F %T')\" >> /logs/train/train-demo.log"
+docker exec "$container_name" sh -c "printf '%s INFO [host02] training step=2 loss=1.38 model=gpt\n' \"\$(date '+%F %T')\" >> /logs/train/train-demo.log"
+docker exec "$container_name" sh -c "printf '%s INFO [host02] training step=3 loss=1.32 model=gpt\n' \"\$(date '+%F %T')\" >> /logs/train/train-demo.log"
+
+# 写入推理日志 (host02)  
+docker exec "$container_name" sh -c "printf '%s WARN [host02] inference slow on batch=5 latency=2.3s\n' \"\$(date '+%F %T')\" >> /logs/infer/infer-demo.log"
+docker exec "$container_name" sh -c "printf '%s INFO [host02] inference completed batch=6 latency=0.8s\n' \"\$(date '+%F %T')\" >> /logs/infer/infer-demo.log"
+
+echo "[OK] 已通过docker exec写入测试日志到 host02 容器内："
+echo " - /logs/train/train-demo.log"
+echo " - /logs/infer/infer-demo.log"
--- a/src/log/tests/scripts/04_query_es.sh
+++ b/src/log/tests/scripts/04_query_es.sh
@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+set -euo pipefail
+ES="${ES:-http://localhost:9200}"
+echo "[i] 查询 ES 端点：$ES"
+curl -fsS "$ES/_cat/indices?v" | egrep 'train-|infer-|logstash' || true
+printf "train-* 计数："; curl -fsS "$ES/train-*/_count" | sed -E 's/.*"count":([0-9]+).*/\1/'; echo
+printf "infer-* 计数："; curl -fsS "$ES/infer-*/_count" | sed -E 's/.*"count":([0-9]+).*/\1/'; echo
--- a/src/log/tests/scripts/05_down.sh
+++ b/src/log/tests/scripts/05_down.sh
@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "$(dirname "$0")/.."
+compose_cmd="docker compose"
+if ! $compose_cmd version >/dev/null 2>&1; then
+  if command -v docker-compose >/dev/null 2>&1; then compose_cmd="docker-compose"; else
+    echo "需要 Docker Compose，请安装后重试" >&2; exit 1; fi
+fi
+$compose_cmd -p logging-mvp down
+echo "[OK] 已停止所有容器"
+
+# 清理private目录内容
+echo "[INFO] 清理private目录内容..."
+cd "$(dirname "$0")/.."
+if [ -d "private" ]; then
+    # 删除private目录及其所有内容
+    rm -rf private
+    echo "[OK] 已清理private目录"
+else
+    echo "[INFO] private目录不存在，无需清理"
+fi
--- a/src/log/tests/scripts/06_dns_test.sh
+++ b/src/log/tests/scripts/06_dns_test.sh
@ -0,0 +1,208 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+echo "======================================="
+echo "ARGUS DNS监控功能测试"
+echo "======================================="
+echo ""
+
+# 记录测试开始时间
+test_start_time=$(date +%s)
+
+# 函数：显示测试步骤
+show_step() {
+    echo ""
+    echo "🔄 Step $1: $2"
+    echo "----------------------------------------"
+}
+
+# 函数：验证步骤结果
+verify_step() {
+    if [ $? -eq 0 ]; then
+        echo "✅ $1 - SUCCESS"
+    else
+        echo "❌ $1 - FAILED"
+        exit 1
+    fi
+}
+
+# 函数：等待服务就绪
+wait_for_services() {
+    echo "[INFO] Waiting for services to be ready..."
+    local max_attempts=60
+    local attempt=1
+
+    while [ $attempt -le $max_attempts ]; do
+        if curl -fs http://localhost:9200/_cluster/health >/dev/null 2>&1 && \
+           curl -fs http://localhost:5601/api/status >/dev/null 2>&1; then
+            echo "[OK] Services are ready!"
+            return 0
+        fi
+        echo "    Waiting for services... ($attempt/$max_attempts)"
+        sleep 5
+        ((attempt++))
+    done
+
+    echo "[ERROR] Services not ready after $max_attempts attempts"
+    return 1
+}
+
+# 函数：检查容器中的/etc/resolv.conf
+check_resolv_conf() {
+    local service_name=$1
+    local expected_dns=$2
+
+    echo "[INFO] 检查 $service_name 容器的 /etc/resolv.conf..."
+
+    local resolv_content=$(docker exec "${service_name}" cat /etc/resolv.conf 2>/dev/null || echo "")
+    if echo "$resolv_content" | grep -q "nameserver $expected_dns"; then
+        echo "✅ $service_name resolv.conf contains nameserver $expected_dns"
+        return 0
+    else
+        echo "❌ $service_name resolv.conf does not contain nameserver $expected_dns"
+        echo "实际内容:"
+        echo "$resolv_content"
+        return 1
+    fi
+}
+
+# 函数：检查DNS监控日志
+check_dns_monitor_logs() {
+    local service_name=$1
+
+    echo "[INFO] 检查 $service_name 的DNS监控日志..."
+
+    local dns_logs=$(docker exec "$service_name" tail -n 20 /var/log/supervisor/dns-monitor.log 2>/dev/null || echo "")
+    if [ -n "$dns_logs" ]; then
+        echo "✅ $service_name DNS监控日志存在"
+        echo "最近的日志:"
+        echo "$dns_logs"
+        return 0
+    else
+        echo "❌ $service_name DNS监控日志为空或不存在"
+        return 1
+    fi
+}
+
+# 函数：确保目录结构存在
+ensure_directories() {
+    echo "[INFO] 确保目录结构存在..."
+    # 确保目录存在
+    mkdir -p ./private/argus/etc/
+    echo "✅ 目录结构准备完成（注：使用真实的update-dns.sh脚本）"
+}
+
+# 开始DNS监控测试
+show_step "1" "Bootstrap - Initialize environment"
+./scripts/01_bootstrap.sh
+verify_step "Bootstrap"
+
+# 确保目录结构
+ensure_directories
+
+show_step "2" "Startup - Start all services"
+./scripts/02_up.sh
+verify_step "Service startup"
+
+# 等待服务完全就绪
+wait_for_services || exit 1
+
+show_step "3" "Create initial DNS configuration"
+# 创建初始的DNS配置文件 - 只有一个IP
+echo "[INFO] 创建初始的dns.conf文件 (8.8.8.8)..."
+cat > ./private/argus/etc/dns.conf << 'EOF'
+8.8.8.8
+EOF
+
+echo "✅ 初始dns.conf文件创建成功 (8.8.8.8)"
+verify_step "Initial DNS configuration creation"
+
+# 等待DNS监控检测到配置文件
+echo "[INFO] 等待DNS监控检测并处理初始配置..."
+sleep 15
+
+show_step "4" "Verify initial DNS configuration processing"
+# 检查两个容器的DNS监控日志
+check_dns_monitor_logs "logging-mvp-es-1"
+verify_step "Elasticsearch DNS monitor logs"
+
+check_dns_monitor_logs "logging-mvp-kibana-1"
+verify_step "Kibana DNS monitor logs"
+
+# 检查resolv.conf是否包含新的DNS服务器
+check_resolv_conf "logging-mvp-es-1" "8.8.8.8"
+verify_step "Elasticsearch resolv.conf initial check"
+
+check_resolv_conf "logging-mvp-kibana-1" "8.8.8.8"
+verify_step "Kibana resolv.conf initial check"
+
+show_step "5" "Modify DNS configuration and test auto-update"
+# 修改DNS配置文件 - 改为另一个IP
+echo "[INFO] 修改dns.conf文件，改为1.1.1.1..."
+cat > ./private/argus/etc/dns.conf << 'EOF'
+1.1.1.1
+EOF
+
+echo "✅ dns.conf文件更新成功，改为1.1.1.1"
+
+# 等待DNS监控检测到配置变化
+echo "[INFO] 等待DNS监控检测配置变化并执行更新..."
+sleep 15
+
+show_step "6" "Verify DNS configuration auto-update"
+# 再次检查DNS监控日志，应该看到配置变化检测
+echo "[INFO] 检查DNS监控是否检测到配置变化..."
+
+# 检查elasticsearch容器
+echo "[INFO] 检查elasticsearch容器的DNS监控日志（最近30行）..."
+docker exec logging-mvp-es-1 tail -n 30 /var/log/supervisor/dns-monitor.log || true
+
+# 检查kibana容器
+echo "[INFO] 检查kibana容器的DNS监控日志（最近30行）..."
+docker exec logging-mvp-kibana-1 tail -n 30 /var/log/supervisor/dns-monitor.log || true
+
+# 验证新的DNS服务器是否被添加到resolv.conf
+check_resolv_conf "logging-mvp-es-1" "1.1.1.1"
+verify_step "Elasticsearch resolv.conf after update"
+
+check_resolv_conf "logging-mvp-kibana-1" "1.1.1.1"
+verify_step "Kibana resolv.conf after update"
+
+show_step "7" "Final verification - Check DNS configuration"
+# 最终验证DNS配置
+echo "[INFO] 最终验证elasticsearch容器的resolv.conf..."
+docker exec logging-mvp-es-1 cat /etc/resolv.conf
+
+echo "[INFO] 最终验证kibana容器的resolv.conf..."
+docker exec logging-mvp-kibana-1 cat /etc/resolv.conf
+
+echo "[INFO] 最终dns.conf内容:"
+cat ./private/argus/etc/dns.conf
+
+verify_step "Final DNS configuration verification"
+
+show_step "8" "Cleanup - Stop all services"
+./scripts/05_down.sh
+verify_step "Service cleanup"
+
+# 清理测试文件
+rm -f ./private/argus/etc/dns.conf
+# 注：不删除update-dns.sh，因为这是真实的脚本
+
+# 计算总测试时间
+test_end_time=$(date +%s)
+total_time=$((test_end_time - test_start_time))
+
+echo ""
+echo "======================================="
+echo "🎉 DNS监控功能测试完成!"
+echo "======================================="
+echo "📊 测试总结:"
+echo "   • 总耗时: ${total_time}秒"
+echo "   • 初始DNS配置: 8.8.8.8"
+echo "   • 更新DNS配置: 1.1.1.1"
+echo "   • DNS监控脚本正常工作"
+echo "   • 容器resolv.conf自动覆盖更新成功"
+echo ""
+echo "✅ DNS自动更新功能测试通过!"
+echo ""
--- a/src/log/tests/scripts/e2e_test.sh
+++ b/src/log/tests/scripts/e2e_test.sh
@ -0,0 +1,169 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+echo "======================================="
+echo "ARGUS Log System End-to-End Test"
+echo "======================================="
+echo ""
+
+# 记录测试开始时间
+test_start_time=$(date +%s)
+
+# 函数：获取ES中的日志计数
+get_log_count() {
+    local train_count=$(curl -s "http://localhost:9200/train-*/_count" 2>/dev/null | grep -o '"count":[0-9]*' | cut -d':' -f2 || echo "0")
+    local infer_count=$(curl -s "http://localhost:9200/infer-*/_count" 2>/dev/null | grep -o '"count":[0-9]*' | cut -d':' -f2 || echo "0")
+    echo "$((train_count + infer_count))"
+}
+
+# 函数：等待服务就绪
+wait_for_services() {
+    echo "[INFO] Waiting for all services to be ready..."
+    local max_attempts=60
+    local attempt=1
+
+    while [ $attempt -le $max_attempts ]; do
+        if curl -fs http://localhost:9200/_cluster/health >/dev/null 2>&1 && \
+           curl -fs http://localhost:5601/api/status >/dev/null 2>&1 && \
+           curl -fs http://localhost:2020/api/v2/metrics >/dev/null 2>&1 && \
+           curl -fs http://localhost:2021/api/v2/metrics >/dev/null 2>&1; then
+            echo "[OK] All services are ready!"
+            return 0
+        fi
+        echo "    Waiting for services... ($attempt/$max_attempts)"
+        sleep 5
+        ((attempt++))
+    done
+
+    echo "[ERROR] Services not ready after $max_attempts attempts"
+    return 1
+}
+
+# 函数：显示测试步骤
+show_step() {
+    echo ""
+    echo "🔄 Step $1: $2"
+    echo "----------------------------------------"
+}
+
+# 函数：验证步骤结果
+verify_step() {
+    if [ $? -eq 0 ]; then
+        echo "✅ $1 - SUCCESS"
+    else
+        echo "❌ $1 - FAILED"
+        exit 1
+    fi
+}
+
+# 开始端到端测试
+show_step "1" "Bootstrap - Initialize environment"
+./scripts/01_bootstrap.sh
+verify_step "Bootstrap"
+
+show_step "2" "Startup - Start all services"
+./scripts/02_up.sh
+verify_step "Service startup"
+
+# 等待服务完全就绪
+wait_for_services || exit 1
+
+# 记录发送测试数据前的日志计数
+initial_count=$(get_log_count)
+echo "[INFO] Initial log count: $initial_count"
+
+show_step "3a" "Send test data - Host01"
+./scripts/03_send_test_host01.sh
+verify_step "Test data sending (host01)"
+
+show_step "3b" "Send test data - Host02"
+./scripts/03_send_test_host02.sh
+verify_step "Test data sending (host02)"
+
+# 等待数据被处理
+echo "[INFO] Waiting for data to be processed..."
+sleep 10
+
+show_step "4" "Verify data - Query Elasticsearch"
+./scripts/04_query_es.sh
+verify_step "Data verification"
+
+# 记录发送测试数据后的日志计数
+final_count=$(get_log_count)
+echo "[INFO] Final log count: $final_count"
+
+# 验证日志数量是否增加
+if [ "$final_count" -gt "$initial_count" ]; then
+    added_logs=$((final_count - initial_count))
+    echo "✅ Log count verification - SUCCESS: Added $added_logs logs (from $initial_count to $final_count)"
+else
+    echo "❌ Log count verification - FAILED: Expected count to increase, but got $initial_count -> $final_count"
+    exit 1
+fi
+
+# 验证预期的最小日志数量（每个主机应该发送一些日志）
+expected_min_logs=4  # 至少应该有几条日志
+if [ "$final_count" -ge "$expected_min_logs" ]; then
+    echo "✅ Minimum log threshold - SUCCESS: $final_count logs (>= $expected_min_logs expected)"
+else
+    echo "❌ Minimum log threshold - FAILED: Only $final_count logs (>= $expected_min_logs expected)"
+    exit 1
+fi
+
+# 检查服务健康状态
+show_step "Health" "Check service health"
+echo "[INFO] Checking service health..."
+
+# 检查 Elasticsearch 健康状态
+es_health=$(curl -s "http://localhost:9200/_cluster/health" | grep -o '"status":"[^"]*"' | cut -d'"' -f4)
+if [ "$es_health" = "green" ] || [ "$es_health" = "yellow" ]; then
+    echo "✅ Elasticsearch health: $es_health"
+else
+    echo "❌ Elasticsearch health: $es_health"
+fi
+
+# 检查 Kibana 状态
+if curl -fs "http://localhost:5601/api/status" >/dev/null 2>&1; then
+    kb_status="available"
+    echo "✅ Kibana status: $kb_status"
+else
+    kb_status="unavailable"
+    echo "⚠️  Kibana status: $kb_status"
+fi
+
+# 检查 Fluent-Bit 指标
+fb_host01_uptime=$(curl -s "http://localhost:2020/api/v2/metrics" | grep "fluentbit_uptime" | head -1 | grep -o "[0-9]\+$" || echo "0")
+fb_host02_uptime=$(curl -s "http://localhost:2021/api/v2/metrics" | grep "fluentbit_uptime" | head -1 | grep -o "[0-9]\+$" || echo "0")
+
+if [ "$fb_host01_uptime" -gt 0 ] && [ "$fb_host02_uptime" -gt 0 ]; then
+    echo "✅ Fluent-Bit services: host01 uptime=${fb_host01_uptime}s, host02 uptime=${fb_host02_uptime}s"
+else
+    echo "⚠️  Fluent-Bit services: host01 uptime=${fb_host01_uptime}s, host02 uptime=${fb_host02_uptime}s"
+fi
+
+verify_step "Service health check"
+
+show_step "5" "Cleanup - Stop all services"
+./scripts/05_down.sh
+verify_step "Service cleanup"
+
+# 计算总测试时间
+test_end_time=$(date +%s)
+total_time=$((test_end_time - test_start_time))
+
+echo ""
+echo "======================================="
+echo "🎉 END-TO-END TEST COMPLETED SUCCESSFULLY!"
+echo "======================================="
+echo "📊 Test Summary:"
+echo "   • Initial logs: $initial_count"
+echo "   • Final logs: $final_count"
+echo "   • Added logs: $added_logs"
+echo "   • Total time: ${total_time}s"
+echo "   • ES health: $es_health"
+echo "   • Kibana status: $kb_status"
+echo "   • DNS resolv: ✅ Passed (ES domain verified)"
+echo "   • All services started and stopped successfully"
+echo ""
+echo "✅ The ARGUS log system is working correctly!"
+echo ""
--- a/src/metric/.gitignore
+++ b/src/metric/.gitignore
@ -0,0 +1,6 @@
+/prometheus/data/
+/client-plugins/dcgm-exporter-installer/
+/client-plugins/demo-all-in-one/artifact/
+/client-plugins/demo-all-in-one/publish/
+/client-plugins/demo-all-in-one/checklist
+/client-plugins/demo-all-in-one/VERSION
--- a/src/metric/client-plugins/demo-all-in-one/.VERSION.example
+++ b/src/metric/client-plugins/demo-all-in-one/.VERSION.example
@ -0,0 +1 @@
+1.29.0
--- a/src/metric/client-plugins/demo-all-in-one/.checklist.example
+++ b/src/metric/client-plugins/demo-all-in-one/.checklist.example
@ -0,0 +1,3 @@
+# 组件名称 目录路径 版本号 [依赖组件] [安装顺序]
+dcgm-exporter-installer /Users/sundapeng/Project/nlp/aiops/client-plugins/dcgm-exporter-installer 1.1.0
+node-exporter-installer /Users/sundapeng/Project/nlp/aiops/client-plugins/node-exporter-installer 1.1.0
--- a/src/metric/client-plugins/demo-all-in-one/README.md
+++ b/src/metric/client-plugins/demo-all-in-one/README.md
@ -0,0 +1,61 @@
+# 客户侧组件安装包构建、发布流程
+
+## 第一步：配置版本和组件
+
+首先搞定配置文件：
+
+1. 把 `.checklist.example` 重命名成 `checklist`
+2. 把 `.VERSION.example` 重命名成 `VERSION`
+
+### checklist 文件格式
+```
+# 组件名称 目录路径 版本号 [依赖组件] [安装顺序]
+dcgm-exporter-installer /path/to/dcgm-exporter-installer 1.1.0
+node-exporter-installer /path/to/node-exporter-installer 1.1.0
+```
+
+### VERSION 文件
+设置需要发布的版本号，比如 `1.29.0`
+
+> 建议用 `version-manager.sh` 来管理版本
+
+## 第二步：构建安装包
+
+直接跑脚本：
+```bash
+./package_artifact.sh
+```
+
+构建完的东西会放在 `artifact/` 目录下，按版本分文件夹。
+
+如果版本已经存在了，想要覆盖重新构建：
+```bash
+./package_artifact.sh --force
+```
+
+构建完可以手工测试安装包。
+
+## 第三步：发布安装包
+
+用这个脚本发布：
+```bash
+./publish_artifact.sh
+```
+
+发布后的内容在 `publish/` 目录里，包含：
+- 压缩版本的安装包
+- 一键安装的bash脚本
+
+## 第四步：部署到FTP服务器（详见 FTP 搭建）
+
+把发布的内容上传到FTP服务器，客户端就可以通过一键命令安装：
+
+```bash
+curl -u user:passwd ftp://server_ip/setup.sh -o setup.sh
+
+chmod +x setup.sh
+
+sudo ./setup.sh --server server_ip --user user --password passwd
+```
+
+这样客户就能直接从FTP服务器下载并安装组件了。
--- a/src/metric/client-plugins/demo-all-in-one/check_health.sh
+++ b/src/metric/client-plugins/demo-all-in-one/check_health.sh
@ -0,0 +1,204 @@
+#!/bin/bash
+
+# 整体健康检查脚本，调用各个组件的健康检查并将结果写入 .health_log 文件
+
+set -e
+
+# 获取脚本所在目录
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+HEALTH_LOG_FILE="$SCRIPT_DIR/.health_log"
+INSTALL_RECORD_FILE="$SCRIPT_DIR/.install_record"
+
+# 颜色定义
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# 日志函数 - 输出到 stderr 避免影响 JSON 结果
+log_info() {
+    echo -e "${BLUE}[INFO]${NC} $1" >&2
+}
+
+log_success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1" >&2
+}
+
+log_warning() {
+    echo -e "${YELLOW}[WARNING]${NC} $1" >&2
+}
+
+log_error() {
+    echo -e "${RED}[ERROR]${NC} $1" >&2
+}
+
+# 检查单个组件健康状态
+check_component() {
+    local component_name="$1"
+    local check_script_path="$2"
+    
+    log_info "检查 $component_name 健康状态..."
+    
+    if [[ ! -f "$check_script_path" ]]; then
+        log_error "健康检查脚本不存在: $check_script_path"
+        echo "{\"name\": \"$component_name\", \"status\": \"unhealth\", \"reason\": \"健康检查脚本不存在: $check_script_path\"}"
+        return 1
+    fi
+    
+    if [[ ! -x "$check_script_path" ]]; then
+        log_error "健康检查脚本无执行权限: $check_script_path"
+        echo "{\"name\": \"$component_name\", \"status\": \"unhealth\", \"reason\": \"健康检查脚本无执行权限: $check_script_path\"}"
+        return 1
+    fi
+    
+    # 执行健康检查脚本，只捕获 stdout，stderr 输出到终端
+    local result
+    if result=$("$check_script_path" 2>/dev/null); then
+        log_success "$component_name 健康检查通过"
+        echo "$result"
+        return 0
+    else
+        log_warning "$component_name 健康检查失败"
+        echo "$result"
+        return 1
+    fi
+}
+
+# 生成时间戳
+get_timestamp() {
+    date '+%Y-%m-%d %H:%M:%S'
+}
+
+# 从安装记录文件中读取组件安装目录
+read_install_record() {
+    local install_record_file="$1"
+    
+    if [[ ! -f "$install_record_file" ]]; then
+        log_error "安装记录文件不存在: $install_record_file"
+        return 1
+    fi
+    
+    # 检查是否有 jq 命令来解析 JSON
+    if command -v jq &> /dev/null; then
+        # 使用 jq 解析 JSON
+        local components_json
+        if components_json=$(jq -r '.components | to_entries[] | "\(.key):\(.value.install_dir)"' "$install_record_file" 2>/dev/null); then
+            echo "$components_json"
+            return 0
+        else
+            log_error "无法解析安装记录文件 JSON 格式: $install_record_file"
+            return 1
+        fi
+    else
+        # 如果没有 jq，尝试简单的文本解析
+        log_warning "jq 命令不可用，尝试简单文本解析"
+        
+        # 查找所有 install_dir 行
+        local components=()
+        while IFS= read -r line; do
+            if [[ "$line" =~ \"install_dir\":[[:space:]]*\"([^\"]+)\" ]]; then
+                local install_dir="${BASH_REMATCH[1]}"
+                # 从路径中提取组件名称
+                local component_name=$(basename "$install_dir")
+                components+=("$component_name:$install_dir")
+            fi
+        done < "$install_record_file"
+        
+        if [[ ${#components[@]} -gt 0 ]]; then
+            printf '%s\n' "${components[@]}"
+            return 0
+        else
+            log_error "无法从安装记录文件中提取组件信息"
+            return 1
+        fi
+    fi
+}
+
+# 主函数
+main() {
+    echo "==========================================" >&2
+    echo "    整体健康检查脚本" >&2
+    echo "==========================================" >&2
+    echo >&2
+    
+    # 记录健康检查开始时间
+    local start_time=$(get_timestamp)
+    log_info "健康检查开始时间: $start_time"
+    
+    # 从安装记录文件中读取组件信息
+    log_info "从安装记录文件读取组件信息: $INSTALL_RECORD_FILE"
+    local components_info
+    if ! components_info=$(read_install_record "$INSTALL_RECORD_FILE"); then
+        log_error "无法读取安装记录文件，健康检查终止"
+        exit 1
+    fi
+    
+    # 存储所有检查结果
+    local all_results=()
+    local overall_status="health"
+    
+    # 逐个检查组件
+    while IFS= read -r component_info; do
+        if [[ -n "$component_info" ]]; then
+            IFS=':' read -r component_name install_dir <<< "$component_info"
+            local check_script_path="$install_dir/check_health.sh"
+            
+            local result
+            if result=$(check_component "$component_name" "$check_script_path"); then
+                all_results+=("$result")
+            else
+                all_results+=("$result")
+                overall_status="unhealth"
+            fi
+        fi
+    done <<< "$components_info"
+    
+    # 记录健康检查结束时间
+    local end_time=$(get_timestamp)
+    log_info "健康检查结束时间: $end_time"
+    
+    # 构建完整的健康检查结果 JSON
+    local health_check_result=$(cat << EOF
+{
+  "start_time": "$start_time",
+  "end_time": "$end_time",
+  "overall_status": "$overall_status",
+  "components": [
+$(printf '%s,\n' "${all_results[@]}" | sed '$s/,$//')
+  ]
+}
+EOF
+)
+    
+    # 写入健康日志文件
+    log_info "将健康检查结果写入日志文件: $HEALTH_LOG_FILE"
+    echo "$health_check_result" >> "$HEALTH_LOG_FILE"
+    
+    # 输出 JSON 结果到 stdout
+    echo "$health_check_result"
+    
+    # 显示总结到 stderr
+    echo >&2
+    echo "==========================================" >&2
+    echo "    健康检查总结" >&2
+    echo "==========================================" >&2
+    echo "开始时间: $start_time" >&2
+    echo "结束时间: $end_time" >&2
+    echo "整体状态: $overall_status" >&2
+    echo "日志文件: $HEALTH_LOG_FILE" >&2
+    echo >&2
+    
+    if [[ "$overall_status" == "health" ]]; then
+        log_success "所有组件健康检查通过！"
+        exit 0
+    else
+        log_error "部分组件健康检查失败，请查看上述详细信息"
+        exit 1
+    fi
+}
+
+# 脚本入口
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
+    main "$@"
+fi
--- a/src/metric/client-plugins/demo-all-in-one/deps/cron-offline.tar.gz
+++ b/src/metric/client-plugins/demo-all-in-one/deps/cron-offline.tar.gz
--- a/src/metric/client-plugins/demo-all-in-one/install_artifact.sh
+++ b/src/metric/client-plugins/demo-all-in-one/install_artifact.sh
@ -0,0 +1,683 @@
+#!/bin/bash
+
+set -e
+
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m'
+
+log_info() {
+    echo -e "${BLUE}[INFO]${NC} $1"
+}
+
+log_success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+log_warning() {
+    echo -e "${YELLOW}[WARNING]${NC} $1"
+}
+
+log_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+# 配置变量
+INSTALL_DIR="${1:-$(pwd)}"  # 使用第一个参数作为安装目录，如果没有参数则使用当前目录
+TEMP_DIR="/tmp/metrics-install-$$"
+VERSION_FILE="version.json"
+
+check_root() {
+    if [[ $EUID -ne 0 ]]; then
+        log_error "此脚本需要 root 权限运行"
+        log_info "请使用: sudo $0 [安装目录]"
+        log_info "如果不指定安装目录，将使用当前目录: $(pwd)"
+        exit 1
+    fi
+}
+
+# 检查系统要求
+check_system() {
+    log_info "检查系统要求..."
+    
+    # 检查操作系统
+    if [[ ! -f /etc/os-release ]]; then
+        log_error "无法检测操作系统版本"
+        exit 1
+    fi
+    
+    source /etc/os-release
+    log_info "检测到操作系统: $NAME $VERSION"
+    
+    # 检查系统架构
+    arch=$(uname -m)
+    log_info "系统架构: $arch"
+    
+    # 检查磁盘空间
+    available_space=$(df / | awk 'NR==2 {print $4}')
+    if [[ $available_space -lt 10485760 ]]; then  # 10GB in KB
+        log_warning "可用磁盘空间不足 10GB，当前可用: $(($available_space / 1024 / 1024))GB"
+    fi
+    
+    # 检查内存
+    total_mem=$(free -m | awk 'NR==2{print $2}')
+    if [[ $total_mem -lt 4096 ]]; then  # 4GB
+        log_warning "系统内存不足 4GB，当前: ${total_mem}MB"
+    fi
+}
+
+# 查找版本文件
+find_version_file() {
+    log_info "查找版本信息文件..."
+    
+    # 在当前目录查找
+    if [[ -f "$VERSION_FILE" ]]; then
+        VERSION_FILE_PATH="$VERSION_FILE"
+        log_success "找到版本文件: $VERSION_FILE"
+        return 0
+    fi
+    
+    # 在 artifact 目录查找
+    for version_dir in artifact/*/; do
+        if [[ -f "${version_dir}${VERSION_FILE}" ]]; then
+            VERSION_FILE_PATH="${version_dir}${VERSION_FILE}"
+            log_success "找到版本文件: $VERSION_FILE_PATH"
+            return 0
+        fi
+    done
+    
+    log_error "未找到版本信息文件 $VERSION_FILE"
+    exit 1
+}
+
+# 解析版本信息
+parse_version_info() {
+    log_info "解析版本信息..."
+    
+    if [[ ! -f "$VERSION_FILE_PATH" ]]; then
+        log_error "版本文件不存在: $VERSION_FILE_PATH"
+        exit 1
+    fi
+    
+    # 使用 jq 解析 JSON（如果可用）
+    if command -v jq &> /dev/null; then
+        # 验证JSON文件格式
+        if ! jq empty "$VERSION_FILE_PATH" 2>/dev/null; then
+            log_error "JSON文件格式错误，请检查 $VERSION_FILE_PATH"
+            exit 1
+        fi
+        
+        VERSION=$(jq -r '.version' "$VERSION_FILE_PATH")
+        BUILD_TIME=$(jq -r '.build_time' "$VERSION_FILE_PATH")
+        
+        # 解析 artifact_list
+        if jq -e '.artifact_list' "$VERSION_FILE_PATH" > /dev/null 2>&1; then
+            jq -r '.artifact_list | to_entries[] | "\(.key):\(.value)"' "$VERSION_FILE_PATH" > "$TEMP_DIR/components.txt"
+        else
+            log_error "version.json 中缺少 artifact_list 字段"
+            exit 1
+        fi
+        
+        # 解析 checksums
+        if jq -e '.checksums' "$VERSION_FILE_PATH" > /dev/null 2>&1; then
+            jq -r '.checksums | to_entries[] | "\(.key):\(.value)"' "$VERSION_FILE_PATH" > "$TEMP_DIR/checksums.txt"
+        else
+            log_error "version.json 中缺少 checksums 字段"
+            exit 1
+        fi
+        
+        # 解析 install_order（现在包含完整的文件名）
+        if jq -e '.install_order' "$VERSION_FILE_PATH" > /dev/null 2>&1; then
+            jq -r '.install_order[]' "$VERSION_FILE_PATH" > "$TEMP_DIR/install_order.txt"
+        else
+            log_error "version.json 中缺少 install_order 字段"
+            exit 1
+        fi
+        
+    else
+        log_warning "jq 未安装，使用简单的 JSON 解析"
+        # 简单的 JSON 解析
+        VERSION=$(grep '"version"' "$VERSION_FILE_PATH" | sed 's/.*"version": *"\([^"]*\)".*/\1/')
+        BUILD_TIME=$(grep '"build_time"' "$VERSION_FILE_PATH" | sed 's/.*"build_time": *"\([^"]*\)".*/\1/')
+        
+        # 解析 artifact_list
+        grep -A 100 '"artifact_list"' "$VERSION_FILE_PATH" | grep -E '^\s*"[^"]+":\s*"[^"]+"' | while read line; do
+            component=$(echo "$line" | sed 's/.*"\([^"]*\)":\s*"[^"]*".*/\1/')
+            version=$(echo "$line" | sed 's/.*"[^"]*":\s*"\([^"]*\)".*/\1/')
+            echo "$component:$version" >> "$TEMP_DIR/components.txt"
+        done
+        
+        # 解析 checksums
+        grep -A 100 '"checksums"' "$VERSION_FILE_PATH" | grep -E '^\s*"[^"]+":\s*"[^"]+"' | while read line; do
+            component=$(echo "$line" | sed 's/.*"\([^"]*\)":\s*"[^"]*".*/\1/')
+            checksum=$(echo "$line" | sed 's/.*"[^"]*":\s*"\([^"]*\)".*/\1/')
+            echo "$component:$checksum" >> "$TEMP_DIR/checksums.txt"
+        done
+        
+        # 解析 install_order
+        grep -A 100 '"install_order"' "$VERSION_FILE_PATH" | grep -E '^\s*"[^"]+"' | while read line; do
+            component=$(echo "$line" | sed 's/.*"\([^"]*\)".*/\1/')
+            echo "$component" >> "$TEMP_DIR/install_order.txt"
+        done
+        
+        # 验证解析结果
+        if [[ ! -f "$TEMP_DIR/components.txt" || ! -s "$TEMP_DIR/components.txt" ]]; then
+            log_error "无法解析 artifact_list，请检查 version.json 格式"
+            exit 1
+        fi
+        
+        if [[ ! -f "$TEMP_DIR/checksums.txt" || ! -s "$TEMP_DIR/checksums.txt" ]]; then
+            log_error "无法解析 checksums，请检查 version.json 格式"
+            exit 1
+        fi
+        
+        if [[ ! -f "$TEMP_DIR/install_order.txt" || ! -s "$TEMP_DIR/install_order.txt" ]]; then
+            log_error "无法解析 install_order，请检查 version.json 格式"
+            exit 1
+        fi
+    fi
+    
+    log_success "版本信息解析完成"
+    log_info "  版本: $VERSION"
+    log_info "  构建时间: $BUILD_TIME"
+    
+    component_count=0
+    if [[ -f "$TEMP_DIR/components.txt" ]]; then
+        component_count=$(wc -l < "$TEMP_DIR/components.txt")
+        log_info "  组件数量: $component_count"
+        log_info "  组件列表:"
+        while IFS= read -r line; do
+            component=$(echo "$line" | cut -d':' -f1)
+            version=$(echo "$line" | cut -d':' -f2)
+            log_info "    - $component v$version"
+        done < "$TEMP_DIR/components.txt"
+    else
+        log_error "components.txt 文件不存在"
+        exit 1
+    fi
+}
+
+# 验证文件完整性
+verify_checksums() {
+    log_info "验证文件完整性..."
+    
+    artifact_dir=$(dirname "$VERSION_FILE_PATH")
+    failed_verification=0
+    
+    if [[ -f "$TEMP_DIR/checksums.txt" ]]; then
+        while IFS= read -r line; do
+            component=$(echo "$line" | cut -d':' -f1)
+            expected_checksum=$(echo "$line" | cut -d':' -f2-)
+            
+            # 查找匹配的 tar 文件
+            actual_file=""
+            for file in "$artifact_dir/${component}-"*.tar.gz; do
+                if [[ -f "$file" ]]; then
+                    actual_file="$file"
+                    break
+                fi
+            done
+            
+            if [[ -z "$actual_file" ]]; then
+                log_error "找不到组件文件: $component"
+                failed_verification=1
+                continue
+            fi
+            
+            # 计算实际校验和
+            actual_checksum="sha256:$(sha256sum "$actual_file" | cut -d' ' -f1)"
+            
+            if [[ "$actual_checksum" == "$expected_checksum" ]]; then
+                log_success "  $component: 校验通过"
+            else
+                log_error "  $component: 校验失败"
+                log_error "    期望: $expected_checksum"
+                log_error "    实际: $actual_checksum"
+                failed_verification=1
+            fi
+        done < "$TEMP_DIR/checksums.txt"
+    fi
+    
+    if [[ $failed_verification -eq 1 ]]; then
+        log_error "文件完整性验证失败"
+        exit 1
+    fi
+    
+    log_success "所有文件校验通过"
+}
+
+# 创建安装目录
+create_install_dirs() {
+    log_info "创建安装目录..."
+    
+    mkdir -p "$INSTALL_DIR"
+    mkdir -p "$TEMP_DIR"
+    
+    log_success "安装目录创建完成: $INSTALL_DIR"
+}
+
+# 安装系统依赖包
+install_system_deps() {
+    log_info "检查系统依赖包..."
+    
+    local script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+    local deps_dir="$script_dir/deps"
+    
+    # 检查deps目录是否存在
+    if [[ ! -d "$deps_dir" ]]; then
+        log_info "deps 目录不存在，跳过系统依赖包安装"
+        return 0
+    fi
+    
+    # 检查是否有tar.gz文件
+    local deps_count=$(find "$deps_dir" -name "*.tar.gz" | wc -l)
+    if [[ $deps_count -eq 0 ]]; then
+        log_info "deps 目录中没有 tar.gz 文件，跳过系统依赖包安装"
+        return 0
+    fi
+    
+    log_info "找到 $deps_count 个系统依赖包，开始安装..."
+    
+    # 创建临时目录用于解压依赖包
+    local deps_temp_dir="$TEMP_DIR/deps"
+    mkdir -p "$deps_temp_dir"
+    
+    # 处理每个tar.gz文件
+    find "$deps_dir" -name "*.tar.gz" | while read tar_file; do
+        local tar_basename=$(basename "$tar_file")
+        local extract_name="${tar_basename%.tar.gz}"
+        
+        log_info "处理依赖包: $tar_basename"
+        
+        # 解压到临时目录
+        local extract_dir="$deps_temp_dir/$extract_name"
+        mkdir -p "$extract_dir"
+        
+        if tar -xzf "$tar_file" -C "$extract_dir" 2>/dev/null; then
+            log_success "  $tar_basename 解压完成"
+        else
+            log_error "  $tar_basename 解压失败"
+            continue
+        fi
+        
+        # 进入解压目录，查找deb包
+        cd "$extract_dir"
+        local deb_count=$(find . -name "*.deb" | wc -l)
+        
+        if [[ $deb_count -gt 0 ]]; then
+            log_info "  找到 $deb_count 个 deb 包，开始安装..."
+            
+            # 1. 先尝试安装所有deb包
+            log_info "  第1步：批量安装deb包..."
+            if dpkg -i *.deb 2>/dev/null; then
+                log_success "  所有deb包安装成功"
+            else
+                log_warning "  部分deb包安装失败，可能存在依赖问题"
+                
+                # 2. 使用apt-get修复依赖
+                log_info "  第2步：修复依赖关系..."
+                if apt-get install -f -y; then
+                    log_success "  依赖关系修复完成"
+                else
+                    log_error "  依赖关系修复失败"
+                    # 继续处理其他包，不退出
+                fi
+            fi
+        else
+            log_info "  $tar_basename 中没有找到deb包，跳过"
+        fi
+        
+        # 返回到依赖临时目录
+        cd "$deps_temp_dir"
+    done
+    
+    # 检查并启动 cron 服务
+    start_cron_service
+    
+    log_success "系统依赖包安装完成"
+}
+
+# 启动 cron 服务
+start_cron_service() {
+    log_info "检查并启动 cron 服务..."
+    
+    # 检查 cron 是否已经在运行
+    if pgrep -x "cron" > /dev/null; then
+        log_success "cron 服务已在运行"
+        return 0
+    fi
+    
+    # 检查 /usr/sbin/cron 是否存在
+    if [[ ! -f "/usr/sbin/cron" ]]; then
+        log_warning "cron 可执行文件不存在，跳过启动"
+        return 1
+    fi
+    
+    # 启动 cron 服务
+    log_info "启动 cron 服务..."
+    if /usr/sbin/cron start 2>/dev/null || /usr/sbin/cron 2>/dev/null; then
+        log_success "cron 服务启动成功"
+        
+        sleep 2
+        
+        if pgrep -x "cron" > /dev/null; then
+            log_success "cron 服务运行正常"
+        else
+            log_warning "cron 服务可能未正常启动"
+        fi
+    else
+        log_error "cron 服务启动失败"
+        return 1
+    fi
+}
+
+# 安装组件
+install_components() {
+    log_info "开始安装组件..."
+    
+    artifact_dir=$(dirname "$VERSION_FILE_PATH")
+    install_count=0
+    total_count=0
+    
+    if [[ -f "$TEMP_DIR/install_order.txt" ]]; then
+        total_count=$(wc -l < "$TEMP_DIR/install_order.txt")
+    fi
+    
+    if [[ -f "$TEMP_DIR/install_order.txt" ]]; then
+        while IFS= read -r filename; do
+            install_count=$((install_count + 1))
+            
+            # 从文件名中提取组件名（去掉时间戳后缀）
+            component=$(echo "$filename" | sed 's/-[0-9]\{8\}-[0-9]\{6\}\.tar\.gz$//')
+            
+            log_info "[$install_count/$total_count] 安装 $component..."
+            log_info "  文件名: $filename"
+            
+            # 直接使用完整的文件名
+            tar_file="$artifact_dir/$filename"
+            
+            if [[ ! -f "$tar_file" ]]; then
+                log_error "找不到组件文件: $filename"
+                log_info "  期望路径: $tar_file"
+                log_info "  当前目录: $(pwd)"
+                log_info "  目录内容:"
+                ls -la "$artifact_dir" | while read line; do
+                    log_info "    $line"
+                done
+                exit 1
+            fi
+            
+            log_info "  找到文件: $tar_file"
+            
+            # 解压到临时目录
+            component_temp_dir="$TEMP_DIR/$component"
+            mkdir -p "$component_temp_dir"
+            
+            if tar -xzf "$tar_file" -C "$component_temp_dir" 2>/dev/null; then
+                log_success "  $component 解压完成"
+            else
+                log_error "  $component 解压失败"
+                exit 1
+            fi
+            
+            # 查找解压后的目录
+            extracted_dir=""
+            for dir in "$component_temp_dir"/*; do
+                if [[ -d "$dir" ]]; then
+                    extracted_dir="$dir"
+                    break
+                fi
+            done
+            
+            if [[ -z "$extracted_dir" ]]; then
+                log_error "  $component 解压后未找到目录"
+                exit 1
+            fi
+            
+            # 执行安装脚本
+            if [[ -f "$extracted_dir/install.sh" ]]; then
+                log_info "  执行 $component 安装脚本..."
+                if (cd "$extracted_dir" && ./install.sh); then
+                    log_success "  $component 安装完成"
+                else
+                    log_error "  $component 安装失败"
+                    exit 1
+                fi
+            else
+                log_error "  $component 缺少 install.sh 文件"
+                exit 1
+            fi
+            
+            # 将解压后的目录移动到安装目录，保留组件目录
+            component_install_dir="$INSTALL_DIR/$component"
+            if [[ -d "$component_install_dir" ]]; then
+                log_info "  组件目录已存在，备份后更新: $component_install_dir"
+                mv "$component_install_dir" "${component_install_dir}.backup.$(date +%Y%m%d_%H%M%S)"
+            fi
+            mv "$extracted_dir" "$component_install_dir"
+            log_success "  组件目录已保存: $component_install_dir"
+            
+            # 清理临时文件
+            rm -rf "$component_temp_dir"
+        done < "$TEMP_DIR/install_order.txt"
+    fi
+    
+    log_success "所有组件安装完成"
+}
+
+# 创建安装记录
+create_install_record() {
+    log_info "创建安装记录..."
+    
+    # 等待一段时间确保所有进程都已启动
+    log_info "等待进程启动..."
+    sleep 3
+    
+    local install_time=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
+    local install_record_file=".install_record"
+    
+    # 创建 JSON 格式的安装记录
+    cat > "$install_record_file" << EOF
+{
+  "version": "$VERSION",
+  "build_time": "$BUILD_TIME",
+  "install_time": "$install_time",
+  "install_dir": "$INSTALL_DIR",
+  "install_pid": $$,
+  "components": {
+EOF
+
+    # 添加组件信息
+    local first_component=true
+    if [[ -f "$TEMP_DIR/components.txt" ]]; then
+        while IFS= read -r line; do
+            component=$(echo "$line" | cut -d':' -f1)
+            version=$(echo "$line" | cut -d':' -f2)
+            
+            # 获取组件的进程信息
+            local component_pid=""
+            
+            # 根据组件名查找进程，使用多种方法确保能找到PID
+            case "$component" in
+                "node-exporter-installer")
+                    # 尝试多种方式查找node_exporter进程
+                    component_pid=$(pgrep -f "node_exporter" | head -1)
+                    if [[ -z "$component_pid" ]]; then
+                        component_pid=$(pgrep -f "node-exporter" | head -1)
+                    fi
+                    if [[ -z "$component_pid" ]]; then
+                        component_pid=$(ps aux | grep -v grep | grep "node_exporter" | awk '{print $2}' | head -1)
+                    fi
+                    ;;
+                "dcgm-exporter-installer")
+                    # 尝试多种方式查找dcgm-exporter进程
+                    component_pid=$(pgrep -f "dcgm-exporter" | head -1)
+                    if [[ -z "$component_pid" ]]; then
+                        component_pid=$(pgrep -f "dcgm_exporter" | head -1)
+                    fi
+                    if [[ -z "$component_pid" ]]; then
+                        component_pid=$(ps aux | grep -v grep | grep "dcgm-exporter" | awk '{print $2}' | head -1)
+                    fi
+                    ;;
+            esac
+            
+            # 记录找到的PID信息
+            if [[ -n "$component_pid" ]]; then
+                log_info "  找到 $component 进程 PID: $component_pid"
+            else
+                log_warning "  未找到 $component 进程"
+            fi
+            
+            # 添加逗号分隔符
+            if [[ "$first_component" == "true" ]]; then
+                first_component=false
+            else
+                echo "," >> "$install_record_file"
+            fi
+            
+            # 添加组件信息
+            cat >> "$install_record_file" << EOF
+    "$component": {
+      "version": "$version",
+      "pid": "$component_pid",
+      "install_dir": "$INSTALL_DIR/$component"
+    }
+EOF
+        done < "$TEMP_DIR/components.txt"
+    fi
+    
+    # 结束 JSON
+    cat >> "$install_record_file" << EOF
+  }
+}
+EOF
+
+    log_success "安装记录已创建: $install_record_file"
+}
+
+# 设置健康检查定时任务
+setup_health_check_cron() {
+    log_info "设置健康检查定时任务..."
+    
+    # 直接使用当前安装目录，不依赖current软链接
+    # INSTALL_DIR 是 /opt/argus-metric/versions/1.34.0
+    local check_health_script="$INSTALL_DIR/check_health.sh"
+    
+    # 检查健康检查脚本是否存在
+    if [[ ! -f "$check_health_script" ]]; then
+        log_error "健康检查脚本不存在: $check_health_script"
+        return 1
+    fi
+    
+    # 确保脚本有执行权限
+    chmod +x "$check_health_script"
+    
+    # 创建临时crontab文件
+    local temp_cron="/tmp/crontab_$$"
+    
+    # 获取当前用户的crontab（如果存在）
+    crontab -l 2>/dev/null > "$temp_cron" || touch "$temp_cron"
+    
+    # 检查并删除旧的健康检查任务
+    if grep -q "check_health.sh" "$temp_cron"; then
+        log_info "发现旧的健康检查定时任务，正在更新..."
+        # 删除所有包含check_health.sh的行
+        grep -v "check_health.sh" "$temp_cron" > "$temp_cron.new"
+        mv "$temp_cron.new" "$temp_cron"
+        log_info "旧的健康检查定时任务已删除"
+    fi
+    
+    # 添加新的定时任务（每5分钟执行一次）
+    echo "# Argus-Metrics 健康检查定时任务" >> "$temp_cron"
+    echo "*/5 * * * * $check_health_script >> $INSTALL_DIR/.health_cron.log 2>&1" >> "$temp_cron"
+    
+    # 安装新的crontab
+    if crontab "$temp_cron"; then
+        log_success "健康检查定时任务设置成功"
+        log_info "  执行频率: 每5分钟"
+        log_info "  日志文件: $INSTALL_DIR/.health_cron.log"
+        log_info "  查看定时任务: crontab -l"
+        log_info "  删除定时任务: crontab -e"
+    else
+        log_error "健康检查定时任务设置失败"
+        rm -f "$temp_cron"
+        return 1
+    fi
+    
+    # 清理临时文件
+    rm -f "$temp_cron"
+    
+    # 立即执行一次健康检查
+    log_info "执行首次健康检查..."
+    if "$check_health_script"; then
+        log_success "首次健康检查完成"
+    else
+        log_warning "首次健康检查失败，但定时任务已设置"
+    fi
+}
+
+# 显示安装信息
+show_install_info() {
+    log_success "Argus-Metrics All-in-One 安装完成！"
+    echo
+    echo "安装信息:"
+    echo "  版本: $VERSION"
+    echo "  构建时间: $BUILD_TIME"
+    echo "  安装目录: $INSTALL_DIR"
+    echo
+    echo "已安装组件:"
+    if [[ -f "$TEMP_DIR/components.txt" ]]; then
+        while IFS= read -r line; do
+            component=$(echo "$line" | cut -d':' -f1)
+            version=$(echo "$line" | cut -d':' -f2)
+            echo "  - $component v$version"
+        done < "$TEMP_DIR/components.txt"
+    fi
+    echo
+    echo "访问地址:"
+    echo "  Node Exporter: http://localhost:9100"
+    echo "  DCGM Exporter: http://localhost:9400"
+    echo
+    echo "健康检查:"
+    echo "  安装记录: .install_record"
+    echo "  健康日志: .health_log"
+    echo "  定时任务日志: .health_cron.log"
+    echo "  查看定时任务: crontab -l"
+    echo
+}
+
+cleanup() {
+    if [[ -d "$TEMP_DIR" ]]; then
+        rm -rf "$TEMP_DIR"
+    fi
+}
+
+trap cleanup EXIT
+
+# 主函数
+main() {
+    echo "=========================================="
+    echo "    Argus-Metrics All-in-One 安装脚本 v1.0"
+    echo "=========================================="
+    echo
+    log_info "安装目录: $INSTALL_DIR"
+    echo
+    
+    check_root
+    check_system
+    find_version_file
+    create_install_dirs
+    parse_version_info
+    verify_checksums
+    install_system_deps
+    install_components
+    create_install_record
+    setup_health_check_cron
+    show_install_info
+}
+
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
+    main "$@"
+fi
--- a/src/metric/client-plugins/demo-all-in-one/package_artifact.sh
+++ b/src/metric/client-plugins/demo-all-in-one/package_artifact.sh
@ -0,0 +1,433 @@
+#!/bin/bash
+
+set -e
+
+# 颜色定义
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# 日志函数
+log_info() {
+    echo -e "${BLUE}[INFO]${NC} $1"
+}
+
+log_success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+log_warning() {
+    echo -e "${YELLOW}[WARNING]${NC} $1"
+}
+
+log_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+# 显示帮助信息
+show_help() {
+    echo "AIOps All-in-One 打包脚本"
+    echo
+    echo "用法: $0 [选项]"
+    echo
+    echo "选项:"
+    echo "  --force     强制重新打包，即使版本已存在"
+    echo "  --help     显示此帮助信息"
+    echo
+    echo "示例:"
+    echo "  $0              # 正常打包，跳过已存在的版本"
+    echo "  $0 --force      # 强制重新打包"
+    echo
+}
+
+# 解析命令行参数
+FORCE_PACKAGE=false
+if [[ "$1" == "--force" ]]; then
+    FORCE_PACKAGE=true
+    log_info "强制重新打包模式"
+elif [[ "$1" == "--help" || "$1" == "-h" ]]; then
+    show_help
+    exit 0
+fi
+
+# 获取当前目录和版本
+CURRENT_DIR=$(pwd)
+VERSION=$(cat VERSION 2>/dev/null || echo "1.0.0")
+ARTIFACT_DIR="artifact/$VERSION"
+
+log_info "开始打包 AIOps All-in-One 安装包 v$VERSION"
+
+# 检查必要文件
+log_info "检查必要文件..."
+if [[ ! -f "VERSION" ]]; then
+    log_error "VERSION 文件不存在"
+    exit 1
+fi
+
+if [[ ! -f "checklist" ]]; then
+    log_error "checklist 文件不存在"
+    exit 1
+fi
+
+# 检查是否已存在该版本
+if [[ -d "$ARTIFACT_DIR" && "$FORCE_PACKAGE" == "false" ]]; then
+    log_info "检查版本 $VERSION 是否已存在..."
+    
+    # 检查 version.json 是否存在
+    if [[ -f "$ARTIFACT_DIR/version.json" ]]; then
+        log_info "找到已存在的版本信息文件"
+        
+        # 检查是否所有组件文件都存在
+        missing_files=0
+        existing_components=0
+        
+        # 解析已存在的 version.json 来检查文件
+        if command -v jq &> /dev/null; then
+            # 使用 jq 解析
+            while IFS= read -r component; do
+                existing_components=$((existing_components + 1))
+                # 查找对应的 tar 文件
+                found_file=false
+                for file in "$ARTIFACT_DIR/${component}-"*.tar.gz; do
+                    if [[ -f "$file" ]]; then
+                        found_file=true
+                        break
+                    fi
+                done
+                if [[ "$found_file" == "false" ]]; then
+                    missing_files=$((missing_files + 1))
+                    log_warning "  缺少文件: $component"
+                fi
+            done < <(jq -r '.artifact_list | keys[]' "$ARTIFACT_DIR/version.json" 2>/dev/null)
+        else
+            # 简单的文件检查
+            for file in "$ARTIFACT_DIR"/*.tar.gz; do
+                if [[ -f "$file" ]]; then
+                    existing_components=$((existing_components + 1))
+                fi
+            done
+        fi
+        
+        # 如果所有文件都存在，则跳过打包
+        if [[ $missing_files -eq 0 && $existing_components -gt 0 ]]; then
+            log_success "版本 $VERSION 已完整打包，跳过重复打包"
+            echo
+            echo "现有文件:"
+            ls -la "$ARTIFACT_DIR"
+            echo
+            echo "如需强制重新打包，请删除目录: rm -rf $ARTIFACT_DIR"
+            echo "或使用: ./package.sh --force"
+            exit 0
+        else
+            log_warning "版本 $VERSION 存在但不完整，将重新打包"
+            log_info "  现有组件: $existing_components"
+            log_info "  缺少文件: $missing_files"
+        fi
+    else
+        log_warning "版本目录存在但缺少 version.json，将重新打包"
+    fi
+fi
+
+# 创建 artifact 目录
+mkdir -p "$ARTIFACT_DIR"
+log_info "创建输出目录: $ARTIFACT_DIR"
+
+# 创建临时文件存储数据
+TEMP_DIR=$(mktemp -d)
+COMPONENTS_FILE="$TEMP_DIR/components.txt"
+VERSIONS_FILE="$TEMP_DIR/versions.txt"
+DEPENDENCIES_FILE="$TEMP_DIR/dependencies.txt"
+INSTALL_ORDER_FILE="$TEMP_DIR/install_order.txt"
+CHECKSUMS_FILE="$TEMP_DIR/checksums.txt"
+ARTIFACT_LIST_FILE="$TEMP_DIR/artifact_list.txt"
+
+# 解析 checklist 文件
+log_info "解析组件清单..."
+line_num=0
+component_count=0
+
+while IFS= read -r line; do
+    [[ -z "$line" || "$line" =~ ^[[:space:]]*# ]] && continue
+    
+    line_num=$((line_num + 1))
+    
+    # 解析行: 组件名 目录路径 版本 [依赖组件] [安装顺序]
+    read -r component component_path version dep_component order <<< "$line"
+    
+    if [[ -z "$component" || -z "$component_path" || -z "$version" ]]; then
+        log_warning "跳过无效行 $line_num: $line"
+        continue
+    fi
+    
+    # 存储组件信息
+    echo "$component" >> "$COMPONENTS_FILE"
+    echo "$component:$version" >> "$VERSIONS_FILE"
+    echo "$component:$component_path" >> "$TEMP_DIR/component_paths.txt"
+    
+    if [[ -n "$dep_component" && "$dep_component" != "$component" ]]; then
+        echo "$component:$dep_component" >> "$DEPENDENCIES_FILE"
+    fi
+    
+    if [[ -n "$order" && "$order" =~ ^[0-9]+$ ]]; then
+        echo "$order:$component" >> "$INSTALL_ORDER_FILE"
+    else
+        # 如果没有指定顺序，按解析顺序分配
+        echo "$line_num:$component" >> "$INSTALL_ORDER_FILE"
+    fi
+    
+    component_count=$((component_count + 1))
+    log_info "  - $component v$version"
+done < checklist
+
+if [[ $component_count -eq 0 ]]; then
+    log_error "没有找到有效的组件"
+    rm -rf "$TEMP_DIR"
+    exit 1
+fi
+
+log_success "找到 $component_count 个组件"
+
+# 检查组件目录是否存在
+log_info "检查组件目录..."
+missing_components=()
+
+while IFS= read -r component; do
+    # 获取组件路径
+    component_path=$(grep "^$component:" "$TEMP_DIR/component_paths.txt" | cut -d':' -f2-)
+    if [[ -z "$component_path" ]]; then
+        log_error "未找到组件 $component 的路径配置"
+        log_info "请检查 component_paths.txt 文件或添加路径配置"
+        exit 1
+    fi
+    
+    if [[ ! -d "$component_path" ]]; then
+        missing_components+=("$component:$component_path")
+    fi
+done < "$COMPONENTS_FILE"
+
+if [[ ${#missing_components[@]} -gt 0 ]]; then
+    log_error "以下组件目录不存在:"
+    for component_path in "${missing_components[@]}"; do
+        echo "  - $component_path"
+    done
+    rm -rf "$TEMP_DIR"
+    exit 1
+fi
+
+# 打包各个组件
+log_info "开始打包组件..."
+
+while IFS= read -r component; do
+    # 获取组件版本和路径
+    version=$(grep "^$component:" "$VERSIONS_FILE" | cut -d':' -f2)
+    component_path=$(grep "^$component:" "$TEMP_DIR/component_paths.txt" | cut -d':' -f2-)
+    if [[ -z "$component_path" ]]; then
+        log_error "未找到组件 $component 的路径配置"
+        log_info "请检查 component_paths.txt 文件或添加路径配置"
+        exit 1
+    fi
+    
+    log_info "打包 $component v$version..."
+    log_info "  组件路径: $component_path"
+    
+    # 进入组件目录
+    cd "$component_path"
+    
+    # 检查组件是否有 package.sh
+    if [[ ! -f "package.sh" ]]; then
+        log_error "$component 缺少 package.sh 文件"
+        cd "$CURRENT_DIR"
+        rm -rf "$TEMP_DIR"
+        exit 1
+    fi
+    
+    # 执行组件的打包脚本
+    if ./package.sh; then
+        # 查找生成的 tar 包
+        tar_file=$(find . -name "*.tar.gz" -type f | head -1)
+        if [[ -n "$tar_file" ]]; then
+            # 移动到 artifact 目录
+            mv "$tar_file" "$CURRENT_DIR/$ARTIFACT_DIR/"
+            tar_filename=$(basename "$tar_file")
+            
+            # 计算校验和
+            checksum=$(sha256sum "$CURRENT_DIR/$ARTIFACT_DIR/$tar_filename" | cut -d' ' -f1)
+            echo "$component:sha256:$checksum" >> "$CHECKSUMS_FILE"
+            echo "$component:$version" >> "$ARTIFACT_LIST_FILE"
+            
+            # 将完整的文件名存储到安装顺序文件中
+            echo "$tar_filename" >> "$TEMP_DIR/install_order_files.txt"
+            
+            log_success "  $component 打包完成: $tar_filename"
+        else
+            log_error "$component 打包失败，未找到生成的 tar 包"
+            cd "$CURRENT_DIR"
+            rm -rf "$TEMP_DIR"
+            exit 1
+        fi
+    else
+        log_error "$component 打包失败"
+        cd "$CURRENT_DIR"
+        rm -rf "$TEMP_DIR"
+        exit 1
+    fi
+    
+    # 返回主目录
+    cd "$CURRENT_DIR"
+done < "$COMPONENTS_FILE"
+
+# 生成 version.json
+log_info "生成版本信息文件..."
+version_json="$ARTIFACT_DIR/version.json"
+
+# 构建依赖关系 JSON
+deps_json=""
+if [[ -f "$DEPENDENCIES_FILE" ]]; then
+    first=true
+    while IFS= read -r line; do
+        component=$(echo "$line" | cut -d':' -f1)
+        dep=$(echo "$line" | cut -d':' -f2)
+        if [[ "$first" == "true" ]]; then
+            deps_json="\"$component\":[\"$dep\"]"
+            first=false
+        else
+            deps_json="$deps_json,\"$component\":[\"$dep\"]"
+        fi
+    done < "$DEPENDENCIES_FILE"
+fi
+
+# 构建安装顺序数组
+order_array=""
+if [[ -f "$TEMP_DIR/install_order_files.txt" ]]; then
+    first=true
+    while IFS= read -r filename; do
+        if [[ "$first" == "true" ]]; then
+            order_array="\"$filename\""
+            first=false
+        else
+            order_array="$order_array,\"$filename\""
+        fi
+    done < "$TEMP_DIR/install_order_files.txt"
+fi
+
+# 构建 artifact_list JSON
+artifact_json=""
+if [[ -f "$ARTIFACT_LIST_FILE" ]]; then
+    first=true
+    while IFS= read -r line; do
+        component=$(echo "$line" | cut -d':' -f1)
+        version=$(echo "$line" | cut -d':' -f2)
+        if [[ "$first" == "true" ]]; then
+            artifact_json="\"$component\":\"$version\""
+            first=false
+        else
+            artifact_json="$artifact_json,\"$component\":\"$version\""
+        fi
+    done < "$ARTIFACT_LIST_FILE"
+fi
+
+# 构建 checksums JSON
+checksums_json=""
+if [[ -f "$CHECKSUMS_FILE" ]]; then
+    first=true
+    while IFS= read -r line; do
+        component=$(echo "$line" | cut -d':' -f1)
+        checksum=$(echo "$line" | cut -d':' -f2-)
+        if [[ "$first" == "true" ]]; then
+            checksums_json="\"$component\":\"$checksum\""
+            first=false
+        else
+            checksums_json="$checksums_json,\"$component\":\"$checksum\""
+        fi
+    done < "$CHECKSUMS_FILE"
+fi
+
+# 生成完整的 version.json
+cat > "$version_json" << EOF
+{
+  "version": "$VERSION",
+  "build_time": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
+  "artifact_list": {
+    $artifact_json
+  },
+  "checksums": {
+    $checksums_json
+  },
+  "dependencies": {
+    $deps_json
+  },
+  "install_order": [
+    $order_array
+  ]
+}
+EOF
+
+log_success "版本信息文件生成完成: $version_json"
+
+# 复制`安装`脚本到 artifact 目录
+log_info "复制安装脚本..."
+if [[ -f "install_artifact.sh" ]]; then
+    cp "install_artifact.sh" "$ARTIFACT_DIR/install.sh"
+    chmod +x "$ARTIFACT_DIR/install.sh"
+    log_success "安装脚本复制完成: $ARTIFACT_DIR/install.sh"
+else
+    log_warning "install_artifact.sh 文件不存在"
+fi
+
+# 复制`卸载`脚本到 artifact 目录
+log_info "复制卸载脚本..."
+if [[ -f "uninstall_artifact.sh" ]]; then
+    cp "uninstall_artifact.sh" "$ARTIFACT_DIR/uninstall.sh"
+    chmod +x "$ARTIFACT_DIR/uninstall.sh"
+    log_success "卸载脚本复制完成: $ARTIFACT_DIR/uninstall.sh"
+else
+    log_warning "uninstall_artifact.sh 文件不存在"
+fi
+
+# 复制`健康检查`脚本到 artifact 目录
+log_info "复制健康检查脚本..."
+if [[ -f "check_health.sh" ]]; then
+    cp "check_health.sh" "$ARTIFACT_DIR/check_health.sh"
+    chmod +x "$ARTIFACT_DIR/check_health.sh"
+    log_success "健康检查脚本复制完成: $ARTIFACT_DIR/check_health.sh"
+else
+    log_warning "check_health.sh 文件不存在"
+fi
+
+# 复制 deps 目录到 artifact 目录
+log_info "复制系统依赖包..."
+if [[ -d "deps" ]]; then
+    cp -r "deps" "$ARTIFACT_DIR/"
+    log_success "系统依赖包复制完成: $ARTIFACT_DIR/deps"
+    
+    # 显示deps目录内容
+    log_info "  依赖包列表:"
+    find "$ARTIFACT_DIR/deps" -name "*.tar.gz" -exec basename {} \; | while read dep_file; do
+        log_info "    - $dep_file"
+    done
+else
+    log_warning "deps 目录不存在，跳过依赖包复制"
+fi
+
+# 显示打包结果
+log_success "打包完成！"
+echo
+echo "版本: $VERSION"
+echo "输出目录: $ARTIFACT_DIR"
+echo "包含组件:"
+if [[ -f "$ARTIFACT_LIST_FILE" ]]; then
+    while IFS= read -r line; do
+        component=$(echo "$line" | cut -d':' -f1)
+        version=$(echo "$line" | cut -d':' -f2)
+        echo "  - $component v$version"
+    done < "$ARTIFACT_LIST_FILE"
+fi
+echo
+echo "文件列表:"
+ls -la "$ARTIFACT_DIR"
+echo
+
+# 清理临时文件
+rm -rf "$TEMP_DIR"
--- a/src/metric/client-plugins/demo-all-in-one/publish_artifact.sh
+++ b/src/metric/client-plugins/demo-all-in-one/publish_artifact.sh
@ -0,0 +1,149 @@
+#!/bin/bash
+
+set -e
+
+# 颜色定义
+GREEN='\033[0;32m'
+BLUE='\033[0;34m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+log_info() {
+    echo -e "${BLUE}[INFO]${NC} $1"
+}
+
+log_success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+log_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+# 显示帮助信息
+show_help() {
+    echo "Argus-Metric Artifact 发布脚本"
+    echo
+    echo "用法: $0 <版本号>"
+    echo
+    echo "参数:"
+    echo "  <版本号>    要发布的版本号，对应 artifact 目录中的版本"
+    echo
+    echo "示例:"
+    echo "  $0 1.20.0    # 发布 1.20.0 版本"
+    echo
+}
+
+# 检查参数
+if [[ $# -ne 1 ]]; then
+    log_error "请提供版本号参数"
+    show_help
+    exit 1
+fi
+
+VERSION="$1"
+ARTIFACT_DIR="artifact/$VERSION"
+PUBLISH_DIR="/srv/ftp/share"
+
+# 检查版本目录是否存在
+if [[ ! -d "$ARTIFACT_DIR" ]]; then
+    log_error "版本目录不存在: $ARTIFACT_DIR"
+    exit 1
+fi
+
+log_info "开始发布版本: $VERSION"
+
+# 确保发布目录存在
+log_info "确保发布目录存在: $PUBLISH_DIR"
+mkdir -p "$PUBLISH_DIR"
+
+# 创建临时目录用于打包
+TEMP_PACKAGE_DIR="/tmp/argus-metric-package-$$"
+mkdir -p "$TEMP_PACKAGE_DIR"
+
+# 复制所有 tar.gz 文件到临时目录
+log_info "准备 artifact 文件..."
+tar_files=$(find "$ARTIFACT_DIR" -name "*.tar.gz" -type f)
+
+if [[ -z "$tar_files" ]]; then
+    log_error "在 $ARTIFACT_DIR 中未找到 tar.gz 文件"
+    exit 1
+fi
+
+for file in $tar_files; do
+    filename=$(basename "$file")
+    log_info "  准备: $filename"
+    cp "$file" "$TEMP_PACKAGE_DIR/"
+done
+
+# 复制版本信息文件
+if [[ -f "$ARTIFACT_DIR/version.json" ]]; then
+    log_info "复制版本信息文件..."
+    cp "$ARTIFACT_DIR/version.json" "$TEMP_PACKAGE_DIR/"
+fi
+
+# 复制健康检查脚本
+if [[ -f "$ARTIFACT_DIR/check_health.sh" ]]; then
+    log_info "复制健康检查脚本..."
+    cp "$ARTIFACT_DIR/check_health.sh" "$TEMP_PACKAGE_DIR/"
+elif [[ -f "check_health.sh" ]]; then
+    log_info "复制健康检查脚本 (从当前目录)..."
+    cp "check_health.sh" "$TEMP_PACKAGE_DIR/"
+else
+    log_warning "未找到 check_health.sh 文件"
+fi
+
+# 复制安装脚本并重命名为 install.sh
+if [[ -f "install_artifact.sh" ]]; then
+    log_info "复制安装脚本..."
+    cp "install_artifact.sh" "$TEMP_PACKAGE_DIR/install.sh"
+fi
+
+if [[ -f "uninstall_artifact.sh" ]]; then
+    log_info "复制卸载脚本..."
+    cp "uninstall_artifact.sh" "$TEMP_PACKAGE_DIR/uninstall.sh"
+fi
+
+# 创建tar包，使用新的命名规范
+TAR_NAME="argus-metric_$(echo $VERSION | tr '.' '_').tar.gz"
+log_info "创建发布包: $TAR_NAME"
+cd "$TEMP_PACKAGE_DIR"
+tar -czf "$PUBLISH_DIR/$TAR_NAME" *
+cd - > /dev/null
+
+# 清理临时目录
+rm -rf "$TEMP_PACKAGE_DIR"
+
+# 更新 LATEST_VERSION 文件
+log_info "更新 LATEST_VERSION 文件..."
+echo "$VERSION" > "$PUBLISH_DIR/LATEST_VERSION"
+
+# 复制 setup.sh 到发布目录
+if [[ -f "setup.sh" ]]; then
+    log_info "复制 setup.sh 到发布目录..."
+    cp "setup.sh" "$PUBLISH_DIR/"
+fi
+
+# 显示发布结果
+log_success "版本 $VERSION 发布完成！"
+echo
+echo "发布目录: $PUBLISH_DIR"
+echo "发布包: $PUBLISH_DIR/$TAR_NAME"
+echo "包大小: $(du -h "$PUBLISH_DIR/$TAR_NAME" | cut -f1)"
+echo "最新版本: $(cat "$PUBLISH_DIR/LATEST_VERSION")"
+echo
+echo "发布目录中的文件:"
+ls -la "$PUBLISH_DIR" | while read line; do
+    echo "  $line"
+done
+echo
+echo "使用方法:"
+echo "  1. 确保 /srv/ftp/share 目录可通过 FTP 访问"
+echo "  2. 用户首先下载安装脚本:"
+echo "     curl -u ftpuser:admin1234 ftp://10.211.55.4/setup.sh -o setup.sh"
+echo "  3. 然后执行安装 (自动获取最新版本):"
+echo "     sudo sh setup.sh"
+echo "  4. 或者指定版本安装:"
+echo "     sudo sh setup.sh --version $VERSION"
+echo "  5. 或者指定不同的FTP服务器:"
+echo "     sudo sh setup.sh --server 192.168.1.100 --user myuser --password mypass"
--- a/src/metric/client-plugins/demo-all-in-one/setup.sh
+++ b/src/metric/client-plugins/demo-all-in-one/setup.sh
@ -0,0 +1,862 @@
+#!/bin/bash
+
+set -e
+
+# 颜色定义
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# 日志函数
+log_info() {
+    echo -e "${BLUE}[INFO]${NC} $1"
+}
+
+log_success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+log_warning() {
+    echo -e "${YELLOW}[WARNING]${NC} $1"
+}
+
+log_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+FTP_SERVER="${FTP_SERVER}"
+FTP_USER="${FTP_USER}"
+FTP_PASS="${FTP_PASS}"
+FTP_PORT="${FTP_PORT:-21}"
+BASE_URL=""                                  # FTP基础URL (将在check_ftp_params中设置)
+LATEST_VERSION_URL=""                        # 版本文件URL (将在check_ftp_params中设置)
+TEMP_DIR="/tmp/argus-metric-install-$$"
+
+# 安装目录配置
+DEFAULT_INSTALL_DIR="/opt/argus-metric"      # 默认安装目录
+INSTALL_DIR="${INSTALL_DIR:-$DEFAULT_INSTALL_DIR}"  # 可通过环境变量覆盖
+VERSIONS_DIR="$INSTALL_DIR/versions"         # 版本目录
+BACKUPS_DIR="$INSTALL_DIR/backups"           # 备份目录
+CURRENT_LINK="$INSTALL_DIR/current"          # 当前版本软链接
+LATEST_VERSION_FILE="$INSTALL_DIR/LATEST_VERSION"  # 当前版本记录文件
+
+# 检查必需的FTP参数
+check_ftp_params() {
+    local missing_params=()
+    
+    if [[ -z "$FTP_SERVER" ]]; then
+        missing_params+=("FTP_SERVER")
+    fi
+    
+    if [[ -z "$FTP_USER" ]]; then
+        missing_params+=("FTP_USER")
+    fi
+    
+    if [[ -z "$FTP_PASS" ]]; then
+        missing_params+=("FTP_PASS")
+    fi
+    
+    if [[ ${#missing_params[@]} -gt 0 ]]; then
+        log_error "缺少必需的FTP参数: ${missing_params[*]}"
+        log_error "请通过以下方式之一设置FTP参数:"
+        log_error "  1. 命令行参数: --server <地址> --user <用户名> --password <密码>"
+        log_error "  2. 环境变量: FTP_SERVER=<地址> FTP_USER=<用户名> FTP_PASS=<密码>"
+        log_error ""
+        log_error "示例:"
+        log_error "  sudo sh setup.sh --server 10.211.55.4 --user ftpuser --password admin1234"
+        log_error "  FTP_SERVER=10.211.55.4 FTP_USER=ftpuser FTP_PASS=admin1234 sudo sh setup.sh"
+        exit 1
+    fi
+    
+    # 设置BASE_URL和LATEST_VERSION_URL
+    BASE_URL="ftp://${FTP_SERVER}:${FTP_PORT}"
+    LATEST_VERSION_URL="$BASE_URL/LATEST_VERSION"
+    
+    log_info "FTP配置:"
+    log_info "  服务器: $FTP_SERVER:$FTP_PORT"
+    log_info "  用户: $FTP_USER"
+}
+
+# 获取最新版本号的函数
+get_latest_version() {
+    log_info "获取最新版本信息..." >&2
+    log_info "尝试从URL获取: $LATEST_VERSION_URL" >&2
+    
+    # 先测试FTP连接
+    log_info "测试FTP连接..." >&2
+    if ! curl -u "${FTP_USER}:${FTP_PASS}" -sfI "$LATEST_VERSION_URL" >/dev/null 2>&1; then
+        log_error "无法连接到FTP服务器或文件不存在" >&2
+        log_error "URL: $LATEST_VERSION_URL" >&2
+        log_error "请检查:" >&2
+        log_error "  1. FTP服务器是否运行: $FTP_SERVER:$FTP_PORT" >&2
+        log_error "  2. 用户名密码是否正确: $FTP_USER" >&2
+        log_error "  3. LATEST_VERSION文件是否存在" >&2
+        log_error "手动测试命令: curl -u ${FTP_USER}:${FTP_PASS} ftp://${FTP_SERVER}/LATEST_VERSION" >&2
+        exit 1
+    fi
+    
+    # 获取文件内容
+    if ! LATEST_VERSION=$(curl -u "${FTP_USER}:${FTP_PASS}" -sfL "$LATEST_VERSION_URL" 2>/dev/null | tr -d '[:space:]'); then
+        log_error "下载LATEST_VERSION文件失败" >&2
+        exit 1
+    fi
+    
+    log_info "原始获取内容: '$LATEST_VERSION'" >&2
+    
+    if [[ -z "$LATEST_VERSION" ]]; then
+        log_error "获取到的版本信息为空" >&2
+        log_error "可能的原因:" >&2
+        log_error "  1. LATEST_VERSION文件为空" >&2
+        log_error "  2. 文件内容格式不正确" >&2
+        log_error "  3. 网络传输问题" >&2
+        log_error "请检查FTP服务器上的 /srv/ftp/share/LATEST_VERSION 文件" >&2
+        exit 1
+    fi
+    
+    log_info "检测到最新版本: $LATEST_VERSION" >&2
+    echo "$LATEST_VERSION"
+}
+
+# 解析参数
+ARGUS_VERSION=""  # 使用不同的变量名避免与系统VERSION冲突
+ACTION="install"
+
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --version)
+            ARGUS_VERSION="$2"
+            shift 2
+            ;;
+        --server)
+            FTP_SERVER="$2"
+            shift 2
+            ;;
+        --user)
+            FTP_USER="$2"
+            shift 2
+            ;;
+        --password)
+            FTP_PASS="$2"
+            shift 2
+            ;;
+        --port)
+            FTP_PORT="$2"
+            shift 2
+            ;;
+        --uninstall)
+            ACTION="uninstall"
+            shift
+            ;;
+        --install-dir)
+            INSTALL_DIR="$2"
+            shift 2
+            ;;
+        --rollback)
+            ACTION="rollback"
+            shift
+            ;;
+        --backup-list)
+            ACTION="backup-list"
+            shift
+            ;;
+        --status)
+            ACTION="status"
+            shift
+            ;;
+        --help)
+            echo "Argus Metric FTP在线安装脚本"
+            echo
+            echo "用法: curl -u <用户名>:<密码> ftp://<服务器>/setup.sh -o setup.sh && sh setup.sh [选项]"
+            echo
+            echo "必需参数 (必须通过命令行参数或环境变量设置):"
+            echo "  --server SERVER       FTP服务器地址 (必须)"
+            echo "  --user USER           FTP用户名 (必须)"
+            echo "  --password PASS       FTP密码 (必须)"
+            echo
+            echo "可选参数:"
+            echo "  --version VERSION     指定版本 (默认: 自动获取最新版本)"
+            echo "  --port PORT           FTP端口 (默认: 21)"
+            echo "  --install-dir DIR     安装目录 (默认: /opt/argus-metric)"
+            echo "  --uninstall           卸载 (自动确认)"
+            echo "  --rollback            回滚到上一个备份版本"
+            echo "  --backup-list         列出所有备份版本"
+            echo "  --status              显示当前安装状态"
+            echo "  --help                显示帮助"
+            echo
+            echo "环境变量:"
+            echo "  FTP_SERVER            FTP服务器地址 (必须)"
+            echo "  FTP_USER              FTP用户名 (必须)"
+            echo "  FTP_PASS              FTP密码 (必须)"
+            echo "  FTP_PORT              FTP端口 (默认: 21)"
+            echo
+            echo "示例:"
+            echo "  # 方式1: 使用命令行参数"
+            echo "  curl -u ftpuser:admin1234 ftp://10.211.55.4/setup.sh -o setup.sh"
+            echo "  sudo sh setup.sh --server 10.211.55.4 --user ftpuser --password admin1234"
+            echo "  "
+            echo "  # 方式2: 使用环境变量"
+            echo "  FTP_SERVER=10.211.55.4 FTP_USER=ftpuser FTP_PASS=admin1234 sudo sh setup.sh"
+            echo "  "
+            echo "  # 指定版本安装"
+            echo "  sudo sh setup.sh --server 10.211.55.4 --user ftpuser --password admin1234 --version 1.30.0"
+            echo "  "
+            echo "  # 卸载"
+            echo "  sudo sh setup.sh --server 10.211.55.4 --user ftpuser --password admin1234 --uninstall"
+            exit 0
+            ;;
+        *)
+            log_error "未知参数: $1"
+            echo "使用 --help 查看帮助信息"
+            exit 1
+            ;;
+    esac
+done
+
+# 清理函数
+cleanup() {
+    if [[ -d "$TEMP_DIR" ]]; then
+        rm -rf "$TEMP_DIR"
+    fi
+}
+
+trap cleanup EXIT
+
+# 创建安装目录结构
+create_install_directories() {
+    log_info "创建安装目录结构..."
+    
+    # 创建主要目录
+    mkdir -p "$VERSIONS_DIR"
+    mkdir -p "$BACKUPS_DIR"
+    
+    log_success "安装目录结构创建完成: $INSTALL_DIR"
+}
+
+# 获取当前安装的版本
+get_current_version() {
+    # 优先从LATEST_VERSION文件读取
+    if [[ -f "$LATEST_VERSION_FILE" ]]; then
+        local version_from_file=$(cat "$LATEST_VERSION_FILE" 2>/dev/null | tr -d '[:space:]')
+        if [[ -n "$version_from_file" ]]; then
+            # 确保版本号格式一致（不带v前缀）
+            echo "$version_from_file"
+            return 0
+        fi
+    fi
+    
+    # 如果文件不存在或为空，从软链接读取
+    if [[ -L "$CURRENT_LINK" ]]; then
+        local current_path=$(readlink "$CURRENT_LINK")
+        # 从版本目录名中提取版本号（现在不带v前缀）
+        basename "$current_path"
+    else
+        echo ""
+    fi
+}
+
+# 检查是否已安装
+check_installed() {
+    if [[ -L "$CURRENT_LINK" ]] && [[ -d "$CURRENT_LINK" ]]; then
+        local current_version=$(get_current_version)
+        if [[ -n "$current_version" ]]; then
+            log_info "检测到已安装版本: v$current_version"
+            return 0
+        fi
+    fi
+    return 1
+}
+
+# 更新LATEST_VERSION文件
+update_latest_version_file() {
+    local version="$1"
+    log_info "更新LATEST_VERSION文件: $version"
+    
+    if echo "$version" > "$LATEST_VERSION_FILE"; then
+        log_success "LATEST_VERSION文件已更新"
+    else
+        log_error "更新LATEST_VERSION文件失败"
+        return 1
+    fi
+}
+
+# 备份当前版本
+backup_current_version() {
+    local current_version=$(get_current_version)
+    if [[ -z "$current_version" ]]; then
+        log_info "没有当前版本需要备份"
+        return 0
+    fi
+    
+    local backup_name="$current_version"
+    local backup_path="$BACKUPS_DIR/$backup_name"
+    
+    log_info "备份当前版本 $current_version 到: $backup_path"
+    
+    # 如果备份已存在，先删除
+    if [[ -d "$backup_path" ]]; then
+        log_info "备份版本已存在，覆盖: $backup_path"
+        rm -rf "$backup_path"
+    fi
+    
+    # 复制当前版本目录
+    if cp -r "$CURRENT_LINK" "$backup_path"; then
+        log_success "版本备份完成: $backup_name"
+        
+        # 清理旧备份，只保留最近3个
+        cleanup_old_backups
+    else
+        log_error "版本备份失败"
+        exit 1
+    fi
+}
+
+# 清理旧备份
+cleanup_old_backups() {
+    log_info "清理旧版本备份..."
+    
+    # 获取备份目录列表，按时间排序，保留最近3个
+    local backup_count=$(ls -1 "$BACKUPS_DIR" 2>/dev/null | wc -l)
+    if [[ $backup_count -gt 3 ]]; then
+        local to_remove=$((backup_count - 3))
+        ls -1t "$BACKUPS_DIR" | tail -n $to_remove | while read backup; do
+            log_info "删除旧备份: $backup"
+            rm -rf "$BACKUPS_DIR/$backup"
+        done
+    fi
+}
+
+# 回滚到备份版本
+rollback_to_backup() {
+    local backup_name="$1"
+    local backup_path="$BACKUPS_DIR/$backup_name"
+    
+    if [[ ! -d "$backup_path" ]]; then
+        log_error "备份不存在: $backup_path"
+        return 1
+    fi
+    
+    log_info "回滚到备份版本: $backup_name"
+    
+    # 停止当前服务
+    stop_services
+    
+    # 恢复软链接（备份目录应该包含版本内容）
+    if ln -sfn "$backup_path" "$CURRENT_LINK"; then
+        log_success "版本回滚完成: $backup_name"
+        
+        # 启动服务
+        start_services
+        return 0
+    else
+        log_error "版本回滚失败"
+        return 1
+    fi
+}
+
+# 停止服务
+stop_services() {
+    log_info "停止当前服务..."
+    
+    # 检查服务是否正在运行
+    if ! check_services_running; then
+        log_info "服务未运行，无需停止"
+        return 0
+    fi
+    
+    # 尝试使用卸载脚本停止服务
+    if [[ -f "$CURRENT_LINK/uninstall.sh" ]]; then
+        cd "$CURRENT_LINK"
+        chmod +x uninstall.sh
+        
+        # 自动确认停止服务（避免交互式确认）
+        echo "y" | ./uninstall.sh >/dev/null 2>&1
+        local stop_exit_code=$?
+        
+        if [[ $stop_exit_code -eq 0 ]]; then
+            log_success "服务停止完成"
+        else
+            log_warning "停止服务时出现警告，尝试手动停止"
+            manual_stop_services
+        fi
+    else
+        log_warning "未找到卸载脚本，尝试手动停止服务"
+        manual_stop_services
+    fi
+}
+
+# 手动停止服务
+manual_stop_services() {
+    log_info "手动停止服务..."
+    
+    # 停止 node_exporter
+    if pgrep -f "node_exporter" >/dev/null 2>&1; then
+        pkill -f "node_exporter" && log_info "node_exporter 已停止"
+    fi
+    
+    # 停止 dcgm_exporter
+    if pgrep -f "dcgm_exporter" >/dev/null 2>&1; then
+        pkill -f "dcgm_exporter" && log_info "dcgm_exporter 已停止"
+    fi
+    
+    # 等待进程完全停止
+    sleep 2
+    
+    # 检查是否还有残留进程
+    if pgrep -f "node_exporter\|dcgm_exporter" >/dev/null 2>&1; then
+        log_warning "仍有服务进程运行，尝试强制停止"
+        pkill -9 -f "node_exporter\|dcgm_exporter" 2>/dev/null || true
+    fi
+    
+    log_success "手动停止服务完成"
+}
+
+# 启动服务
+start_services() {
+    log_info "启动服务..."
+    
+    if [[ -f "$CURRENT_LINK/install.sh" ]]; then
+        cd "$CURRENT_LINK"
+        chmod +x install.sh
+        
+        # 检查服务是否已经在运行
+        if check_services_running; then
+            log_info "服务已在运行，跳过启动"
+            return 0
+        fi
+        
+        # 启动服务 - 传递正确的安装目录参数
+        if ./install.sh "$INSTALL_DIR" 2>/dev/null; then
+            log_success "服务启动完成"
+        else
+            log_error "服务启动失败"
+            return 1
+        fi
+    else
+        log_error "未找到安装脚本"
+        return 1
+    fi
+}
+
+# 检查服务是否正在运行
+check_services_running() {
+    # 检查常见的服务端口是否在监听
+    local ports=(9100 9400)  # node-exporter 和 dcgm-exporter 的默认端口
+    
+    for port in "${ports[@]}"; do
+        if netstat -tlnp 2>/dev/null | grep -q ":$port "; then
+            log_info "检测到服务正在端口 $port 上运行"
+            return 0
+        fi
+    done
+    
+    # 检查相关进程
+    if pgrep -f "node_exporter\|dcgm_exporter" >/dev/null 2>&1; then
+        log_info "检测到相关服务进程正在运行"
+        return 0
+    fi
+    
+    return 1
+}
+
+# 检查是否为 root 用户
+check_root() {
+    if [[ $EUID -ne 0 ]]; then
+        log_error "此脚本需要 root 权限运行"
+        log_info "请使用: sudo sh setup.sh"
+        exit 1
+    fi
+}
+
+# 检查系统要求
+check_system() {
+    log_info "检查系统要求..."
+    
+    # 检查操作系统
+    if [[ ! -f /etc/os-release ]]; then
+        log_error "无法检测操作系统版本"
+        exit 1
+    fi
+    
+    # 读取系统信息，使用子shell避免污染当前环境变量
+    local OS_INFO=$(source /etc/os-release && echo "$NAME $VERSION_ID")
+    log_info "检测到操作系统: $OS_INFO"
+    
+    # 检查系统架构
+    arch=$(uname -m)
+    log_info "系统架构: $arch"
+    
+    # 检查磁盘空间
+    available_space=$(df / | awk 'NR==2 {print $4}')
+    if [[ $available_space -lt 1024 ]]; then
+        log_warning "可用磁盘空间不足 1GB，当前可用: $(($available_space / 1024 / 1024))GB"
+    fi
+}
+
+# 下载并安装
+install_argus_metric() {
+    # 如果没有指定版本，获取最新版本
+    if [[ -z "$ARGUS_VERSION" ]]; then
+        ARGUS_VERSION=$(get_latest_version)
+    fi
+    
+    log_info "开始安装 Argus Metric v$ARGUS_VERSION..."
+    log_info "安装目录: $INSTALL_DIR"
+    
+    # 检查是否已安装
+    local is_upgrade=false
+    if check_installed; then
+        local current_version=$(get_current_version)
+        if [[ "$current_version" == "$ARGUS_VERSION" ]]; then
+            log_info "版本 v$ARGUS_VERSION 已安装，无需重复安装"
+            return 0
+        fi
+        log_info "检测到版本升级: v$current_version -> v$ARGUS_VERSION"
+        is_upgrade=true
+        
+        # 备份当前版本
+        backup_current_version
+    fi
+    
+    # 创建安装目录结构
+    create_install_directories
+    
+    # 创建临时目录
+    mkdir -p "$TEMP_DIR"
+    cd "$TEMP_DIR"
+    
+    # 下载发布包，使用新的命名规范
+    TAR_NAME="argus-metric_$(echo $ARGUS_VERSION | tr '.' '_').tar.gz"
+    log_info "下载发布包: $TAR_NAME"
+    log_info "从FTP服务器下载: $FTP_SERVER:$FTP_PORT, 用户: $FTP_USER"
+    
+    # 构造curl命令并显示（隐藏密码）
+    CURL_CMD="curl -u \"${FTP_USER}:***\" -sfL \"$BASE_URL/$TAR_NAME\" -o \"$TAR_NAME\""
+    log_info "执行命令: $CURL_CMD"
+    
+    if ! curl -u "${FTP_USER}:${FTP_PASS}" -sfL "$BASE_URL/$TAR_NAME" -o "$TAR_NAME"; then
+        log_error "下载发布包失败: $BASE_URL/$TAR_NAME"
+        log_error "完整命令: curl -u \"${FTP_USER}:${FTP_PASS}\" -sfL \"$BASE_URL/$TAR_NAME\" -o \"$TAR_NAME\""
+        log_error "请检查FTP服务器连接、用户名密码是否正确"
+        exit 1
+    fi
+    
+    # 解压发布包到当前目录
+    log_info "解压发布包..."
+    if ! tar -xzf "$TAR_NAME"; then
+        log_error "解压发布包失败"
+        exit 1
+    fi
+    
+    # 显示解压后的文件结构
+    log_info "解压后的文件结构:"
+    ls -la "$TEMP_DIR"
+    
+    # 准备版本目录
+    local version_dir="$VERSIONS_DIR/$ARGUS_VERSION"
+    log_info "安装到版本目录: $version_dir"
+    
+    # 如果升级，先停止服务
+    if [[ "$is_upgrade" == true ]]; then
+        stop_services
+    fi
+    
+    # 创建版本目录
+    if [[ -d "$version_dir" ]]; then
+        log_info "版本目录已存在，备份后更新"
+        rm -rf "$version_dir"
+    fi
+    
+    # 创建新的版本目录
+    mkdir -p "$version_dir"
+    
+    # 移动解压的文件到版本目录
+    log_info "移动文件到版本目录: $TEMP_DIR/* -> $version_dir/"
+    
+    # 检查源目录是否有内容
+    if [[ ! "$(ls -A "$TEMP_DIR" 2>/dev/null)" ]]; then
+        log_error "临时目录为空，无法移动文件"
+        exit 1
+    fi
+    
+    # 检查目标目录是否存在
+    if [[ ! -d "$version_dir" ]]; then
+        log_error "目标版本目录不存在: $version_dir"
+        exit 1
+    fi
+    
+    # 执行文件移动
+    if mv "$TEMP_DIR"/* "$version_dir" 2>/dev/null; then
+        log_success "文件移动到版本目录完成"
+    else
+        log_error "移动文件到版本目录失败"
+        log_error "源目录内容:"
+        ls -la "$TEMP_DIR" || true
+        log_error "目标目录状态:"
+        ls -la "$version_dir" || true
+        log_error "权限检查:"
+        ls -ld "$TEMP_DIR" "$version_dir" || true
+        exit 1
+    fi
+    
+    # 执行安装脚本
+    log_info "执行安装脚本..."
+    cd "$version_dir"
+    if [[ -f "install.sh" ]]; then
+        chmod +x install.sh
+        # 传递版本目录作为安装目录给安装脚本
+        if ./install.sh "$version_dir"; then
+            log_success "安装脚本执行完成"
+        else
+            log_error "安装脚本执行失败"
+            # 如果是升级失败，尝试回滚
+            if [[ "$is_upgrade" == true ]]; then
+                log_warning "升级失败，尝试回滚到之前版本..."
+                local latest_backup=$(ls -1t "$BACKUPS_DIR" 2>/dev/null | head -n 1)
+                if [[ -n "$latest_backup" ]]; then
+                    rollback_to_backup "$latest_backup"
+                    return 1
+                fi
+            fi
+            exit 1
+        fi
+    else
+        log_error "未找到安装脚本 install.sh"
+        exit 1
+    fi
+    
+    # 更新软链接指向新版本
+    log_info "更新当前版本链接..."
+    if ln -sfn "$version_dir" "$CURRENT_LINK"; then
+        log_success "版本链接更新完成: $CURRENT_LINK -> $version_dir"
+    else
+        log_error "版本链接更新失败"
+        exit 1
+    fi
+    
+    # 更新LATEST_VERSION文件
+    update_latest_version_file "$ARGUS_VERSION"
+    
+    # 启动服务
+    start_services
+    
+    log_success "Argus Metric v$ARGUS_VERSION 安装完成！"
+    
+    # 显示安装信息
+    echo
+    log_info "安装信息:"
+    log_info "  版本: $ARGUS_VERSION"
+    log_info "  安装目录: $INSTALL_DIR"
+    log_info "  版本目录: $version_dir"
+    log_info "  当前链接: $CURRENT_LINK"
+    if [[ "$is_upgrade" == true ]]; then
+        log_info "  升级类型: 版本升级"
+    else
+        log_info "  安装类型: 全新安装"
+    fi
+}
+
+# 卸载
+uninstall_argus_metric() {
+    log_info "开始卸载 Argus Metric..."
+    log_info "安装目录: $INSTALL_DIR"
+    
+    # 检查是否已安装
+    if ! check_installed; then
+        log_info "未检测到已安装的 Argus Metric"
+        return 0
+    fi
+    
+    local current_version=$(get_current_version)
+    log_info "检测到当前版本: v$current_version"
+    
+    # 停止服务
+    stop_services
+    
+    # 执行卸载脚本
+    log_info "执行卸载脚本..."
+    if [[ -f "$CURRENT_LINK/uninstall.sh" ]]; then
+        cd "$CURRENT_LINK"
+        chmod +x uninstall.sh
+        
+        # 自动确认卸载（因为用户已经明确使用了 --uninstall 参数）
+        log_info "自动确认卸载操作..."
+        echo "y" | ./uninstall.sh
+        local uninstall_exit_code=$?
+        
+        if [[ $uninstall_exit_code -eq 0 ]]; then
+            log_success "卸载脚本执行完成"
+        else
+            log_error "卸载脚本执行失败 (退出码: $uninstall_exit_code)"
+            exit 1
+        fi
+    else
+        log_warning "未找到卸载脚本，执行基本清理"
+    fi
+    
+    # 清理安装目录
+    log_info "清理安装目录..."
+    if [[ -d "$INSTALL_DIR" ]]; then
+        # 询问是否完全删除安装目录
+        log_warning "这将删除整个安装目录: $INSTALL_DIR"
+        log_warning "包括所有版本、备份和配置文件"
+        
+        # 在自动化环境中，直接删除
+        if rm -rf "$INSTALL_DIR"; then
+            log_success "安装目录已完全清理: $INSTALL_DIR"
+        else
+            log_error "清理安装目录失败"
+            exit 1
+        fi
+    else
+        log_info "安装目录不存在，无需清理"
+    fi
+    
+    log_success "Argus Metric 卸载完成！"
+}
+
+# 显示状态
+show_status() {
+    echo "=========================================="
+    echo "    Argus Metric 安装状态"
+    echo "=========================================="
+    echo
+    
+    if check_installed; then
+        local current_version=$(get_current_version)
+        log_info "当前版本: $current_version"
+        log_info "安装目录: $INSTALL_DIR"
+        log_info "当前链接: $CURRENT_LINK"
+        log_info "版本目录: $VERSIONS_DIR/$current_version"
+        log_info "版本文件: $LATEST_VERSION_FILE"
+        
+        # 显示LATEST_VERSION文件内容
+        if [[ -f "$LATEST_VERSION_FILE" ]]; then
+            local file_version=$(cat "$LATEST_VERSION_FILE" 2>/dev/null | tr -d '[:space:]')
+            log_info "版本文件内容: $file_version"
+        fi
+        
+        echo
+        log_info "目录结构:"
+        if [[ -d "$INSTALL_DIR" ]]; then
+            tree -L 2 "$INSTALL_DIR" 2>/dev/null || ls -la "$INSTALL_DIR"
+        fi
+        
+        echo
+        log_info "可用版本:"
+        if [[ -d "$VERSIONS_DIR" ]]; then
+            ls -1 "$VERSIONS_DIR" 2>/dev/null | sed 's/^/  - /'
+        else
+            echo "  无"
+        fi
+        
+        echo
+        log_info "备份版本:"
+        if [[ -d "$BACKUPS_DIR" ]] && [[ $(ls -1 "$BACKUPS_DIR" 2>/dev/null | wc -l) -gt 0 ]]; then
+            ls -1t "$BACKUPS_DIR" 2>/dev/null | sed 's/^/  - /'
+        else
+            echo "  无"
+        fi
+    else
+        log_warning "Argus Metric 未安装"
+        log_info "安装目录: $INSTALL_DIR"
+    fi
+}
+
+# 列出备份
+list_backups() {
+    echo "=========================================="
+    echo "    Argus Metric 备份列表"
+    echo "=========================================="
+    echo
+    
+    if [[ -d "$BACKUPS_DIR" ]] && [[ $(ls -1 "$BACKUPS_DIR" 2>/dev/null | wc -l) -gt 0 ]]; then
+        log_info "可用备份版本:"
+        ls -1t "$BACKUPS_DIR" 2>/dev/null | while read backup; do
+            local backup_time=$(stat -c %y "$BACKUPS_DIR/$backup" 2>/dev/null | cut -d' ' -f1-2)
+            echo "  - $backup (创建时间: $backup_time)"
+        done
+    else
+        log_warning "没有可用的备份版本"
+    fi
+}
+
+# 回滚功能
+rollback_version() {
+    log_info "开始回滚操作..."
+    
+    if ! check_installed; then
+        log_error "没有检测到已安装的版本，无法回滚"
+        exit 1
+    fi
+    
+    # 获取最新的备份
+    local latest_backup=$(ls -1t "$BACKUPS_DIR" 2>/dev/null | head -n 1)
+    if [[ -z "$latest_backup" ]]; then
+        log_error "没有找到可用的备份版本"
+        exit 1
+    fi
+    
+    log_info "将回滚到备份版本: $latest_backup"
+    
+    if rollback_to_backup "$latest_backup"; then
+        log_success "回滚完成！"
+        
+        # 显示当前状态
+        echo
+        show_status
+    else
+        log_error "回滚失败"
+        exit 1
+    fi
+}
+
+# 主函数
+main() {
+    echo "=========================================="
+    echo "    Argus Metric 在线安装脚本 v1.0"
+    echo "=========================================="
+    echo
+    
+    # 对于状态和备份列表操作，不需要FTP参数和root权限
+    if [[ "$ACTION" == "status" || "$ACTION" == "backup-list" ]]; then
+        if [[ "$ACTION" == "status" ]]; then
+            show_status
+        elif [[ "$ACTION" == "backup-list" ]]; then
+            list_backups
+        fi
+        return 0
+    fi
+    
+    check_root
+    
+    # 更新目录配置变量（在设置INSTALL_DIR后）
+    VERSIONS_DIR="$INSTALL_DIR/versions"
+    BACKUPS_DIR="$INSTALL_DIR/backups"
+    CURRENT_LINK="$INSTALL_DIR/current"
+    LATEST_VERSION_FILE="$INSTALL_DIR/LATEST_VERSION"
+    
+    # 对于回滚操作，不需要FTP参数
+    if [[ "$ACTION" == "rollback" ]]; then
+        rollback_version
+        return 0
+    fi
+    
+    check_ftp_params
+    check_system
+    
+    if [[ "$ACTION" == "uninstall" ]]; then
+        uninstall_argus_metric
+    else
+        install_argus_metric
+    fi
+    
+    echo
+    log_info "操作完成！"
+}
+
+# 脚本入口
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
+    main "$@"
+fi
--- a/src/metric/client-plugins/demo-all-in-one/uninstall_artifact.sh
+++ b/src/metric/client-plugins/demo-all-in-one/uninstall_artifact.sh
@ -0,0 +1,274 @@
+#!/bin/bash
+
+set -e
+
+# 颜色定义
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# 日志函数
+log_info() {
+    echo -e "${BLUE}[INFO]${NC} $1"
+}
+
+log_success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+log_warning() {
+    echo -e "${YELLOW}[WARNING]${NC} $1"
+}
+
+log_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+# 配置变量
+INSTALL_DIR="/opt/aiops"
+TEMP_DIR="/tmp/aiops-uninstall-$$"
+VERSION_FILE="version.json"
+
+# 检查是否为 root 用户
+check_root() {
+    if [[ $EUID -ne 0 ]]; then
+        log_error "此脚本需要 root 权限运行"
+        log_info "请使用: sudo $0"
+        exit 1
+    fi
+}
+
+# 查找版本文件
+find_version_file() {
+    log_info "查找版本信息文件..."
+    
+    # 在当前目录查找
+    if [[ -f "$VERSION_FILE" ]]; then
+        VERSION_FILE_PATH="$VERSION_FILE"
+        log_success "找到版本文件: $VERSION_FILE"
+        return 0
+    fi
+    
+    # 在 artifact 目录查找
+    for version_dir in artifact/*/; do
+        if [[ -f "${version_dir}${VERSION_FILE}" ]]; then
+            VERSION_FILE_PATH="${version_dir}${VERSION_FILE}"
+            log_success "找到版本文件: $VERSION_FILE_PATH"
+            return 0
+        fi
+    done
+    
+    log_error "未找到版本信息文件 $VERSION_FILE"
+    log_info "请确保在正确的目录下运行此脚本"
+    exit 1
+}
+
+# 解析版本信息
+parse_version_info() {
+    log_info "解析版本信息..."
+    
+    if [[ ! -f "$VERSION_FILE_PATH" ]]; then
+        log_error "版本文件不存在: $VERSION_FILE_PATH"
+        exit 1
+    fi
+    
+    # 使用 jq 解析 JSON（如果可用）
+    if command -v jq &> /dev/null; then
+        VERSION=$(jq -r '.version' "$VERSION_FILE_PATH")
+        BUILD_TIME=$(jq -r '.build_time' "$VERSION_FILE_PATH")
+        
+        # 解析 install_order（现在包含完整的文件名）
+        if jq -e '.install_order' "$VERSION_FILE_PATH" > /dev/null 2>&1; then
+            jq -r '.install_order[]' "$VERSION_FILE_PATH" > "$TEMP_DIR/install_order.txt"
+        else
+            log_error "version.json 中缺少 install_order 字段"
+            exit 1
+        fi
+    else
+        log_warning "jq 未安装，使用简单的 JSON 解析"
+        VERSION=$(grep '"version"' "$VERSION_FILE_PATH" | sed 's/.*"version": *"\([^"]*\)".*/\1/')
+        BUILD_TIME=$(grep '"build_time"' "$VERSION_FILE_PATH" | sed 's/.*"build_time": *"\([^"]*\)".*/\1/')
+        
+        # 解析 install_order
+        grep -A 100 '"install_order"' "$VERSION_FILE_PATH" | grep -E '^\s*"[^"]+"' | while read line; do
+            component=$(echo "$line" | sed 's/.*"\([^"]*\)".*/\1/')
+            echo "$component" >> "$TEMP_DIR/install_order.txt"
+        done
+    fi
+    
+    log_success "版本信息解析完成"
+    log_info "  版本: $VERSION"
+    log_info "  构建时间: $BUILD_TIME"
+}
+
+# 创建临时目录
+create_temp_dirs() {
+    log_info "创建临时目录..."
+    mkdir -p "$TEMP_DIR"
+    log_success "临时目录创建完成: $TEMP_DIR"
+}
+
+# 卸载组件
+uninstall_components() {
+    log_info "开始卸载组件..."
+    
+    artifact_dir=$(dirname "$VERSION_FILE_PATH")
+    uninstall_count=0
+    total_count=0
+    
+    if [[ -f "$TEMP_DIR/install_order.txt" ]]; then
+        total_count=$(wc -l < "$TEMP_DIR/install_order.txt")
+    fi
+    
+    if [[ -f "$TEMP_DIR/install_order.txt" ]]; then
+        while IFS= read -r filename; do
+            uninstall_count=$((uninstall_count + 1))
+            
+            # 从文件名中提取组件名（去掉时间戳后缀）
+            component=$(echo "$filename" | sed 's/-[0-9]\{8\}-[0-9]\{6\}\.tar\.gz$//')
+            
+            log_info "[$uninstall_count/$total_count] 卸载 $component..."
+            
+            # 直接使用完整的文件名
+            tar_file="$artifact_dir/$filename"
+            
+            if [[ ! -f "$tar_file" ]]; then
+                log_error "找不到组件文件: $filename"
+                exit 1
+            fi
+            
+            # 解压到临时目录
+            component_temp_dir="$TEMP_DIR/$component"
+            mkdir -p "$component_temp_dir"
+            
+            if tar -xzf "$tar_file" -C "$component_temp_dir"; then
+                log_success "  $component 解压完成"
+            else
+                log_error "  $component 解压失败"
+                exit 1
+            fi
+            
+            # 查找解压后的目录
+            extracted_dir=""
+            for dir in "$component_temp_dir"/*; do
+                if [[ -d "$dir" ]]; then
+                    extracted_dir="$dir"
+                    break
+                fi
+            done
+            
+            if [[ -z "$extracted_dir" ]]; then
+                log_error "  $component 解压后未找到目录"
+                exit 1
+            fi
+            
+            # 执行卸载脚本
+            if [[ -f "$extracted_dir/uninstall.sh" ]]; then
+                log_info "  执行 $component 卸载脚本..."
+                # 所有组件都只需要一个确认
+                if (cd "$extracted_dir" && echo "y" | ./uninstall.sh); then
+                    log_success "  $component 卸载完成"
+                else
+                    log_error "  $component 卸载失败"
+                    exit 1
+                fi
+            else
+                log_warning "  $component 缺少 uninstall.sh 文件，跳过卸载"
+            fi
+            
+            # 清理临时文件
+            rm -rf "$component_temp_dir"
+        done < "$TEMP_DIR/install_order.txt"
+    fi
+    
+    log_success "所有组件卸载完成"
+}
+
+# 清理全局文件
+cleanup_global_files() {
+    log_info "清理全局文件..."
+    
+    # 清理安装目录
+    if [[ -d "$INSTALL_DIR" ]]; then
+        rm -rf "$INSTALL_DIR"
+        log_success "安装目录已清理: $INSTALL_DIR"
+    else
+        log_info "安装目录不存在: $INSTALL_DIR"
+    fi
+    
+    # 清理可能的全局配置文件
+    local global_configs=(
+        "/etc/aiops"
+        "/var/log/aiops"
+    )
+    
+    for config in "${global_configs[@]}"; do
+        if [[ -d "$config" ]]; then
+            rm -rf "$config"
+            log_success "全局配置已清理: $config"
+        fi
+    done
+}
+
+# 显示卸载信息
+show_uninstall_info() {
+    log_success "AIOps All-in-One 卸载完成！"
+    echo
+    echo "卸载信息:"
+    echo "  版本: $VERSION"
+    echo "  构建时间: $BUILD_TIME"
+    echo
+    echo "清理内容:"
+    echo "  - 二进制文件"
+    echo "  - 配置文件"
+    echo "  - 数据目录"
+    echo "  - 进程和服务"
+    echo "  - 全局安装目录"
+    echo
+    echo "注意:"
+    echo "  - 系统依赖包可能仍然存在"
+    echo "  - 如需完全清理，请手动检查并删除相关文件"
+    echo
+}
+
+# 清理函数
+cleanup() {
+    if [[ -d "$TEMP_DIR" ]]; then
+        rm -rf "$TEMP_DIR"
+    fi
+}
+
+# 设置清理陷阱
+trap cleanup EXIT
+
+# 主函数
+main() {
+    echo "=========================================="
+    echo "    AIOps All-in-One 卸载脚本"
+    echo "=========================================="
+    echo
+    
+    check_root
+    find_version_file
+    create_temp_dirs
+    parse_version_info
+    
+    log_warning "此操作将完全卸载 AIOps All-in-One"
+    read -p "确认继续？(y/N): " confirm
+    
+    if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then
+        log_info "取消卸载操作"
+        exit 0
+    fi
+    
+    uninstall_components
+    cleanup_global_files
+    show_uninstall_info
+}
+
+# 脚本入口
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
+    main "$@"
+fi
--- a/src/metric/client-plugins/demo-all-in-one/version-manager.sh
+++ b/src/metric/client-plugins/demo-all-in-one/version-manager.sh
@ -0,0 +1,350 @@
+#!/bin/bash
+
+set -e
+
+# 颜色定义
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# 日志函数
+log_info() {
+    echo -e "${BLUE}[INFO]${NC} $1"
+}
+
+log_success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+log_warning() {
+    echo -e "${YELLOW}[WARNING]${NC} $1"
+}
+
+log_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+# 显示帮助信息
+show_help() {
+    echo "AIOps 版本管理工具"
+    echo
+    echo "用法: $0 <command> [options]"
+    echo
+    echo "命令:"
+    echo "  bump <type>     - 升级版本号 (major|minor|patch)"
+    echo "  set <version>   - 设置指定版本号"
+    echo "  show            - 显示当前版本信息"
+    echo "  list            - 列出所有版本"
+    echo "  clean           - 清理旧版本"
+    echo "  validate        - 验证版本配置"
+    echo
+    echo "示例:"
+    echo "  $0 bump minor    # 升级次版本号 1.0.0 -> 1.1.0"
+    echo "  $0 set 2.0.0     # 设置版本为 2.0.0"
+    echo "  $0 show          # 显示当前版本"
+    echo "  $0 list          # 列出所有版本"
+}
+
+# 获取当前版本
+get_current_version() {
+    if [[ -f "VERSION" ]]; then
+        cat VERSION
+    else
+        echo "0.0.0"
+    fi
+}
+
+# 设置版本号
+set_version() {
+    local new_version="$1"
+    
+    # 验证版本号格式
+    if [[ ! "$new_version" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
+        log_error "无效的版本号格式: $new_version"
+        log_info "版本号格式应为: major.minor.patch (如: 1.2.3)"
+        exit 1
+    fi
+    
+    echo "$new_version" > VERSION
+    log_success "版本号已设置为: $new_version"
+}
+
+# 升级版本号
+bump_version() {
+    local bump_type="$1"
+    local current_version=$(get_current_version)
+    
+    # 解析当前版本号
+    IFS='.' read -r major minor patch <<< "$current_version"
+    
+    case "$bump_type" in
+        "major")
+            major=$((major + 1))
+            minor=0
+            patch=0
+            ;;
+        "minor")
+            minor=$((minor + 1))
+            patch=0
+            ;;
+        "patch")
+            patch=$((patch + 1))
+            ;;
+        *)
+            log_error "无效的升级类型: $bump_type"
+            log_info "支持的类型: major, minor, patch"
+            exit 1
+            ;;
+    esac
+    
+    local new_version="$major.$minor.$patch"
+    set_version "$new_version"
+    log_success "版本号已从 $current_version 升级到 $new_version"
+}
+
+# 显示当前版本信息
+show_version() {
+    local current_version=$(get_current_version)
+    log_info "当前版本: $current_version"
+    
+    if [[ -f "checklist" ]]; then
+        echo
+        echo "组件清单:"
+        while IFS= read -r line; do
+            [[ -z "$line" || "$line" =~ ^[[:space:]]*# ]] && continue
+            read -r component version dep order <<< "$line"
+            if [[ -n "$component" && -n "$version" ]]; then
+                echo "  - $component v$version"
+            fi
+        done < checklist
+    fi
+    
+    # 检查是否有对应的 artifact
+    local artifact_dir="artifact/$current_version"
+    if [[ -d "$artifact_dir" ]]; then
+        echo
+        echo "已构建的组件:"
+        for file in "$artifact_dir"/*.tar.gz; do
+            if [[ -f "$file" ]]; then
+                local filename=$(basename "$file")
+                local size=$(du -h "$file" | cut -f1)
+                echo "  - $filename ($size)"
+            fi
+        done
+        
+        if [[ -f "$artifact_dir/version.json" ]]; then
+            echo
+            echo "版本信息文件: $artifact_dir/version.json"
+        fi
+    else
+        echo
+        log_warning "未找到对应的构建目录: $artifact_dir"
+        log_info "运行 ./package.sh 进行构建"
+    fi
+}
+
+# 列出所有版本
+list_versions() {
+    log_info "所有版本列表:"
+    echo
+    
+    if [[ ! -d "artifact" ]]; then
+        log_warning "artifact 目录不存在"
+        return
+    fi
+    
+    for version_dir in artifact/*/; do
+        if [[ -d "$version_dir" ]]; then
+            local version=$(basename "$version_dir")
+            local current_version=$(get_current_version)
+            
+            if [[ "$version" == "$current_version" ]]; then
+                echo "  * $version (当前版本)"
+            else
+                echo "    $version"
+            fi
+            
+            # 显示该版本的组件
+            local component_count=0
+            for file in "$version_dir"/*.tar.gz; do
+                if [[ -f "$file" ]]; then
+                    component_count=$((component_count + 1))
+                fi
+            done
+            
+            if [[ $component_count -gt 0 ]]; then
+                echo "      包含 $component_count 个组件"
+            fi
+        fi
+    done
+}
+
+# 清理旧版本
+clean_versions() {
+    local current_version=$(get_current_version)
+    local keep_versions=5  # 保留最近5个版本
+    
+    log_info "清理旧版本 (保留最近 $keep_versions 个版本)..."
+    
+    if [[ ! -d "artifact" ]]; then
+        log_warning "artifact 目录不存在"
+        return
+    fi
+    
+    # 获取所有版本目录，按修改时间排序
+    local versions=()
+    while IFS= read -r -d '' version_dir; do
+        versions+=("$(basename "$version_dir")")
+    done < <(find artifact -maxdepth 1 -type d -name "[0-9]*" -print0 | sort -z)
+    
+    local total_versions=${#versions[@]}
+    local versions_to_remove=$((total_versions - keep_versions))
+    
+    if [[ $versions_to_remove -le 0 ]]; then
+        log_info "无需清理，当前只有 $total_versions 个版本"
+        return
+    fi
+    
+    log_info "将删除 $versions_to_remove 个旧版本..."
+    
+    for ((i=0; i<versions_to_remove; i++)); do
+        local version="${versions[i]}"
+        if [[ "$version" != "$current_version" ]]; then
+            log_info "删除版本: $version"
+            rm -rf "artifact/$version"
+        fi
+    done
+    
+    log_success "旧版本清理完成"
+}
+
+# 验证版本配置
+validate_version() {
+    log_info "验证版本配置..."
+    
+    local errors=0
+    
+    # 检查 VERSION 文件
+    if [[ ! -f "VERSION" ]]; then
+        log_error "VERSION 文件不存在"
+        errors=$((errors + 1))
+    else
+        local version=$(get_current_version)
+        if [[ ! "$version" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
+            log_error "VERSION 文件格式无效: $version"
+            errors=$((errors + 1))
+        else
+            log_success "VERSION 文件格式正确: $version"
+        fi
+    fi
+    
+    # 检查 checklist 文件
+    if [[ ! -f "checklist" ]]; then
+        log_error "checklist 文件不存在"
+        errors=$((errors + 1))
+    else
+        local component_count=0
+        while IFS= read -r line; do
+            [[ -z "$line" || "$line" =~ ^[[:space:]]*# ]] && continue
+            read -r component version dep order <<< "$line"
+            if [[ -n "$component" && -n "$version" ]]; then
+                component_count=$((component_count + 1))
+                
+                # 检查组件目录是否存在
+                if [[ ! -d "components/$component" ]]; then
+                    log_error "组件目录不存在: components/$component"
+                    errors=$((errors + 1))
+                fi
+            fi
+        done < checklist
+        
+        if [[ $component_count -gt 0 ]]; then
+            log_success "checklist 包含 $component_count 个组件"
+        else
+            log_error "checklist 中没有有效组件"
+            errors=$((errors + 1))
+        fi
+    fi
+    
+    # 检查 package.sh 文件
+    if [[ ! -f "package.sh" ]]; then
+        log_error "package.sh 文件不存在"
+        errors=$((errors + 1))
+    else
+        if [[ -x "package.sh" ]]; then
+            log_success "package.sh 可执行"
+        else
+            log_warning "package.sh 不可执行，请运行: chmod +x package.sh"
+        fi
+    fi
+    
+    # 检查 install.sh 文件
+    if [[ ! -f "install.sh" ]]; then
+        log_error "install.sh 文件不存在"
+        errors=$((errors + 1))
+    else
+        if [[ -x "install.sh" ]]; then
+            log_success "install.sh 可执行"
+        else
+            log_warning "install.sh 不可执行，请运行: chmod +x install.sh"
+        fi
+    fi
+    
+    if [[ $errors -eq 0 ]]; then
+        log_success "版本配置验证通过"
+    else
+        log_error "发现 $errors 个配置问题"
+        exit 1
+    fi
+}
+
+# 主函数
+main() {
+    case "${1:-}" in
+        "bump")
+            if [[ -z "${2:-}" ]]; then
+                log_error "请指定升级类型: major, minor, patch"
+                exit 1
+            fi
+            bump_version "$2"
+            ;;
+        "set")
+            if [[ -z "${2:-}" ]]; then
+                log_error "请指定版本号"
+                exit 1
+            fi
+            set_version "$2"
+            ;;
+        "show")
+            show_version
+            ;;
+        "list")
+            list_versions
+            ;;
+        "clean")
+            clean_versions
+            ;;
+        "validate")
+            validate_version
+            ;;
+        "help"|"-h"|"--help")
+            show_help
+            ;;
+        "")
+            show_help
+            ;;
+        *)
+            log_error "未知命令: $1"
+            echo
+            show_help
+            exit 1
+            ;;
+    esac
+}
+
+# 脚本入口
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
+    main "$@"
+fi
--- a/src/metric/client-plugins/node-exporter-installer/bin/node_exporter
+++ b/src/metric/client-plugins/node-exporter-installer/bin/node_exporter
--- a/src/metric/client-plugins/node-exporter-installer/check_health.sh
+++ b/src/metric/client-plugins/node-exporter-installer/check_health.sh
@ -0,0 +1,55 @@
+#!/bin/bash
+
+# Node Exporter 健康检查脚本
+# 输出 JSON 格式结果
+
+set -e
+
+# 检查 Node Exporter 健康状态
+check_health() {
+    local url="http://localhost:9100"
+    local metrics_url="$url/metrics"
+    local name="node-exporter"
+    local status="unhealth"
+    local reason=""
+    
+    # 检查 curl 是否可用
+    if ! command -v curl &> /dev/null; then
+        reason="curl 命令不可用，无法进行健康检查"
+        echo "{\"name\": \"$name\", \"status\": \"$status\", \"reason\": \"$reason\"}"
+        exit 1
+    fi
+    
+    # 测试根路径连接
+    local http_code=$(curl -s -o /dev/null -w "%{http_code}" "$url" 2>/dev/null || echo "000")
+    
+    if [[ "$http_code" == "200" ]]; then
+        # 测试 metrics 端点
+        local metrics_code=$(curl -s -o /dev/null -w "%{http_code}" "$metrics_url" 2>/dev/null || echo "000")
+        
+        if [[ "$metrics_code" == "200" ]]; then
+            status="health"
+            reason="success"
+            echo "{\"name\": \"$name\", \"status\": \"$status\", \"reason\": \"$reason\"}"
+            exit 0
+        else
+            reason="Metrics 端点异常 (HTTP $metrics_code)"
+            echo "{\"name\": \"$name\", \"status\": \"$status\", \"reason\": \"$reason\"}"
+            exit 1
+        fi
+    else
+        reason="HTTP 服务异常 (HTTP $http_code)，请检查 Node Exporter 是否正在运行在端口 9100"
+        echo "{\"name\": \"$name\", \"status\": \"$status\", \"reason\": \"$reason\"}"
+        exit 1
+    fi
+}
+
+# 主函数
+main() {
+    check_health
+}
+
+# 脚本入口
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
+    main "$@"
+fi
--- a/src/metric/client-plugins/node-exporter-installer/install.sh
+++ b/src/metric/client-plugins/node-exporter-installer/install.sh
@ -0,0 +1,297 @@
+#!/bin/bash
+
+set -e
+
+# 颜色定义
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# 日志函数
+log_info() {
+    echo -e "${BLUE}[INFO]${NC} $1"
+}
+
+log_success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+log_warning() {
+    echo -e "${YELLOW}[WARNING]${NC} $1"
+}
+
+log_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+# 显示帮助信息
+show_help() {
+    echo "Node Exporter 安装脚本"
+    echo
+    echo "用法: $0 [选项]"
+    echo
+    echo "选项:"
+    echo "  --help     显示此帮助信息"
+    echo
+    echo "示例:"
+    echo "  $0          # 安装 Node Exporter"
+    echo
+}
+
+# 解析命令行参数
+for arg in "$@"; do
+    case $arg in
+        --help|-h)
+            show_help
+            exit 0
+            ;;
+        *)
+            log_error "未知参数: $arg"
+            show_help
+            exit 1
+            ;;
+    esac
+done
+
+# 检查是否为 root 用户
+check_root() {
+    if [[ $EUID -ne 0 ]]; then
+        log_error "此脚本需要 root 权限运行"
+        log_info "请使用: sudo $0"
+        exit 1
+    fi
+}
+
+# 检查系统要求
+check_system() {
+    log_info "检查系统要求..."
+    
+    # 检查操作系统
+    if [[ ! -f /etc/os-release ]]; then
+        log_error "无法检测操作系统版本"
+        exit 1
+    fi
+    
+    source /etc/os-release
+    log_info "检测到操作系统: $NAME $VERSION"
+    
+    # 检查是否为 Linux 系统
+    if [[ "$ID" != "ubuntu" && "$ID" != "debian" && "$ID" != "centos" && "$ID" != "rhel" && "$ID" != "fedora" ]]; then
+        log_warning "此脚本主要针对常见 Linux 发行版，其他系统可能需要调整"
+    fi
+    
+    # 检查系统架构
+    local arch=$(uname -m)
+    log_info "系统架构: $arch"
+    
+    if [[ "$arch" != "x86_64" && "$arch" != "amd64" ]]; then
+        log_warning "当前架构为 $arch，node_exporter 主要支持 x86_64/amd64"
+    fi
+}
+
+# 停止可能运行的服务
+stop_existing_service() {
+    log_info "检查并停止可能运行的服务..."
+    
+    local pid_file="/var/run/node-exporter.pid"
+    
+    # 检查并停止通过 PID 文件管理的服务
+    if [[ -f "$pid_file" ]]; then
+        local pid=$(cat "$pid_file")
+        if kill -0 "$pid" 2>/dev/null; then
+            log_info "发现正在运行的 Node Exporter 服务 (PID: $pid)，正在停止..."
+            kill "$pid"
+            sleep 2
+            if kill -0 "$pid" 2>/dev/null; then
+                log_warning "进程未响应，强制终止..."
+                kill -9 "$pid" 2>/dev/null || true
+            fi
+            rm -f "$pid_file"
+            log_success "服务已停止"
+        else
+            log_warning "发现过期的 PID 文件，正在清理..."
+            rm -f "$pid_file"
+        fi
+    fi
+    
+    # 查找并停止所有 node_exporter 和 node-exporter 进程
+    local pids=$(pgrep -f "node_exporter\|node-exporter" 2>/dev/null || true)
+    if [[ -n "$pids" ]]; then
+        log_info "发现 node_exporter 或 node-exporter 进程，正在停止..."
+        for pid in $pids; do
+            log_info "停止进程 PID: $pid"
+            kill "$pid" 2>/dev/null || true
+        done
+        sleep 2
+        
+        # 检查是否还有进程在运行，如果有则强制终止
+        local remaining_pids=$(pgrep -f "node_exporter\|node-exporter" 2>/dev/null || true)
+        if [[ -n "$remaining_pids" ]]; then
+            log_warning "进程未响应，强制终止..."
+            for pid in $remaining_pids; do
+                log_info "强制终止进程 PID: $pid"
+                kill -9 "$pid" 2>/dev/null || true
+            done
+            sleep 1
+        fi
+        
+        # 最终检查
+        if pgrep -f "node_exporter\|node-exporter" > /dev/null; then
+            log_error "无法停止所有 node_exporter 进程"
+        else
+            log_success "所有 node_exporter 进程已停止"
+        fi
+    fi
+}
+
+# 安装 Node Exporter 二进制文件
+install_node_exporter() {
+    log_info "安装 Node Exporter..."
+    
+    local binary_file="bin/node_exporter"
+    local install_dir="/usr/local/bin"
+    
+    if [[ ! -f "$binary_file" ]]; then
+        log_error "找不到 Node Exporter 二进制文件: $binary_file"
+        exit 1
+    fi
+    
+    # 停止可能运行的服务
+    stop_existing_service
+    
+    # 复制二进制文件并重命名为统一格式
+    cp "$binary_file" "$install_dir/node-exporter"
+    chmod +x "$install_dir/node-exporter"
+    
+    log_success "Node Exporter 二进制文件安装完成"
+}
+
+# 创建用户和组
+create_user() {
+    log_info "创建 node_exporter 用户..."
+    
+    # 检查用户是否已存在
+    if id "node_exporter" &>/dev/null; then
+        log_info "用户 node_exporter 已存在"
+    else
+        useradd --no-create-home --shell /bin/false node_exporter
+        log_success "用户 node_exporter 创建完成"
+    fi
+}
+
+# 安装配置文件
+install_config() {
+    log_info "安装配置文件..."
+    
+    local config_dir="/etc/node_exporter"
+    
+    # 创建配置目录
+    mkdir -p "$config_dir"
+    
+    # 创建文本文件收集器目录
+    mkdir -p "/var/lib/node_exporter/textfile_collector"
+    chown node_exporter:node_exporter "/var/lib/node_exporter/textfile_collector"
+}
+
+# 启动 Node Exporter 服务
+start_node_exporter() {
+    log_info "启动 Node Exporter 服务..."
+    
+    local binary_path="/usr/local/bin/node-exporter"
+    local log_file="/var/log/node-exporter.log"
+    local pid_file="/var/run/node-exporter.pid"
+    
+    # 检查服务是否已经在运行
+    if [[ -f "$pid_file" ]]; then
+        local pid=$(cat "$pid_file")
+        if kill -0 "$pid" 2>/dev/null; then
+            log_info "Node Exporter 服务已在运行 (PID: $pid)"
+            return 0
+        else
+            log_warning "发现过期的 PID 文件，正在清理..."
+            rm -f "$pid_file"
+        fi
+    fi
+    
+    # 检查端口是否被占用
+    if netstat -tuln 2>/dev/null | grep -q ":9100 "; then
+        log_warning "端口 9100 已被占用，请检查是否有其他服务在运行"
+        return 1
+    fi
+    
+    # 启动服务
+    log_info "正在启动 Node Exporter..."
+    nohup "$binary_path" --web.listen-address=:9100 > "$log_file" 2>&1 &
+    local pid=$!
+    
+    # 保存 PID
+    echo "$pid" > "$pid_file"
+    
+    # 等待服务启动
+    sleep 2
+    
+    # 检查服务是否成功启动
+    if kill -0 "$pid" 2>/dev/null; then
+        log_success "Node Exporter 服务启动成功 (PID: $pid)"
+        log_info "日志文件: $log_file"
+        log_info "PID 文件: $pid_file"
+    else
+        log_error "Node Exporter 服务启动失败"
+        rm -f "$pid_file"
+        return 1
+    fi
+}
+
+
+
+# 显示安装信息
+show_install_info() {
+    log_success "Node Exporter 安装完成！"
+    echo
+    echo "安装信息:"
+    echo "  二进制文件: /usr/local/bin/node-exporter"
+    echo "  运行用户: node_exporter"
+    echo "  配置目录: /etc/node_exporter/"
+    echo "  默认端口: 9100"
+    echo
+    echo "使用方法:"
+    echo "  手动启动: /usr/local/bin/node-exporter --web.listen-address=:9100"
+    echo "  后台启动: nohup /usr/local/bin/node-exporter --web.listen-address=:9100 &"
+    echo
+    echo "测试连接:"
+    echo "  curl http://localhost:9100/metrics"
+    echo "  curl http://localhost:9100"
+    echo
+    echo "Prometheus 配置示例:"
+    echo "  - job_name: 'node_exporter'"
+    echo "    static_configs:"
+    echo "      - targets: ['localhost:9100']"
+    echo
+}
+
+# 主函数
+main() {
+    echo "=========================================="
+    echo "    Node Exporter 安装脚本 v1.0"
+    echo "=========================================="
+    echo
+    
+    check_root
+    check_system
+    
+    log_info "开始安装 Node Exporter..."
+    
+    install_node_exporter
+    create_user
+    install_config
+    start_node_exporter
+    
+    show_install_info
+}
+
+# 脚本入口
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
+    main "$@"
+fi
--- a/src/metric/client-plugins/node-exporter-installer/package.sh
+++ b/src/metric/client-plugins/node-exporter-installer/package.sh
@ -0,0 +1,87 @@
+#!/bin/bash
+
+set -e
+
+# 颜色定义
+GREEN='\033[0;32m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+log_info() {
+    echo -e "${BLUE}[INFO]${NC} $1"
+}
+
+log_success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+# 获取当前目录
+CURRENT_DIR=$(pwd)
+PACKAGE_NAME="node-exporter-installer-$(date +%Y%m%d-%H%M%S)"
+PACKAGE_FILE="${PACKAGE_NAME}.tar.gz"
+
+log_info "开始打包 Node Exporter 安装包..."
+
+# 检查必要文件
+log_info "检查必要文件..."
+
+required_files=(
+    "install.sh"
+    "uninstall.sh"
+    "bin/node_exporter"
+    "check_health.sh"
+)
+
+missing_files=()
+for file in "${required_files[@]}"; do
+    if [[ ! -f "$file" ]]; then
+        missing_files+=("$file")
+    fi
+done
+
+if [[ ${#missing_files[@]} -gt 0 ]]; then
+    echo "缺少以下文件:"
+    for file in "${missing_files[@]}"; do
+        echo "  - $file"
+    done
+    exit 1
+fi
+
+log_success "所有必要文件检查完成"
+
+# 创建临时目录
+TEMP_DIR=$(mktemp -d)
+log_info "创建临时目录: $TEMP_DIR"
+
+# 复制文件到临时目录
+cp -r . "$TEMP_DIR/$PACKAGE_NAME"
+
+# 进入临时目录
+cd "$TEMP_DIR"
+
+# 创建压缩包
+log_info "创建压缩包: $PACKAGE_FILE"
+tar -czf "$PACKAGE_FILE" "$PACKAGE_NAME"
+
+# 移动压缩包到原目录
+mv "$PACKAGE_FILE" "$CURRENT_DIR/"
+
+# 清理临时目录
+rm -rf "$TEMP_DIR"
+
+# 返回原目录
+cd "$CURRENT_DIR"
+
+# 显示结果
+log_success "打包完成！"
+echo
+echo "安装包文件: $PACKAGE_FILE"
+echo "文件大小: $(du -h "$PACKAGE_FILE" | cut -f1)"
+echo
+echo "使用方法:"
+echo "1. 将 $PACKAGE_FILE 传输到目标服务器"
+echo "2. 解压: tar -xzf $PACKAGE_FILE"
+echo "3. 进入目录: cd $PACKAGE_NAME"
+echo "4. 运行安装: sudo ./install.sh"
+echo
+echo "注意: 请确保所有必要文件都存在"
--- a/src/metric/client-plugins/node-exporter-installer/uninstall.sh
+++ b/src/metric/client-plugins/node-exporter-installer/uninstall.sh
@ -0,0 +1,239 @@
+#!/bin/bash
+
+# Node Exporter 卸载脚本
+# 版本: 1.0
+# 作者: AIOps Team
+# 日期: $(date +%Y-%m-%d)
+
+set -e
+
+# 颜色定义
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# 日志函数
+log_info() {
+    echo -e "${BLUE}[INFO]${NC} $1"
+}
+
+log_success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+log_warning() {
+    echo -e "${YELLOW}[WARNING]${NC} $1"
+}
+
+log_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+# 检查是否为 root 用户
+check_root() {
+    if [[ $EUID -ne 0 ]]; then
+        log_error "此脚本需要 root 权限运行"
+        log_info "请使用: sudo $0"
+        exit 1
+    fi
+}
+
+# 停止运行中的进程
+stop_processes() {
+    log_info "停止 Node Exporter 进程..."
+    
+    local pid_file="/var/run/node-exporter.pid"
+    local stopped=false
+    
+    # 首先尝试通过 PID 文件停止服务
+    if [[ -f "$pid_file" ]]; then
+        local pid=$(cat "$pid_file")
+        if kill -0 "$pid" 2>/dev/null; then
+            log_info "通过 PID 文件停止服务 (PID: $pid)..."
+            kill "$pid"
+            sleep 3
+            
+            # 检查进程是否已停止
+            if kill -0 "$pid" 2>/dev/null; then
+                log_warning "进程未响应，强制终止..."
+                kill -9 "$pid" 2>/dev/null || true
+            fi
+            log_success "Node Exporter 进程已停止"
+            stopped=true
+        else
+            log_warning "PID 文件存在但进程已不存在，清理 PID 文件"
+            rm -f "$pid_file"
+        fi
+    fi
+    
+    # 查找并杀死所有 node_exporter 和 node-exporter 进程
+    local pids=$(pgrep -f "node_exporter\|node-exporter" 2>/dev/null || true)
+    if [[ -n "$pids" ]]; then
+        log_info "发现 node_exporter 或 node-exporter 进程，正在停止..."
+        for pid in $pids; do
+            log_info "停止进程 PID: $pid"
+            kill "$pid" 2>/dev/null || true
+        done
+        sleep 2
+        
+        # 检查是否还有进程在运行，如果有则强制终止
+        local remaining_pids=$(pgrep -f "node_exporter\|node-exporter" 2>/dev/null || true)
+        if [[ -n "$remaining_pids" ]]; then
+            log_warning "进程未响应，强制终止..."
+            for pid in $remaining_pids; do
+                log_info "强制终止进程 PID: $pid"
+                kill -9 "$pid" 2>/dev/null || true
+            done
+            sleep 1
+        fi
+        
+        # 最终检查
+        if pgrep -f "node_exporter\|node-exporter" > /dev/null; then
+            log_error "无法停止所有 node_exporter 进程"
+        else
+            log_success "所有 Node Exporter 进程已停止"
+            stopped=true
+        fi
+    else
+        log_info "Node Exporter 进程未运行"
+    fi
+    
+    # 清理 PID 文件
+    rm -f "$pid_file"
+    
+    if [[ "$stopped" == "false" ]]; then
+        log_warning "未发现需要停止的 Node Exporter 进程"
+    fi
+}
+
+# 删除二进制文件
+remove_binary() {
+    log_info "删除 Node Exporter 二进制文件..."
+    
+    local binary_files=(
+        "/usr/local/bin/node-exporter"
+        "/usr/local/bin/node_exporter"
+    )
+    
+    local deleted=false
+    for binary_file in "${binary_files[@]}"; do
+        if [[ -f "$binary_file" ]]; then
+            rm -f "$binary_file"
+            log_success "二进制文件已删除: $binary_file"
+            deleted=true
+        fi
+    done
+    
+    if [[ "$deleted" == "false" ]]; then
+        log_info "二进制文件不存在"
+    fi
+}
+
+# 删除配置文件
+remove_config() {
+    log_info "删除配置文件..."
+    
+    local config_dir="/etc/node_exporter"
+    
+    if [[ -d "$config_dir" ]]; then
+        rm -rf "$config_dir"
+        log_success "配置目录已删除"
+    else
+        log_info "配置目录不存在"
+    fi
+}
+
+# 删除数据目录
+remove_data_dir() {
+    log_info "删除数据目录..."
+    
+    local data_dir="/var/lib/node_exporter"
+    
+    if [[ -d "$data_dir" ]]; then
+        rm -rf "$data_dir"
+        log_success "数据目录已删除"
+    else
+        log_info "数据目录不存在"
+    fi
+}
+
+# 检查用户状态（可选）
+check_user_status() {
+    log_info "检查 node_exporter 用户状态..."
+    
+    if id "node_exporter" &>/dev/null; then
+        log_info "检测到 node_exporter 用户存在"
+        log_warning "node_exporter 是系统用户，可能被其他服务使用"
+        log_info "为了系统稳定性，将保留 node_exporter 用户"
+        log_info "如需手动删除，请运行: sudo userdel node_exporter"
+    else
+        log_info "node_exporter 用户不存在"
+    fi
+}
+
+# 清理日志文件
+cleanup_logs() {
+    log_info "清理日志文件..."
+    
+    # 清理 journal 日志
+    journalctl --vacuum-time=1s --quiet || true
+    
+    # 删除安装脚本创建的日志文件
+    rm -f /var/log/node-exporter.log
+    
+    log_success "日志文件已清理"
+}
+
+# 显示卸载信息
+show_uninstall_info() {
+    log_success "Node Exporter 卸载完成！"
+    echo
+    echo "已删除的内容:"
+    echo "  - 二进制文件: /usr/local/bin/node-exporter"
+    echo "  - 配置目录: /etc/node_exporter"
+    echo "  - 数据目录: /var/lib/node_exporter"
+    echo "  - 相关日志文件"
+    echo
+    echo "注意:"
+    echo "  - node_exporter 用户已保留（系统用户，可能被其他服务使用）"
+    echo "  - 如需完全清理，请手动检查并删除相关文件"
+    echo
+}
+
+# 主函数
+main() {
+    echo "=========================================="
+    echo "    Node Exporter 卸载脚本 v1.0"
+    echo "=========================================="
+    echo
+    
+    check_root
+    
+    log_warning "此操作将完全卸载 Node Exporter"
+    read -p "确认继续？(y/N): " confirm
+    
+    if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then
+        log_info "取消卸载操作"
+        exit 0
+    fi
+    
+    log_info "开始卸载 Node Exporter..."
+    
+    stop_processes
+    remove_binary
+    remove_config
+    remove_data_dir
+    cleanup_logs
+    
+    # 检查用户状态
+    check_user_status
+    
+    show_uninstall_info
+}
+
+# 脚本入口
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
+    main "$@"
+fi
--- a/src/metric/ftp/deps/vsftpd_3.0.5-0ubuntu1.1_amd64.deb
+++ b/src/metric/ftp/deps/vsftpd_3.0.5-0ubuntu1.1_amd64.deb
--- a/src/metric/ftp/vsftpd-config-README.md
+++ b/src/metric/ftp/vsftpd-config-README.md
@ -0,0 +1,111 @@
+# vsftpd 配置
+
+配置 vsftpd FTP 服务器。
+
+# 安装deps下 vsftpd 的离线安装包
+
+sudo dpkg -i vsftpd_3.0.5-0ubuntu1.1_amd64.deb
+
+# 有依赖问题，修复依赖
+
+sudo apt-get install -f
+
+## 启动服务
+
+sudo service vsftpd start
+
+# 重启服务
+
+sudo service vsftpd restart
+
+# 查看状态
+
+sudo service vsftpd status
+
+## 备份配置文件
+
+先备份默认配置，出问题能恢复：
+
+```bash
+sudo cp /etc/vsftpd.conf /etc/vsftpd.conf.bak
+```
+
+## 修改配置文件
+
+编辑配置：
+
+```bash
+sudo vim /etc/vsftpd.conf
+```
+
+### 基本配置参数
+
+```bash
+# 允许本地用户登录
+local_enable=YES
+
+# 允许写操作（上传/删除/修改）
+write_enable=YES
+
+# 限制用户在自己目录中，不能访问整个系统
+chroot_local_user=YES
+
+# 防止 chroot 错误（重要！）
+allow_writeable_chroot=YES
+
+# 被动模式配置
+pasv_enable=YES
+pasv_min_port=30000
+pasv_max_port=31000
+```
+
+## 创建 FTP 目录和用户
+
+### 创建共享目录
+
+```bash
+sudo mkdir -p /srv/ftp/share
+sudo chmod 755 /srv/ftp/share
+```
+
+### 创建专用用户
+
+```bash
+sudo adduser ftpuser
+
+# 修改用户主目录
+sudo usermod -d /srv/ftp/share ftpuser
+```
+
+## 重启服务
+
+```bash
+sudo service vsftpd restart
+```
+
+## 防火墙配置
+
+### 开放基本端口
+
+```bash
+sudo ufw allow 21/tcp
+```
+
+### 开放被动模式端口
+
+```bash
+sudo ufw allow 30000:31000/tcp
+```
+
+## 测试连接
+
+```bash
+# 本地测试
+ftp localhost
+
+# 远程测试
+ftp 你的服务器IP
+```
+
+用户名：ftpuser  
+密码：设置的密码
--- a/src/metric/ftp/vsftpd-offline-install.sh
+++ b/src/metric/ftp/vsftpd-offline-install.sh
@ -0,0 +1,49 @@
+#!/bin/bash
+
+# vsftpd 离线安装脚本
+# 使用方法：./vsftpd-offline-install.sh
+
+set -e
+
+echo "开始 vsftpd 离线安装..."
+
+# 检查是否为 root 用户
+if [ "$EUID" -ne 0 ]; then
+    echo "请使用 root 权限运行此脚本"
+    exit 1
+fi
+
+# 定义离线包目录
+OFFLINE_DIR="./vsftpd-offline"
+DEB_DIR="$OFFLINE_DIR/debs"
+
+# 检查离线包是否存在
+if [ ! -d "$OFFLINE_DIR" ]; then
+    echo "错误：找不到离线包目录 $OFFLINE_DIR"
+    echo "请先准备离线包，方法："
+    echo "1. 在有网络的机器上运行："
+    echo "   mkdir -p $DEB_DIR"
+    echo "   cd $DEB_DIR"
+    echo "   apt download vsftpd"
+    echo "   apt download \$(apt-cache depends vsftpd | grep Depends | cut -d: -f2 | tr -d ' ')"
+    echo "2. 将整个 $OFFLINE_DIR 目录拷贝到目标机器"
+    exit 1
+fi
+
+# 安装 deb 包
+echo "安装 vsftpd 及依赖包..."
+cd "$DEB_DIR"
+dpkg -i *.deb || apt-get install -f -y
+
+# 检查安装状态
+if systemctl is-active --quiet vsftpd; then
+    echo "vsftpd 安装成功并已启动"
+else
+    echo "启动 vsftpd 服务..."
+    systemctl start vsftpd
+    systemctl enable vsftpd
+fi
+
+echo "vsftpd 离线安装完成！"
+echo "配置文件位置: /etc/vsftpd.conf"
+echo "服务状态: $(systemctl is-active vsftpd)"
--- a/src/metric/prometheus/Dockerfile
+++ b/src/metric/prometheus/Dockerfile
@ -0,0 +1,67 @@
+FROM ubuntu/prometheus:3-24.04_stable
+
+USER root
+
+ARG USE_INTRANET=false
+
+# 内网 apt 源配置
+RUN if [ "$USE_INTRANET" = "true" ]; then \
+        echo "Configuring intranet apt sources..." && \
+        cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
+        echo "deb [trusted=yes] http://10.68.64.1/ubuntu2204/ jammy main" > /etc/apt/sources.list && \
+        echo 'Acquire::https::Verify-Peer "false";' > /etc/apt/apt.conf.d/99disable-ssl-check && \
+        echo 'Acquire::https::Verify-Host "false";' >> /etc/apt/apt.conf.d/99disable-ssl-check; \
+    fi
+
+# 常用工具
+RUN apt-get update && \
+    apt-get install -y supervisor net-tools inetutils-ping vim && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# 如果是部署环境替换 apt 源
+RUN if [ "$USE_INTRANET" = "true" ]; then \
+    echo "deb [trusted=yes] https://10.92.132.52/mirrors/ubuntu2204/ jammy main" > /etc/apt/sources.list; \
+    fi
+
+# supervisor 日志目录
+RUN mkdir -p /var/log/supervisor
+
+# 设置 Prometheus 基础路径环境变量
+ENV PROMETHEUS_BASE_PATH=/private/argus/metric/prometheus
+
+# 设置用户和组ID环境变量
+ARG PROMETHEUS_UID=2133
+ARG PROMETHEUS_GID=2015
+ENV PROMETHEUS_UID=${PROMETHEUS_UID}
+ENV PROMETHEUS_GID=${PROMETHEUS_GID}
+
+# 创建目录结构
+RUN mkdir -p ${PROMETHEUS_BASE_PATH}/rules \
+    && mkdir -p ${PROMETHEUS_BASE_PATH}/targets \
+    && mkdir -p /private/argus/etc \
+    && rm -rf /prometheus \
+    && ln -s ${PROMETHEUS_BASE_PATH} /prometheus
+
+# 修改 Prometheus 用户 UID/GID 并授权
+RUN usermod -u ${PROMETHEUS_UID} nobody && \
+    groupmod -g ${PROMETHEUS_GID} nogroup && \
+    chown -h nobody:nogroup /prometheus && \
+    chown -R nobody:nogroup /private/argus/metric /etc/prometheus && \
+    chown -R nobody:nogroup ${PROMETHEUS_BASE_PATH}
+
+# supervisor 配置
+COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
+
+# 启动脚本
+COPY start-prometheus-supervised.sh /usr/local/bin/start-prometheus-supervised.sh
+RUN chmod +x /usr/local/bin/start-prometheus-supervised.sh
+
+# 自定义 prometheus 配置文件
+COPY prometheus.yml /etc/prometheus/prometheus.yml
+
+USER root
+
+EXPOSE 9090
+
+ENTRYPOINT ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf", "-n"]
--- a/src/metric/prometheus/README.md
+++ b/src/metric/prometheus/README.md
@ -0,0 +1,126 @@
+# Prometheus Docker 镜像配置
+
+## 环境变量配置
+
+### PROMETHEUS_BASE_PATH
+
+设置 Prometheus 配置和数据的基础路径。
+
+**默认值**: `/private/argus/metric/prometheus`
+
+**用途**:
+- 配置文件存储路径: `${PROMETHEUS_BASE_PATH}/prometheus.yml`
+- 规则文件路径: `${PROMETHEUS_BASE_PATH}/rules/*.yml`
+- 监控目标文件路径: `${PROMETHEUS_BASE_PATH}/targets/`
+
+## 使用示例
+
+### 1. 使用默认路径
+```bash
+docker run -d \
+  --name prometheus \
+  -p 9090:9090 \
+  -v /host/prometheus/data:/private/argus/metric/prometheus \
+  prometheus:latest
+```
+
+### 2. 自定义基础路径
+```bash
+docker run -d \
+  --name prometheus \
+  -p 9090:9090 \
+  -e PROMETHEUS_BASE_PATH=/custom/prometheus/path \
+  -v /host/prometheus/data:/custom/prometheus/path \
+  prometheus:latest
+```
+
+### 3. Kubernetes 部署示例
+```yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: prometheus
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: prometheus
+  template:
+    metadata:
+      labels:
+        app: prometheus
+    spec:
+      containers:
+      - name: prometheus
+        image: prometheus:latest
+        env:
+        - name: PROMETHEUS_BASE_PATH
+          value: "/data/prometheus"
+        ports:
+        - containerPort: 9090
+        volumeMounts:
+        - name: prometheus-data
+          mountPath: /data/prometheus
+      volumes:
+      - name: prometheus-data
+        persistentVolumeClaim:
+          claimName: prometheus-pvc
+```
+
+## 目录结构
+
+容器启动后会在 `${PROMETHEUS_BASE_PATH}` 下创建以下目录结构：
+
+```
+${PROMETHEUS_BASE_PATH}/
+├── prometheus.yml          # 主配置文件
+├── rules/                  # 告警规则目录
+│   └── *.yml
+└── targets/                # 监控目标目录
+    ├── node_exporter.json
+    └── dcgm_exporter.json
+```
+
+## 动态配置
+
+- **规则文件**: 在 `rules/` 目录下添加 `.yml` 文件即可自动加载
+- **监控目标**: 修改 `targets/` 目录下的 JSON 文件即可动态更新监控目标
+- **主配置**: 修改 `prometheus.yml` 后可通过 Prometheus 的 `/-/reload` 端点重新加载配置
+
+## 权限管理
+
+### 默认路径权限
+- 默认路径 `/private/argus/metric/prometheus` 在 Dockerfile 中已设置正确的权限
+- nobody 用户（UID: 2133, GID: 2015）拥有完全读写权限
+
+### 自定义路径权限
+- 当使用自定义 `PROMETHEUS_BASE_PATH` 时，启动脚本会自动创建目录并设置权限
+- 确保 nobody 用户对自定义路径有读写权限
+
+### 挂载卷注意事项
+1. **主机目录权限**: 确保挂载的主机目录对 nobody 用户（UID: 2133）可写
+2. **SELinux**: 如果使用 SELinux，可能需要设置适当的上下文
+3. **Docker 用户映射**: 确保容器内的 nobody 用户与主机用户权限匹配
+
+## 故障排除
+
+### 权限问题
+如果遇到权限错误，可以检查：
+```bash
+# 检查目录权限
+ls -la /path/to/prometheus/data
+
+# 检查用户映射
+id nobody
+
+# 手动修复权限
+chown -R 2133:2015 /path/to/prometheus/data
+chmod -R 755 /path/to/prometheus/data
+```
+
+## 注意事项
+
+1. 确保挂载的目录有适当的读写权限
+2. 配置文件会在容器启动时自动生成，无需手动创建
+3. 可以通过修改环境变量 `PROMETHEUS_BASE_PATH` 来改变所有相关路径，无需重新构建镜像
+4. 自定义路径的目录会在启动时自动创建并设置权限
--- a/src/metric/prometheus/demo-targets/dcgm_exporter.json
+++ b/src/metric/prometheus/demo-targets/dcgm_exporter.json
@ -0,0 +1,9 @@
+[
+  {
+    "targets": ["localhost:9400"],
+    "labels": {
+      "job": "dcgm",
+      "instance": "dcgm-exporter"
+    }
+  }
+]
--- a/src/metric/prometheus/demo-targets/node_exporter.json
+++ b/src/metric/prometheus/demo-targets/node_exporter.json
@ -0,0 +1,9 @@
+[
+  {
+    "targets": ["localhost:9100", "192.168.16.116:9100"],
+    "labels": {
+      "job": "node",
+      "instance": "node-exporter"
+    }
+  }
+]
--- a/src/metric/prometheus/prometheus.yml
+++ b/src/metric/prometheus/prometheus.yml
@ -0,0 +1,27 @@
+global:
+  scrape_interval: 15s
+  evaluation_interval: 15s
+  scrape_timeout: 10s
+
+# 对接 AlertManager
+alerting:
+  alertmanagers:
+    - static_configs:
+        - targets: []
+
+# 规则目录
+rule_files:
+  - "${PROMETHEUS_BASE_PATH}/rules/*.yml"
+
+scrape_configs:
+  - job_name: "node"
+    file_sd_configs:
+      - files:
+          - "${PROMETHEUS_BASE_PATH}/targets/node_exporter.json"
+        refresh_interval: 30s
+
+  - job_name: "dcgm"
+    file_sd_configs:
+      - files:
+          - "${PROMETHEUS_BASE_PATH}/targets/dcgm_exporter.json"
+        refresh_interval: 30s
--- a/src/metric/prometheus/start-prometheus-supervised.sh
+++ b/src/metric/prometheus/start-prometheus-supervised.sh
@ -0,0 +1,26 @@
+#!/bin/bash
+set -euo pipefail
+
+echo "[INFO] Starting Prometheus under supervisor..."
+
+PROMETHEUS_BASE_PATH=${PROMETHEUS_BASE_PATH:-/private/argus/metric/prometheus}
+DOMAIN=prom.metric.argus.com
+
+echo "[INFO] Prometheus base path: ${PROMETHEUS_BASE_PATH}"
+
+# 生成配置文件
+echo "[INFO] Generating prometheus.yml with base path: ${PROMETHEUS_BASE_PATH}"
+sed "s|\${PROMETHEUS_BASE_PATH}|${PROMETHEUS_BASE_PATH}|g" \
+    /etc/prometheus/prometheus.yml > ${PROMETHEUS_BASE_PATH}/prometheus.yml
+
+# 记录容器 IP
+IP=$(ifconfig eth0 | awk '/inet /{print $2}')
+echo "current IP: ${IP}"
+echo "${IP}" > /private/argus/etc/${DOMAIN}
+
+exec /bin/prometheus \
+    --config.file=${PROMETHEUS_BASE_PATH}/prometheus.yml \
+    --storage.tsdb.path=/prometheus \
+    --web.enable-lifecycle \
+    --web.console.libraries=/usr/share/prometheus/console_libraries \
+    --web.console.templates=/usr/share/prometheus/consoles
--- a/src/metric/prometheus/supervisord.conf
+++ b/src/metric/prometheus/supervisord.conf
@ -0,0 +1,27 @@
+[supervisord]
+nodaemon=true
+logfile=/var/log/supervisor/supervisord.log
+pidfile=/var/run/supervisord.pid
+user=root
+
+[program:prometheus]
+command=/usr/local/bin/start-prometheus-supervised.sh
+user=nobody
+stdout_logfile=/var/log/supervisor/prometheus.log
+stderr_logfile=/var/log/supervisor/prometheus_error.log
+autorestart=true
+startretries=3
+startsecs=30
+stopwaitsecs=30
+killasgroup=true
+stopasgroup=true
+
+[unix_http_server]
+file=/var/run/supervisor.sock
+chmod=0700
+
+[supervisorctl]
+serverurl=unix:///var/run/supervisor.sock
+
+[rpcinterface:supervisor]
+supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
Author	SHA1	Message	Date
sundapeng.sdp	68b265624c	feat: 基于算力平台的Prometheus镜像改造，supervisor自启应用；调整Prometheus.yml结构； refs #9	2025-09-23 17:37:59 +08:00
sundapeng.sdp	d9d937f5d6	feat: FTP服务器离线安装及配置；数据采集客户端支持一键部署、版本校验、组件健康检查、失败回滚等功能； refs #3	2025-09-23 17:37:59 +08:00
sundapeng.sdp	095e8ee32d	feat: 提交示例Node-Exporter安装包； refs #3	2025-09-23 17:37:59 +08:00
sundapeng.sdp	a6c60b2edd	feat: 支持all-in-one模式构建、发布安装包；支持各组件健康状态监测；Prometheus相关配置文件； refs #3	2025-09-23 17:37:59 +08:00
yuyr	8a38d3d0b2	dev_1.0.0_yuyr 完成 log和bind模块开发部署测试 (#8 ) - [x] 完成log模块镜像构建、本地端到端写日志——收集——查询流程； - [x] 完成bind模块构建； - [x] 内置域名IP自动更新脚本，使用 /private/argus/etc目录下文件进行同步，容器启动时自动写IP，定时任务刷新更新DNS服务器IP和DNS规则； Co-authored-by: root <root@curious.host.com> Reviewed-on: #8 Reviewed-by: sundapeng <sundp@mail.zgclab.edu.cn>	2025-09-22 16:39:38 +08:00