290 lines
7.7 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
set -e
# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# 日志函数
log_info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
log_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
log_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# 显示帮助信息
show_help() {
echo "Argus Agent 安装脚本"
echo
echo "用法: $0 [选项]"
echo
echo "选项:"
echo " --help 显示此帮助信息"
echo
echo "示例:"
echo " $0 # 安装 Argus Agent"
echo
}
# 解析命令行参数
INSTALL_DIR=""
for arg in "$@"; do
case $arg in
--help|-h)
show_help
exit 0
;;
*)
# 如果参数不是以--开头,则认为是安装目录
if [[ ! "$arg" =~ ^-- ]]; then
INSTALL_DIR="$arg"
else
log_error "未知参数: $arg"
show_help
exit 1
fi
;;
esac
done
# 检查是否为 root 用户
check_root() {
if [[ $EUID -ne 0 ]]; then
log_error "此脚本需要 root 权限运行"
log_info "请使用: sudo $0"
exit 1
fi
}
# 检查系统要求
check_system() {
log_info "检查系统要求..."
# 检查操作系统
if [[ ! -f /etc/os-release ]]; then
log_error "无法检测操作系统版本"
exit 1
fi
source /etc/os-release
log_info "检测到操作系统: $NAME $VERSION"
# 检查是否为 Linux 系统
if [[ "$ID" != "ubuntu" && "$ID" != "debian" && "$ID" != "centos" && "$ID" != "rhel" && "$ID" != "fedora" ]]; then
log_warning "此脚本主要针对常见 Linux 发行版,其他系统可能需要调整"
fi
# 检查系统架构
local arch=$(uname -m)
log_info "系统架构: $arch"
if [[ "$arch" != "x86_64" && "$arch" != "amd64" ]]; then
log_warning "当前架构为 $archargus-agent 主要支持 x86_64/amd64"
fi
}
# 停止可能运行的服务
stop_existing_service() {
log_info "检查并停止可能运行的服务..."
local pid_file="/var/run/argus-agent.pid"
if [[ -f "$pid_file" ]]; then
local pid=$(cat "$pid_file")
if ps -p "$pid" -o comm= | grep -q "^argus-agent$"; then
kill "$pid" 2>/dev/null || true
sleep 2
kill -9 "$pid" 2>/dev/null || true
log_success "服务已停止"
fi
rm -f "$pid_file"
fi
local pids=$(pgrep -x argus-agent 2>/dev/null || true)
if [[ -n "$pids" ]]; then
for pid in $pids; do kill -9 "$pid" 2>/dev/null || true; done
fi
# 检查僵尸进程
local zombies=$(ps -eo pid,stat,comm | grep '[a]rgus-agent' | awk '$2 ~ /Z/ {print $1}')
if [[ -n "$zombies" ]]; then
for pid in $zombies; do
local ppid=$(ps -o ppid= -p $pid)
log_warning "检测到僵尸 argus-agent (PID=$pid, PPID=$ppid),尝试清理"
[[ "$ppid" -ne 1 ]] && kill -9 "$ppid" 2>/dev/null || true
done
fi
}
# 安装 Argus Agent 二进制文件
install_argus_agent() {
log_info "安装 Argus Agent..."
local binary_file="bin/argus-agent"
local install_dir="/usr/local/bin"
local target_file="$install_dir/argus-agent"
[[ ! -f "$binary_file" ]] && log_error "找不到 Argus Agent 二进制文件: $binary_file" && exit 1
stop_existing_service
local timeout=10
while [[ $timeout -gt 0 ]]; do
remaining_pids=$(pgrep -x argus-agent | grep -vw $$ || true)
[[ -z "$remaining_pids" ]] && break
if ps -eo pid,stat,comm | grep -E 'argus-agent' | grep -q 'Z'; then
log_warning "检测到僵尸 argus-agent跳过等待"
break
fi
log_warning "等待 argus-agent 完全退出... ($timeout)"
sleep 1
((timeout--))
done
cp "$binary_file" "${target_file}.new"
chmod +x "${target_file}.new"
mv -f "${target_file}.new" "$target_file"
log_success "Argus Agent 二进制文件安装完成"
}
# 创建用户和组
create_user() {
log_info "创建 argus-agent 用户..."
# 检查用户是否已存在
if id "argus-agent" &>/dev/null; then
log_info "用户 argus-agent 已存在"
else
useradd --no-create-home --shell /bin/false argus-agent
log_success "用户 argus-agent 创建完成"
fi
}
# 安装配置文件
install_config() {
log_info "安装配置文件..."
local config_dir="/etc/argus-agent"
# 创建配置目录
mkdir -p "$config_dir"
# 创建健康检查目录
mkdir -p "/var/lib/argus-agent/health"
chown argus-agent:argus-agent "/var/lib/argus-agent/health"
}
# 启动 Argus Agent 服务
start_argus_agent() {
log_info "启动 Argus Agent 服务..."
local binary_path="/usr/local/bin/argus-agent"
local log_file="/var/log/argus-agent.log"
local pid_file="/var/run/argus-agent.pid"
[[ -f "$pid_file" ]] && rm -f "$pid_file"
log_info "正在启动 Argus Agent..."
setsid "$binary_path" > "$log_file" 2>&1 < /dev/null &
local pid=$!
echo "$pid" > "$pid_file"
sleep 2
if kill -0 "$pid" 2>/dev/null; then
log_success "Argus Agent 服务启动成功 (PID: $pid)"
else
log_error "Argus Agent 启动失败"
[[ -f "$log_file" ]] && tail -n 10 "$log_file"
rm -f "$pid_file"
fi
}
# 更新安装记录
update_install_record() {
local pid="$1"
# 使用传入的安装目录参数,如果没有则使用默认值
local install_base_dir="${2:-/opt/argus-metric/current}"
local install_record="$install_base_dir/.install_record"
# 如果安装记录文件不存在,说明是首次安装,由主安装脚本统一创建
if [[ ! -f "$install_record" ]]; then
log_info "安装记录文件不存在,将由主安装脚本创建"
return 0
fi
# 如果文件存在,说明是重启场景,只更新 PID 字段
if command -v jq &> /dev/null; then
# 读取当前 PID
local current_pid=$(jq -r '.components."argus-agent".pid // ""' "$install_record" 2>/dev/null)
if [[ -z "$current_pid" ]]; then
log_warning "无法读取当前 PID跳过更新"
return 1
fi
# 使用 jq 只更新 pid 字段,保持字符串类型,保留其他字段
jq --arg new_pid "$pid" '.components."argus-agent".pid = $new_pid' "$install_record" > "$install_record.tmp" && mv "$install_record.tmp" "$install_record"
log_info "PID 已更新: $current_pid -> $pid"
else
log_warning "jq 命令不可用,无法更新安装记录文件"
fi
}
# 显示安装信息
show_install_info() {
log_success "Argus Agent 安装完成!"
echo
echo "安装信息:"
echo " 二进制文件: /usr/local/bin/argus-agent"
echo " 运行用户: argus-agent"
echo " 配置目录: /etc/argus-agent/"
echo " 健康检查目录: /var/lib/argus-agent/health"
echo
echo "使用方法:"
echo " 手动启动: /usr/local/bin/argus-agent"
echo " 后台启动: nohup /usr/local/bin/argus-agent &"
echo
echo "健康检查:"
echo " ./check_health.sh"
echo
}
# 主函数
main() {
echo "=========================================="
echo " Argus Agent 安装脚本 v1.0"
echo "=========================================="
echo
check_root
check_system
log_info "开始安装 Argus Agent..."
install_argus_agent
create_user
install_config
start_argus_agent
show_install_info
}
# 脚本入口
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
main "$@"
fi