From 46c34f3de6ba1744584b2428e3783688bf253c61 Mon Sep 17 00:00:00 2001 From: "sundapeng.sdp" Date: Thu, 25 Sep 2025 16:31:41 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=AE=9A=E6=97=B6=E8=A7=A3=E6=9E=90arg?= =?UTF-8?q?us-master=E4=BA=A7=E7=94=9F=E7=9A=84=E8=B5=84=E6=BA=90=E6=96=87?= =?UTF-8?q?=E4=BB=B6=EF=BC=8C=E7=94=9F=E6=88=90=E5=8A=A8=E6=80=81=E9=85=8D?= =?UTF-8?q?=E7=BD=AE=E6=96=87=E4=BB=B6=EF=BC=8C=E7=94=A8=E4=BA=8E=20Promet?= =?UTF-8?q?heus=20=E7=83=AD=E5=8A=A0=E8=BD=BD=E9=85=8D=E7=BD=AE=EF=BC=9B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit refs #9 --- src/metric/prometheus/{ => build}/Dockerfile | 40 +- src/metric/prometheus/{ => build}/README.md | 96 ++-- .../prometheus/build/exporter_config.json | 41 ++ .../prometheus/{ => build}/prometheus.yml | 0 .../start-prometheus-supervised.sh | 0 .../prometheus/build/start-targets-updater.sh | 29 ++ .../prometheus/{ => build}/supervisord.conf | 12 + src/metric/prometheus/build/update_targets.py | 416 ++++++++++++++++++ 8 files changed, 576 insertions(+), 58 deletions(-) rename src/metric/prometheus/{ => build}/Dockerfile (55%) mode change 100644 => 100755 rename src/metric/prometheus/{ => build}/README.md (62%) mode change 100644 => 100755 create mode 100755 src/metric/prometheus/build/exporter_config.json rename src/metric/prometheus/{ => build}/prometheus.yml (100%) mode change 100644 => 100755 rename src/metric/prometheus/{ => build}/start-prometheus-supervised.sh (100%) mode change 100644 => 100755 create mode 100755 src/metric/prometheus/build/start-targets-updater.sh rename src/metric/prometheus/{ => build}/supervisord.conf (67%) mode change 100644 => 100755 create mode 100755 src/metric/prometheus/build/update_targets.py diff --git a/src/metric/prometheus/Dockerfile b/src/metric/prometheus/build/Dockerfile old mode 100644 new mode 100755 similarity index 55% rename from src/metric/prometheus/Dockerfile rename to src/metric/prometheus/build/Dockerfile index 08fbb41..465ec40 --- a/src/metric/prometheus/Dockerfile +++ b/src/metric/prometheus/build/Dockerfile @@ -11,13 +11,32 @@ RUN if [ "$USE_INTRANET" = "true" ]; then \ echo "deb [trusted=yes] http://10.68.64.1/ubuntu2204/ jammy main" > /etc/apt/sources.list && \ echo 'Acquire::https::Verify-Peer "false";' > /etc/apt/apt.conf.d/99disable-ssl-check && \ echo 'Acquire::https::Verify-Host "false";' >> /etc/apt/apt.conf.d/99disable-ssl-check; \ + else \ + echo "Configuring fast apt sources for external network..." && \ + # 查找并替换sources.list文件 + find /etc/apt -name "sources.list*" -exec sed -i 's/archive.ubuntu.com/mirrors.aliyun.com/g' {} \; && \ + find /etc/apt -name "sources.list*" -exec sed -i 's/security.ubuntu.com/mirrors.aliyun.com/g' {} \; && \ + # 使用阿里云源 + echo "deb http://mirrors.aliyun.com/ubuntu/ jammy main restricted universe multiverse" > /etc/apt/sources.list && \ + echo "deb http://mirrors.aliyun.com/ubuntu/ jammy-updates main restricted universe multiverse" >> /etc/apt/sources.list && \ + echo "deb http://mirrors.aliyun.com/ubuntu/ jammy-security main restricted universe multiverse" >> /etc/apt/sources.list; \ fi -# 常用工具 -RUN apt-get update && \ - apt-get install -y supervisor net-tools inetutils-ping vim && \ +# 验证源配置并安装常用工具 +RUN echo "=== Current apt sources ===" && \ + cat /etc/apt/sources.list && \ + echo "=== Updating package list ===" && \ + apt-get update && \ + echo "=== Installing packages ===" && \ + apt-get install -y --no-install-recommends \ + supervisor \ + net-tools \ + inetutils-ping \ + vim \ + python3 \ + python3-pip && \ apt-get clean && \ - rm -rf /var/lib/apt/lists/* + rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* # 如果是部署环境替换 apt 源 RUN if [ "$USE_INTRANET" = "true" ]; then \ @@ -57,9 +76,22 @@ COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf COPY start-prometheus-supervised.sh /usr/local/bin/start-prometheus-supervised.sh RUN chmod +x /usr/local/bin/start-prometheus-supervised.sh +# targets 更新脚本 +COPY start-targets-updater.sh /usr/local/bin/start-targets-updater.sh +RUN chmod +x /usr/local/bin/start-targets-updater.sh + +# targets 更新 Python 脚本 +COPY update_targets.py /usr/local/bin/update_targets.py +RUN chmod +x /usr/local/bin/update_targets.py + +# exporter 配置文件 +COPY exporter_config.json ${PROMETHEUS_BASE_PATH}/exporter_config.json + # 自定义 prometheus 配置文件 COPY prometheus.yml /etc/prometheus/prometheus.yml +RUN chown nobody:nogroup ${PROMETHEUS_BASE_PATH}/exporter_config.json /etc/prometheus/prometheus.yml + USER root EXPOSE 9090 diff --git a/src/metric/prometheus/README.md b/src/metric/prometheus/build/README.md old mode 100644 new mode 100755 similarity index 62% rename from src/metric/prometheus/README.md rename to src/metric/prometheus/build/README.md index e9ba826..63c7046 --- a/src/metric/prometheus/README.md +++ b/src/metric/prometheus/build/README.md @@ -13,60 +13,6 @@ - 规则文件路径: `${PROMETHEUS_BASE_PATH}/rules/*.yml` - 监控目标文件路径: `${PROMETHEUS_BASE_PATH}/targets/` -## 使用示例 - -### 1. 使用默认路径 -```bash -docker run -d \ - --name prometheus \ - -p 9090:9090 \ - -v /host/prometheus/data:/private/argus/metric/prometheus \ - prometheus:latest -``` - -### 2. 自定义基础路径 -```bash -docker run -d \ - --name prometheus \ - -p 9090:9090 \ - -e PROMETHEUS_BASE_PATH=/custom/prometheus/path \ - -v /host/prometheus/data:/custom/prometheus/path \ - prometheus:latest -``` - -### 3. Kubernetes 部署示例 -```yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: prometheus -spec: - replicas: 1 - selector: - matchLabels: - app: prometheus - template: - metadata: - labels: - app: prometheus - spec: - containers: - - name: prometheus - image: prometheus:latest - env: - - name: PROMETHEUS_BASE_PATH - value: "/data/prometheus" - ports: - - containerPort: 9090 - volumeMounts: - - name: prometheus-data - mountPath: /data/prometheus - volumes: - - name: prometheus-data - persistentVolumeClaim: - claimName: prometheus-pvc -``` - ## 目录结构 容器启动后会在 `${PROMETHEUS_BASE_PATH}` 下创建以下目录结构: @@ -118,9 +64,51 @@ chown -R 2133:2015 /path/to/prometheus/data chmod -R 755 /path/to/prometheus/data ``` +## 动态 Targets 配置 + +### 配置流程 + +1. **节点资源清单**: `nodes.json` 包含所有监控节点的基本信息 + ```json + [ + { + "node_id": "A1", + "user_id": "user01", + "ip": "1.2.3.4", + "hostname": "dev-node-1", + "labels": ["production", "us-west-1"] + } + ] + ``` + +2. **Exporter 配置**: `exporter_config.json` 定义各类型 exporter 的端口和标签模板 + - 支持 dcgm (GPU监控) 和 node (系统监控) 两种类型 + - 配置端口映射和标签模板规则 + +3. **自动拆分生成**: `update_targets.py` 脚本根据节点清单自动生成对应的 targets 文件 + - 读取 `nodes.json` 获取节点信息 + - 按 exporter 类型拆分生成 `targets/*_exporter.json` + - 应用标签模板,生成完整的监控目标配置 + +4. **热加载机制**: + - 脚本支持守护进程模式,定期检查 `nodes.json` 变化 + - 文件内容变化时自动重新生成 targets 配置 + - Prometheus 自动发现并重新加载新的监控目标 + +### 使用方式 + +```bash +# 单次更新(注意用户权限,此方法用于测试,但生成文件是 root 权限) +python3 update_targets.py --config nodes.json --targets-dir targets/ + +# 守护进程模式, 该进程托管于supervisor +python3 update_targets.py --daemon --check-interval 30 +``` + ## 注意事项 1. 确保挂载的目录有适当的读写权限 2. 配置文件会在容器启动时自动生成,无需手动创建 3. 可以通过修改环境变量 `PROMETHEUS_BASE_PATH` 来改变所有相关路径,无需重新构建镜像 4. 自定义路径的目录会在启动时自动创建并设置权限 +5. `nodes.json` 文件变化后,targets 配置会自动更新,无需手动干预 diff --git a/src/metric/prometheus/build/exporter_config.json b/src/metric/prometheus/build/exporter_config.json new file mode 100755 index 0000000..75cee90 --- /dev/null +++ b/src/metric/prometheus/build/exporter_config.json @@ -0,0 +1,41 @@ +{ + "exporters": { + "dcgm": { + "port": 9400, + "job_name": "dcgm", + "instance_prefix": "dcgm-exporter", + "description": "DCGM GPU 监控 exporter" + }, + "node": { + "port": 9100, + "job_name": "node", + "instance_prefix": "node-exporter", + "description": "Node 系统监控 exporter" + } + }, + "label_templates": { + "dcgm": { + "job": "dcgm", + "instance": "dcgm-exporter-{node_id}", + "node_id": "{node_id}", + "ip": "{ip}", + "hostname": "{hostname}", + "user_id": "{user_id}", + "tag": "{tag}" + }, + "node": { + "job": "node", + "instance": "node-exporter-{node_id}", + "node_id": "{node_id}", + "ip": "{ip}", + "hostname": "{hostname}", + "user_id": "{user_id}", + "tag": "{tag}" + } + }, + "settings": { + "backup_retention_days": 7, + "log_retention_days": 30, + "refresh_interval": "30s" + } +} \ No newline at end of file diff --git a/src/metric/prometheus/prometheus.yml b/src/metric/prometheus/build/prometheus.yml old mode 100644 new mode 100755 similarity index 100% rename from src/metric/prometheus/prometheus.yml rename to src/metric/prometheus/build/prometheus.yml diff --git a/src/metric/prometheus/start-prometheus-supervised.sh b/src/metric/prometheus/build/start-prometheus-supervised.sh old mode 100644 new mode 100755 similarity index 100% rename from src/metric/prometheus/start-prometheus-supervised.sh rename to src/metric/prometheus/build/start-prometheus-supervised.sh diff --git a/src/metric/prometheus/build/start-targets-updater.sh b/src/metric/prometheus/build/start-targets-updater.sh new file mode 100755 index 0000000..68ac11f --- /dev/null +++ b/src/metric/prometheus/build/start-targets-updater.sh @@ -0,0 +1,29 @@ +#!/bin/bash +set -euo pipefail + +echo "[INFO] Starting Prometheus Targets Updater under supervisor..." + +# 配置变量 +PROMETHEUS_BASE_PATH=${PROMETHEUS_BASE_PATH:-/private/argus/metric/prometheus} +NODES_CONFIG_FILE=${NODES_CONFIG_FILE:-${PROMETHEUS_BASE_PATH}/nodes.json} +TARGETS_DIR=${PROMETHEUS_BASE_PATH}/targets +EXPORTER_CONFIG_FILE=${EXPORTER_CONFIG_FILE:-${PROMETHEUS_BASE_PATH}/exporter_config.json} +CHECK_INTERVAL=${CHECK_INTERVAL:-30} +LOG_LEVEL=${LOG_LEVEL:-INFO} + +echo "[INFO] Prometheus base path: ${PROMETHEUS_BASE_PATH}" +echo "[INFO] Nodes config file: ${NODES_CONFIG_FILE}" +echo "[INFO] Targets directory: ${TARGETS_DIR}" +echo "[INFO] Exporter config file: ${EXPORTER_CONFIG_FILE}" +echo "[INFO] Check interval: ${CHECK_INTERVAL}s" +echo "[INFO] Log level: ${LOG_LEVEL}" + +mkdir -p "${TARGETS_DIR}" + +exec python3 /usr/local/bin/update_targets.py \ + --config "${NODES_CONFIG_FILE}" \ + --targets-dir "${TARGETS_DIR}" \ + --exporter-config "${EXPORTER_CONFIG_FILE}" \ + --log-level "${LOG_LEVEL}" \ + --daemon \ + --check-interval "${CHECK_INTERVAL}" diff --git a/src/metric/prometheus/supervisord.conf b/src/metric/prometheus/build/supervisord.conf old mode 100644 new mode 100755 similarity index 67% rename from src/metric/prometheus/supervisord.conf rename to src/metric/prometheus/build/supervisord.conf index 3aca877..7a5f530 --- a/src/metric/prometheus/supervisord.conf +++ b/src/metric/prometheus/build/supervisord.conf @@ -16,6 +16,18 @@ stopwaitsecs=30 killasgroup=true stopasgroup=true +[program:targets-updater] +command=/usr/local/bin/start-targets-updater.sh +user=nobody +stdout_logfile=/var/log/supervisor/targets_updater.log +stderr_logfile=/var/log/supervisor/targets_updater_error.log +autorestart=true +startretries=3 +startsecs=10 +stopwaitsecs=30 +killasgroup=true +stopasgroup=true + [unix_http_server] file=/var/run/supervisor.sock chmod=0700 diff --git a/src/metric/prometheus/build/update_targets.py b/src/metric/prometheus/build/update_targets.py new file mode 100755 index 0000000..91b5dc8 --- /dev/null +++ b/src/metric/prometheus/build/update_targets.py @@ -0,0 +1,416 @@ +#!/usr/bin/env python3 +""" +Prometheus Targets 动态更新脚本 + +脚本从节点配置文件读取节点信息,并动态生成对应的 Prometheus targets 文件。 + +""" + +import json +import os +import sys +import logging +import argparse +import time +import hashlib +from datetime import datetime +from typing import Dict, List, Any +from pathlib import Path + + +class PrometheusTargetsManager: + """Prometheus Targets 管理器""" + + def __init__(self, config_file: str, targets_dir: str, exporter_config_file: str = None, log_level: str = "INFO"): + """ + 初始化管理器 + + Args: + config_file: 节点配置文件路径 + targets_dir: targets 文件输出目录 + exporter_config_file: exporter 配置文件路径 + log_level: 日志级别 + """ + self.config_file = Path(config_file) + self.targets_dir = Path(targets_dir) + self.exporter_config_file = Path(exporter_config_file) if exporter_config_file else None + self.log_level = log_level + self.last_mtime = 0 # 记录文件最后修改时间 + self.last_content_hash = None # 记录文件内容哈希 + + # 设置日志 + self._setup_logging() + + # 加载 exporter 配置(必需,失败则程序退出) + try: + full_config = self._load_exporter_config() + self.exporter_configs = full_config.get('exporters', {}) + self.label_templates = full_config.get('label_templates', {}) + except Exception as e: + self.logger.error(f"初始化失败,无法加载 exporter 配置: {e}") + raise + + # 确保 targets 目录存在 + self.targets_dir.mkdir(parents=True, exist_ok=True) + + def _setup_logging(self): + """设置日志配置""" + logging.basicConfig( + level=getattr(logging, self.log_level.upper()), + format='%(asctime)s - %(levelname)s - %(message)s', + handlers=[ + logging.StreamHandler(sys.stdout), + logging.FileHandler(f'{self.targets_dir}/targets_update.log') + ] + ) + self.logger = logging.getLogger(__name__) + + def _load_exporter_config(self) -> Dict[str, Any]: + """ + 加载 exporter 配置文件 + + Returns: + exporter 配置字典 + + Raises: + FileNotFoundError: 配置文件不存在 + json.JSONDecodeError: JSON 格式错误 + ValueError: 配置格式错误 + """ + if not self.exporter_config_file: + raise FileNotFoundError("Exporter 配置文件路径未指定") + + if not self.exporter_config_file.exists(): + raise FileNotFoundError(f"Exporter 配置文件不存在: {self.exporter_config_file}") + + try: + with open(self.exporter_config_file, 'r', encoding='utf-8') as f: + config = json.load(f) + + if not isinstance(config, dict): + raise ValueError("Exporter 配置文件必须是 JSON 对象格式") + + exporters = config.get('exporters', {}) + if not isinstance(exporters, dict): + raise ValueError("exporters 配置必须是对象格式") + + if not exporters: + raise ValueError("exporters 配置不能为空") + + self.logger.info(f"成功加载 exporter 配置: {len(exporters)} 个 exporter") + return config + + except json.JSONDecodeError as e: + self.logger.error(f"Exporter 配置文件 JSON 解析错误: {e}") + raise + except Exception as e: + self.logger.error(f"加载 exporter 配置失败: {e}") + raise + + def load_nodes_config(self) -> List[Dict[str, Any]]: + """ + 加载节点配置文件 + + Returns: + 节点配置列表 + """ + try: + if not self.config_file.exists(): + self.logger.warning(f"节点配置文件不存在: {self.config_file}") + return [] + + with open(self.config_file, 'r', encoding='utf-8') as f: + nodes = json.load(f) + + if not isinstance(nodes, list): + self.logger.error("节点配置必须是数组格式") + return [] + + self.logger.info(f"成功加载 {len(nodes)} 个节点配置") + return nodes + + except json.JSONDecodeError as e: + self.logger.error(f"JSON 解析错误: {e}") + return [] + except Exception as e: + self.logger.error(f"加载节点配置失败: {e}") + return [] + + def generate_targets(self, nodes: List[Dict[str, Any]], exporter_type: str) -> List[Dict[str, Any]]: + """ + 生成指定类型的 targets 配置 + + Args: + nodes: 节点配置列表 + exporter_type: exporter 类型 (dcgm, node) + + Returns: + targets 配置列表 + """ + if exporter_type not in self.exporter_configs: + self.logger.error(f"不支持的 exporter 类型: {exporter_type}") + return [] + + config = self.exporter_configs[exporter_type] + targets = [] + + for node in nodes: + # 验证必要字段 + if not all(key in node for key in ['node_id', 'ip']): + self.logger.warning(f"节点配置缺少必要字段,跳过: {node}") + continue + + # 构建 target 地址 + target_address = f"{node['ip']}:{config['port']}" + + # 构建上下文变量 + context = { + 'node_id': node['node_id'], + 'ip': node['ip'], + 'hostname': node.get('hostname', ''), + 'user_id': node.get('user_id', ''), + 'tag': self._join_labels(node.get('labels', [])) + } + + # 使用模板生成标签 + label_template = self.label_templates.get(exporter_type, {}) + labels = {} + + for label_key, template_value in label_template.items(): + if isinstance(template_value, str) and '{' in template_value: + # 模板字符串,需要渲染 + labels[label_key] = self._render_label_template(template_value, context) + else: + # 固定值 + labels[label_key] = template_value + + targets.append({ + "targets": [target_address], + "labels": labels + }) + + self.logger.info(f"为 {exporter_type} exporter 生成了 {len(targets)} 个 targets") + return targets + + def write_targets_file(self, targets: List[Dict[str, Any]], exporter_type: str) -> None: + """ + 写入 targets 文件 + + Args: + targets: targets 配置列表 + exporter_type: exporter 类型 + """ + filename = f"{exporter_type}_exporter.json" + filepath = self.targets_dir / filename + + try: + # 写入新文件 + with open(filepath, 'w', encoding='utf-8') as f: + json.dump(targets, f, indent=2, ensure_ascii=False) + + self.logger.info(f"成功写入 targets 文件: {filepath}") + + except Exception as e: + self.logger.error(f"写入 targets 文件失败: {e}") + raise + + def update_all_targets(self) -> None: + """更新所有类型的 targets 文件""" + try: + # 加载节点配置 + nodes = self.load_nodes_config() + + if not nodes: + self.logger.warning("没有找到任何节点配置") + return + + # 为每种 exporter 类型生成 targets + for exporter_type in self.exporter_configs.keys(): + targets = self.generate_targets(nodes, exporter_type) + if targets: # 只有当有 targets 时才写入文件 + self.write_targets_file(targets, exporter_type) + + self.logger.info("所有 targets 文件更新完成") + + except Exception as e: + self.logger.error(f"更新 targets 失败: {e}") + raise + + def _calculate_file_hash(self, file_path: Path) -> str: + """ + 计算文件内容的 MD5 哈希值 + + Args: + file_path: 文件路径 + + Returns: + 文件内容的 MD5 哈希值 + """ + try: + with open(file_path, 'rb') as f: + content = f.read() + return hashlib.md5(content).hexdigest() + except Exception as e: + self.logger.error(f"计算文件哈希失败: {e}") + return "" + + def _render_label_template(self, template: str, context: Dict[str, str]) -> str: + """ + 渲染标签模板 + + Args: + template: 模板字符串,如 "dcgm-exporter-{node_id}" + context: 上下文变量字典 + + Returns: + 渲染后的字符串 + """ + try: + return template.format(**context) + except KeyError as e: + self.logger.warning(f"模板渲染失败,缺少变量 {e}: {template}") + return template + except Exception as e: + self.logger.warning(f"模板渲染失败: {e}") + return template + + def _join_labels(self, labels_list: List[str]) -> str: + """ + 将 labels 数组拼接成一个字符串 + + Args: + labels_list: 标签字符串数组 + + Returns: + 拼接后的字符串,用逗号分隔 + """ + if not labels_list: + return "" + + # 过滤掉空字符串和 None 值 + valid_labels = [label.strip() for label in labels_list if label and label.strip()] + + return ",".join(valid_labels) + + def check_file_changed(self) -> bool: + """ + 检查配置文件是否发生变化 + + Returns: + True 如果文件发生变化,False 否则 + """ + try: + if not self.config_file.exists(): + return False + + # 计算当前文件内容哈希 + current_hash = self._calculate_file_hash(self.config_file) + if not current_hash: + return False + + # 如果是第一次检查,记录哈希并触发更新 + if self.last_content_hash is None: + self.last_content_hash = current_hash + self.logger.info("首次检查,记录文件内容哈希并触发初始更新") + return True + + # 比较内容哈希 + if current_hash != self.last_content_hash: + self.last_content_hash = current_hash + self.logger.info("检测到文件内容变化") + return True + + return False + + except Exception as e: + self.logger.error(f"检查文件变化失败: {e}") + return False + + def run_daemon(self, check_interval: int = 30) -> None: + """ + 以守护进程模式运行,定期检查文件变化 + + Args: + check_interval: 检查间隔(秒) + """ + self.logger.info(f"启动守护进程模式,检查间隔: {check_interval}秒") + + try: + while True: + if self.check_file_changed(): + self.logger.info("检测到配置文件变化,开始更新 targets") + self.update_all_targets() + else: + self.logger.debug("配置文件无变化,跳过更新") + + time.sleep(check_interval) + + except KeyboardInterrupt: + self.logger.info("收到中断信号,正在退出...") + except Exception as e: + self.logger.error(f"守护进程运行错误: {e}") + raise + + +def main(): + """主函数""" + parser = argparse.ArgumentParser(description="Prometheus Targets 动态更新脚本 (精简版)") + parser.add_argument( + "--config", + default="/private/argus/metric/prometheus/nodes.json", + help="节点配置文件路径 (默认: /private/argus/metric/prometheus/nodes.json)" + ) + parser.add_argument( + "--targets-dir", + default="/private/argus/metric/prometheus/targets", + help="targets 文件输出目录 (默认: /private/argus/metric/prometheus/targets)" + ) + parser.add_argument( + "--exporter-config", + default="/private/argus/metric/prometheus/exporter_config.json", + help="exporter 配置文件路径 (默认: /private/argus/metric/prometheus/exporter_config.json)" + ) + parser.add_argument( + "--log-level", + choices=["DEBUG", "INFO", "WARNING", "ERROR"], + default="INFO", + help="日志级别 (默认: INFO)" + ) + parser.add_argument( + "--daemon", + action="store_true", + help="以守护进程模式运行" + ) + parser.add_argument( + "--check-interval", + type=int, + default=30, + help="守护进程模式下的检查间隔(秒,默认: 30)" + ) + + args = parser.parse_args() + + try: + # 创建管理器 + manager = PrometheusTargetsManager( + config_file=args.config, + targets_dir=args.targets_dir, + exporter_config_file=args.exporter_config, + log_level=args.log_level + ) + + if args.daemon: + # 守护进程模式 + manager.run_daemon(args.check_interval) + else: + # 单次执行模式 + manager.update_all_targets() + print("成功更新所有 exporter targets") + + except Exception as e: + print(f"错误: {e}", file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() \ No newline at end of file