diff --git a/src/agent/.gitignore b/src/agent/.gitignore new file mode 100644 index 0000000..a8d8cd8 --- /dev/null +++ b/src/agent/.gitignore @@ -0,0 +1,3 @@ +build/ +*.egg-info/ +__pycache__/ diff --git a/src/agent/Dockerfile b/src/agent/Dockerfile deleted file mode 100644 index c4dc076..0000000 --- a/src/agent/Dockerfile +++ /dev/null @@ -1,23 +0,0 @@ -FROM python:3.11-slim - -SHELL ["/bin/bash", "-c"] - -ARG PIP_INDEX_URL= -ENV PIP_NO_CACHE_DIR=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONPATH=/app - -WORKDIR /app - -COPY requirements.txt ./ -RUN set -euxo pipefail \ - && python -m pip install --upgrade pip \ - && if [[ -n "$PIP_INDEX_URL" ]]; then \ - PIP_INDEX_URL="$PIP_INDEX_URL" python -m pip install -r requirements.txt; \ - else \ - python -m pip install -r requirements.txt; \ - fi - -COPY app ./app - -CMD ["python", "-m", "app.main"] diff --git a/src/agent/README.md b/src/agent/README.md index 04475db..eb576b5 100644 --- a/src/agent/README.md +++ b/src/agent/README.md @@ -6,26 +6,34 @@ Python agent that registers with the Argus master service, persists node informa ```bash cd src/agent -./scripts/build_images.sh # builds argus-agent:dev +./scripts/build_binary.sh # produces dist/argus-agent ``` -Runtime expects a configuration file (generated by installer) at `/private/argus/agent//config`. Key fields: +The resulting executable (`dist/argus-agent`) bundles the runtime via PyInstaller. Runtime configuration is now derived from environment variables and the container hostname—no local config file is required. -- `HOSTNAME`, `NODE_FILE`, `VERSION` -- `MASTER_ENDPOINT` (e.g. `http://master:3000`) -- `REPORT_INTERVAL_SECONDS` -- `SUBMODULE_HEALTH_FILE_DIR` (supports `{hostname}` placeholder) -- optional `GPU_NUMBER` +Required variables: -Health files live under `/private/argus/agent/health//` and must follow `-*.json` naming (e.g. `log-fluentbit.json`). The agent sends parsed JSON objects keyed by file stem. +- `MASTER_ENDPOINT`:Master 服务的完整地址,若未带协议会自动补全为 `http://`。 +- `REPORT_INTERVAL_SECONDS`:状态上报周期,可选,默认 60。 + +Additional overrides: + +- `AGENT_HOSTNAME`:可选,若需要覆盖容器 `hostname`。 + +At startup the agent会读取容器主机名(或 `AGENT_HOSTNAME` 覆盖值)并固定以下路径: + +- 节点状态持久化:`/private/argus/agent//node.json` +- 子模块健康目录:`/private/argus/agent/health//` + +健康文件需按 `<模块名前缀>-*.json` 命名,例如 `log-fluentbit.json`,文件内容会以文件名前缀为键写入上报 payload。 ## Tests -Docker-based E2E stack (master + agent): +Docker 端到端测试会启动 master 容器与一个普通 `ubuntu:24.04` 容器,在其中挂载并执行打包后的 agent(通过环境变量注入 `MASTER_ENDPOINT` 与 `REPORT_INTERVAL_SECONDS`): ```bash cd src/agent/tests ./scripts/00_e2e_test.sh ``` -The scripts provision configs/health directories under `tests/private/` and clean up via `07_down.sh`. +测试脚本会自动调用 `../scripts/build_binary.sh` 生成可执行文件,并在 `tests/private/` 下准备配置与健康目录,最后通过 `07_down.sh` 清理环境。 diff --git a/src/agent/app/collector.py b/src/agent/app/collector.py index c733fa4..1b61caa 100644 --- a/src/agent/app/collector.py +++ b/src/agent/app/collector.py @@ -27,7 +27,7 @@ def collect_metadata(config: AgentConfig) -> Dict[str, Any]: "instance": instance, "cpu_number": _detect_cpu_count(), "memory_in_bytes": _detect_memory_bytes(), - "gpu_number": _detect_gpu_count(config), + "gpu_number": _detect_gpu_count(), } return meta @@ -70,11 +70,8 @@ def _detect_memory_bytes() -> int: return 0 -def _detect_gpu_count(config: AgentConfig) -> int: - """采集 GPU 数量,可被配置覆盖。""" - if config.gpu_number_override is not None: - return config.gpu_number_override - +def _detect_gpu_count() -> int: + """采集 GPU 数量,如无法探测则默认为 0。""" try: proc = subprocess.run( ["nvidia-smi", "-L"], diff --git a/src/agent/app/config.py b/src/agent/app/config.py index 95f2d81..f5359f8 100644 --- a/src/agent/app/config.py +++ b/src/agent/app/config.py @@ -1,8 +1,14 @@ from __future__ import annotations import os +import socket from dataclasses import dataclass from pathlib import Path +from typing import Final + +from .version import VERSION + +DEFAULT_REPORT_INTERVAL_SECONDS: Final[int] = 60 @dataclass(frozen=True) @@ -12,90 +18,57 @@ class AgentConfig: version: str master_endpoint: str report_interval_seconds: int - health_dir_template: str - gpu_number_override: int | None + health_dir: str request_timeout_seconds: int = 10 - @property - def health_dir(self) -> str: - return self.health_dir_template.format(hostname=self.hostname) + +def _normalise_master_endpoint(value: str) -> str: + value = value.strip() + if not value: + raise ValueError("MASTER_ENDPOINT environment variable is required") + if not value.startswith("http://") and not value.startswith("https://"): + value = f"http://{value}" + return value.rstrip("/") -def _parse_config_file(path: str) -> dict[str, str]: - result: dict[str, str] = {} +def _read_report_interval(raw_value: str | None) -> int: + if raw_value is None or raw_value.strip() == "": + return DEFAULT_REPORT_INTERVAL_SECONDS try: - with open(path, "r", encoding="utf-8") as handle: - for raw_line in handle: - line = raw_line.strip() - if not line or line.startswith("#"): - continue - if "=" not in line: - continue - key, value = line.split("=", 1) - result[key.strip().upper()] = value.strip() - except FileNotFoundError: - raise FileNotFoundError(f"Agent config file not found: {path}") from None - return result - - -def load_config(path: str) -> AgentConfig: - """读取配置文件并结合环境变量,返回 AgentConfig。""" - config_values = _parse_config_file(path) - force_env = os.environ.get("AGENT_FORCE_ENV", "0").lower() in {"1", "true", "yes"} - - def read_key(key: str, default: str | None = None, *, required: bool = False) -> str: - env_key = f"AGENT_{key}" - if env_key in os.environ: - return os.environ[env_key] - if force_env and key in os.environ: - return os.environ[key] - if key in config_values: - return config_values[key] - if default is not None: - return default - if required: - raise ValueError(f"Missing required configuration key: {key}") - return "" - - hostname = read_key("HOSTNAME", required=True) - node_file = read_key("NODE_FILE", f"/private/argus/agent/{hostname}/node.json") - version = read_key("VERSION", "1.0.0") - master_endpoint = read_key("MASTER_ENDPOINT", required=True) - report_interval_raw = read_key("REPORT_INTERVAL_SECONDS", "60") - health_dir_template = read_key( - "SUBMODULE_HEALTH_FILE_DIR", - f"/private/argus/agent/health/{{hostname}}/", - ) - gpu_override_raw = read_key("GPU_NUMBER", "") - - try: - report_interval_seconds = int(report_interval_raw) + interval = int(raw_value) except ValueError as exc: raise ValueError("REPORT_INTERVAL_SECONDS must be an integer") from exc - if report_interval_seconds <= 0: + if interval <= 0: raise ValueError("REPORT_INTERVAL_SECONDS must be positive") + return interval - gpu_override = None - if gpu_override_raw: - try: - gpu_override = int(gpu_override_raw) - except ValueError as exc: - raise ValueError("GPU_NUMBER must be an integer when provided") from exc - if gpu_override < 0: - raise ValueError("GPU_NUMBER must be non-negative") - if not master_endpoint.startswith("http://") and not master_endpoint.startswith("https://"): - master_endpoint = f"http://{master_endpoint}" +def _resolve_hostname() -> str: + return os.environ.get("AGENT_HOSTNAME") or socket.gethostname() + + +def load_config() -> AgentConfig: + """从环境变量推导配置,移除了外部配置文件依赖。""" + + hostname = _resolve_hostname() + node_file = f"/private/argus/agent/{hostname}/node.json" + health_dir = f"/private/argus/agent/health/{hostname}/" + + master_endpoint_env = os.environ.get("MASTER_ENDPOINT") + if master_endpoint_env is None: + raise ValueError("MASTER_ENDPOINT environment variable is not set") + master_endpoint = _normalise_master_endpoint(master_endpoint_env) + + report_interval_seconds = _read_report_interval(os.environ.get("REPORT_INTERVAL_SECONDS")) Path(node_file).parent.mkdir(parents=True, exist_ok=True) - Path(health_dir_template.format(hostname=hostname)).mkdir(parents=True, exist_ok=True) + Path(health_dir).mkdir(parents=True, exist_ok=True) return AgentConfig( hostname=hostname, node_file=node_file, - version=version, - master_endpoint=master_endpoint.rstrip("/"), + version=VERSION, + master_endpoint=master_endpoint, report_interval_seconds=report_interval_seconds, - health_dir_template=health_dir_template, - gpu_number_override=gpu_override, + health_dir=health_dir, ) diff --git a/src/agent/app/main.py b/src/agent/app/main.py index 7d87595..c5e2ba0 100644 --- a/src/agent/app/main.py +++ b/src/agent/app/main.py @@ -1,8 +1,6 @@ from __future__ import annotations -import argparse import signal -import sys import time from datetime import datetime, timezone from typing import Optional @@ -32,28 +30,15 @@ class StopSignal: return self._stop -def parse_args(argv: list[str]) -> argparse.Namespace: - parser = argparse.ArgumentParser(description="Argus agent") - parser.add_argument( - "--config", - dest="config_path", - default=None, - help="Path to agent config file", - ) - return parser.parse_args(argv) - - -def main(argv: Optional[list[str]] = None) -> int: +def main(argv: Optional[list[str]] = None) -> int: # noqa: ARG001 - 保留签名以兼容入口调用 setup_logging() - args = parse_args(argv or sys.argv[1:]) stop_signal = StopSignal() signal.signal(signal.SIGTERM, stop_signal.set) signal.signal(signal.SIGINT, stop_signal.set) try: - config_path = args.config_path or _default_config_path() - config = load_config(config_path) + config = load_config() except Exception as exc: LOGGER.error("Failed to load configuration", extra={"error": str(exc)}) return 1 @@ -89,13 +74,6 @@ def main(argv: Optional[list[str]] = None) -> int: return 0 -def _default_config_path() -> str: - from socket import gethostname - - hostname = gethostname() - return f"/private/argus/agent/{hostname}/config" - - def _register_with_retry( client: AgentClient, config: AgentConfig, diff --git a/src/agent/app/version.py b/src/agent/app/version.py new file mode 100644 index 0000000..97a14f8 --- /dev/null +++ b/src/agent/app/version.py @@ -0,0 +1,69 @@ +from __future__ import annotations + +import os +import sys +from pathlib import Path +from typing import Optional + +import importlib.metadata + +try: + import tomllib +except ModuleNotFoundError: # pragma: no cover + import tomli as tomllib # type: ignore[no-redef] + + +def _candidate_paths() -> list[Path]: + paths = [] + bundle_dir: Optional[str] = getattr(sys, "_MEIPASS", None) + if bundle_dir: + paths.append(Path(bundle_dir) / "pyproject.toml") + paths.append(Path(__file__).resolve().parent.parent / "pyproject.toml") + paths.append(Path(__file__).resolve().parent / "pyproject.toml") + paths.append(Path.cwd() / "pyproject.toml") + return paths + + +def _read_from_pyproject() -> Optional[str]: + for path in _candidate_paths(): + if not path.exists(): + continue + try: + with path.open("rb") as handle: + data = tomllib.load(handle) + except (OSError, tomllib.TOMLDecodeError): + continue + project = data.get("project") + if isinstance(project, dict): + version = project.get("version") + if isinstance(version, str): + return version + tool = data.get("tool") + if isinstance(tool, dict): + argus_cfg = tool.get("argus") + if isinstance(argus_cfg, dict): + version = argus_cfg.get("version") + if isinstance(version, str): + return version + return None + + +def _detect_version() -> str: + try: + return importlib.metadata.version("argus-agent") + except importlib.metadata.PackageNotFoundError: + pass + override = os.environ.get("AGENT_VERSION_OVERRIDE") + if override: + return override + fallback = _read_from_pyproject() + if fallback: + return fallback + return "0.0.0" + + +VERSION: str = _detect_version() + + +def get_version() -> str: + return VERSION diff --git a/src/agent/dist/argus-agent b/src/agent/dist/argus-agent new file mode 100755 index 0000000..56920df Binary files /dev/null and b/src/agent/dist/argus-agent differ diff --git a/src/agent/entry.py b/src/agent/entry.py new file mode 100644 index 0000000..39197b1 --- /dev/null +++ b/src/agent/entry.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import sys + +from app.main import main as agent_main + + +if __name__ == "__main__": + sys.exit(agent_main()) diff --git a/src/agent/images/.gitkeep b/src/agent/images/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/src/agent/pyproject.toml b/src/agent/pyproject.toml new file mode 100644 index 0000000..627766e --- /dev/null +++ b/src/agent/pyproject.toml @@ -0,0 +1,19 @@ +[project] +name = "argus-agent" +version = "1.1.0" +description = "Argus agent binary" +readme = "README.md" +requires-python = ">=3.11" +dependencies = [ + "requests==2.31.0" +] + +[build-system] +requires = ["setuptools>=69", "wheel"] +build-backend = "setuptools.build_meta" + +[tool.argus] +entry = "app.main:main" + +[tool.setuptools] +packages = ["app"] diff --git a/src/agent/requirements.txt b/src/agent/requirements.txt deleted file mode 100644 index 2c24336..0000000 --- a/src/agent/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -requests==2.31.0 diff --git a/src/agent/scripts/build_binary.sh b/src/agent/scripts/build_binary.sh new file mode 100755 index 0000000..50671d4 --- /dev/null +++ b/src/agent/scripts/build_binary.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +MODULE_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +BUILD_ROOT="$MODULE_ROOT/build" +DIST_DIR="$MODULE_ROOT/dist" +PYINSTALLER_BUILD="$BUILD_ROOT/pyinstaller" +VENV_DIR="$BUILD_ROOT/venv" + +mkdir -p "$PYINSTALLER_BUILD" +mkdir -p "$DIST_DIR" + +if [[ ! -d "$VENV_DIR" ]]; then + python3 -m venv "$VENV_DIR" +fi + +# shellcheck disable=SC1091 +source "$VENV_DIR/bin/activate" + +pip install --upgrade pip +pip install "$MODULE_ROOT" +pip install "pyinstaller==6.6.0" + +rm -rf "$PYINSTALLER_BUILD"/* +rm -f "$DIST_DIR/argus-agent" + +pyinstaller \ + --clean \ + --onefile \ + --name argus-agent \ + --distpath "$DIST_DIR" \ + --workpath "$PYINSTALLER_BUILD/work" \ + --specpath "$PYINSTALLER_BUILD/spec" \ + --add-data "$MODULE_ROOT/pyproject.toml:." \ + "$MODULE_ROOT/entry.py" + +chmod +x "$DIST_DIR/argus-agent" + +deactivate + +echo "[INFO] Agent binary generated at $DIST_DIR/argus-agent" diff --git a/src/agent/scripts/build_images.sh b/src/agent/scripts/build_images.sh deleted file mode 100755 index 8bf88a5..0000000 --- a/src/agent/scripts/build_images.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -usage() { - echo "Usage: $0 [--intranet] [--tag ]" >&2 -} - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" -IMAGE_TAG="${IMAGE_TAG:-argus-agent:dev}" -BUILD_ARGS=() - -while [[ "$#" -gt 0 ]]; do - case "$1" in - --intranet) - INTRANET_INDEX="${INTRANET_INDEX:-https://pypi.tuna.tsinghua.edu.cn/simple}" - BUILD_ARGS+=("--build-arg" "PIP_INDEX_URL=${INTRANET_INDEX}") - shift - ;; - --tag) - [[ $# -ge 2 ]] || { usage; exit 1; } - IMAGE_TAG="$2" - shift 2 - ;; - -h|--help) - usage - exit 0 - ;; - *) - echo "Unknown option: $1" >&2 - usage - exit 1 - ;; - esac - done - -echo "[INFO] Building image $IMAGE_TAG" -docker build "${BUILD_ARGS[@]}" -t "$IMAGE_TAG" "$PROJECT_ROOT" -echo "[OK] Image $IMAGE_TAG built" diff --git a/src/agent/scripts/load_images.sh b/src/agent/scripts/load_images.sh deleted file mode 100755 index 4b8c423..0000000 --- a/src/agent/scripts/load_images.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -usage() { - echo "Usage: $0 [--file ]" >&2 -} - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" -DEFAULT_INPUT="$PROJECT_ROOT/images/argus-agent-dev.tar" -IMAGE_TAR="$DEFAULT_INPUT" - -while [[ "$#" -gt 0 ]]; do - case "$1" in - --file) - [[ $# -ge 2 ]] || { usage; exit 1; } - IMAGE_TAR="$2" - shift 2 - ;; - -h|--help) - usage - exit 0 - ;; - *) - echo "Unknown option: $1" >&2 - usage - exit 1 - ;; - esac - done - -if [[ ! -f "$IMAGE_TAR" ]]; then - echo "[ERROR] Image tarball not found: $IMAGE_TAR" >&2 - exit 1 -fi - -echo "[INFO] Loading image from $IMAGE_TAR" -docker image load -i "$IMAGE_TAR" -echo "[OK] Image loaded" diff --git a/src/agent/scripts/save_images.sh b/src/agent/scripts/save_images.sh deleted file mode 100755 index e629de1..0000000 --- a/src/agent/scripts/save_images.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -usage() { - echo "Usage: $0 [--tag ] [--output ]" >&2 -} - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" -DEFAULT_OUTPUT="$PROJECT_ROOT/images/argus-agent-dev.tar" -IMAGE_TAG="${IMAGE_TAG:-argus-agent:dev}" -OUTPUT_PATH="$DEFAULT_OUTPUT" - -while [[ "$#" -gt 0 ]]; do - case "$1" in - --tag) - [[ $# -ge 2 ]] || { usage; exit 1; } - IMAGE_TAG="$2" - shift 2 - ;; - --output) - [[ $# -ge 2 ]] || { usage; exit 1; } - OUTPUT_PATH="$2" - shift 2 - ;; - -h|--help) - usage - exit 0 - ;; - *) - echo "Unknown option: $1" >&2 - usage - exit 1 - ;; - esac - done - -mkdir -p "$(dirname "$OUTPUT_PATH")" -echo "[INFO] Saving image $IMAGE_TAG to $OUTPUT_PATH" -docker image save "$IMAGE_TAG" -o "$OUTPUT_PATH" -echo "[OK] Image saved" diff --git a/src/agent/tests/docker-compose.yml b/src/agent/tests/docker-compose.yml index 1442bb4..8386ac2 100644 --- a/src/agent/tests/docker-compose.yml +++ b/src/agent/tests/docker-compose.yml @@ -13,14 +13,20 @@ services: - ./private/argus/metric/prometheus:/private/argus/metric/prometheus agent: - image: argus-agent:dev + image: ubuntu:24.04 container_name: argus-agent-e2e hostname: dev-e2euser-e2einst-pod-0 depends_on: - master + environment: + - MASTER_ENDPOINT=http://master:3000 + - REPORT_INTERVAL_SECONDS=2 volumes: - ./private/argus/agent/dev-e2euser-e2einst-pod-0:/private/argus/agent/dev-e2euser-e2einst-pod-0 - ./private/argus/agent/health/dev-e2euser-e2einst-pod-0:/private/argus/agent/health/dev-e2euser-e2einst-pod-0 + - ../dist/argus-agent:/usr/local/bin/argus-agent:ro + entrypoint: + - /usr/local/bin/argus-agent networks: default: diff --git a/src/agent/tests/scripts/01_bootstrap.sh b/src/agent/tests/scripts/01_bootstrap.sh index 410d95a..95ca096 100755 --- a/src/agent/tests/scripts/01_bootstrap.sh +++ b/src/agent/tests/scripts/01_bootstrap.sh @@ -20,25 +20,23 @@ mkdir -p "$MASTER_PRIVATE_DIR" mkdir -p "$METRIC_PRIVATE_DIR" mkdir -p "$TMP_ROOT" -cat > "$AGENT_CONFIG_DIR/config" </dev/null ./scripts/build_images.sh --tag argus-master:dev popd >/dev/null +AGENT_BINARY="$AGENT_ROOT/dist/argus-agent" + pushd "$AGENT_ROOT" >/dev/null -./scripts/build_images.sh --tag argus-agent:dev +./scripts/build_binary.sh popd >/dev/null +if [[ ! -x "$AGENT_BINARY" ]]; then + echo "[ERROR] Agent binary not found at $AGENT_BINARY" >&2 + exit 1 +fi + +echo "$AGENT_BINARY" > "$TMP_ROOT/agent_binary_path" + echo "[INFO] Agent E2E bootstrap complete" diff --git a/src/agent/tests/scripts/02_up.sh b/src/agent/tests/scripts/02_up.sh index 06c1e8e..f1a1234 100755 --- a/src/agent/tests/scripts/02_up.sh +++ b/src/agent/tests/scripts/02_up.sh @@ -4,6 +4,19 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" TEST_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +TMP_ROOT="$TEST_ROOT/tmp" + +if [[ ! -f "$TMP_ROOT/agent_binary_path" ]]; then + echo "[ERROR] Agent binary path missing; run 01_bootstrap.sh first" >&2 + exit 1 +fi + +AGENT_BINARY="$(cat "$TMP_ROOT/agent_binary_path")" +if [[ ! -x "$AGENT_BINARY" ]]; then + echo "[ERROR] Agent binary not executable: $AGENT_BINARY" >&2 + exit 1 +fi + compose() { if docker compose version >/dev/null 2>&1; then docker compose "$@" diff --git a/src/agent/tests/scripts/06_restart_agent_and_reregister.sh b/src/agent/tests/scripts/06_restart_agent_and_reregister.sh index 43e5ba5..6a517b5 100755 --- a/src/agent/tests/scripts/06_restart_agent_and_reregister.sh +++ b/src/agent/tests/scripts/06_restart_agent_and_reregister.sh @@ -10,6 +10,17 @@ AGENT_HOSTNAME="dev-e2euser-e2einst-pod-0" NETWORK_NAME="tests_default" NEW_AGENT_IP="172.28.0.200" +if [[ ! -f "$TMP_ROOT/agent_binary_path" ]]; then + echo "[ERROR] Agent binary path missing; rerun bootstrap" >&2 + exit 1 +fi + +AGENT_BINARY="$(cat "$TMP_ROOT/agent_binary_path")" +if [[ ! -x "$AGENT_BINARY" ]]; then + echo "[ERROR] Agent binary not executable: $AGENT_BINARY" >&2 + exit 1 +fi + compose() { if docker compose version >/dev/null 2>&1; then docker compose "$@" @@ -57,14 +68,17 @@ if ! docker run -d \ --ip "$NEW_AGENT_IP" \ -v "$AGENT_DIR:/private/argus/agent/$AGENT_HOSTNAME" \ -v "$HEALTH_DIR:/private/argus/agent/health/$AGENT_HOSTNAME" \ - argus-agent:dev \ + -v "$AGENT_BINARY:/usr/local/bin/argus-agent:ro" \ + -e MASTER_ENDPOINT=http://master:3000 \ + -e REPORT_INTERVAL_SECONDS=2 \ + ubuntu:24.04 \ sleep 300 >/dev/null; then echo "[ERROR] Failed to start agent container with custom IP" >&2 exit 1 fi # 在容器内启动真实 agent 进程 -if ! docker exec -d argus-agent-e2e python -m app.main; then +if ! docker exec -d argus-agent-e2e /usr/local/bin/argus-agent; then echo "[ERROR] Failed to spawn agent process inside container" >&2 exit 1 fi