from __future__ import annotations import os import socket from dataclasses import dataclass from pathlib import Path from typing import Final from .state import load_node_state from .version import VERSION from .log import get_logger DEFAULT_REPORT_INTERVAL_SECONDS: Final[int] = 60 LOGGER = get_logger("argus.agent.config") @dataclass(frozen=True) class AgentConfig: hostname: str environment: str user: str instance: str node_file: str version: str master_endpoint: str report_interval_seconds: int health_dir: str request_timeout_seconds: int = 10 def _normalise_master_endpoint(value: str) -> str: value = value.strip() if not value: raise ValueError("MASTER_ENDPOINT environment variable is required") if not value.startswith("http://") and not value.startswith("https://"): value = f"http://{value}" return value.rstrip("/") def _read_report_interval(raw_value: str | None) -> int: if raw_value is None or raw_value.strip() == "": return DEFAULT_REPORT_INTERVAL_SECONDS try: interval = int(raw_value) except ValueError as exc: raise ValueError("REPORT_INTERVAL_SECONDS must be an integer") from exc if interval <= 0: raise ValueError("REPORT_INTERVAL_SECONDS must be positive") return interval def _resolve_hostname() -> str: return os.environ.get("AGENT_HOSTNAME") or socket.gethostname() def _load_metadata_from_state(node_file: str) -> tuple[str, str, str] | None: state = load_node_state(node_file) if not state: return None meta = state.get("meta_data") or {} env = meta.get("env") or state.get("env") user = meta.get("user") or state.get("user") instance = meta.get("instance") or state.get("instance") if env and user and instance: LOGGER.debug("Metadata resolved from node state", extra={"node_file": node_file}) return env, user, instance LOGGER.warning( "node.json missing metadata fields; ignoring", extra={"node_file": node_file, "meta_data": meta}, ) return None def _resolve_metadata_fields(hostname: str, node_file: str) -> tuple[str, str, str]: env = os.environ.get("AGENT_ENV") user = os.environ.get("AGENT_USER") instance = os.environ.get("AGENT_INSTANCE") if env and user and instance: return env, user, instance if any([env, user, instance]): LOGGER.warning( "Incomplete metadata environment variables; falling back to persisted metadata", extra={ "has_env": bool(env), "has_user": bool(user), "has_instance": bool(instance), }, ) state_metadata = _load_metadata_from_state(node_file) if state_metadata is not None: return state_metadata from .collector import _parse_hostname # Local import to avoid circular dependency env, user, instance = _parse_hostname(hostname) if not all([env, user, instance]): raise ValueError( "Failed to determine metadata fields; set AGENT_ENV/USER/INSTANCE or use supported hostname pattern" ) return env, user, instance def load_config() -> AgentConfig: """从环境变量推导配置,移除了外部配置文件依赖。""" hostname = _resolve_hostname() node_file = f"/private/argus/agent/{hostname}/node.json" environment, user, instance = _resolve_metadata_fields(hostname, node_file) health_dir = f"/private/argus/agent/{hostname}/health/" master_endpoint_env = os.environ.get("MASTER_ENDPOINT") if master_endpoint_env is None: raise ValueError("MASTER_ENDPOINT environment variable is not set") master_endpoint = _normalise_master_endpoint(master_endpoint_env) report_interval_seconds = _read_report_interval(os.environ.get("REPORT_INTERVAL_SECONDS")) Path(node_file).parent.mkdir(parents=True, exist_ok=True) Path(health_dir).mkdir(parents=True, exist_ok=True) return AgentConfig( hostname=hostname, environment=environment, user=user, instance=instance, node_file=node_file, version=VERSION, master_endpoint=master_endpoint, report_interval_seconds=report_interval_seconds, health_dir=health_dir, )