Reviewed-on: #26 Reviewed-by: xuxt <xuxt@zgclab.edu.cn> Reviewed-by: huhy <husteryezi@163.com> Reviewed-by: sundapeng <sundp@mail.zgclab.edu.cn>
142 lines
4.2 KiB
Python
142 lines
4.2 KiB
Python
from __future__ import annotations
|
|
|
|
import os
|
|
import socket
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Final
|
|
|
|
from .state import load_node_state
|
|
from .version import VERSION
|
|
from .log import get_logger
|
|
|
|
DEFAULT_REPORT_INTERVAL_SECONDS: Final[int] = 60
|
|
|
|
LOGGER = get_logger("argus.agent.config")
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class AgentConfig:
|
|
hostname: str
|
|
environment: str
|
|
user: str
|
|
instance: str
|
|
node_file: str
|
|
version: str
|
|
master_endpoint: str
|
|
report_interval_seconds: int
|
|
health_dir: str
|
|
request_timeout_seconds: int = 10
|
|
|
|
|
|
def _normalise_master_endpoint(value: str) -> str:
|
|
value = value.strip()
|
|
if not value:
|
|
raise ValueError("MASTER_ENDPOINT environment variable is required")
|
|
if not value.startswith("http://") and not value.startswith("https://"):
|
|
value = f"http://{value}"
|
|
return value.rstrip("/")
|
|
|
|
|
|
def _read_report_interval(raw_value: str | None) -> int:
|
|
if raw_value is None or raw_value.strip() == "":
|
|
return DEFAULT_REPORT_INTERVAL_SECONDS
|
|
try:
|
|
interval = int(raw_value)
|
|
except ValueError as exc:
|
|
raise ValueError("REPORT_INTERVAL_SECONDS must be an integer") from exc
|
|
if interval <= 0:
|
|
raise ValueError("REPORT_INTERVAL_SECONDS must be positive")
|
|
return interval
|
|
|
|
|
|
def _resolve_hostname() -> str:
|
|
return os.environ.get("AGENT_HOSTNAME") or socket.gethostname()
|
|
|
|
|
|
def _load_metadata_from_state(node_file: str) -> tuple[str, str, str] | None:
|
|
state = load_node_state(node_file)
|
|
if not state:
|
|
return None
|
|
|
|
meta = state.get("meta_data") or {}
|
|
env = meta.get("env") or state.get("env")
|
|
user = meta.get("user") or state.get("user")
|
|
instance = meta.get("instance") or state.get("instance")
|
|
|
|
if env and user and instance:
|
|
LOGGER.debug("Metadata resolved from node state", extra={"node_file": node_file})
|
|
return env, user, instance
|
|
|
|
LOGGER.warning(
|
|
"node.json missing metadata fields; ignoring",
|
|
extra={"node_file": node_file, "meta_data": meta},
|
|
)
|
|
return None
|
|
|
|
|
|
def _resolve_metadata_fields(hostname: str, node_file: str) -> tuple[str, str, str]:
|
|
env = os.environ.get("AGENT_ENV")
|
|
user = os.environ.get("AGENT_USER")
|
|
instance = os.environ.get("AGENT_INSTANCE")
|
|
|
|
if env and user and instance:
|
|
return env, user, instance
|
|
|
|
if any([env, user, instance]):
|
|
LOGGER.warning(
|
|
"Incomplete metadata environment variables; falling back to persisted metadata",
|
|
extra={
|
|
"has_env": bool(env),
|
|
"has_user": bool(user),
|
|
"has_instance": bool(instance),
|
|
},
|
|
)
|
|
|
|
state_metadata = _load_metadata_from_state(node_file)
|
|
if state_metadata is not None:
|
|
return state_metadata
|
|
|
|
from .collector import _parse_hostname # Local import to avoid circular dependency
|
|
|
|
env, user, instance = _parse_hostname(hostname)
|
|
|
|
if not all([env, user, instance]):
|
|
raise ValueError(
|
|
"Failed to determine metadata fields; set AGENT_ENV/USER/INSTANCE or use supported hostname pattern"
|
|
)
|
|
|
|
return env, user, instance
|
|
|
|
|
|
def load_config() -> AgentConfig:
|
|
"""从环境变量推导配置,移除了外部配置文件依赖。"""
|
|
|
|
hostname = _resolve_hostname()
|
|
node_file = f"/private/argus/agent/{hostname}/node.json"
|
|
environment, user, instance = _resolve_metadata_fields(hostname, node_file)
|
|
|
|
health_dir = f"/private/argus/agent/{hostname}/health/"
|
|
|
|
master_endpoint_env = os.environ.get("MASTER_ENDPOINT")
|
|
if master_endpoint_env is None:
|
|
raise ValueError("MASTER_ENDPOINT environment variable is not set")
|
|
master_endpoint = _normalise_master_endpoint(master_endpoint_env)
|
|
|
|
report_interval_seconds = _read_report_interval(os.environ.get("REPORT_INTERVAL_SECONDS"))
|
|
|
|
Path(node_file).parent.mkdir(parents=True, exist_ok=True)
|
|
Path(health_dir).mkdir(parents=True, exist_ok=True)
|
|
|
|
return AgentConfig(
|
|
hostname=hostname,
|
|
environment=environment,
|
|
user=user,
|
|
instance=instance,
|
|
node_file=node_file,
|
|
version=VERSION,
|
|
master_endpoint=master_endpoint,
|
|
report_interval_seconds=report_interval_seconds,
|
|
health_dir=health_dir,
|
|
)
|