argus-netconf-exporter/tests/test_scraper_behavior.py

392 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from __future__ import annotations
import threading
import time
from types import SimpleNamespace
from exporter.config import DeviceConfig, GlobalConfig
from exporter.models import DeviceHealthState, DeviceMetricsSnapshot
from exporter.registry import DeviceRegistry, DeviceRuntimeState
from exporter.scraper import run_one_scrape_round, scrape_device, scraper_loop
class DummyConnectionManager:
def __init__(self) -> None:
self.acquired = []
self.invalidated = []
def acquire_session(self, cfg: DeviceConfig):
self.acquired.append(cfg.name)
return SimpleNamespace() # manager 对象对测试无关紧要
def mark_session_invalid(self, name: str) -> None:
self.invalidated.append(name)
def close_all(self) -> None: # pragma: no cover - not used here
pass
def test_scrape_device_success_updates_cache_and_health_and_registry():
global_cfg = GlobalConfig()
global_cfg.scrape_interval_seconds = 10
global_cfg.failure_threshold = 3
global_cfg.max_backoff_factor = 8
dev_cfg = DeviceConfig(
name="dev1",
host="1.1.1.1",
port=830,
username="u",
password="p",
)
registry = DeviceRegistry(global_scrape_interval=global_cfg.scrape_interval_seconds)
registry.register_static_device(dev_cfg)
state = registry.get_enabled_devices(time.time())[0]
cm = DummyConnectionManager()
# 构造一个包含简单 transceiver/channel 的 XML
xml_reply = """
<rpc-reply xmlns="urn:ietf:params:xml:ns:netconf:base:1.0">
<data>
<components xmlns="http://openconfig.net/yang/platform">
<component>
<name>comp1</name>
<state>
<type>TRANSCEIVER</type>
<temperature><instant>40.0</instant></temperature>
</state>
<transceiver xmlns="http://openconfig.net/yang/platform/transceiver">
<state>
<present>PRESENT</present>
<vendor>H3C</vendor>
<serial-no>SN001</serial-no>
</state>
<physical-channels>
<channel>
<index>0</index>
<state>
<description>1/0/1:1</description>
<output-power><instant>-2.5</instant></output-power>
</state>
</channel>
</physical-channels>
</transceiver>
</component>
</components>
</data>
</rpc-reply>
""".strip()
def fake_get_rpc(_mgr, _flt: str) -> str:
return xml_reply
cache: dict[str, DeviceMetricsSnapshot] = {}
health: dict[str, DeviceHealthState] = {}
now = time.time()
scrape_device(
now,
state,
registry,
cm,
fake_get_rpc,
cache,
health,
global_cfg,
failure_threshold=global_cfg.failure_threshold,
max_backoff_factor=global_cfg.max_backoff_factor,
)
# cache 中应有快照,且包含一个 transceiver 和一个 channel
assert "dev1" in cache
snapshot = cache["dev1"]
assert len(snapshot.transceivers) == 1
assert len(snapshot.channels) == 1
# health 状态应标记为成功
hs = health["dev1"]
assert hs.last_scrape_success is True
assert hs.last_error_type is None
# registry 的调度状态应更新(下次采集时间向后推进)
state_after = registry.get_enabled_devices(now + 100)[0]
assert state_after.next_scrape_at > now
def test_scrape_device_failure_updates_health_and_invalidates_session(monkeypatch):
global_cfg = GlobalConfig()
global_cfg.scrape_interval_seconds = 10
global_cfg.failure_threshold = 1
global_cfg.max_backoff_factor = 8
dev_cfg = DeviceConfig(
name="dev2",
host="1.1.1.2",
port=830,
username="u",
password="p",
)
registry = DeviceRegistry(global_scrape_interval=global_cfg.scrape_interval_seconds)
registry.register_static_device(dev_cfg)
state = registry.get_enabled_devices(time.time())[0]
cm = DummyConnectionManager()
def failing_get_rpc(_mgr, _flt: str) -> str:
raise RuntimeError("filter failed")
cache: dict[str, DeviceMetricsSnapshot] = {}
health: dict[str, DeviceHealthState] = {}
now = time.time()
scrape_device(
now,
state,
registry,
cm,
failing_get_rpc,
cache,
health,
global_cfg,
failure_threshold=global_cfg.failure_threshold,
max_backoff_factor=global_cfg.max_backoff_factor,
)
# cache 中不应有 dev2 的快照
assert "dev2" not in cache
# health 状态应为失败,且 error_type 为 FilterError
hs = health["dev2"]
assert hs.last_scrape_success is False
assert hs.last_error_type == "FilterError"
# 连接应被标记为无效
assert "dev2" in cm.invalidated
def test_run_one_scrape_round_invokes_scrape_for_enabled_devices(monkeypatch):
global_cfg = GlobalConfig()
global_cfg.scrape_interval_seconds = 5
global_cfg.failure_threshold = 3
global_cfg.max_backoff_factor = 8
dev_cfg = DeviceConfig(
name="dev3",
host="1.1.1.3",
port=830,
username="u",
password="p",
)
registry = DeviceRegistry(global_scrape_interval=global_cfg.scrape_interval_seconds)
registry.register_static_device(dev_cfg)
state = registry.get_enabled_devices(time.time())[0]
cm = DummyConnectionManager()
def fake_get_rpc(_mgr, _flt: str) -> str:
# 返回最小合法 XML
return """
<rpc-reply xmlns="urn:ietf:params:xml:ns:netconf:base:1.0">
<data>
<components xmlns="http://openconfig.net/yang/platform">
<component>
<name>compX</name>
<state><type>TRANSCEIVER</type></state>
<transceiver xmlns="http://openconfig.net/yang/platform/transceiver">
<physical-channels>
<channel>
<index>0</index>
<state></state>
</channel>
</physical-channels>
</transceiver>
</component>
</components>
</data>
</rpc-reply>
""".strip()
cache: dict[str, DeviceMetricsSnapshot] = {}
health: dict[str, DeviceHealthState] = {}
now = time.time()
# 调用 run_one_scrape_round确保会调用到 scrape_device
run_one_scrape_round(
now,
registry,
cm,
fake_get_rpc,
cache,
health,
global_cfg,
failure_threshold=global_cfg.failure_threshold,
max_backoff_factor=global_cfg.max_backoff_factor,
)
# dev3 应该被采集一次,并产生快照
assert "dev3" in cache
assert "dev3" in health
def test_scraper_loop_covers_wait_true_and_false(monkeypatch):
"""
覆盖 scraper_loop 中 stop_event.wait 的 True/False 两个分支,
以及 while 条件的退出分支。
"""
global_cfg = GlobalConfig()
global_cfg.scrape_interval_seconds = 0 # 立即触发多轮调度
registry = DeviceRegistry(global_scrape_interval=global_cfg.scrape_interval_seconds)
cm = DummyConnectionManager()
# 使用计数器控制 run_one_scrape_round 调用次数
call_count = {"n": 0}
def fake_get_rpc(_mgr, _flt: str) -> str:
# 返回最小合法 XML
return """
<rpc-reply xmlns="urn:ietf:params:xml:ns:netconf:base:1.0">
<data>
<components xmlns="http://openconfig.net/yang/platform">
<component>
<name>compY</name>
<state><type>TRANSCEIVER</type></state>
</component>
</components>
</data>
</rpc-reply>
""".strip()
cache: dict[str, DeviceMetricsSnapshot] = {}
health: dict[str, DeviceHealthState] = {}
# monkeypatch run_one_scrape_round使其在第二次调用时设置 stop_event
from exporter import scraper as scraper_mod
real_run_one = scraper_mod.run_one_scrape_round
def counting_run_one(
now: float,
registry_: DeviceRegistry,
connection_manager_,
netconf_get_rpc_,
cache_,
health_,
global_cfg_,
failure_threshold: int,
max_backoff_factor: int,
):
call_count["n"] += 1
if call_count["n"] >= 2:
# 第二次调用后设置 stop_event确保有一次 wait 返回 False一次返回 True
stop_event.set()
return real_run_one(
now,
registry_,
connection_manager_,
netconf_get_rpc_,
cache_,
health_,
global_cfg_,
failure_threshold,
max_backoff_factor,
)
monkeypatch.setattr(scraper_mod, "run_one_scrape_round", counting_run_one)
stop_event = threading.Event()
t = threading.Thread(
target=scraper_loop,
args=(stop_event, registry, cm, fake_get_rpc, cache, health, global_cfg),
daemon=True,
)
t.start()
t.join(timeout=5.0)
# 至少调用了两次 run_one_scrape_round一次 wait=False一次 wait=True
assert call_count["n"] >= 2
def test_scrape_device_preserves_existing_health_entry():
"""
第二次采集同一设备时health 字典中已存在条目,应走 device in health 分支。
"""
global_cfg = GlobalConfig()
dev_cfg = DeviceConfig(
name="dev4",
host="1.1.1.4",
port=830,
username="u",
password="p",
)
registry = DeviceRegistry(global_scrape_interval=global_cfg.scrape_interval_seconds)
registry.register_static_device(dev_cfg)
state = registry.get_enabled_devices(time.time())[0]
cm = DummyConnectionManager()
xml_reply = """
<rpc-reply xmlns="urn:ietf:params:xml:ns:netconf:base:1.0">
<data>
<components xmlns="http://openconfig.net/yang/platform">
<component>
<name>compZ</name>
<state><type>TRANSCEIVER</type></state>
<transceiver xmlns="http://openconfig.net/yang/platform/transceiver">
<physical-channels>
<channel>
<index>0</index>
<state></state>
</channel>
</physical-channels>
</transceiver>
</component>
</components>
</data>
</rpc-reply>
""".strip()
def fake_get_rpc(_mgr, _flt: str) -> str:
return xml_reply
cache: dict[str, DeviceMetricsSnapshot] = {}
health: dict[str, DeviceHealthState] = {}
now = time.time()
# 第一次采集health 中还没有 dev4
scrape_device(
now,
state,
registry,
cm,
fake_get_rpc,
cache,
health,
global_cfg,
failure_threshold=global_cfg.failure_threshold,
max_backoff_factor=global_cfg.max_backoff_factor,
)
assert "dev4" in health
# 第二次采集,应走 device 已存在分支
scrape_device(
now + 1,
state,
registry,
cm,
fake_get_rpc,
cache,
health,
global_cfg,
failure_threshold=global_cfg.failure_threshold,
max_backoff_factor=global_cfg.max_backoff_factor,
)
# health 条目仍然存在且状态为成功
assert health["dev4"].last_scrape_success is True