20260622 接入Routinator仓库同步指标

This commit is contained in:
yuyr 2026-06-22 16:27:19 +08:00
parent 4546d90c33
commit 92d184681b
3 changed files with 299 additions and 3 deletions

View File

@ -494,6 +494,174 @@
"refId": "A" "refId": "A"
} }
] ]
},
{
"id": 12,
"title": "Repo Sync Availability by RP",
"type": "timeseries",
"datasource": {
"type": "prometheus",
"uid": "Prometheus"
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 36
},
"fieldConfig": {
"defaults": {
"unit": "none",
"min": 0,
"decimals": 0
},
"overrides": []
},
"options": {
"legend": {
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"expr": "inter_rp_repo_sync_total{exported_instance=\"remote200-inter-rp\",state=~\"available|failed\",exported_rp=~\"ours-rp|routinator\"}",
"legendFormat": "{{exported_rp}} {{state}}",
"refId": "A"
}
]
},
{
"id": 13,
"title": "Repo Sync Overlap Classes",
"type": "timeseries",
"datasource": {
"type": "prometheus",
"uid": "Prometheus"
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 36
},
"fieldConfig": {
"defaults": {
"unit": "none",
"min": 0,
"decimals": 0
},
"overrides": []
},
"options": {
"legend": {
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"expr": "inter_rp_repo_sync_overlap_total{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\"}",
"legendFormat": "{{class}}",
"refId": "A"
}
]
},
{
"id": 14,
"title": "Repo Sync Diff URIs",
"type": "table",
"datasource": {
"type": "prometheus",
"uid": "Prometheus"
},
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 44
},
"fieldConfig": {
"defaults": {
"unit": "none",
"decimals": 0
},
"overrides": []
},
"options": {
"showHeader": true,
"sortBy": []
},
"targets": [
{
"expr": "inter_rp_repo_sync_diff_info{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\"}",
"format": "table",
"instant": true,
"legendFormat": "{{class}} #{{rank}}",
"refId": "A"
}
]
},
{
"id": 15,
"title": "Only-Ours Repo Count",
"type": "stat",
"datasource": {
"type": "prometheus",
"uid": "Prometheus"
},
"gridPos": {
"h": 4,
"w": 6,
"x": 18,
"y": 32
},
"fieldConfig": {
"defaults": {
"unit": "short",
"decimals": 0
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto",
"wideLayout": true
},
"targets": [
{
"expr": "max(inter_rp_repo_sync_overlap_total{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\",class=\"only_ours\"})",
"legendFormat": "only ours",
"refId": "A",
"instant": true
}
]
} }
], ],
"refresh": "10s", "refresh": "10s",
@ -513,5 +681,5 @@
"timezone": "browser", "timezone": "browser",
"title": "Ours RP vs Routinator", "title": "Ours RP vs Routinator",
"uid": "ours-rp-inter-rp", "uid": "ours-rp-inter-rp",
"version": 2 "version": 3
} }

View File

@ -3,6 +3,7 @@ import csv
import datetime as dt import datetime as dt
import json import json
import os import os
import re
import socket import socket
import sys import sys
import threading import threading
@ -138,6 +139,126 @@ def sample_routinator(now, errors):
sample["vaps"] = count_unique_csv(latest / "vaps.csv", 2) sample["vaps"] = count_unique_csv(latest / "vaps.csv", 2)
return sample return sample
def parse_prom_labels(label_text):
labels = {}
for key, value in re.findall(r'(\w+)="((?:[^"\\]|\\.)*)"', label_text):
labels[key] = value.replace(r'\"', '"').replace(r'\\', '\\')
return labels
def parse_prometheus_samples(text, metric_name):
pattern = re.compile(r'^' + re.escape(metric_name) + r'\{([^}]*)\}\s+([-+0-9.eE]+)\s*$', re.MULTILINE)
for match in pattern.finditer(text):
try:
value = float(match.group(2))
except ValueError:
continue
yield parse_prom_labels(match.group(1)), value
def load_ours_repo_sets(errors):
try:
import urllib.request
text = urllib.request.urlopen("http://127.0.0.1:9556/metrics", timeout=10).read().decode("utf-8", "replace")
except Exception as exc:
errors.append(f"ours-rp repo metrics: {exc}")
return {"total": set(), "available": set(), "failed": set(), "info": {}}
info = {}
states = {}
for labels, value in parse_prometheus_samples(text, "ours_rp_repository_info"):
uri = labels.get("uri")
if uri:
info[uri] = labels
for labels, value in parse_prometheus_samples(text, "ours_rp_repository_terminal_state_publication_points"):
uri = None
repo_id = labels.get("repo_id")
for candidate_uri, candidate in info.items():
if candidate.get("repo_id") == repo_id:
uri = candidate_uri
break
if not uri:
continue
states.setdefault(uri, {})[labels.get("terminal_state", "unknown")] = value
total = set(info)
available = set()
failed = set()
for uri in total:
repo_states = states.get(uri, {})
non_failed = sum(value for state, value in repo_states.items() if state != "failed_no_cache")
if non_failed > 0:
available.add(uri)
else:
failed.add(uri)
return {"total": total, "available": available, "failed": failed, "info": info}
def load_routinator_repo_sets(errors):
path = PEER_ROOT / "routinator" / "routinator-metrics.prom"
if not path.exists():
errors.append(f"routinator repo metrics: missing {path}")
return {"total": set(), "success": set(), "failed": set(), "duration": {}}
try:
text = path.read_text(encoding="utf-8", errors="replace")
except Exception as exc:
errors.append(f"routinator repo metrics: read {path}: {exc}")
return {"total": set(), "success": set(), "failed": set(), "duration": {}}
total = set()
success = set()
failed = set()
duration = {}
for metric in ["routinator_rrdp_status", "routinator_rsync_status"]:
for labels, value in parse_prometheus_samples(text, metric):
uri = labels.get("uri")
if not uri:
continue
total.add(uri)
ok = value in (0.0, 200.0, 304.0)
if ok:
success.add(uri)
else:
failed.add(uri)
for metric in ["routinator_rrdp_duration", "routinator_rsync_duration"]:
for labels, value in parse_prometheus_samples(text, metric):
uri = labels.get("uri")
if uri:
duration[uri] = max(duration.get(uri, 0.0), value)
return {"total": total, "success": success, "failed": failed, "duration": duration}
def emit_repo_diff_metrics(out, errors):
ours = load_ours_repo_sets(errors)
routinator = load_routinator_repo_sets(errors)
ours_ok = ours["available"]
rout_ok = routinator["success"]
only_ours = sorted(ours_ok - rout_ok)
only_routinator = sorted(rout_ok - ours_ok)
both = ours_ok & rout_ok
neither = (ours["total"] | routinator["total"]) - (ours_ok | rout_ok)
base = {"instance": INSTANCE, "left": "ours-rp", "right": "routinator"}
out.append(metric_line("inter_rp_repo_sync_total", {"instance": INSTANCE, "rp": "ours-rp", "state": "total"}, len(ours["total"])))
out.append(metric_line("inter_rp_repo_sync_total", {"instance": INSTANCE, "rp": "ours-rp", "state": "available"}, len(ours_ok)))
out.append(metric_line("inter_rp_repo_sync_total", {"instance": INSTANCE, "rp": "ours-rp", "state": "failed"}, len(ours["failed"])))
out.append(metric_line("inter_rp_repo_sync_total", {"instance": INSTANCE, "rp": "routinator", "state": "total"}, len(routinator["total"])))
out.append(metric_line("inter_rp_repo_sync_total", {"instance": INSTANCE, "rp": "routinator", "state": "available"}, len(rout_ok)))
out.append(metric_line("inter_rp_repo_sync_total", {"instance": INSTANCE, "rp": "routinator", "state": "failed"}, len(routinator["failed"])))
out.append(metric_line("inter_rp_repo_sync_overlap_total", {**base, "class": "both_available"}, len(both)))
out.append(metric_line("inter_rp_repo_sync_overlap_total", {**base, "class": "only_ours"}, len(only_ours)))
out.append(metric_line("inter_rp_repo_sync_overlap_total", {**base, "class": "only_routinator"}, len(only_routinator)))
out.append(metric_line("inter_rp_repo_sync_overlap_total", {**base, "class": "neither_available"}, len(neither)))
for diff_class, uris in [("only_ours", only_ours), ("only_routinator", only_routinator)]:
for rank, uri in enumerate(uris[:50], start=1):
labels = {**base, "class": diff_class, "rank": rank, "uri": uri}
if uri in routinator["duration"]:
labels["routinator_duration"] = f"{routinator['duration'][uri]:.3f}"
out.append(metric_line("inter_rp_repo_sync_diff_info", labels, 1))
return {
"oursTotal": len(ours["total"]),
"oursAvailable": len(ours_ok),
"routinatorTotal": len(routinator["total"]),
"routinatorAvailable": len(rout_ok),
"onlyOurs": only_ours[:50],
"onlyRoutinator": only_routinator[:50],
"bothAvailable": len(both),
"neitherAvailable": len(neither),
}
def sync_metrics(now): def sync_metrics(now):
path = PEER_ROOT / "sync-status.json" path = PEER_ROOT / "sync-status.json"
if not path.exists(): if not path.exists():
@ -201,7 +322,8 @@ def build_metrics():
out.append(metric_line("inter_rp_vrps_diff", {"instance": INSTANCE, "left": "ours-rp", "right": "routinator"}, abs(int(ours["vrps"]) - int(rout["vrps"])))) out.append(metric_line("inter_rp_vrps_diff", {"instance": INSTANCE, "left": "ours-rp", "right": "routinator"}, abs(int(ours["vrps"]) - int(rout["vrps"]))))
if ours.get("vaps") is not None and rout.get("vaps") is not None: if ours.get("vaps") is not None and rout.get("vaps") is not None:
out.append(metric_line("inter_rp_vaps_diff", {"instance": INSTANCE, "left": "ours-rp", "right": "routinator"}, abs(int(ours["vaps"]) - int(rout["vaps"])))) out.append(metric_line("inter_rp_vaps_diff", {"instance": INSTANCE, "left": "ours-rp", "right": "routinator"}, abs(int(ours["vaps"]) - int(rout["vaps"]))))
return "".join(out), {"errors": errors, "samples": samples, "sync": sync} repo_diff = emit_repo_diff_metrics(out, errors)
return "".join(out), {"errors": errors, "samples": samples, "sync": sync, "repoDiff": repo_diff}
def get_metrics(): def get_metrics():
now = time.time() now = time.time()

View File

@ -47,7 +47,13 @@ sync_once() {
rm -rf "$tmp" rm -rf "$tmp"
return 2 return 2
fi fi
rm -rf "$PEER_ROOT/routinator/latest.next" "$PEER_ROOT/routinator/latest.prev" if ! ssh -o BatchMode=yes -o ConnectTimeout=8 "$REMOTE231" "curl -fsS --max-time 20 http://127.0.0.1:9558/metrics" >"$tmp/routinator-metrics.prom"; then
rm -rf "$tmp"
return 3
fi
rm -rf "$PEER_ROOT/routinator/latest.next" "$PEER_ROOT/routinator/latest.prev" "$PEER_ROOT/routinator/routinator-metrics.prom.next"
mv "$tmp/routinator-metrics.prom" "$PEER_ROOT/routinator/routinator-metrics.prom.next"
mv "$PEER_ROOT/routinator/routinator-metrics.prom.next" "$PEER_ROOT/routinator/routinator-metrics.prom"
mv "$tmp/latest" "$PEER_ROOT/routinator/latest.next" mv "$tmp/latest" "$PEER_ROOT/routinator/latest.next"
if [[ -e "$PEER_ROOT/routinator/latest" ]]; then if [[ -e "$PEER_ROOT/routinator/latest" ]]; then
mv "$PEER_ROOT/routinator/latest" "$PEER_ROOT/routinator/latest.prev" mv "$PEER_ROOT/routinator/latest" "$PEER_ROOT/routinator/latest.prev"