20260622 接入Routinator仓库同步指标
This commit is contained in:
parent
4546d90c33
commit
92d184681b
@ -494,6 +494,174 @@
|
|||||||
"refId": "A"
|
"refId": "A"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 12,
|
||||||
|
"title": "Repo Sync Availability by RP",
|
||||||
|
"type": "timeseries",
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "Prometheus"
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 36
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"unit": "none",
|
||||||
|
"min": 0,
|
||||||
|
"decimals": 0
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull"
|
||||||
|
],
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "bottom",
|
||||||
|
"showLegend": true
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "single",
|
||||||
|
"sort": "none"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "inter_rp_repo_sync_total{exported_instance=\"remote200-inter-rp\",state=~\"available|failed\",exported_rp=~\"ours-rp|routinator\"}",
|
||||||
|
"legendFormat": "{{exported_rp}} {{state}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 13,
|
||||||
|
"title": "Repo Sync Overlap Classes",
|
||||||
|
"type": "timeseries",
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "Prometheus"
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 36
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"unit": "none",
|
||||||
|
"min": 0,
|
||||||
|
"decimals": 0
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull"
|
||||||
|
],
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "bottom",
|
||||||
|
"showLegend": true
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "single",
|
||||||
|
"sort": "none"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "inter_rp_repo_sync_overlap_total{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\"}",
|
||||||
|
"legendFormat": "{{class}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 14,
|
||||||
|
"title": "Repo Sync Diff URIs",
|
||||||
|
"type": "table",
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "Prometheus"
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 24,
|
||||||
|
"x": 0,
|
||||||
|
"y": 44
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"unit": "none",
|
||||||
|
"decimals": 0
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"options": {
|
||||||
|
"showHeader": true,
|
||||||
|
"sortBy": []
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "inter_rp_repo_sync_diff_info{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\"}",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"legendFormat": "{{class}} #{{rank}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 15,
|
||||||
|
"title": "Only-Ours Repo Count",
|
||||||
|
"type": "stat",
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "Prometheus"
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 4,
|
||||||
|
"w": 6,
|
||||||
|
"x": 18,
|
||||||
|
"y": 32
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"unit": "short",
|
||||||
|
"decimals": 0
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"options": {
|
||||||
|
"colorMode": "value",
|
||||||
|
"graphMode": "area",
|
||||||
|
"justifyMode": "auto",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull"
|
||||||
|
],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"textMode": "auto",
|
||||||
|
"wideLayout": true
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "max(inter_rp_repo_sync_overlap_total{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\",class=\"only_ours\"})",
|
||||||
|
"legendFormat": "only ours",
|
||||||
|
"refId": "A",
|
||||||
|
"instant": true
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"refresh": "10s",
|
"refresh": "10s",
|
||||||
@ -513,5 +681,5 @@
|
|||||||
"timezone": "browser",
|
"timezone": "browser",
|
||||||
"title": "Ours RP vs Routinator",
|
"title": "Ours RP vs Routinator",
|
||||||
"uid": "ours-rp-inter-rp",
|
"uid": "ours-rp-inter-rp",
|
||||||
"version": 2
|
"version": 3
|
||||||
}
|
}
|
||||||
|
|||||||
@ -3,6 +3,7 @@ import csv
|
|||||||
import datetime as dt
|
import datetime as dt
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import socket
|
import socket
|
||||||
import sys
|
import sys
|
||||||
import threading
|
import threading
|
||||||
@ -138,6 +139,126 @@ def sample_routinator(now, errors):
|
|||||||
sample["vaps"] = count_unique_csv(latest / "vaps.csv", 2)
|
sample["vaps"] = count_unique_csv(latest / "vaps.csv", 2)
|
||||||
return sample
|
return sample
|
||||||
|
|
||||||
|
|
||||||
|
def parse_prom_labels(label_text):
|
||||||
|
labels = {}
|
||||||
|
for key, value in re.findall(r'(\w+)="((?:[^"\\]|\\.)*)"', label_text):
|
||||||
|
labels[key] = value.replace(r'\"', '"').replace(r'\\', '\\')
|
||||||
|
return labels
|
||||||
|
|
||||||
|
def parse_prometheus_samples(text, metric_name):
|
||||||
|
pattern = re.compile(r'^' + re.escape(metric_name) + r'\{([^}]*)\}\s+([-+0-9.eE]+)\s*$', re.MULTILINE)
|
||||||
|
for match in pattern.finditer(text):
|
||||||
|
try:
|
||||||
|
value = float(match.group(2))
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
yield parse_prom_labels(match.group(1)), value
|
||||||
|
|
||||||
|
def load_ours_repo_sets(errors):
|
||||||
|
try:
|
||||||
|
import urllib.request
|
||||||
|
text = urllib.request.urlopen("http://127.0.0.1:9556/metrics", timeout=10).read().decode("utf-8", "replace")
|
||||||
|
except Exception as exc:
|
||||||
|
errors.append(f"ours-rp repo metrics: {exc}")
|
||||||
|
return {"total": set(), "available": set(), "failed": set(), "info": {}}
|
||||||
|
info = {}
|
||||||
|
states = {}
|
||||||
|
for labels, value in parse_prometheus_samples(text, "ours_rp_repository_info"):
|
||||||
|
uri = labels.get("uri")
|
||||||
|
if uri:
|
||||||
|
info[uri] = labels
|
||||||
|
for labels, value in parse_prometheus_samples(text, "ours_rp_repository_terminal_state_publication_points"):
|
||||||
|
uri = None
|
||||||
|
repo_id = labels.get("repo_id")
|
||||||
|
for candidate_uri, candidate in info.items():
|
||||||
|
if candidate.get("repo_id") == repo_id:
|
||||||
|
uri = candidate_uri
|
||||||
|
break
|
||||||
|
if not uri:
|
||||||
|
continue
|
||||||
|
states.setdefault(uri, {})[labels.get("terminal_state", "unknown")] = value
|
||||||
|
total = set(info)
|
||||||
|
available = set()
|
||||||
|
failed = set()
|
||||||
|
for uri in total:
|
||||||
|
repo_states = states.get(uri, {})
|
||||||
|
non_failed = sum(value for state, value in repo_states.items() if state != "failed_no_cache")
|
||||||
|
if non_failed > 0:
|
||||||
|
available.add(uri)
|
||||||
|
else:
|
||||||
|
failed.add(uri)
|
||||||
|
return {"total": total, "available": available, "failed": failed, "info": info}
|
||||||
|
|
||||||
|
def load_routinator_repo_sets(errors):
|
||||||
|
path = PEER_ROOT / "routinator" / "routinator-metrics.prom"
|
||||||
|
if not path.exists():
|
||||||
|
errors.append(f"routinator repo metrics: missing {path}")
|
||||||
|
return {"total": set(), "success": set(), "failed": set(), "duration": {}}
|
||||||
|
try:
|
||||||
|
text = path.read_text(encoding="utf-8", errors="replace")
|
||||||
|
except Exception as exc:
|
||||||
|
errors.append(f"routinator repo metrics: read {path}: {exc}")
|
||||||
|
return {"total": set(), "success": set(), "failed": set(), "duration": {}}
|
||||||
|
total = set()
|
||||||
|
success = set()
|
||||||
|
failed = set()
|
||||||
|
duration = {}
|
||||||
|
for metric in ["routinator_rrdp_status", "routinator_rsync_status"]:
|
||||||
|
for labels, value in parse_prometheus_samples(text, metric):
|
||||||
|
uri = labels.get("uri")
|
||||||
|
if not uri:
|
||||||
|
continue
|
||||||
|
total.add(uri)
|
||||||
|
ok = value in (0.0, 200.0, 304.0)
|
||||||
|
if ok:
|
||||||
|
success.add(uri)
|
||||||
|
else:
|
||||||
|
failed.add(uri)
|
||||||
|
for metric in ["routinator_rrdp_duration", "routinator_rsync_duration"]:
|
||||||
|
for labels, value in parse_prometheus_samples(text, metric):
|
||||||
|
uri = labels.get("uri")
|
||||||
|
if uri:
|
||||||
|
duration[uri] = max(duration.get(uri, 0.0), value)
|
||||||
|
return {"total": total, "success": success, "failed": failed, "duration": duration}
|
||||||
|
|
||||||
|
def emit_repo_diff_metrics(out, errors):
|
||||||
|
ours = load_ours_repo_sets(errors)
|
||||||
|
routinator = load_routinator_repo_sets(errors)
|
||||||
|
ours_ok = ours["available"]
|
||||||
|
rout_ok = routinator["success"]
|
||||||
|
only_ours = sorted(ours_ok - rout_ok)
|
||||||
|
only_routinator = sorted(rout_ok - ours_ok)
|
||||||
|
both = ours_ok & rout_ok
|
||||||
|
neither = (ours["total"] | routinator["total"]) - (ours_ok | rout_ok)
|
||||||
|
base = {"instance": INSTANCE, "left": "ours-rp", "right": "routinator"}
|
||||||
|
out.append(metric_line("inter_rp_repo_sync_total", {"instance": INSTANCE, "rp": "ours-rp", "state": "total"}, len(ours["total"])))
|
||||||
|
out.append(metric_line("inter_rp_repo_sync_total", {"instance": INSTANCE, "rp": "ours-rp", "state": "available"}, len(ours_ok)))
|
||||||
|
out.append(metric_line("inter_rp_repo_sync_total", {"instance": INSTANCE, "rp": "ours-rp", "state": "failed"}, len(ours["failed"])))
|
||||||
|
out.append(metric_line("inter_rp_repo_sync_total", {"instance": INSTANCE, "rp": "routinator", "state": "total"}, len(routinator["total"])))
|
||||||
|
out.append(metric_line("inter_rp_repo_sync_total", {"instance": INSTANCE, "rp": "routinator", "state": "available"}, len(rout_ok)))
|
||||||
|
out.append(metric_line("inter_rp_repo_sync_total", {"instance": INSTANCE, "rp": "routinator", "state": "failed"}, len(routinator["failed"])))
|
||||||
|
out.append(metric_line("inter_rp_repo_sync_overlap_total", {**base, "class": "both_available"}, len(both)))
|
||||||
|
out.append(metric_line("inter_rp_repo_sync_overlap_total", {**base, "class": "only_ours"}, len(only_ours)))
|
||||||
|
out.append(metric_line("inter_rp_repo_sync_overlap_total", {**base, "class": "only_routinator"}, len(only_routinator)))
|
||||||
|
out.append(metric_line("inter_rp_repo_sync_overlap_total", {**base, "class": "neither_available"}, len(neither)))
|
||||||
|
for diff_class, uris in [("only_ours", only_ours), ("only_routinator", only_routinator)]:
|
||||||
|
for rank, uri in enumerate(uris[:50], start=1):
|
||||||
|
labels = {**base, "class": diff_class, "rank": rank, "uri": uri}
|
||||||
|
if uri in routinator["duration"]:
|
||||||
|
labels["routinator_duration"] = f"{routinator['duration'][uri]:.3f}"
|
||||||
|
out.append(metric_line("inter_rp_repo_sync_diff_info", labels, 1))
|
||||||
|
return {
|
||||||
|
"oursTotal": len(ours["total"]),
|
||||||
|
"oursAvailable": len(ours_ok),
|
||||||
|
"routinatorTotal": len(routinator["total"]),
|
||||||
|
"routinatorAvailable": len(rout_ok),
|
||||||
|
"onlyOurs": only_ours[:50],
|
||||||
|
"onlyRoutinator": only_routinator[:50],
|
||||||
|
"bothAvailable": len(both),
|
||||||
|
"neitherAvailable": len(neither),
|
||||||
|
}
|
||||||
|
|
||||||
def sync_metrics(now):
|
def sync_metrics(now):
|
||||||
path = PEER_ROOT / "sync-status.json"
|
path = PEER_ROOT / "sync-status.json"
|
||||||
if not path.exists():
|
if not path.exists():
|
||||||
@ -201,7 +322,8 @@ def build_metrics():
|
|||||||
out.append(metric_line("inter_rp_vrps_diff", {"instance": INSTANCE, "left": "ours-rp", "right": "routinator"}, abs(int(ours["vrps"]) - int(rout["vrps"]))))
|
out.append(metric_line("inter_rp_vrps_diff", {"instance": INSTANCE, "left": "ours-rp", "right": "routinator"}, abs(int(ours["vrps"]) - int(rout["vrps"]))))
|
||||||
if ours.get("vaps") is not None and rout.get("vaps") is not None:
|
if ours.get("vaps") is not None and rout.get("vaps") is not None:
|
||||||
out.append(metric_line("inter_rp_vaps_diff", {"instance": INSTANCE, "left": "ours-rp", "right": "routinator"}, abs(int(ours["vaps"]) - int(rout["vaps"]))))
|
out.append(metric_line("inter_rp_vaps_diff", {"instance": INSTANCE, "left": "ours-rp", "right": "routinator"}, abs(int(ours["vaps"]) - int(rout["vaps"]))))
|
||||||
return "".join(out), {"errors": errors, "samples": samples, "sync": sync}
|
repo_diff = emit_repo_diff_metrics(out, errors)
|
||||||
|
return "".join(out), {"errors": errors, "samples": samples, "sync": sync, "repoDiff": repo_diff}
|
||||||
|
|
||||||
def get_metrics():
|
def get_metrics():
|
||||||
now = time.time()
|
now = time.time()
|
||||||
|
|||||||
@ -47,7 +47,13 @@ sync_once() {
|
|||||||
rm -rf "$tmp"
|
rm -rf "$tmp"
|
||||||
return 2
|
return 2
|
||||||
fi
|
fi
|
||||||
rm -rf "$PEER_ROOT/routinator/latest.next" "$PEER_ROOT/routinator/latest.prev"
|
if ! ssh -o BatchMode=yes -o ConnectTimeout=8 "$REMOTE231" "curl -fsS --max-time 20 http://127.0.0.1:9558/metrics" >"$tmp/routinator-metrics.prom"; then
|
||||||
|
rm -rf "$tmp"
|
||||||
|
return 3
|
||||||
|
fi
|
||||||
|
rm -rf "$PEER_ROOT/routinator/latest.next" "$PEER_ROOT/routinator/latest.prev" "$PEER_ROOT/routinator/routinator-metrics.prom.next"
|
||||||
|
mv "$tmp/routinator-metrics.prom" "$PEER_ROOT/routinator/routinator-metrics.prom.next"
|
||||||
|
mv "$PEER_ROOT/routinator/routinator-metrics.prom.next" "$PEER_ROOT/routinator/routinator-metrics.prom"
|
||||||
mv "$tmp/latest" "$PEER_ROOT/routinator/latest.next"
|
mv "$tmp/latest" "$PEER_ROOT/routinator/latest.next"
|
||||||
if [[ -e "$PEER_ROOT/routinator/latest" ]]; then
|
if [[ -e "$PEER_ROOT/routinator/latest" ]]; then
|
||||||
mv "$PEER_ROOT/routinator/latest" "$PEER_ROOT/routinator/latest.prev"
|
mv "$PEER_ROOT/routinator/latest" "$PEER_ROOT/routinator/latest.prev"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user