update inter-rp repo sync metrics

This commit is contained in:
yuyr 2026-06-22 18:01:57 +08:00
parent 92d184681b
commit 4e37b96aff
3 changed files with 171 additions and 65 deletions

View File

@ -11,7 +11,7 @@
"panels": [
{
"id": 1,
"title": "Metrics Reload OK",
"title": "Ours Only Repo Count",
"type": "stat",
"datasource": {
"type": "prometheus",
@ -25,7 +25,7 @@
},
"fieldConfig": {
"defaults": {
"unit": "none",
"unit": "short",
"decimals": 0
},
"overrides": []
@ -47,8 +47,8 @@
},
"targets": [
{
"expr": "max(inter_rp_service_last_reload_success{exported_instance=\"remote200-inter-rp\"})",
"legendFormat": "reload",
"expr": "max(inter_rp_repo_sync_overlap_total{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\",class=\"only_ours\"})",
"legendFormat": "only ours",
"refId": "A",
"instant": true
}
@ -56,7 +56,7 @@
},
{
"id": 2,
"title": "231 Sync Age",
"title": "Routinator Only Repo Count",
"type": "stat",
"datasource": {
"type": "prometheus",
@ -70,7 +70,7 @@
},
"fieldConfig": {
"defaults": {
"unit": "s",
"unit": "short",
"decimals": 0
},
"overrides": []
@ -92,8 +92,8 @@
},
"targets": [
{
"expr": "max(inter_rp_sync_age_seconds{exported_instance=\"remote200-inter-rp\"})",
"legendFormat": "sync age",
"expr": "max(inter_rp_repo_sync_overlap_total{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\",class=\"only_routinator\"})",
"legendFormat": "only routinator",
"refId": "A",
"instant": true
}
@ -101,7 +101,7 @@
},
{
"id": 3,
"title": "Parse Errors",
"title": "Ours vs Routinator VAP Diff",
"type": "stat",
"datasource": {
"type": "prometheus",
@ -137,8 +137,8 @@
},
"targets": [
{
"expr": "max(inter_rp_parse_errors{exported_instance=\"remote200-inter-rp\"})",
"legendFormat": "errors",
"expr": "max(inter_rp_vaps_diff{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\"})",
"legendFormat": "vap diff",
"refId": "A",
"instant": true
}
@ -592,7 +592,7 @@
"uid": "Prometheus"
},
"gridPos": {
"h": 8,
"h": 9,
"w": 24,
"x": 0,
"y": 44
@ -602,7 +602,48 @@
"unit": "none",
"decimals": 0
},
"overrides": []
"overrides": [
{
"matcher": {
"id": "byName",
"options": "uri"
},
"properties": [
{
"id": "custom.width",
"value": 760
}
]
},
{
"matcher": {
"id": "byName",
"options": "class"
},
"properties": [
{
"id": "custom.width",
"value": 140
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "^(mft|crl|crt|roa|aspa)$"
},
"properties": [
{
"id": "custom.align",
"value": "right"
},
{
"id": "custom.width",
"value": 80
}
]
}
]
},
"options": {
"showHeader": true,
@ -616,50 +657,42 @@
"legendFormat": "{{class}} #{{rank}}",
"refId": "A"
}
]
},
{
"id": 15,
"title": "Only-Ours Repo Count",
"type": "stat",
"datasource": {
"type": "prometheus",
"uid": "Prometheus"
},
"gridPos": {
"h": 4,
"w": 6,
"x": 18,
"y": 32
},
"fieldConfig": {
"defaults": {
"unit": "short",
"decimals": 0
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto",
"wideLayout": true
},
"targets": [
],
"transformations": [
{
"expr": "max(inter_rp_repo_sync_overlap_total{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\",class=\"only_ours\"})",
"legendFormat": "only ours",
"refId": "A",
"instant": true
"id": "organize",
"options": {
"excludeByName": {
"Time": true,
"Value": true,
"__name__": true,
"exported_instance": true,
"instance": true,
"job": true,
"left": true,
"right": true,
"rank": true,
"routinator_duration": true
},
"indexByName": {
"class": 0,
"uri": 1,
"mft": 2,
"crl": 3,
"crt": 4,
"roa": 5,
"aspa": 6
},
"renameByName": {
"class": "class",
"uri": "uri",
"mft": "mft",
"crl": "crl",
"crt": "crt",
"roa": "roa",
"aspa": "aspa"
}
}
}
]
}
@ -681,5 +714,5 @@
"timezone": "browser",
"title": "Ours RP vs Routinator",
"uid": "ours-rp-inter-rp",
"version": 3
"version": 4
}

View File

@ -155,15 +155,44 @@ def parse_prometheus_samples(text, metric_name):
continue
yield parse_prom_labels(match.group(1)), value
def canonical_object_type(value):
value = (value or "").strip().lower()
if value in ("manifest", "mft"):
return "mft"
if value == "crl":
return "crl"
if value in ("certificate", "cert", "ca_cert", "router_cert", "ee_cert", "crt"):
return "crt"
if value == "roa":
return "roa"
if value == "aspa":
return "aspa"
return None
def empty_object_counts():
return {"mft": 0, "crl": 0, "crt": 0, "roa": 0, "aspa": 0}
def add_object_count(counts_by_uri, uri, object_type, value):
canonical = canonical_object_type(object_type)
if not uri or canonical is None:
return
counts = counts_by_uri.setdefault(uri, empty_object_counts())
counts[canonical] = counts.get(canonical, 0) + int(value)
def object_count_labels(counts_by_uri, uri):
counts = counts_by_uri.get(uri, empty_object_counts())
return {key: str(int(counts.get(key, 0))) for key in ["mft", "crl", "crt", "roa", "aspa"]}
def load_ours_repo_sets(errors):
try:
import urllib.request
text = urllib.request.urlopen("http://127.0.0.1:9556/metrics", timeout=10).read().decode("utf-8", "replace")
except Exception as exc:
errors.append(f"ours-rp repo metrics: {exc}")
return {"total": set(), "available": set(), "failed": set(), "info": {}}
return {"total": set(), "available": set(), "failed": set(), "info": {}, "object_counts": {}}
info = {}
states = {}
object_counts = {}
for labels, value in parse_prometheus_samples(text, "ours_rp_repository_info"):
uri = labels.get("uri")
if uri:
@ -178,6 +207,15 @@ def load_ours_repo_sets(errors):
if not uri:
continue
states.setdefault(uri, {})[labels.get("terminal_state", "unknown")] = value
for labels, value in parse_prometheus_samples(text, "ours_rp_repository_objects_by_type"):
uri = labels.get("uri")
if not uri:
repo_id = labels.get("repo_id")
for candidate_uri, candidate in info.items():
if candidate.get("repo_id") == repo_id:
uri = candidate_uri
break
add_object_count(object_counts, uri, labels.get("object_type"), value)
total = set(info)
available = set()
failed = set()
@ -188,22 +226,23 @@ def load_ours_repo_sets(errors):
available.add(uri)
else:
failed.add(uri)
return {"total": total, "available": available, "failed": failed, "info": info}
return {"total": total, "available": available, "failed": failed, "info": info, "object_counts": object_counts}
def load_routinator_repo_sets(errors):
path = PEER_ROOT / "routinator" / "routinator-metrics.prom"
if not path.exists():
errors.append(f"routinator repo metrics: missing {path}")
return {"total": set(), "success": set(), "failed": set(), "duration": {}}
return {"total": set(), "success": set(), "failed": set(), "duration": {}, "object_counts": {}}
try:
text = path.read_text(encoding="utf-8", errors="replace")
except Exception as exc:
errors.append(f"routinator repo metrics: read {path}: {exc}")
return {"total": set(), "success": set(), "failed": set(), "duration": {}}
return {"total": set(), "success": set(), "failed": set(), "duration": {}, "object_counts": {}}
total = set()
success = set()
failed = set()
duration = {}
object_counts = {}
for metric in ["routinator_rrdp_status", "routinator_rsync_status"]:
for labels, value in parse_prometheus_samples(text, metric):
uri = labels.get("uri")
@ -220,7 +259,9 @@ def load_routinator_repo_sets(errors):
uri = labels.get("uri")
if uri:
duration[uri] = max(duration.get(uri, 0.0), value)
return {"total": total, "success": success, "failed": failed, "duration": duration}
for labels, value in parse_prometheus_samples(text, "routinator_repository_objects_total"):
add_object_count(object_counts, labels.get("uri"), labels.get("type"), value)
return {"total": total, "success": success, "failed": failed, "duration": duration, "object_counts": object_counts}
def emit_repo_diff_metrics(out, errors):
ours = load_ours_repo_sets(errors)
@ -242,9 +283,18 @@ def emit_repo_diff_metrics(out, errors):
out.append(metric_line("inter_rp_repo_sync_overlap_total", {**base, "class": "only_ours"}, len(only_ours)))
out.append(metric_line("inter_rp_repo_sync_overlap_total", {**base, "class": "only_routinator"}, len(only_routinator)))
out.append(metric_line("inter_rp_repo_sync_overlap_total", {**base, "class": "neither_available"}, len(neither)))
for diff_class, uris in [("only_ours", only_ours), ("only_routinator", only_routinator)]:
for diff_class, uris, counts_by_uri in [
("only_ours", only_ours, ours["object_counts"]),
("only_routinator", only_routinator, routinator["object_counts"]),
]:
for rank, uri in enumerate(uris[:50], start=1):
labels = {**base, "class": diff_class, "rank": rank, "uri": uri}
labels = {
**base,
"class": diff_class,
"rank": rank,
"uri": uri,
**object_count_labels(counts_by_uri, uri),
}
if uri in routinator["duration"]:
labels["routinator_duration"] = f"{routinator['duration'][uri]:.3f}"
out.append(metric_line("inter_rp_repo_sync_diff_info", labels, 1))

View File

@ -258,6 +258,7 @@ struct RepoMetrics {
duration_seconds_avg: f64,
phase_counts: BTreeMap<String, u64>,
terminal_state_counts: BTreeMap<String, u64>,
object_counts: BTreeMap<String, u64>,
}
#[derive(Clone, Debug, Default, Serialize)]
@ -859,7 +860,8 @@ fn extract_publication_point_metrics(
let result = json_str(object, &["result"])
.unwrap_or("unknown")
.to_string();
*object_counts.entry((kind, result)).or_default() += 1;
*object_counts.entry((kind.clone(), result)).or_default() += 1;
*repo.object_counts.entry(kind).or_default() += 1;
}
}
@ -1531,6 +1533,21 @@ fn render_repo_metrics(writer: &mut PromWriter<'_>, instance: &str, repos: &[Rep
*count as f64,
);
}
for kind in ["manifest", "crl", "certificate", "roa", "aspa"] {
let labels = [
label("instance", instance),
label("repo_id", &repo.repo_id),
label("host", &repo.host),
label("uri", &repo.uri),
label("object_type", kind),
];
writer.gauge(
"ours_rp_repository_objects_by_type",
"Repository object count by object type from latest run report",
&labels,
repo.object_counts.get(kind).copied().unwrap_or(0) as f64,
);
}
}
}
@ -2298,6 +2315,8 @@ mod tests {
assert_eq!(snapshot.repo_stats.len(), 1);
assert!(snapshot.repo_stats[0].sync_success);
assert_eq!(snapshot.repo_stats[0].download_bytes, 333);
assert_eq!(snapshot.repo_stats[0].object_counts["roa"], 2);
assert_eq!(snapshot.repo_stats[0].object_counts["manifest"], 1);
assert_eq!(snapshot.top_pp_by_object_count[0].object_count, 2);
assert_eq!(snapshot.cir.as_ref().unwrap().objects, 2);
assert_eq!(snapshot.ccr.as_ref().unwrap().state_items["tas"], 1);
@ -2305,6 +2324,10 @@ mod tests {
assert!(metrics.contains("ours_rp_repository_info"));
assert!(metrics.contains("ours_rp_repository_sync_success"));
assert!(metrics.contains("ours_rp_repository_download_bytes"));
assert!(metrics.contains("ours_rp_repository_objects_by_type"));
assert!(metrics.contains(r#"ours_rp_repository_objects_by_type{instance="test",repo_id="#));
assert!(metrics.contains(r#"object_type="roa"} 2"#));
assert!(metrics.contains(r#"object_type="manifest"} 1"#));
assert!(metrics.contains("ours_rp_large_publication_points"));
assert!(metrics.contains("ours_rp_cir_objects"));
assert!(metrics.contains("ours_rp_cir_objects_by_source"));