update inter-rp repo sync metrics
This commit is contained in:
parent
92d184681b
commit
4e37b96aff
@ -11,7 +11,7 @@
|
||||
"panels": [
|
||||
{
|
||||
"id": 1,
|
||||
"title": "Metrics Reload OK",
|
||||
"title": "Ours Only Repo Count",
|
||||
"type": "stat",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
@ -25,7 +25,7 @@
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "none",
|
||||
"unit": "short",
|
||||
"decimals": 0
|
||||
},
|
||||
"overrides": []
|
||||
@ -47,8 +47,8 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "max(inter_rp_service_last_reload_success{exported_instance=\"remote200-inter-rp\"})",
|
||||
"legendFormat": "reload",
|
||||
"expr": "max(inter_rp_repo_sync_overlap_total{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\",class=\"only_ours\"})",
|
||||
"legendFormat": "only ours",
|
||||
"refId": "A",
|
||||
"instant": true
|
||||
}
|
||||
@ -56,7 +56,7 @@
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"title": "231 Sync Age",
|
||||
"title": "Routinator Only Repo Count",
|
||||
"type": "stat",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
@ -70,7 +70,7 @@
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "s",
|
||||
"unit": "short",
|
||||
"decimals": 0
|
||||
},
|
||||
"overrides": []
|
||||
@ -92,8 +92,8 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "max(inter_rp_sync_age_seconds{exported_instance=\"remote200-inter-rp\"})",
|
||||
"legendFormat": "sync age",
|
||||
"expr": "max(inter_rp_repo_sync_overlap_total{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\",class=\"only_routinator\"})",
|
||||
"legendFormat": "only routinator",
|
||||
"refId": "A",
|
||||
"instant": true
|
||||
}
|
||||
@ -101,7 +101,7 @@
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"title": "Parse Errors",
|
||||
"title": "Ours vs Routinator VAP Diff",
|
||||
"type": "stat",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
@ -137,8 +137,8 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "max(inter_rp_parse_errors{exported_instance=\"remote200-inter-rp\"})",
|
||||
"legendFormat": "errors",
|
||||
"expr": "max(inter_rp_vaps_diff{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\"})",
|
||||
"legendFormat": "vap diff",
|
||||
"refId": "A",
|
||||
"instant": true
|
||||
}
|
||||
@ -592,7 +592,7 @@
|
||||
"uid": "Prometheus"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"h": 9,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 44
|
||||
@ -602,7 +602,48 @@
|
||||
"unit": "none",
|
||||
"decimals": 0
|
||||
},
|
||||
"overrides": []
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "uri"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.width",
|
||||
"value": 760
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "class"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.width",
|
||||
"value": 140
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "^(mft|crl|crt|roa|aspa)$"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.align",
|
||||
"value": "right"
|
||||
},
|
||||
{
|
||||
"id": "custom.width",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"options": {
|
||||
"showHeader": true,
|
||||
@ -616,50 +657,42 @@
|
||||
"legendFormat": "{{class}} #{{rank}}",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 15,
|
||||
"title": "Only-Ours Repo Count",
|
||||
"type": "stat",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "Prometheus"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 32
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "short",
|
||||
"decimals": 0
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"targets": [
|
||||
],
|
||||
"transformations": [
|
||||
{
|
||||
"expr": "max(inter_rp_repo_sync_overlap_total{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\",class=\"only_ours\"})",
|
||||
"legendFormat": "only ours",
|
||||
"refId": "A",
|
||||
"instant": true
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true,
|
||||
"Value": true,
|
||||
"__name__": true,
|
||||
"exported_instance": true,
|
||||
"instance": true,
|
||||
"job": true,
|
||||
"left": true,
|
||||
"right": true,
|
||||
"rank": true,
|
||||
"routinator_duration": true
|
||||
},
|
||||
"indexByName": {
|
||||
"class": 0,
|
||||
"uri": 1,
|
||||
"mft": 2,
|
||||
"crl": 3,
|
||||
"crt": 4,
|
||||
"roa": 5,
|
||||
"aspa": 6
|
||||
},
|
||||
"renameByName": {
|
||||
"class": "class",
|
||||
"uri": "uri",
|
||||
"mft": "mft",
|
||||
"crl": "crl",
|
||||
"crt": "crt",
|
||||
"roa": "roa",
|
||||
"aspa": "aspa"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -681,5 +714,5 @@
|
||||
"timezone": "browser",
|
||||
"title": "Ours RP vs Routinator",
|
||||
"uid": "ours-rp-inter-rp",
|
||||
"version": 3
|
||||
"version": 4
|
||||
}
|
||||
|
||||
@ -155,15 +155,44 @@ def parse_prometheus_samples(text, metric_name):
|
||||
continue
|
||||
yield parse_prom_labels(match.group(1)), value
|
||||
|
||||
def canonical_object_type(value):
|
||||
value = (value or "").strip().lower()
|
||||
if value in ("manifest", "mft"):
|
||||
return "mft"
|
||||
if value == "crl":
|
||||
return "crl"
|
||||
if value in ("certificate", "cert", "ca_cert", "router_cert", "ee_cert", "crt"):
|
||||
return "crt"
|
||||
if value == "roa":
|
||||
return "roa"
|
||||
if value == "aspa":
|
||||
return "aspa"
|
||||
return None
|
||||
|
||||
def empty_object_counts():
|
||||
return {"mft": 0, "crl": 0, "crt": 0, "roa": 0, "aspa": 0}
|
||||
|
||||
def add_object_count(counts_by_uri, uri, object_type, value):
|
||||
canonical = canonical_object_type(object_type)
|
||||
if not uri or canonical is None:
|
||||
return
|
||||
counts = counts_by_uri.setdefault(uri, empty_object_counts())
|
||||
counts[canonical] = counts.get(canonical, 0) + int(value)
|
||||
|
||||
def object_count_labels(counts_by_uri, uri):
|
||||
counts = counts_by_uri.get(uri, empty_object_counts())
|
||||
return {key: str(int(counts.get(key, 0))) for key in ["mft", "crl", "crt", "roa", "aspa"]}
|
||||
|
||||
def load_ours_repo_sets(errors):
|
||||
try:
|
||||
import urllib.request
|
||||
text = urllib.request.urlopen("http://127.0.0.1:9556/metrics", timeout=10).read().decode("utf-8", "replace")
|
||||
except Exception as exc:
|
||||
errors.append(f"ours-rp repo metrics: {exc}")
|
||||
return {"total": set(), "available": set(), "failed": set(), "info": {}}
|
||||
return {"total": set(), "available": set(), "failed": set(), "info": {}, "object_counts": {}}
|
||||
info = {}
|
||||
states = {}
|
||||
object_counts = {}
|
||||
for labels, value in parse_prometheus_samples(text, "ours_rp_repository_info"):
|
||||
uri = labels.get("uri")
|
||||
if uri:
|
||||
@ -178,6 +207,15 @@ def load_ours_repo_sets(errors):
|
||||
if not uri:
|
||||
continue
|
||||
states.setdefault(uri, {})[labels.get("terminal_state", "unknown")] = value
|
||||
for labels, value in parse_prometheus_samples(text, "ours_rp_repository_objects_by_type"):
|
||||
uri = labels.get("uri")
|
||||
if not uri:
|
||||
repo_id = labels.get("repo_id")
|
||||
for candidate_uri, candidate in info.items():
|
||||
if candidate.get("repo_id") == repo_id:
|
||||
uri = candidate_uri
|
||||
break
|
||||
add_object_count(object_counts, uri, labels.get("object_type"), value)
|
||||
total = set(info)
|
||||
available = set()
|
||||
failed = set()
|
||||
@ -188,22 +226,23 @@ def load_ours_repo_sets(errors):
|
||||
available.add(uri)
|
||||
else:
|
||||
failed.add(uri)
|
||||
return {"total": total, "available": available, "failed": failed, "info": info}
|
||||
return {"total": total, "available": available, "failed": failed, "info": info, "object_counts": object_counts}
|
||||
|
||||
def load_routinator_repo_sets(errors):
|
||||
path = PEER_ROOT / "routinator" / "routinator-metrics.prom"
|
||||
if not path.exists():
|
||||
errors.append(f"routinator repo metrics: missing {path}")
|
||||
return {"total": set(), "success": set(), "failed": set(), "duration": {}}
|
||||
return {"total": set(), "success": set(), "failed": set(), "duration": {}, "object_counts": {}}
|
||||
try:
|
||||
text = path.read_text(encoding="utf-8", errors="replace")
|
||||
except Exception as exc:
|
||||
errors.append(f"routinator repo metrics: read {path}: {exc}")
|
||||
return {"total": set(), "success": set(), "failed": set(), "duration": {}}
|
||||
return {"total": set(), "success": set(), "failed": set(), "duration": {}, "object_counts": {}}
|
||||
total = set()
|
||||
success = set()
|
||||
failed = set()
|
||||
duration = {}
|
||||
object_counts = {}
|
||||
for metric in ["routinator_rrdp_status", "routinator_rsync_status"]:
|
||||
for labels, value in parse_prometheus_samples(text, metric):
|
||||
uri = labels.get("uri")
|
||||
@ -220,7 +259,9 @@ def load_routinator_repo_sets(errors):
|
||||
uri = labels.get("uri")
|
||||
if uri:
|
||||
duration[uri] = max(duration.get(uri, 0.0), value)
|
||||
return {"total": total, "success": success, "failed": failed, "duration": duration}
|
||||
for labels, value in parse_prometheus_samples(text, "routinator_repository_objects_total"):
|
||||
add_object_count(object_counts, labels.get("uri"), labels.get("type"), value)
|
||||
return {"total": total, "success": success, "failed": failed, "duration": duration, "object_counts": object_counts}
|
||||
|
||||
def emit_repo_diff_metrics(out, errors):
|
||||
ours = load_ours_repo_sets(errors)
|
||||
@ -242,9 +283,18 @@ def emit_repo_diff_metrics(out, errors):
|
||||
out.append(metric_line("inter_rp_repo_sync_overlap_total", {**base, "class": "only_ours"}, len(only_ours)))
|
||||
out.append(metric_line("inter_rp_repo_sync_overlap_total", {**base, "class": "only_routinator"}, len(only_routinator)))
|
||||
out.append(metric_line("inter_rp_repo_sync_overlap_total", {**base, "class": "neither_available"}, len(neither)))
|
||||
for diff_class, uris in [("only_ours", only_ours), ("only_routinator", only_routinator)]:
|
||||
for diff_class, uris, counts_by_uri in [
|
||||
("only_ours", only_ours, ours["object_counts"]),
|
||||
("only_routinator", only_routinator, routinator["object_counts"]),
|
||||
]:
|
||||
for rank, uri in enumerate(uris[:50], start=1):
|
||||
labels = {**base, "class": diff_class, "rank": rank, "uri": uri}
|
||||
labels = {
|
||||
**base,
|
||||
"class": diff_class,
|
||||
"rank": rank,
|
||||
"uri": uri,
|
||||
**object_count_labels(counts_by_uri, uri),
|
||||
}
|
||||
if uri in routinator["duration"]:
|
||||
labels["routinator_duration"] = f"{routinator['duration'][uri]:.3f}"
|
||||
out.append(metric_line("inter_rp_repo_sync_diff_info", labels, 1))
|
||||
|
||||
@ -258,6 +258,7 @@ struct RepoMetrics {
|
||||
duration_seconds_avg: f64,
|
||||
phase_counts: BTreeMap<String, u64>,
|
||||
terminal_state_counts: BTreeMap<String, u64>,
|
||||
object_counts: BTreeMap<String, u64>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default, Serialize)]
|
||||
@ -859,7 +860,8 @@ fn extract_publication_point_metrics(
|
||||
let result = json_str(object, &["result"])
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
*object_counts.entry((kind, result)).or_default() += 1;
|
||||
*object_counts.entry((kind.clone(), result)).or_default() += 1;
|
||||
*repo.object_counts.entry(kind).or_default() += 1;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1531,6 +1533,21 @@ fn render_repo_metrics(writer: &mut PromWriter<'_>, instance: &str, repos: &[Rep
|
||||
*count as f64,
|
||||
);
|
||||
}
|
||||
for kind in ["manifest", "crl", "certificate", "roa", "aspa"] {
|
||||
let labels = [
|
||||
label("instance", instance),
|
||||
label("repo_id", &repo.repo_id),
|
||||
label("host", &repo.host),
|
||||
label("uri", &repo.uri),
|
||||
label("object_type", kind),
|
||||
];
|
||||
writer.gauge(
|
||||
"ours_rp_repository_objects_by_type",
|
||||
"Repository object count by object type from latest run report",
|
||||
&labels,
|
||||
repo.object_counts.get(kind).copied().unwrap_or(0) as f64,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -2298,6 +2315,8 @@ mod tests {
|
||||
assert_eq!(snapshot.repo_stats.len(), 1);
|
||||
assert!(snapshot.repo_stats[0].sync_success);
|
||||
assert_eq!(snapshot.repo_stats[0].download_bytes, 333);
|
||||
assert_eq!(snapshot.repo_stats[0].object_counts["roa"], 2);
|
||||
assert_eq!(snapshot.repo_stats[0].object_counts["manifest"], 1);
|
||||
assert_eq!(snapshot.top_pp_by_object_count[0].object_count, 2);
|
||||
assert_eq!(snapshot.cir.as_ref().unwrap().objects, 2);
|
||||
assert_eq!(snapshot.ccr.as_ref().unwrap().state_items["tas"], 1);
|
||||
@ -2305,6 +2324,10 @@ mod tests {
|
||||
assert!(metrics.contains("ours_rp_repository_info"));
|
||||
assert!(metrics.contains("ours_rp_repository_sync_success"));
|
||||
assert!(metrics.contains("ours_rp_repository_download_bytes"));
|
||||
assert!(metrics.contains("ours_rp_repository_objects_by_type"));
|
||||
assert!(metrics.contains(r#"ours_rp_repository_objects_by_type{instance="test",repo_id="#));
|
||||
assert!(metrics.contains(r#"object_type="roa"} 2"#));
|
||||
assert!(metrics.contains(r#"object_type="manifest"} 1"#));
|
||||
assert!(metrics.contains("ours_rp_large_publication_points"));
|
||||
assert!(metrics.contains("ours_rp_cir_objects"));
|
||||
assert!(metrics.contains("ours_rp_cir_objects_by_source"));
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user