update inter-rp repo sync metrics
This commit is contained in:
parent
92d184681b
commit
4e37b96aff
@ -11,7 +11,7 @@
|
|||||||
"panels": [
|
"panels": [
|
||||||
{
|
{
|
||||||
"id": 1,
|
"id": 1,
|
||||||
"title": "Metrics Reload OK",
|
"title": "Ours Only Repo Count",
|
||||||
"type": "stat",
|
"type": "stat",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
@ -25,7 +25,7 @@
|
|||||||
},
|
},
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
"defaults": {
|
"defaults": {
|
||||||
"unit": "none",
|
"unit": "short",
|
||||||
"decimals": 0
|
"decimals": 0
|
||||||
},
|
},
|
||||||
"overrides": []
|
"overrides": []
|
||||||
@ -47,8 +47,8 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "max(inter_rp_service_last_reload_success{exported_instance=\"remote200-inter-rp\"})",
|
"expr": "max(inter_rp_repo_sync_overlap_total{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\",class=\"only_ours\"})",
|
||||||
"legendFormat": "reload",
|
"legendFormat": "only ours",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"instant": true
|
"instant": true
|
||||||
}
|
}
|
||||||
@ -56,7 +56,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 2,
|
"id": 2,
|
||||||
"title": "231 Sync Age",
|
"title": "Routinator Only Repo Count",
|
||||||
"type": "stat",
|
"type": "stat",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
@ -70,7 +70,7 @@
|
|||||||
},
|
},
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
"defaults": {
|
"defaults": {
|
||||||
"unit": "s",
|
"unit": "short",
|
||||||
"decimals": 0
|
"decimals": 0
|
||||||
},
|
},
|
||||||
"overrides": []
|
"overrides": []
|
||||||
@ -92,8 +92,8 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "max(inter_rp_sync_age_seconds{exported_instance=\"remote200-inter-rp\"})",
|
"expr": "max(inter_rp_repo_sync_overlap_total{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\",class=\"only_routinator\"})",
|
||||||
"legendFormat": "sync age",
|
"legendFormat": "only routinator",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"instant": true
|
"instant": true
|
||||||
}
|
}
|
||||||
@ -101,7 +101,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 3,
|
"id": 3,
|
||||||
"title": "Parse Errors",
|
"title": "Ours vs Routinator VAP Diff",
|
||||||
"type": "stat",
|
"type": "stat",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
@ -137,8 +137,8 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "max(inter_rp_parse_errors{exported_instance=\"remote200-inter-rp\"})",
|
"expr": "max(inter_rp_vaps_diff{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\"})",
|
||||||
"legendFormat": "errors",
|
"legendFormat": "vap diff",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"instant": true
|
"instant": true
|
||||||
}
|
}
|
||||||
@ -592,7 +592,7 @@
|
|||||||
"uid": "Prometheus"
|
"uid": "Prometheus"
|
||||||
},
|
},
|
||||||
"gridPos": {
|
"gridPos": {
|
||||||
"h": 8,
|
"h": 9,
|
||||||
"w": 24,
|
"w": 24,
|
||||||
"x": 0,
|
"x": 0,
|
||||||
"y": 44
|
"y": 44
|
||||||
@ -602,7 +602,48 @@
|
|||||||
"unit": "none",
|
"unit": "none",
|
||||||
"decimals": 0
|
"decimals": 0
|
||||||
},
|
},
|
||||||
"overrides": []
|
"overrides": [
|
||||||
|
{
|
||||||
|
"matcher": {
|
||||||
|
"id": "byName",
|
||||||
|
"options": "uri"
|
||||||
|
},
|
||||||
|
"properties": [
|
||||||
|
{
|
||||||
|
"id": "custom.width",
|
||||||
|
"value": 760
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"matcher": {
|
||||||
|
"id": "byName",
|
||||||
|
"options": "class"
|
||||||
|
},
|
||||||
|
"properties": [
|
||||||
|
{
|
||||||
|
"id": "custom.width",
|
||||||
|
"value": 140
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"matcher": {
|
||||||
|
"id": "byRegexp",
|
||||||
|
"options": "^(mft|crl|crt|roa|aspa)$"
|
||||||
|
},
|
||||||
|
"properties": [
|
||||||
|
{
|
||||||
|
"id": "custom.align",
|
||||||
|
"value": "right"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "custom.width",
|
||||||
|
"value": 80
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"options": {
|
"options": {
|
||||||
"showHeader": true,
|
"showHeader": true,
|
||||||
@ -616,50 +657,42 @@
|
|||||||
"legendFormat": "{{class}} #{{rank}}",
|
"legendFormat": "{{class}} #{{rank}}",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
}
|
}
|
||||||
]
|
],
|
||||||
},
|
"transformations": [
|
||||||
{
|
|
||||||
"id": 15,
|
|
||||||
"title": "Only-Ours Repo Count",
|
|
||||||
"type": "stat",
|
|
||||||
"datasource": {
|
|
||||||
"type": "prometheus",
|
|
||||||
"uid": "Prometheus"
|
|
||||||
},
|
|
||||||
"gridPos": {
|
|
||||||
"h": 4,
|
|
||||||
"w": 6,
|
|
||||||
"x": 18,
|
|
||||||
"y": 32
|
|
||||||
},
|
|
||||||
"fieldConfig": {
|
|
||||||
"defaults": {
|
|
||||||
"unit": "short",
|
|
||||||
"decimals": 0
|
|
||||||
},
|
|
||||||
"overrides": []
|
|
||||||
},
|
|
||||||
"options": {
|
|
||||||
"colorMode": "value",
|
|
||||||
"graphMode": "area",
|
|
||||||
"justifyMode": "auto",
|
|
||||||
"orientation": "auto",
|
|
||||||
"reduceOptions": {
|
|
||||||
"calcs": [
|
|
||||||
"lastNotNull"
|
|
||||||
],
|
|
||||||
"fields": "",
|
|
||||||
"values": false
|
|
||||||
},
|
|
||||||
"textMode": "auto",
|
|
||||||
"wideLayout": true
|
|
||||||
},
|
|
||||||
"targets": [
|
|
||||||
{
|
{
|
||||||
"expr": "max(inter_rp_repo_sync_overlap_total{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\",class=\"only_ours\"})",
|
"id": "organize",
|
||||||
"legendFormat": "only ours",
|
"options": {
|
||||||
"refId": "A",
|
"excludeByName": {
|
||||||
"instant": true
|
"Time": true,
|
||||||
|
"Value": true,
|
||||||
|
"__name__": true,
|
||||||
|
"exported_instance": true,
|
||||||
|
"instance": true,
|
||||||
|
"job": true,
|
||||||
|
"left": true,
|
||||||
|
"right": true,
|
||||||
|
"rank": true,
|
||||||
|
"routinator_duration": true
|
||||||
|
},
|
||||||
|
"indexByName": {
|
||||||
|
"class": 0,
|
||||||
|
"uri": 1,
|
||||||
|
"mft": 2,
|
||||||
|
"crl": 3,
|
||||||
|
"crt": 4,
|
||||||
|
"roa": 5,
|
||||||
|
"aspa": 6
|
||||||
|
},
|
||||||
|
"renameByName": {
|
||||||
|
"class": "class",
|
||||||
|
"uri": "uri",
|
||||||
|
"mft": "mft",
|
||||||
|
"crl": "crl",
|
||||||
|
"crt": "crt",
|
||||||
|
"roa": "roa",
|
||||||
|
"aspa": "aspa"
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@ -681,5 +714,5 @@
|
|||||||
"timezone": "browser",
|
"timezone": "browser",
|
||||||
"title": "Ours RP vs Routinator",
|
"title": "Ours RP vs Routinator",
|
||||||
"uid": "ours-rp-inter-rp",
|
"uid": "ours-rp-inter-rp",
|
||||||
"version": 3
|
"version": 4
|
||||||
}
|
}
|
||||||
|
|||||||
@ -155,15 +155,44 @@ def parse_prometheus_samples(text, metric_name):
|
|||||||
continue
|
continue
|
||||||
yield parse_prom_labels(match.group(1)), value
|
yield parse_prom_labels(match.group(1)), value
|
||||||
|
|
||||||
|
def canonical_object_type(value):
|
||||||
|
value = (value or "").strip().lower()
|
||||||
|
if value in ("manifest", "mft"):
|
||||||
|
return "mft"
|
||||||
|
if value == "crl":
|
||||||
|
return "crl"
|
||||||
|
if value in ("certificate", "cert", "ca_cert", "router_cert", "ee_cert", "crt"):
|
||||||
|
return "crt"
|
||||||
|
if value == "roa":
|
||||||
|
return "roa"
|
||||||
|
if value == "aspa":
|
||||||
|
return "aspa"
|
||||||
|
return None
|
||||||
|
|
||||||
|
def empty_object_counts():
|
||||||
|
return {"mft": 0, "crl": 0, "crt": 0, "roa": 0, "aspa": 0}
|
||||||
|
|
||||||
|
def add_object_count(counts_by_uri, uri, object_type, value):
|
||||||
|
canonical = canonical_object_type(object_type)
|
||||||
|
if not uri or canonical is None:
|
||||||
|
return
|
||||||
|
counts = counts_by_uri.setdefault(uri, empty_object_counts())
|
||||||
|
counts[canonical] = counts.get(canonical, 0) + int(value)
|
||||||
|
|
||||||
|
def object_count_labels(counts_by_uri, uri):
|
||||||
|
counts = counts_by_uri.get(uri, empty_object_counts())
|
||||||
|
return {key: str(int(counts.get(key, 0))) for key in ["mft", "crl", "crt", "roa", "aspa"]}
|
||||||
|
|
||||||
def load_ours_repo_sets(errors):
|
def load_ours_repo_sets(errors):
|
||||||
try:
|
try:
|
||||||
import urllib.request
|
import urllib.request
|
||||||
text = urllib.request.urlopen("http://127.0.0.1:9556/metrics", timeout=10).read().decode("utf-8", "replace")
|
text = urllib.request.urlopen("http://127.0.0.1:9556/metrics", timeout=10).read().decode("utf-8", "replace")
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
errors.append(f"ours-rp repo metrics: {exc}")
|
errors.append(f"ours-rp repo metrics: {exc}")
|
||||||
return {"total": set(), "available": set(), "failed": set(), "info": {}}
|
return {"total": set(), "available": set(), "failed": set(), "info": {}, "object_counts": {}}
|
||||||
info = {}
|
info = {}
|
||||||
states = {}
|
states = {}
|
||||||
|
object_counts = {}
|
||||||
for labels, value in parse_prometheus_samples(text, "ours_rp_repository_info"):
|
for labels, value in parse_prometheus_samples(text, "ours_rp_repository_info"):
|
||||||
uri = labels.get("uri")
|
uri = labels.get("uri")
|
||||||
if uri:
|
if uri:
|
||||||
@ -178,6 +207,15 @@ def load_ours_repo_sets(errors):
|
|||||||
if not uri:
|
if not uri:
|
||||||
continue
|
continue
|
||||||
states.setdefault(uri, {})[labels.get("terminal_state", "unknown")] = value
|
states.setdefault(uri, {})[labels.get("terminal_state", "unknown")] = value
|
||||||
|
for labels, value in parse_prometheus_samples(text, "ours_rp_repository_objects_by_type"):
|
||||||
|
uri = labels.get("uri")
|
||||||
|
if not uri:
|
||||||
|
repo_id = labels.get("repo_id")
|
||||||
|
for candidate_uri, candidate in info.items():
|
||||||
|
if candidate.get("repo_id") == repo_id:
|
||||||
|
uri = candidate_uri
|
||||||
|
break
|
||||||
|
add_object_count(object_counts, uri, labels.get("object_type"), value)
|
||||||
total = set(info)
|
total = set(info)
|
||||||
available = set()
|
available = set()
|
||||||
failed = set()
|
failed = set()
|
||||||
@ -188,22 +226,23 @@ def load_ours_repo_sets(errors):
|
|||||||
available.add(uri)
|
available.add(uri)
|
||||||
else:
|
else:
|
||||||
failed.add(uri)
|
failed.add(uri)
|
||||||
return {"total": total, "available": available, "failed": failed, "info": info}
|
return {"total": total, "available": available, "failed": failed, "info": info, "object_counts": object_counts}
|
||||||
|
|
||||||
def load_routinator_repo_sets(errors):
|
def load_routinator_repo_sets(errors):
|
||||||
path = PEER_ROOT / "routinator" / "routinator-metrics.prom"
|
path = PEER_ROOT / "routinator" / "routinator-metrics.prom"
|
||||||
if not path.exists():
|
if not path.exists():
|
||||||
errors.append(f"routinator repo metrics: missing {path}")
|
errors.append(f"routinator repo metrics: missing {path}")
|
||||||
return {"total": set(), "success": set(), "failed": set(), "duration": {}}
|
return {"total": set(), "success": set(), "failed": set(), "duration": {}, "object_counts": {}}
|
||||||
try:
|
try:
|
||||||
text = path.read_text(encoding="utf-8", errors="replace")
|
text = path.read_text(encoding="utf-8", errors="replace")
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
errors.append(f"routinator repo metrics: read {path}: {exc}")
|
errors.append(f"routinator repo metrics: read {path}: {exc}")
|
||||||
return {"total": set(), "success": set(), "failed": set(), "duration": {}}
|
return {"total": set(), "success": set(), "failed": set(), "duration": {}, "object_counts": {}}
|
||||||
total = set()
|
total = set()
|
||||||
success = set()
|
success = set()
|
||||||
failed = set()
|
failed = set()
|
||||||
duration = {}
|
duration = {}
|
||||||
|
object_counts = {}
|
||||||
for metric in ["routinator_rrdp_status", "routinator_rsync_status"]:
|
for metric in ["routinator_rrdp_status", "routinator_rsync_status"]:
|
||||||
for labels, value in parse_prometheus_samples(text, metric):
|
for labels, value in parse_prometheus_samples(text, metric):
|
||||||
uri = labels.get("uri")
|
uri = labels.get("uri")
|
||||||
@ -220,7 +259,9 @@ def load_routinator_repo_sets(errors):
|
|||||||
uri = labels.get("uri")
|
uri = labels.get("uri")
|
||||||
if uri:
|
if uri:
|
||||||
duration[uri] = max(duration.get(uri, 0.0), value)
|
duration[uri] = max(duration.get(uri, 0.0), value)
|
||||||
return {"total": total, "success": success, "failed": failed, "duration": duration}
|
for labels, value in parse_prometheus_samples(text, "routinator_repository_objects_total"):
|
||||||
|
add_object_count(object_counts, labels.get("uri"), labels.get("type"), value)
|
||||||
|
return {"total": total, "success": success, "failed": failed, "duration": duration, "object_counts": object_counts}
|
||||||
|
|
||||||
def emit_repo_diff_metrics(out, errors):
|
def emit_repo_diff_metrics(out, errors):
|
||||||
ours = load_ours_repo_sets(errors)
|
ours = load_ours_repo_sets(errors)
|
||||||
@ -242,9 +283,18 @@ def emit_repo_diff_metrics(out, errors):
|
|||||||
out.append(metric_line("inter_rp_repo_sync_overlap_total", {**base, "class": "only_ours"}, len(only_ours)))
|
out.append(metric_line("inter_rp_repo_sync_overlap_total", {**base, "class": "only_ours"}, len(only_ours)))
|
||||||
out.append(metric_line("inter_rp_repo_sync_overlap_total", {**base, "class": "only_routinator"}, len(only_routinator)))
|
out.append(metric_line("inter_rp_repo_sync_overlap_total", {**base, "class": "only_routinator"}, len(only_routinator)))
|
||||||
out.append(metric_line("inter_rp_repo_sync_overlap_total", {**base, "class": "neither_available"}, len(neither)))
|
out.append(metric_line("inter_rp_repo_sync_overlap_total", {**base, "class": "neither_available"}, len(neither)))
|
||||||
for diff_class, uris in [("only_ours", only_ours), ("only_routinator", only_routinator)]:
|
for diff_class, uris, counts_by_uri in [
|
||||||
|
("only_ours", only_ours, ours["object_counts"]),
|
||||||
|
("only_routinator", only_routinator, routinator["object_counts"]),
|
||||||
|
]:
|
||||||
for rank, uri in enumerate(uris[:50], start=1):
|
for rank, uri in enumerate(uris[:50], start=1):
|
||||||
labels = {**base, "class": diff_class, "rank": rank, "uri": uri}
|
labels = {
|
||||||
|
**base,
|
||||||
|
"class": diff_class,
|
||||||
|
"rank": rank,
|
||||||
|
"uri": uri,
|
||||||
|
**object_count_labels(counts_by_uri, uri),
|
||||||
|
}
|
||||||
if uri in routinator["duration"]:
|
if uri in routinator["duration"]:
|
||||||
labels["routinator_duration"] = f"{routinator['duration'][uri]:.3f}"
|
labels["routinator_duration"] = f"{routinator['duration'][uri]:.3f}"
|
||||||
out.append(metric_line("inter_rp_repo_sync_diff_info", labels, 1))
|
out.append(metric_line("inter_rp_repo_sync_diff_info", labels, 1))
|
||||||
|
|||||||
@ -258,6 +258,7 @@ struct RepoMetrics {
|
|||||||
duration_seconds_avg: f64,
|
duration_seconds_avg: f64,
|
||||||
phase_counts: BTreeMap<String, u64>,
|
phase_counts: BTreeMap<String, u64>,
|
||||||
terminal_state_counts: BTreeMap<String, u64>,
|
terminal_state_counts: BTreeMap<String, u64>,
|
||||||
|
object_counts: BTreeMap<String, u64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Default, Serialize)]
|
#[derive(Clone, Debug, Default, Serialize)]
|
||||||
@ -859,7 +860,8 @@ fn extract_publication_point_metrics(
|
|||||||
let result = json_str(object, &["result"])
|
let result = json_str(object, &["result"])
|
||||||
.unwrap_or("unknown")
|
.unwrap_or("unknown")
|
||||||
.to_string();
|
.to_string();
|
||||||
*object_counts.entry((kind, result)).or_default() += 1;
|
*object_counts.entry((kind.clone(), result)).or_default() += 1;
|
||||||
|
*repo.object_counts.entry(kind).or_default() += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1531,6 +1533,21 @@ fn render_repo_metrics(writer: &mut PromWriter<'_>, instance: &str, repos: &[Rep
|
|||||||
*count as f64,
|
*count as f64,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
for kind in ["manifest", "crl", "certificate", "roa", "aspa"] {
|
||||||
|
let labels = [
|
||||||
|
label("instance", instance),
|
||||||
|
label("repo_id", &repo.repo_id),
|
||||||
|
label("host", &repo.host),
|
||||||
|
label("uri", &repo.uri),
|
||||||
|
label("object_type", kind),
|
||||||
|
];
|
||||||
|
writer.gauge(
|
||||||
|
"ours_rp_repository_objects_by_type",
|
||||||
|
"Repository object count by object type from latest run report",
|
||||||
|
&labels,
|
||||||
|
repo.object_counts.get(kind).copied().unwrap_or(0) as f64,
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2298,6 +2315,8 @@ mod tests {
|
|||||||
assert_eq!(snapshot.repo_stats.len(), 1);
|
assert_eq!(snapshot.repo_stats.len(), 1);
|
||||||
assert!(snapshot.repo_stats[0].sync_success);
|
assert!(snapshot.repo_stats[0].sync_success);
|
||||||
assert_eq!(snapshot.repo_stats[0].download_bytes, 333);
|
assert_eq!(snapshot.repo_stats[0].download_bytes, 333);
|
||||||
|
assert_eq!(snapshot.repo_stats[0].object_counts["roa"], 2);
|
||||||
|
assert_eq!(snapshot.repo_stats[0].object_counts["manifest"], 1);
|
||||||
assert_eq!(snapshot.top_pp_by_object_count[0].object_count, 2);
|
assert_eq!(snapshot.top_pp_by_object_count[0].object_count, 2);
|
||||||
assert_eq!(snapshot.cir.as_ref().unwrap().objects, 2);
|
assert_eq!(snapshot.cir.as_ref().unwrap().objects, 2);
|
||||||
assert_eq!(snapshot.ccr.as_ref().unwrap().state_items["tas"], 1);
|
assert_eq!(snapshot.ccr.as_ref().unwrap().state_items["tas"], 1);
|
||||||
@ -2305,6 +2324,10 @@ mod tests {
|
|||||||
assert!(metrics.contains("ours_rp_repository_info"));
|
assert!(metrics.contains("ours_rp_repository_info"));
|
||||||
assert!(metrics.contains("ours_rp_repository_sync_success"));
|
assert!(metrics.contains("ours_rp_repository_sync_success"));
|
||||||
assert!(metrics.contains("ours_rp_repository_download_bytes"));
|
assert!(metrics.contains("ours_rp_repository_download_bytes"));
|
||||||
|
assert!(metrics.contains("ours_rp_repository_objects_by_type"));
|
||||||
|
assert!(metrics.contains(r#"ours_rp_repository_objects_by_type{instance="test",repo_id="#));
|
||||||
|
assert!(metrics.contains(r#"object_type="roa"} 2"#));
|
||||||
|
assert!(metrics.contains(r#"object_type="manifest"} 1"#));
|
||||||
assert!(metrics.contains("ours_rp_large_publication_points"));
|
assert!(metrics.contains("ours_rp_large_publication_points"));
|
||||||
assert!(metrics.contains("ours_rp_cir_objects"));
|
assert!(metrics.contains("ours_rp_cir_objects"));
|
||||||
assert!(metrics.contains("ours_rp_cir_objects_by_source"));
|
assert!(metrics.contains("ours_rp_cir_objects_by_source"));
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user