20260623 迁移231并固定相位调度
This commit is contained in:
parent
4e37b96aff
commit
6ab044480a
@ -47,7 +47,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "max(inter_rp_repo_sync_overlap_total{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\",class=\"only_ours\"})",
|
||||
"expr": "max(inter_rp_repo_sync_overlap_total{exported_instance=~\".*inter-rp\",left=\"ours-rp\",right=\"routinator\",class=\"only_ours\"})",
|
||||
"legendFormat": "only ours",
|
||||
"refId": "A",
|
||||
"instant": true
|
||||
@ -92,7 +92,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "max(inter_rp_repo_sync_overlap_total{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\",class=\"only_routinator\"})",
|
||||
"expr": "max(inter_rp_repo_sync_overlap_total{exported_instance=~\".*inter-rp\",left=\"ours-rp\",right=\"routinator\",class=\"only_routinator\"})",
|
||||
"legendFormat": "only routinator",
|
||||
"refId": "A",
|
||||
"instant": true
|
||||
@ -137,7 +137,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "max(inter_rp_vaps_diff{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\"})",
|
||||
"expr": "max(inter_rp_vaps_diff{exported_instance=~\".*inter-rp\",left=\"ours-rp\",right=\"routinator\"})",
|
||||
"legendFormat": "vap diff",
|
||||
"refId": "A",
|
||||
"instant": true
|
||||
@ -182,7 +182,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "max(inter_rp_vrps_diff{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\"})",
|
||||
"expr": "max(inter_rp_vrps_diff{exported_instance=~\".*inter-rp\",left=\"ours-rp\",right=\"routinator\"})",
|
||||
"legendFormat": "vrp diff",
|
||||
"refId": "A",
|
||||
"instant": true
|
||||
@ -226,8 +226,8 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "inter_rp_run_wall_seconds{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}",
|
||||
"legendFormat": "{{exported_rp}}",
|
||||
"expr": "inter_rp_run_wall_seconds{exported_instance=~\".*inter-rp\",rp=~\"ours-rp|routinator\"}",
|
||||
"legendFormat": "{{rp}}",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
@ -269,8 +269,8 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "inter_rp_run_max_rss_bytes{exported_instance=\"remote200-inter-rp\",kind=\"aggregate_peak\",exported_rp=~\"ours-rp|routinator\"}",
|
||||
"legendFormat": "{{exported_rp}}",
|
||||
"expr": "inter_rp_run_max_rss_bytes{exported_instance=~\".*inter-rp\",kind=\"aggregate_peak\",rp=~\"ours-rp|routinator\"}",
|
||||
"legendFormat": "{{rp}}",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
@ -312,8 +312,8 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "inter_rp_vrps{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}",
|
||||
"legendFormat": "{{exported_rp}}",
|
||||
"expr": "inter_rp_vrps{exported_instance=~\".*inter-rp\",rp=~\"ours-rp|routinator\"}",
|
||||
"legendFormat": "{{rp}}",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
@ -355,8 +355,8 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "inter_rp_vaps{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}",
|
||||
"legendFormat": "{{exported_rp}}",
|
||||
"expr": "inter_rp_vaps{exported_instance=~\".*inter-rp\",rp=~\"ours-rp|routinator\"}",
|
||||
"legendFormat": "{{rp}}",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
@ -388,24 +388,24 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "inter_rp_run_seq{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}",
|
||||
"expr": "inter_rp_run_seq{exported_instance=~\".*inter-rp\",rp=~\"ours-rp|routinator\"}",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"legendFormat": "{{exported_rp}} seq",
|
||||
"legendFormat": "{{rp}} seq",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "inter_rp_run_success{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}",
|
||||
"expr": "inter_rp_run_success{exported_instance=~\".*inter-rp\",rp=~\"ours-rp|routinator\"}",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"legendFormat": "{{exported_rp}} success",
|
||||
"legendFormat": "{{rp}} success",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "inter_rp_run_wall_seconds{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}",
|
||||
"expr": "inter_rp_run_wall_seconds{exported_instance=~\".*inter-rp\",rp=~\"ours-rp|routinator\"}",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"legendFormat": "{{exported_rp}} wall",
|
||||
"legendFormat": "{{rp}} wall",
|
||||
"refId": "C"
|
||||
}
|
||||
]
|
||||
@ -437,14 +437,14 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "inter_rp_vrps_diff{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\"}",
|
||||
"expr": "inter_rp_vrps_diff{exported_instance=~\".*inter-rp\",left=\"ours-rp\",right=\"routinator\"}",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"legendFormat": "vrps ours-rp-routinator",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "inter_rp_vaps_diff{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\"}",
|
||||
"expr": "inter_rp_vaps_diff{exported_instance=~\".*inter-rp\",left=\"ours-rp\",right=\"routinator\"}",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"legendFormat": "vaps ours-rp-routinator",
|
||||
@ -489,8 +489,8 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "inter_rp_artifact_age_seconds{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}",
|
||||
"legendFormat": "{{exported_rp}}",
|
||||
"expr": "inter_rp_artifact_age_seconds{exported_instance=~\".*inter-rp\",rp=~\"ours-rp|routinator\"}",
|
||||
"legendFormat": "{{rp}}",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
@ -533,8 +533,8 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "inter_rp_repo_sync_total{exported_instance=\"remote200-inter-rp\",state=~\"available|failed\",exported_rp=~\"ours-rp|routinator\"}",
|
||||
"legendFormat": "{{exported_rp}} {{state}}",
|
||||
"expr": "inter_rp_repo_sync_total{exported_instance=~\".*inter-rp\",state=~\"available|failed\",rp=~\"ours-rp|routinator\"}",
|
||||
"legendFormat": "{{rp}} {{state}}",
|
||||
"refId": "A"
|
||||
}
|
||||
]
|
||||
@ -577,7 +577,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "inter_rp_repo_sync_overlap_total{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\"}",
|
||||
"expr": "inter_rp_repo_sync_overlap_total{exported_instance=~\".*inter-rp\",left=\"ours-rp\",right=\"routinator\"}",
|
||||
"legendFormat": "{{class}}",
|
||||
"refId": "A"
|
||||
}
|
||||
@ -651,7 +651,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "inter_rp_repo_sync_diff_info{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\"}",
|
||||
"expr": "inter_rp_repo_sync_diff_info{exported_instance=~\".*inter-rp\",left=\"ours-rp\",right=\"routinator\"}",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"legendFormat": "{{class}} #{{rank}}",
|
||||
|
||||
@ -186,6 +186,205 @@
|
||||
"title": "Publication Points",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "Prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"decimals": 0,
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"x": 0,
|
||||
"y": 4,
|
||||
"w": 6,
|
||||
"h": 4
|
||||
},
|
||||
"id": 9,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "11.3.1",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "ours_rp_run_sequence",
|
||||
"legendFormat": "seq",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Latest Run Sequence",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "Prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"decimals": 2,
|
||||
"unit": "percent",
|
||||
"min": 0,
|
||||
"max": 100,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "red",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 90
|
||||
},
|
||||
{
|
||||
"color": "green",
|
||||
"value": 98
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"x": 6,
|
||||
"y": 4,
|
||||
"w": 6,
|
||||
"h": 4
|
||||
},
|
||||
"id": 10,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "11.3.1",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "100 * sum by (job, instance, exported_instance) (ours_rp_repo_terminal_state_count{terminal_state=\"publication_point_cache\"}) / sum by (job, instance, exported_instance) (ours_rp_publication_points)",
|
||||
"legendFormat": "PP cache hit ratio",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Latest PP Cache Hit Ratio",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "Prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "none",
|
||||
"decimals": 0
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"x": 12,
|
||||
"y": 4,
|
||||
"w": 6,
|
||||
"h": 4
|
||||
},
|
||||
"id": 11,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "11.3.1",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "ours_rp_vrps{kind=\"total\"}",
|
||||
"legendFormat": "VRPs raw",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "VRPs",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "Prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "none",
|
||||
"decimals": 0
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"x": 18,
|
||||
"y": 4,
|
||||
"w": 6,
|
||||
"h": 4
|
||||
},
|
||||
"id": 12,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "11.3.1",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "ours_rp_vaps",
|
||||
"legendFormat": "VAPs",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "VAPs",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
@ -334,186 +533,6 @@
|
||||
"title": "Large Publication Points by Object Count",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "Prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"decimals": 0,
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"x": 0,
|
||||
"y": 4,
|
||||
"w": 6,
|
||||
"h": 4
|
||||
},
|
||||
"id": 9,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "11.3.1",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "ours_rp_run_sequence",
|
||||
"legendFormat": "seq",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Latest Run Sequence",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "Prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"decimals": 0,
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"x": 6,
|
||||
"y": 4,
|
||||
"w": 6,
|
||||
"h": 4
|
||||
},
|
||||
"id": 10,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "11.3.1",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "ours_rp_run_success",
|
||||
"legendFormat": "success",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Latest Run Success",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "Prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "none",
|
||||
"decimals": 0
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"x": 12,
|
||||
"y": 4,
|
||||
"w": 6,
|
||||
"h": 4
|
||||
},
|
||||
"id": 11,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "11.3.1",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "ours_rp_vrps{kind=\"total\"}",
|
||||
"legendFormat": "VRPs raw",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "VRPs",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "Prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "none",
|
||||
"decimals": 0
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"x": 18,
|
||||
"y": 4,
|
||||
"w": 6,
|
||||
"h": 4
|
||||
},
|
||||
"id": 12,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "11.3.1",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "ours_rp_vaps",
|
||||
"legendFormat": "VAPs",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "VAPs",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
@ -586,7 +605,7 @@
|
||||
"gridPos": {
|
||||
"x": 0,
|
||||
"y": 24,
|
||||
"w": 24,
|
||||
"w": 12,
|
||||
"h": 8
|
||||
},
|
||||
"id": 14,
|
||||
@ -615,6 +634,70 @@
|
||||
"title": "Max RSS Over Time",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "Prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent",
|
||||
"decimals": 2,
|
||||
"min": 0,
|
||||
"max": 100,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "red",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 90
|
||||
},
|
||||
{
|
||||
"color": "green",
|
||||
"value": 98
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"x": 12,
|
||||
"y": 24,
|
||||
"w": 12,
|
||||
"h": 8
|
||||
},
|
||||
"id": 17,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"lastNotNull",
|
||||
"min",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "100 * sum by (job, instance, exported_instance) (ours_rp_repo_terminal_state_count{terminal_state=\"publication_point_cache\"}) / sum by (job, instance, exported_instance) (ours_rp_publication_points)",
|
||||
"legendFormat": "PP cache hit ratio",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "PP Cache Hit Ratio",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
|
||||
@ -243,6 +243,7 @@ def load_routinator_repo_sets(errors):
|
||||
failed = set()
|
||||
duration = {}
|
||||
object_counts = {}
|
||||
publication_point_states = {}
|
||||
for metric in ["routinator_rrdp_status", "routinator_rsync_status"]:
|
||||
for labels, value in parse_prometheus_samples(text, metric):
|
||||
uri = labels.get("uri")
|
||||
@ -254,6 +255,20 @@ def load_routinator_repo_sets(errors):
|
||||
success.add(uri)
|
||||
else:
|
||||
failed.add(uri)
|
||||
for labels, value in parse_prometheus_samples(text, "routinator_repository_publication_points_total"):
|
||||
uri = labels.get("uri")
|
||||
state = labels.get("state", "unknown")
|
||||
if not uri:
|
||||
continue
|
||||
total.add(uri)
|
||||
publication_point_states.setdefault(uri, {})[state] = publication_point_states.setdefault(uri, {}).get(state, 0.0) + value
|
||||
for uri, states in publication_point_states.items():
|
||||
valid_count = states.get("valid", 0.0)
|
||||
non_valid_count = sum(value for state, value in states.items() if state != "valid")
|
||||
if valid_count > 0:
|
||||
success.add(uri)
|
||||
elif non_valid_count > 0:
|
||||
failed.add(uri)
|
||||
for metric in ["routinator_rrdp_duration", "routinator_rsync_duration"]:
|
||||
for labels, value in parse_prometheus_samples(text, metric):
|
||||
uri = labels.get("uri")
|
||||
@ -261,6 +276,7 @@ def load_routinator_repo_sets(errors):
|
||||
duration[uri] = max(duration.get(uri, 0.0), value)
|
||||
for labels, value in parse_prometheus_samples(text, "routinator_repository_objects_total"):
|
||||
add_object_count(object_counts, labels.get("uri"), labels.get("type"), value)
|
||||
failed = failed - success
|
||||
return {"total": total, "success": success, "failed": failed, "duration": duration, "object_counts": object_counts}
|
||||
|
||||
def emit_repo_diff_metrics(out, errors):
|
||||
|
||||
@ -87,6 +87,7 @@ mkdir -p "$STAGE_DIR/bin" "$STAGE_DIR/fixtures" "$STAGE_DIR/scripts" "$STAGE_DIR
|
||||
|
||||
install -m 0755 "$SCRIPT_DIR/run_soak.sh" "$STAGE_DIR/run_soak.sh"
|
||||
install -m 0755 "$SCRIPT_DIR/run_24h_soak_with_metrics.sh" "$STAGE_DIR/run_24h_soak_with_metrics.sh"
|
||||
install -m 0755 "$SCRIPT_DIR/fixed_phase_loop.sh" "$STAGE_DIR/scripts/soak/fixed_phase_loop.sh"
|
||||
install -m 0755 "$SCRIPT_DIR/hourly_soak_report.py" "$STAGE_DIR/scripts/soak/hourly_soak_report.py"
|
||||
install -m 0644 "$SCRIPT_DIR/portable-soak.env.example" "$STAGE_DIR/.env"
|
||||
install -m 0644 "$SCRIPT_DIR/portable-soak.env.example" "$STAGE_DIR/portable-soak.env.example"
|
||||
|
||||
126
scripts/soak/fixed_phase_loop.sh
Normal file
126
scripts/soak/fixed_phase_loop.sh
Normal file
@ -0,0 +1,126 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
NAME="fixed-phase"
|
||||
CYCLE_SECS="${PHASE_CYCLE_SECS:-900}"
|
||||
OFFSET_SECS="${PHASE_OFFSET_SECS:-0}"
|
||||
LOCK_FILE="${RPKI_HEAVY_LOCK:-/var/lock/rpki-heavy-run.lock}"
|
||||
LOCK_WAIT_SECS="${LOCK_WAIT_SECS:-30}"
|
||||
|
||||
usage() {
|
||||
cat <<'USAGE'
|
||||
Usage:
|
||||
fixed_phase_loop.sh [--name <name>] [--cycle-secs <seconds>] [--offset-secs <seconds>]
|
||||
[--lock-file <path>] [--lock-wait-secs <seconds>] -- <command> [args...]
|
||||
|
||||
Runs one command at fixed wall-clock phases. Missed phases are skipped rather than caught up,
|
||||
which keeps independent RP jobs from drifting into each other. A shared flock protects against
|
||||
unexpected overruns.
|
||||
USAGE
|
||||
}
|
||||
|
||||
die() {
|
||||
echo "error: $*" >&2
|
||||
exit 2
|
||||
}
|
||||
|
||||
is_non_negative_int() {
|
||||
[[ "$1" =~ ^[0-9]+$ ]]
|
||||
}
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--name)
|
||||
shift
|
||||
NAME="${1:?--name requires a value}"
|
||||
;;
|
||||
--cycle-secs)
|
||||
shift
|
||||
CYCLE_SECS="${1:?--cycle-secs requires a value}"
|
||||
;;
|
||||
--offset-secs)
|
||||
shift
|
||||
OFFSET_SECS="${1:?--offset-secs requires a value}"
|
||||
;;
|
||||
--lock-file)
|
||||
shift
|
||||
LOCK_FILE="${1:?--lock-file requires a value}"
|
||||
;;
|
||||
--lock-wait-secs)
|
||||
shift
|
||||
LOCK_WAIT_SECS="${1:?--lock-wait-secs requires a value}"
|
||||
;;
|
||||
--help|-h)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
--)
|
||||
shift
|
||||
break
|
||||
;;
|
||||
*)
|
||||
die "unknown argument: $1"
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
[[ $# -gt 0 ]] || die "missing command after --"
|
||||
is_non_negative_int "$CYCLE_SECS" || die "--cycle-secs must be a non-negative integer"
|
||||
is_non_negative_int "$OFFSET_SECS" || die "--offset-secs must be a non-negative integer"
|
||||
is_non_negative_int "$LOCK_WAIT_SECS" || die "--lock-wait-secs must be a non-negative integer"
|
||||
(( CYCLE_SECS > 0 )) || die "--cycle-secs must be > 0"
|
||||
(( OFFSET_SECS < CYCLE_SECS )) || die "--offset-secs must be < --cycle-secs"
|
||||
|
||||
mkdir -p "$(dirname "$LOCK_FILE")"
|
||||
|
||||
timestamp_utc() {
|
||||
date -u +%Y-%m-%dT%H:%M:%SZ
|
||||
}
|
||||
|
||||
format_epoch_utc() {
|
||||
date -u -d "@$1" +%Y-%m-%dT%H:%M:%SZ
|
||||
}
|
||||
|
||||
LAST_TARGET_EPOCH=-1
|
||||
|
||||
next_phase_epoch() {
|
||||
local now="$1"
|
||||
local shifted=$((now - OFFSET_SECS))
|
||||
local remainder=$((shifted % CYCLE_SECS))
|
||||
if (( remainder < 0 )); then
|
||||
remainder=$((remainder + CYCLE_SECS))
|
||||
fi
|
||||
local sleep_secs=$((CYCLE_SECS - remainder))
|
||||
if (( sleep_secs == CYCLE_SECS )); then
|
||||
sleep_secs=0
|
||||
fi
|
||||
printf '%s\n' "$((now + sleep_secs))"
|
||||
}
|
||||
|
||||
while true; do
|
||||
now_epoch="$(date +%s)"
|
||||
target_epoch="$(next_phase_epoch "$now_epoch")"
|
||||
if (( target_epoch <= LAST_TARGET_EPOCH )); then
|
||||
target_epoch=$((LAST_TARGET_EPOCH + CYCLE_SECS))
|
||||
fi
|
||||
LAST_TARGET_EPOCH="$target_epoch"
|
||||
sleep_secs=$((target_epoch - now_epoch))
|
||||
echo "[$(timestamp_utc)] $NAME next_phase=$(format_epoch_utc "$target_epoch") sleep=${sleep_secs}s cycle=${CYCLE_SECS}s offset=${OFFSET_SECS}s" >&2
|
||||
if (( sleep_secs > 0 )); then
|
||||
sleep "$sleep_secs"
|
||||
fi
|
||||
|
||||
started_epoch="$(date +%s)"
|
||||
echo "[$(timestamp_utc)] $NAME phase_start target=$(format_epoch_utc "$target_epoch") lock=$LOCK_FILE wait=${LOCK_WAIT_SECS}s" >&2
|
||||
set +e
|
||||
flock -w "$LOCK_WAIT_SECS" "$LOCK_FILE" "$@"
|
||||
code=$?
|
||||
set -e
|
||||
ended_epoch="$(date +%s)"
|
||||
if (( code == 0 )); then
|
||||
echo "[$(timestamp_utc)] $NAME phase_done exit=0 elapsed=$((ended_epoch - started_epoch))s" >&2
|
||||
else
|
||||
echo "[$(timestamp_utc)] $NAME phase_done exit=$code elapsed=$((ended_epoch - started_epoch))s skipped_or_failed=1" >&2
|
||||
fi
|
||||
done
|
||||
@ -17,6 +17,17 @@ STOP_AFTER_SECS=0
|
||||
# 示例:RIRS=apnic,arin 或 RIRS=afrinic,apnic,arin,lacnic,ripe
|
||||
RIRS=afrinic,apnic,arin,lacnic,ripe
|
||||
|
||||
# TAL/TA 输入模式。
|
||||
# file-with-ta:使用 package 内置 fixtures/tal + fixtures/ta,完全离线固定输入。
|
||||
# file-live-ta:使用 package 内置 fixtures/tal;每轮后台 best-effort 刷新 TA 到 state/live-ta,
|
||||
# 子进程不等待刷新,直接使用当前已有的 state/live-ta,首次缺失时从 fixtures/ta 初始化。
|
||||
# url:直接把 TAL URL 传给子进程,由子进程处理 TAL/TA 获取。
|
||||
TAL_INPUT_MODE=file-with-ta
|
||||
|
||||
# file-live-ta 后台刷新 TA 的 curl 超时配置。刷新失败只写日志,不阻断本轮 run。
|
||||
LIVE_TA_REFRESH_CONNECT_TIMEOUT_SECS=15
|
||||
LIVE_TA_REFRESH_MAX_TIME_SECS=120
|
||||
|
||||
# 运行根目录。默认使用 package 根目录;如需把产物写到独立数据盘,可改成绝对路径。
|
||||
RUN_ROOT="${PACKAGE_ROOT}"
|
||||
|
||||
|
||||
@ -43,6 +43,9 @@ META_DIR="${META_DIR:-$STATE_ROOT/meta}"
|
||||
TMP_DIR="${TMP_DIR:-$RUN_ROOT/tmp}"
|
||||
RSYNC_MIRROR_ROOT="${RSYNC_MIRROR_ROOT:-$STATE_ROOT/rsync-mirror}"
|
||||
INVALID_ROOT="$STATE_ROOT/invalid"
|
||||
LIVE_TA_REFRESH_DIR="${LIVE_TA_REFRESH_DIR:-$META_DIR/live-ta-refresh}"
|
||||
LIVE_TA_REFRESH_CONNECT_TIMEOUT_SECS="${LIVE_TA_REFRESH_CONNECT_TIMEOUT_SECS:-15}"
|
||||
LIVE_TA_REFRESH_MAX_TIME_SECS="${LIVE_TA_REFRESH_MAX_TIME_SECS:-120}"
|
||||
|
||||
RPKI_BIN="$BIN_DIR/rpki"
|
||||
RPKI_DAEMON_BIN="$BIN_DIR/rpki_daemon"
|
||||
@ -194,22 +197,129 @@ live_ta_file_for_rir() {
|
||||
printf '%s' "$STATE_ROOT/live-ta/$(basename "$(tal_file_for_rir "$1")" .tal).cer"
|
||||
}
|
||||
|
||||
live_ta_refresh_pid_file_for_rir() {
|
||||
printf '%s' "$LIVE_TA_REFRESH_DIR/$1.pid"
|
||||
}
|
||||
|
||||
refresh_live_ta_for_rir() {
|
||||
local rir_name="$1"
|
||||
local run_id="${2:-manual}"
|
||||
local log_path="${3:-}"
|
||||
local tal_path
|
||||
local ta_uri
|
||||
local ta_file
|
||||
local tmp_file
|
||||
if [[ -n "$log_path" ]]; then
|
||||
mkdir -p "$(dirname "$log_path")"
|
||||
exec >> "$log_path" 2>&1
|
||||
fi
|
||||
echo "live-ta-refresh start run=$run_id rir=$rir_name at=$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
||||
tal_path="$(tal_file_for_rir "$rir_name")"
|
||||
ta_uri="$(tal_https_uri_from_fixture "$tal_path")"
|
||||
[[ -n "$ta_uri" ]] || die "missing http(s) TA URI in TAL fixture for $rir_name: $tal_path"
|
||||
if [[ -z "$ta_uri" ]]; then
|
||||
echo "live-ta-refresh failed rir=$rir_name reason=missing_https_uri tal=$tal_path"
|
||||
return 1
|
||||
fi
|
||||
ta_file="$(live_ta_file_for_rir "$rir_name")"
|
||||
mkdir -p "$(dirname "$ta_file")"
|
||||
tmp_file="${ta_file}.tmp.$$"
|
||||
curl -fsSL --connect-timeout 15 --max-time 120 "$ta_uri" -o "$tmp_file" \
|
||||
|| { rm -f "$tmp_file"; die "failed to refresh TA for $rir_name from $ta_uri"; }
|
||||
[[ -s "$tmp_file" ]] || { rm -f "$tmp_file"; die "empty TA download for $rir_name from $ta_uri"; }
|
||||
tmp_file="${ta_file}.tmp.$$.$RANDOM"
|
||||
if ! curl -fsSL --connect-timeout "$LIVE_TA_REFRESH_CONNECT_TIMEOUT_SECS" --max-time "$LIVE_TA_REFRESH_MAX_TIME_SECS" "$ta_uri" -o "$tmp_file"; then
|
||||
rm -f "$tmp_file"
|
||||
echo "live-ta-refresh failed rir=$rir_name reason=curl uri=$ta_uri"
|
||||
return 1
|
||||
fi
|
||||
if [[ ! -s "$tmp_file" ]]; then
|
||||
rm -f "$tmp_file"
|
||||
echo "live-ta-refresh failed rir=$rir_name reason=empty_download uri=$ta_uri"
|
||||
return 1
|
||||
fi
|
||||
mv "$tmp_file" "$ta_file"
|
||||
echo "live-ta-refresh success rir=$rir_name uri=$ta_uri output=$ta_file bytes=$(wc -c < "$ta_file" | tr -d ' ')"
|
||||
}
|
||||
|
||||
ensure_live_ta_for_rir() {
|
||||
local rir_name="$1"
|
||||
local live_ta_file
|
||||
local fixture_ta_file
|
||||
live_ta_file="$(live_ta_file_for_rir "$rir_name")"
|
||||
if [[ -s "$live_ta_file" ]]; then
|
||||
return 0
|
||||
fi
|
||||
fixture_ta_file="$(ta_file_for_rir "$rir_name")"
|
||||
[[ -s "$fixture_ta_file" ]] || die "missing live TA and fixture TA for $rir_name: $live_ta_file / $fixture_ta_file"
|
||||
mkdir -p "$(dirname "$live_ta_file")"
|
||||
cp "$fixture_ta_file" "$live_ta_file"
|
||||
}
|
||||
|
||||
reap_finished_live_ta_refresh_for_rir() {
|
||||
local rir_name="$1"
|
||||
local pid_file
|
||||
local pid
|
||||
local pid_state
|
||||
local pid_file_mtime
|
||||
local now_epoch
|
||||
local stale_after_secs
|
||||
pid_file="$(live_ta_refresh_pid_file_for_rir "$rir_name")"
|
||||
[[ -f "$pid_file" ]] || return 0
|
||||
pid="$(cat "$pid_file" 2>/dev/null || true)"
|
||||
if [[ "$pid" =~ ^[0-9]+$ ]] && kill -0 "$pid" >/dev/null 2>&1; then
|
||||
pid_state=""
|
||||
if [[ -r "/proc/$pid/stat" ]]; then
|
||||
pid_state="$(awk '{ print $3 }' "/proc/$pid/stat" 2>/dev/null || true)"
|
||||
fi
|
||||
if [[ "$pid_state" == "Z" ]]; then
|
||||
wait "$pid" >/dev/null 2>&1 || true
|
||||
rm -f "$pid_file"
|
||||
return 0
|
||||
fi
|
||||
pid_file_mtime="$(stat -c %Y "$pid_file" 2>/dev/null || date +%s)"
|
||||
now_epoch="$(date +%s)"
|
||||
stale_after_secs=$((LIVE_TA_REFRESH_MAX_TIME_SECS + 60))
|
||||
if (( now_epoch - pid_file_mtime > stale_after_secs )); then
|
||||
rm -f "$pid_file"
|
||||
return 0
|
||||
fi
|
||||
return 1
|
||||
fi
|
||||
if [[ "$pid" =~ ^[0-9]+$ ]]; then
|
||||
wait "$pid" >/dev/null 2>&1 || true
|
||||
fi
|
||||
rm -f "$pid_file"
|
||||
return 0
|
||||
}
|
||||
|
||||
start_live_ta_refresh_for_rir() {
|
||||
local rir_name="$1"
|
||||
local run_id="$2"
|
||||
local pid_file
|
||||
local log_path
|
||||
local pid
|
||||
mkdir -p "$LIVE_TA_REFRESH_DIR" "$LOG_ROOT"
|
||||
pid_file="$(live_ta_refresh_pid_file_for_rir "$rir_name")"
|
||||
if ! reap_finished_live_ta_refresh_for_rir "$rir_name"; then
|
||||
pid="$(cat "$pid_file" 2>/dev/null || true)"
|
||||
echo "live-ta-refresh skip run=$run_id rir=$rir_name reason=previous_refresh_running pid=$pid" \
|
||||
>> "$LOG_ROOT/live-ta-refresh-$run_id-$rir_name.log"
|
||||
return 0
|
||||
fi
|
||||
log_path="$LOG_ROOT/live-ta-refresh-$run_id-$rir_name.log"
|
||||
refresh_live_ta_for_rir "$rir_name" "$run_id" "$log_path" &
|
||||
pid=$!
|
||||
printf '%s\n' "$pid" > "$pid_file"
|
||||
}
|
||||
|
||||
prepare_live_ta_inputs_for_run() {
|
||||
local run_id="$1"
|
||||
local rir_name
|
||||
if [[ "$TAL_INPUT_MODE" != "file-live-ta" ]]; then
|
||||
return 0
|
||||
fi
|
||||
for rir_name in "${RIR_LIST[@]}"; do
|
||||
ensure_live_ta_for_rir "$rir_name"
|
||||
done
|
||||
for rir_name in "${RIR_LIST[@]}"; do
|
||||
start_live_ta_refresh_for_rir "$rir_name" "$run_id"
|
||||
done
|
||||
}
|
||||
|
||||
compare_view_trust_anchor() {
|
||||
@ -432,7 +542,6 @@ build_child_args() {
|
||||
if [[ "$TAL_INPUT_MODE" == "url" ]]; then
|
||||
CHILD_ARGS+=(--tal-url "$(tal_url_for_rir "$rir_name")")
|
||||
elif [[ "$TAL_INPUT_MODE" == "file-live-ta" ]]; then
|
||||
refresh_live_ta_for_rir "$rir_name"
|
||||
CHILD_ARGS+=(--tal-path "$(tal_file_for_rir "$rir_name")")
|
||||
CHILD_ARGS+=(--ta-path "$(live_ta_file_for_rir "$rir_name")")
|
||||
else
|
||||
@ -613,6 +722,7 @@ run_one_round() {
|
||||
"$snapshot_reason" "$previous_run_id" "$previous_success_value" "$started_at" "" \
|
||||
"$INVALID_DB_PATH" "$INVALID_STATE_PATH" "$INVALID_TMP_PATH" "" "$PACKAGE_ROOT" "$ENV_FILE"
|
||||
|
||||
prepare_live_ta_inputs_for_run "$run_id"
|
||||
build_child_args
|
||||
if is_true "$RPKI_ANALYZE"; then
|
||||
CHILD_ARGS+=(--analyze --analysis-out "$run_dir/analyze")
|
||||
@ -674,6 +784,8 @@ main() {
|
||||
require_command find
|
||||
if [[ "$TAL_INPUT_MODE" == "file-live-ta" ]]; then
|
||||
require_command curl
|
||||
validate_positive_int "LIVE_TA_REFRESH_CONNECT_TIMEOUT_SECS" "$LIVE_TA_REFRESH_CONNECT_TIMEOUT_SECS"
|
||||
validate_positive_int "LIVE_TA_REFRESH_MAX_TIME_SECS" "$LIVE_TA_REFRESH_MAX_TIME_SECS"
|
||||
fi
|
||||
validate_max_runs
|
||||
validate_non_negative_int "INTERVAL_SECS" "$INTERVAL_SECS"
|
||||
@ -701,7 +813,7 @@ main() {
|
||||
fi
|
||||
done
|
||||
|
||||
mkdir -p "$RUNS_ROOT" "$LOG_ROOT" "$DB_DIR" "$META_DIR" "$TMP_DIR" "$INVALID_ROOT"
|
||||
mkdir -p "$RUNS_ROOT" "$LOG_ROOT" "$DB_DIR" "$META_DIR" "$TMP_DIR" "$INVALID_ROOT" "$LIVE_TA_REFRESH_DIR"
|
||||
if is_true "$ALLOW_RSYNC_MIRROR_REUSE"; then
|
||||
mkdir -p "$RSYNC_MIRROR_ROOT"
|
||||
fi
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user