20260622 恢复Routinator inter-RP监控
This commit is contained in:
parent
61d3e636ae
commit
4546d90c33
@ -7,12 +7,22 @@
|
|||||||
"graphTooltip": 0,
|
"graphTooltip": 0,
|
||||||
"id": null,
|
"id": null,
|
||||||
"links": [],
|
"links": [],
|
||||||
|
"liveNow": false,
|
||||||
"panels": [
|
"panels": [
|
||||||
{
|
{
|
||||||
|
"id": 1,
|
||||||
|
"title": "Metrics Reload OK",
|
||||||
|
"type": "stat",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "Prometheus"
|
"uid": "Prometheus"
|
||||||
},
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 4,
|
||||||
|
"w": 6,
|
||||||
|
"x": 0,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
"defaults": {
|
"defaults": {
|
||||||
"unit": "none",
|
"unit": "none",
|
||||||
@ -20,13 +30,6 @@
|
|||||||
},
|
},
|
||||||
"overrides": []
|
"overrides": []
|
||||||
},
|
},
|
||||||
"gridPos": {
|
|
||||||
"h": 4,
|
|
||||||
"w": 6,
|
|
||||||
"x": 0,
|
|
||||||
"y": 0
|
|
||||||
},
|
|
||||||
"id": 1,
|
|
||||||
"options": {
|
"options": {
|
||||||
"colorMode": "value",
|
"colorMode": "value",
|
||||||
"graphMode": "area",
|
"graphMode": "area",
|
||||||
@ -42,22 +45,29 @@
|
|||||||
"textMode": "auto",
|
"textMode": "auto",
|
||||||
"wideLayout": true
|
"wideLayout": true
|
||||||
},
|
},
|
||||||
"pluginVersion": "11.3.1",
|
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "inter_rp_service_last_reload_success",
|
"expr": "max(inter_rp_service_last_reload_success{exported_instance=\"remote200-inter-rp\"})",
|
||||||
"legendFormat": "reload",
|
"legendFormat": "reload",
|
||||||
"refId": "A"
|
"refId": "A",
|
||||||
|
"instant": true
|
||||||
}
|
}
|
||||||
],
|
]
|
||||||
"title": "Metrics Reload OK",
|
|
||||||
"type": "stat"
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"id": 2,
|
||||||
|
"title": "231 Sync Age",
|
||||||
|
"type": "stat",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "Prometheus"
|
"uid": "Prometheus"
|
||||||
},
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 4,
|
||||||
|
"w": 6,
|
||||||
|
"x": 6,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
"defaults": {
|
"defaults": {
|
||||||
"unit": "s",
|
"unit": "s",
|
||||||
@ -65,13 +75,6 @@
|
|||||||
},
|
},
|
||||||
"overrides": []
|
"overrides": []
|
||||||
},
|
},
|
||||||
"gridPos": {
|
|
||||||
"h": 4,
|
|
||||||
"w": 6,
|
|
||||||
"x": 6,
|
|
||||||
"y": 0
|
|
||||||
},
|
|
||||||
"id": 2,
|
|
||||||
"options": {
|
"options": {
|
||||||
"colorMode": "value",
|
"colorMode": "value",
|
||||||
"graphMode": "area",
|
"graphMode": "area",
|
||||||
@ -87,22 +90,29 @@
|
|||||||
"textMode": "auto",
|
"textMode": "auto",
|
||||||
"wideLayout": true
|
"wideLayout": true
|
||||||
},
|
},
|
||||||
"pluginVersion": "11.3.1",
|
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "inter_rp_sync_age_seconds",
|
"expr": "max(inter_rp_sync_age_seconds{exported_instance=\"remote200-inter-rp\"})",
|
||||||
"legendFormat": "sync age",
|
"legendFormat": "sync age",
|
||||||
"refId": "A"
|
"refId": "A",
|
||||||
|
"instant": true
|
||||||
}
|
}
|
||||||
],
|
]
|
||||||
"title": "Remote200 Sync Age",
|
|
||||||
"type": "stat"
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"id": 3,
|
||||||
|
"title": "Parse Errors",
|
||||||
|
"type": "stat",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "Prometheus"
|
"uid": "Prometheus"
|
||||||
},
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 4,
|
||||||
|
"w": 6,
|
||||||
|
"x": 12,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
"defaults": {
|
"defaults": {
|
||||||
"unit": "short",
|
"unit": "short",
|
||||||
@ -110,13 +120,6 @@
|
|||||||
},
|
},
|
||||||
"overrides": []
|
"overrides": []
|
||||||
},
|
},
|
||||||
"gridPos": {
|
|
||||||
"h": 4,
|
|
||||||
"w": 6,
|
|
||||||
"x": 12,
|
|
||||||
"y": 0
|
|
||||||
},
|
|
||||||
"id": 3,
|
|
||||||
"options": {
|
"options": {
|
||||||
"colorMode": "value",
|
"colorMode": "value",
|
||||||
"graphMode": "area",
|
"graphMode": "area",
|
||||||
@ -132,36 +135,36 @@
|
|||||||
"textMode": "auto",
|
"textMode": "auto",
|
||||||
"wideLayout": true
|
"wideLayout": true
|
||||||
},
|
},
|
||||||
"pluginVersion": "11.3.1",
|
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "inter_rp_parse_errors",
|
"expr": "max(inter_rp_parse_errors{exported_instance=\"remote200-inter-rp\"})",
|
||||||
"legendFormat": "errors",
|
"legendFormat": "errors",
|
||||||
"refId": "A"
|
"refId": "A",
|
||||||
|
"instant": true
|
||||||
}
|
}
|
||||||
],
|
]
|
||||||
"title": "Parse Errors",
|
|
||||||
"type": "stat"
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"id": 4,
|
||||||
|
"title": "Ours vs Routinator VRP Diff",
|
||||||
|
"type": "stat",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "Prometheus"
|
"uid": "Prometheus"
|
||||||
},
|
},
|
||||||
"fieldConfig": {
|
|
||||||
"defaults": {
|
|
||||||
"unit": "none",
|
|
||||||
"decimals": 0
|
|
||||||
},
|
|
||||||
"overrides": []
|
|
||||||
},
|
|
||||||
"gridPos": {
|
"gridPos": {
|
||||||
"h": 4,
|
"h": 4,
|
||||||
"w": 6,
|
"w": 6,
|
||||||
"x": 18,
|
"x": 18,
|
||||||
"y": 0
|
"y": 0
|
||||||
},
|
},
|
||||||
"id": 4,
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"unit": "short",
|
||||||
|
"decimals": 0
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
"options": {
|
"options": {
|
||||||
"colorMode": "value",
|
"colorMode": "value",
|
||||||
"graphMode": "area",
|
"graphMode": "area",
|
||||||
@ -177,22 +180,29 @@
|
|||||||
"textMode": "auto",
|
"textMode": "auto",
|
||||||
"wideLayout": true
|
"wideLayout": true
|
||||||
},
|
},
|
||||||
"pluginVersion": "11.3.1",
|
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "inter_rp_ccr_digest_match{state=\"overall\"}",
|
"expr": "max(inter_rp_vrps_diff{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\"})",
|
||||||
"legendFormat": "overall",
|
"legendFormat": "vrp diff",
|
||||||
"refId": "A"
|
"refId": "A",
|
||||||
|
"instant": true
|
||||||
}
|
}
|
||||||
],
|
]
|
||||||
"title": "Ours vs rpki-client CCR Match",
|
|
||||||
"type": "stat"
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"id": 5,
|
||||||
|
"title": "Wall Time by RP",
|
||||||
|
"type": "timeseries",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "Prometheus"
|
"uid": "Prometheus"
|
||||||
},
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 4
|
||||||
|
},
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
"defaults": {
|
"defaults": {
|
||||||
"unit": "s",
|
"unit": "s",
|
||||||
@ -200,13 +210,6 @@
|
|||||||
},
|
},
|
||||||
"overrides": []
|
"overrides": []
|
||||||
},
|
},
|
||||||
"gridPos": {
|
|
||||||
"h": 8,
|
|
||||||
"w": 12,
|
|
||||||
"x": 0,
|
|
||||||
"y": 4
|
|
||||||
},
|
|
||||||
"id": 5,
|
|
||||||
"options": {
|
"options": {
|
||||||
"legend": {
|
"legend": {
|
||||||
"calcs": [
|
"calcs": [
|
||||||
@ -223,19 +226,26 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "inter_rp_run_wall_seconds",
|
"expr": "inter_rp_run_wall_seconds{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}",
|
||||||
"legendFormat": "{{rp}}",
|
"legendFormat": "{{exported_rp}}",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
]
|
||||||
"title": "Wall Time by RP",
|
|
||||||
"type": "timeseries"
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"id": 6,
|
||||||
|
"title": "Max RSS Aggregate Peak by RP",
|
||||||
|
"type": "timeseries",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "Prometheus"
|
"uid": "Prometheus"
|
||||||
},
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 4
|
||||||
|
},
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
"defaults": {
|
"defaults": {
|
||||||
"unit": "bytes",
|
"unit": "bytes",
|
||||||
@ -243,13 +253,6 @@
|
|||||||
},
|
},
|
||||||
"overrides": []
|
"overrides": []
|
||||||
},
|
},
|
||||||
"gridPos": {
|
|
||||||
"h": 8,
|
|
||||||
"w": 12,
|
|
||||||
"x": 12,
|
|
||||||
"y": 4
|
|
||||||
},
|
|
||||||
"id": 6,
|
|
||||||
"options": {
|
"options": {
|
||||||
"legend": {
|
"legend": {
|
||||||
"calcs": [
|
"calcs": [
|
||||||
@ -266,34 +269,33 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "inter_rp_run_max_rss_bytes{kind=\"aggregate_peak\"}",
|
"expr": "inter_rp_run_max_rss_bytes{exported_instance=\"remote200-inter-rp\",kind=\"aggregate_peak\",exported_rp=~\"ours-rp|routinator\"}",
|
||||||
"legendFormat": "{{rp}}",
|
"legendFormat": "{{exported_rp}}",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
]
|
||||||
"title": "Max RSS Aggregate Peak by RP",
|
|
||||||
"type": "timeseries"
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"datasource": {
|
|
||||||
"type": "prometheus",
|
|
||||||
"uid": "Prometheus"
|
|
||||||
},
|
|
||||||
"fieldConfig": {
|
|
||||||
"defaults": {
|
|
||||||
"unit": "none",
|
|
||||||
"decimals": 0,
|
|
||||||
"min": 0
|
|
||||||
},
|
|
||||||
"overrides": []
|
|
||||||
},
|
|
||||||
"gridPos": {
|
|
||||||
"h": 8,
|
|
||||||
"w": 12,
|
|
||||||
"x": 0,
|
|
||||||
"y": 12
|
|
||||||
},
|
|
||||||
"id": 7,
|
"id": 7,
|
||||||
|
"title": "VRPs by RP (unique ASN/Prefix/MaxLen)",
|
||||||
|
"type": "timeseries",
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "Prometheus"
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 12
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"unit": "none",
|
||||||
|
"min": 0
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
"options": {
|
"options": {
|
||||||
"legend": {
|
"legend": {
|
||||||
"calcs": [
|
"calcs": [
|
||||||
@ -310,34 +312,33 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "inter_rp_vrps",
|
"expr": "inter_rp_vrps{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}",
|
||||||
"legendFormat": "{{rp}}",
|
"legendFormat": "{{exported_rp}}",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
]
|
||||||
"title": "VRPs by RP (unique ASN/Prefix/MaxLen)",
|
|
||||||
"type": "timeseries"
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"id": 8,
|
||||||
|
"title": "VAPs / ASPAs by RP (unique Customer/Providers)",
|
||||||
|
"type": "timeseries",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "Prometheus"
|
"uid": "Prometheus"
|
||||||
},
|
},
|
||||||
"fieldConfig": {
|
|
||||||
"defaults": {
|
|
||||||
"unit": "none",
|
|
||||||
"decimals": 0,
|
|
||||||
"min": 0
|
|
||||||
},
|
|
||||||
"overrides": []
|
|
||||||
},
|
|
||||||
"gridPos": {
|
"gridPos": {
|
||||||
"h": 8,
|
"h": 8,
|
||||||
"w": 12,
|
"w": 12,
|
||||||
"x": 12,
|
"x": 12,
|
||||||
"y": 12
|
"y": 12
|
||||||
},
|
},
|
||||||
"id": 8,
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"unit": "none",
|
||||||
|
"min": 0
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
"options": {
|
"options": {
|
||||||
"legend": {
|
"legend": {
|
||||||
"calcs": [
|
"calcs": [
|
||||||
@ -354,54 +355,75 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "inter_rp_vaps",
|
"expr": "inter_rp_vaps{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}",
|
||||||
"legendFormat": "{{rp}}",
|
"legendFormat": "{{exported_rp}}",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
]
|
||||||
"title": "VAPs / ASPAs by RP (unique Customer/Providers)",
|
|
||||||
"type": "timeseries"
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"id": 9,
|
||||||
|
"title": "Latest RP Runs",
|
||||||
|
"type": "table",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "Prometheus"
|
"uid": "Prometheus"
|
||||||
},
|
},
|
||||||
"fieldConfig": {
|
|
||||||
"defaults": {
|
|
||||||
"unit": "none",
|
|
||||||
"decimals": 0
|
|
||||||
},
|
|
||||||
"overrides": []
|
|
||||||
},
|
|
||||||
"gridPos": {
|
"gridPos": {
|
||||||
"h": 8,
|
"h": 8,
|
||||||
"w": 12,
|
"w": 12,
|
||||||
"x": 0,
|
"x": 0,
|
||||||
"y": 20
|
"y": 20
|
||||||
},
|
},
|
||||||
"id": 9,
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"unit": "none",
|
||||||
|
"decimals": 0
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
"options": {
|
"options": {
|
||||||
"showHeader": true,
|
"showHeader": true,
|
||||||
"sortBy": []
|
"sortBy": []
|
||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "inter_rp_ccr_digest_match{left=\"ours-rp\",right=\"rpki-client\"}",
|
"expr": "inter_rp_run_seq{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"instant": true,
|
"instant": true,
|
||||||
"legendFormat": "{{state}}",
|
"legendFormat": "{{exported_rp}} seq",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
}
|
|
||||||
],
|
|
||||||
"title": "CCR Digest Match States",
|
|
||||||
"type": "table"
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"expr": "inter_rp_run_success{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"legendFormat": "{{exported_rp}} success",
|
||||||
|
"refId": "B"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "inter_rp_run_wall_seconds{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"legendFormat": "{{exported_rp}} wall",
|
||||||
|
"refId": "C"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 10,
|
||||||
|
"title": "Output Count Diffs (unique)",
|
||||||
|
"type": "table",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "Prometheus"
|
"uid": "Prometheus"
|
||||||
},
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 20
|
||||||
|
},
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
"defaults": {
|
"defaults": {
|
||||||
"unit": "none",
|
"unit": "none",
|
||||||
@ -409,41 +431,41 @@
|
|||||||
},
|
},
|
||||||
"overrides": []
|
"overrides": []
|
||||||
},
|
},
|
||||||
"gridPos": {
|
|
||||||
"h": 8,
|
|
||||||
"w": 12,
|
|
||||||
"x": 12,
|
|
||||||
"y": 20
|
|
||||||
},
|
|
||||||
"id": 10,
|
|
||||||
"options": {
|
"options": {
|
||||||
"showHeader": true,
|
"showHeader": true,
|
||||||
"sortBy": []
|
"sortBy": []
|
||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "inter_rp_vrps_diff",
|
"expr": "inter_rp_vrps_diff{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\"}",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"instant": true,
|
"instant": true,
|
||||||
"legendFormat": "vrps {{left}}-{{right}}",
|
"legendFormat": "vrps ours-rp-routinator",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "inter_rp_vaps_diff",
|
"expr": "inter_rp_vaps_diff{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\"}",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"instant": true,
|
"instant": true,
|
||||||
"legendFormat": "vaps {{left}}-{{right}}",
|
"legendFormat": "vaps ours-rp-routinator",
|
||||||
"refId": "B"
|
"refId": "B"
|
||||||
}
|
}
|
||||||
],
|
]
|
||||||
"title": "Output Count Diffs (VRP/VAP unique)",
|
|
||||||
"type": "table"
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"id": 11,
|
||||||
|
"title": "Artifact Age by RP",
|
||||||
|
"type": "timeseries",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "Prometheus"
|
"uid": "Prometheus"
|
||||||
},
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 24,
|
||||||
|
"x": 0,
|
||||||
|
"y": 28
|
||||||
|
},
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
"defaults": {
|
"defaults": {
|
||||||
"unit": "s",
|
"unit": "s",
|
||||||
@ -451,13 +473,6 @@
|
|||||||
},
|
},
|
||||||
"overrides": []
|
"overrides": []
|
||||||
},
|
},
|
||||||
"gridPos": {
|
|
||||||
"h": 8,
|
|
||||||
"w": 24,
|
|
||||||
"x": 0,
|
|
||||||
"y": 28
|
|
||||||
},
|
|
||||||
"id": 11,
|
|
||||||
"options": {
|
"options": {
|
||||||
"legend": {
|
"legend": {
|
||||||
"calcs": [
|
"calcs": [
|
||||||
@ -474,21 +489,19 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "inter_rp_artifact_age_seconds",
|
"expr": "inter_rp_artifact_age_seconds{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}",
|
||||||
"legendFormat": "{{rp}}",
|
"legendFormat": "{{exported_rp}}",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
]
|
||||||
"title": "Artifact Age by RP",
|
|
||||||
"type": "timeseries"
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"preload": false,
|
|
||||||
"refresh": "10s",
|
"refresh": "10s",
|
||||||
"schemaVersion": 40,
|
"schemaVersion": 40,
|
||||||
"tags": [
|
"tags": [
|
||||||
"rpki",
|
"rpki",
|
||||||
"inter-rp"
|
"inter-rp",
|
||||||
|
"routinator"
|
||||||
],
|
],
|
||||||
"templating": {
|
"templating": {
|
||||||
"list": []
|
"list": []
|
||||||
@ -497,9 +510,8 @@
|
|||||||
"from": "now-6h",
|
"from": "now-6h",
|
||||||
"to": "now"
|
"to": "now"
|
||||||
},
|
},
|
||||||
"timepicker": {},
|
|
||||||
"timezone": "browser",
|
"timezone": "browser",
|
||||||
"title": "Ours RP Inter-RP",
|
"title": "Ours RP vs Routinator",
|
||||||
"uid": "ours-rp-inter-rp",
|
"uid": "ours-rp-inter-rp",
|
||||||
"version": 1
|
"version": 2
|
||||||
}
|
}
|
||||||
|
|||||||
250
scripts/inter_rp/inter_rp_ours_routinator_exporter.py
Executable file
250
scripts/inter_rp/inter_rp_ours_routinator_exporter.py
Executable file
@ -0,0 +1,250 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import csv
|
||||||
|
import datetime as dt
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import socket
|
||||||
|
import sys
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
RUN_ROOT = Path(os.environ.get("OURS_RUN_ROOT", "/root/ours-rp-continuous/portable-soak"))
|
||||||
|
PEER_ROOT = Path(os.environ.get("PEER_ROOT", "/root/ours-rp-continuous/portable-soak/inter-rp-peers"))
|
||||||
|
INSTANCE = os.environ.get("INTER_RP_INSTANCE", "remote200-inter-rp")
|
||||||
|
LISTEN = os.environ.get("INTER_RP_LISTEN", "0.0.0.0:9557")
|
||||||
|
SCAN_TTL = float(os.environ.get("INTER_RP_SCAN_TTL_SECONDS", "10"))
|
||||||
|
|
||||||
|
_cache_lock = threading.Lock()
|
||||||
|
_cache = {"deadline": 0.0, "metrics": "", "status": {}}
|
||||||
|
_count_cache = {}
|
||||||
|
|
||||||
|
def unix_now():
|
||||||
|
return time.time()
|
||||||
|
|
||||||
|
def parse_rfc3339(value):
|
||||||
|
if not value:
|
||||||
|
return None
|
||||||
|
text = str(value).replace("Z", "+00:00")
|
||||||
|
try:
|
||||||
|
return dt.datetime.fromisoformat(text).timestamp()
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def read_json(path):
|
||||||
|
with open(path, "r", encoding="utf-8") as handle:
|
||||||
|
return json.load(handle)
|
||||||
|
|
||||||
|
def latest_ours_run():
|
||||||
|
runs = RUN_ROOT / "runs"
|
||||||
|
candidates = sorted(p for p in runs.glob("run_*") if (p / "run-summary.json").exists())
|
||||||
|
return candidates[-1] if candidates else None
|
||||||
|
|
||||||
|
def count_unique_csv(path, cols):
|
||||||
|
if not path.exists():
|
||||||
|
return None
|
||||||
|
stat = path.stat()
|
||||||
|
key = (str(path), stat.st_mtime_ns, stat.st_size, cols)
|
||||||
|
if key in _count_cache:
|
||||||
|
return _count_cache[key]
|
||||||
|
seen = set()
|
||||||
|
with open(path, "r", encoding="utf-8", newline="") as handle:
|
||||||
|
reader = csv.reader(handle)
|
||||||
|
first = True
|
||||||
|
for row in reader:
|
||||||
|
if first:
|
||||||
|
first = False
|
||||||
|
continue
|
||||||
|
if not row:
|
||||||
|
continue
|
||||||
|
if len(row) < cols:
|
||||||
|
continue
|
||||||
|
seen.add(tuple(cell.strip() for cell in row[:cols]))
|
||||||
|
value = len(seen)
|
||||||
|
_count_cache.clear()
|
||||||
|
_count_cache[key] = value
|
||||||
|
return value
|
||||||
|
|
||||||
|
def metric_line(name, labels, value):
|
||||||
|
label_text = ",".join(f'{k}="{str(v).replace(chr(92), chr(92)+chr(92)).replace(chr(34), chr(92)+chr(34))}"' for k, v in labels.items())
|
||||||
|
return f"{name}{{{label_text}}} {value}\n"
|
||||||
|
|
||||||
|
def bool_num(value):
|
||||||
|
return 1 if value else 0
|
||||||
|
|
||||||
|
def sample_ours(now, errors):
|
||||||
|
sample = {"rp": "ours-rp", "present": False, "success": False, "max": {}, "errors": 0}
|
||||||
|
run_dir = latest_ours_run()
|
||||||
|
if run_dir is None:
|
||||||
|
sample["errors"] += 1
|
||||||
|
errors.append("ours-rp: no run-summary.json")
|
||||||
|
return sample
|
||||||
|
sample["present"] = True
|
||||||
|
summary_path = run_dir / "run-summary.json"
|
||||||
|
try:
|
||||||
|
summary = read_json(summary_path)
|
||||||
|
except Exception as exc:
|
||||||
|
sample["errors"] += 1
|
||||||
|
errors.append(f"ours-rp: read {summary_path}: {exc}")
|
||||||
|
return sample
|
||||||
|
sample["run_id"] = summary.get("runId") or run_dir.name
|
||||||
|
sample["run_seq"] = summary.get("runSeq") or int(run_dir.name.split("_")[-1])
|
||||||
|
sample["success"] = summary.get("status") == "success" and int(summary.get("exitCode", 0)) == 0
|
||||||
|
if summary.get("wallMs") is not None:
|
||||||
|
sample["wall"] = float(summary["wallMs"]) / 1000.0
|
||||||
|
finished = parse_rfc3339(summary.get("finishedAtRfc3339Utc"))
|
||||||
|
if finished is not None:
|
||||||
|
sample["finished"] = finished
|
||||||
|
sample["age"] = max(0.0, now - finished)
|
||||||
|
rss = summary.get("processMetrics", {}).get("maxRssKb")
|
||||||
|
if rss is not None:
|
||||||
|
sample["max"]["parent"] = int(rss) * 1024
|
||||||
|
sample["max"]["aggregate_peak"] = int(rss) * 1024
|
||||||
|
sample["vrps"] = count_unique_csv(run_dir / "vrps.csv", 3)
|
||||||
|
sample["vaps"] = count_unique_csv(run_dir / "vaps.csv", 2)
|
||||||
|
return sample
|
||||||
|
|
||||||
|
def sample_routinator(now, errors):
|
||||||
|
rp = "routinator"
|
||||||
|
latest = PEER_ROOT / rp / "latest"
|
||||||
|
sample = {"rp": rp, "present": False, "success": False, "max": {}, "errors": 0}
|
||||||
|
if not latest.exists():
|
||||||
|
sample["errors"] += 1
|
||||||
|
errors.append(f"routinator: missing latest directory: {latest}")
|
||||||
|
return sample
|
||||||
|
sample["present"] = True
|
||||||
|
meta_path = latest / "run-meta.json"
|
||||||
|
try:
|
||||||
|
meta = read_json(meta_path)
|
||||||
|
except Exception as exc:
|
||||||
|
sample["errors"] += 1
|
||||||
|
errors.append(f"routinator: read {meta_path}: {exc}")
|
||||||
|
return sample
|
||||||
|
sample["run_id"] = meta.get("runId")
|
||||||
|
sample["run_seq"] = meta.get("runSeq")
|
||||||
|
sample["success"] = bool(meta.get("success"))
|
||||||
|
if meta.get("wallMs") is not None:
|
||||||
|
sample["wall"] = float(meta["wallMs"]) / 1000.0
|
||||||
|
finished = parse_rfc3339(meta.get("finishedAtRfc3339Utc"))
|
||||||
|
if finished is not None:
|
||||||
|
sample["finished"] = finished
|
||||||
|
sample["age"] = max(0.0, now - finished)
|
||||||
|
max_rss = meta.get("maxRssKb", {})
|
||||||
|
for label, key in [("parent", "parent"), ("child_max", "childMax"), ("aggregate_peak", "aggregatePeak")]:
|
||||||
|
if max_rss.get(key) is not None:
|
||||||
|
sample["max"][label] = int(max_rss[key]) * 1024
|
||||||
|
sample["vrps"] = count_unique_csv(latest / "vrps.csv", 3)
|
||||||
|
sample["vaps"] = count_unique_csv(latest / "vaps.csv", 2)
|
||||||
|
return sample
|
||||||
|
|
||||||
|
def sync_metrics(now):
|
||||||
|
path = PEER_ROOT / "sync-status.json"
|
||||||
|
if not path.exists():
|
||||||
|
return {"present": False, "success": False, "message": f"missing {path}"}
|
||||||
|
try:
|
||||||
|
value = read_json(path)
|
||||||
|
except Exception as exc:
|
||||||
|
return {"present": True, "success": False, "message": str(exc)}
|
||||||
|
ts = parse_rfc3339(value.get("lastSyncAtRfc3339Utc"))
|
||||||
|
return {
|
||||||
|
"present": True,
|
||||||
|
"success": bool(value.get("success")),
|
||||||
|
"timestamp": ts,
|
||||||
|
"age": max(0.0, now - ts) if ts is not None else None,
|
||||||
|
"remote": value.get("remoteHost", ""),
|
||||||
|
"message": value.get("message", ""),
|
||||||
|
}
|
||||||
|
|
||||||
|
def build_metrics():
|
||||||
|
now = unix_now()
|
||||||
|
errors = []
|
||||||
|
start = time.time()
|
||||||
|
samples = [sample_ours(now, errors), sample_routinator(now, errors)]
|
||||||
|
sync = sync_metrics(now)
|
||||||
|
if not sync.get("success"):
|
||||||
|
errors.append("sync: " + str(sync.get("message", "failed")))
|
||||||
|
out = []
|
||||||
|
out.append(metric_line("inter_rp_service_up", {"instance": INSTANCE}, 1))
|
||||||
|
out.append(metric_line("inter_rp_service_last_scan_timestamp_seconds", {"instance": INSTANCE}, now))
|
||||||
|
out.append(metric_line("inter_rp_service_last_scan_duration_seconds", {"instance": INSTANCE}, time.time() - start))
|
||||||
|
out.append(metric_line("inter_rp_service_last_reload_success", {"instance": INSTANCE}, bool_num(len(errors) == 0)))
|
||||||
|
out.append(metric_line("inter_rp_parse_errors", {"instance": INSTANCE}, len(errors)))
|
||||||
|
out.append(metric_line("inter_rp_sync_present", {"instance": INSTANCE}, bool_num(sync.get("present"))))
|
||||||
|
out.append(metric_line("inter_rp_sync_last_success", {"instance": INSTANCE}, bool_num(sync.get("success"))))
|
||||||
|
if sync.get("age") is not None:
|
||||||
|
out.append(metric_line("inter_rp_sync_age_seconds", {"instance": INSTANCE}, sync["age"]))
|
||||||
|
if sync.get("timestamp") is not None:
|
||||||
|
out.append(metric_line("inter_rp_sync_last_timestamp_seconds", {"instance": INSTANCE}, sync["timestamp"]))
|
||||||
|
by_rp = {s["rp"]: s for s in samples}
|
||||||
|
for s in samples:
|
||||||
|
labels = {"instance": INSTANCE, "rp": s["rp"]}
|
||||||
|
out.append(metric_line("inter_rp_run_present", labels, bool_num(s.get("present"))))
|
||||||
|
out.append(metric_line("inter_rp_run_success", labels, bool_num(s.get("success"))))
|
||||||
|
out.append(metric_line("inter_rp_sample_parse_errors", labels, s.get("errors", 0)))
|
||||||
|
if s.get("run_seq") is not None:
|
||||||
|
out.append(metric_line("inter_rp_run_seq", labels, s["run_seq"]))
|
||||||
|
if s.get("wall") is not None:
|
||||||
|
out.append(metric_line("inter_rp_run_wall_seconds", labels, s["wall"]))
|
||||||
|
if s.get("age") is not None:
|
||||||
|
out.append(metric_line("inter_rp_artifact_age_seconds", labels, s["age"]))
|
||||||
|
if s.get("vrps") is not None:
|
||||||
|
out.append(metric_line("inter_rp_vrps", labels, s["vrps"]))
|
||||||
|
if s.get("vaps") is not None:
|
||||||
|
out.append(metric_line("inter_rp_vaps", labels, s["vaps"]))
|
||||||
|
for kind, value in s.get("max", {}).items():
|
||||||
|
labels2 = dict(labels)
|
||||||
|
labels2["kind"] = kind
|
||||||
|
out.append(metric_line("inter_rp_run_max_rss_bytes", labels2, value))
|
||||||
|
ours, rout = by_rp.get("ours-rp", {}), by_rp.get("routinator", {})
|
||||||
|
if ours.get("vrps") is not None and rout.get("vrps") is not None:
|
||||||
|
out.append(metric_line("inter_rp_vrps_diff", {"instance": INSTANCE, "left": "ours-rp", "right": "routinator"}, abs(int(ours["vrps"]) - int(rout["vrps"]))))
|
||||||
|
if ours.get("vaps") is not None and rout.get("vaps") is not None:
|
||||||
|
out.append(metric_line("inter_rp_vaps_diff", {"instance": INSTANCE, "left": "ours-rp", "right": "routinator"}, abs(int(ours["vaps"]) - int(rout["vaps"]))))
|
||||||
|
return "".join(out), {"errors": errors, "samples": samples, "sync": sync}
|
||||||
|
|
||||||
|
def get_metrics():
|
||||||
|
now = time.time()
|
||||||
|
with _cache_lock:
|
||||||
|
if _cache["metrics"] and _cache["deadline"] > now:
|
||||||
|
return _cache["metrics"]
|
||||||
|
metrics, status = build_metrics()
|
||||||
|
_cache["metrics"] = metrics
|
||||||
|
_cache["status"] = status
|
||||||
|
_cache["deadline"] = now + SCAN_TTL
|
||||||
|
return metrics
|
||||||
|
|
||||||
|
class Handler(BaseHTTPRequestHandler):
|
||||||
|
def do_GET(self):
|
||||||
|
if self.path == "/metrics":
|
||||||
|
body = get_metrics().encode("utf-8")
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header("Content-Type", "text/plain; version=0.0.4; charset=utf-8")
|
||||||
|
self.send_header("Content-Length", str(len(body)))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(body)
|
||||||
|
return
|
||||||
|
if self.path == "/status":
|
||||||
|
with _cache_lock:
|
||||||
|
if not _cache["status"] or _cache["deadline"] <= time.time():
|
||||||
|
build_metrics()
|
||||||
|
body = json.dumps(_cache["status"], indent=2).encode("utf-8")
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header("Content-Type", "application/json")
|
||||||
|
self.send_header("Content-Length", str(len(body)))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(body)
|
||||||
|
return
|
||||||
|
self.send_response(404)
|
||||||
|
self.end_headers()
|
||||||
|
def log_message(self, fmt, *args):
|
||||||
|
return
|
||||||
|
|
||||||
|
def main():
|
||||||
|
host, port = LISTEN.rsplit(":", 1)
|
||||||
|
server = ThreadingHTTPServer((host, int(port)), Handler)
|
||||||
|
print(f"inter-rp ours+routinator exporter listen={LISTEN} instance={INSTANCE}", flush=True)
|
||||||
|
server.serve_forever()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
73
scripts/inter_rp/sync_routinator_from_remote231.sh
Executable file
73
scripts/inter_rp/sync_routinator_from_remote231.sh
Executable file
@ -0,0 +1,73 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
REMOTE231="${REMOTE231:-root@47.251.127.231}"
|
||||||
|
REMOTE_ROOT="${REMOTE_ROOT:-/var/lib/inter-rp-runners}"
|
||||||
|
PEER_ROOT="${PEER_ROOT:-/root/ours-rp-continuous/portable-soak/inter-rp-peers}"
|
||||||
|
SYNC_INTERVAL_SECS="${SYNC_INTERVAL_SECS:-60}"
|
||||||
|
MAX_SYNCS="${MAX_SYNCS:--1}"
|
||||||
|
LOG_PREFIX="[inter-rp-sync]"
|
||||||
|
mkdir -p "$PEER_ROOT/routinator"
|
||||||
|
write_status() {
|
||||||
|
local success="$1"
|
||||||
|
local message="$2"
|
||||||
|
env SYNC_SUCCESS="$success" SYNC_MESSAGE="$message" SYNC_REMOTE="$REMOTE231" SYNC_REMOTE_ROOT="$REMOTE_ROOT" python3 - "$PEER_ROOT/sync-status.json" <<'PY'
|
||||||
|
import datetime, json, os, socket, sys
|
||||||
|
path = sys.argv[1]
|
||||||
|
payload = {
|
||||||
|
"schemaVersion": 1,
|
||||||
|
"success": os.environ["SYNC_SUCCESS"] == "true",
|
||||||
|
"lastSyncAtRfc3339Utc": datetime.datetime.now(datetime.UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z"),
|
||||||
|
"remoteHost": os.environ["SYNC_REMOTE"],
|
||||||
|
"remoteRoot": os.environ["SYNC_REMOTE_ROOT"],
|
||||||
|
"localHost": socket.gethostname(),
|
||||||
|
"message": os.environ["SYNC_MESSAGE"],
|
||||||
|
}
|
||||||
|
with open(path, "w", encoding="utf-8") as handle:
|
||||||
|
json.dump(payload, handle, indent=2)
|
||||||
|
handle.write("\n")
|
||||||
|
PY
|
||||||
|
}
|
||||||
|
sync_once() {
|
||||||
|
local tmp="$PEER_ROOT/.sync-routinator-$$"
|
||||||
|
rm -rf "$tmp"
|
||||||
|
mkdir -p "$tmp"
|
||||||
|
if ! rsync -aL --delete \
|
||||||
|
--include='run-meta.json' \
|
||||||
|
--include='result.ccr' \
|
||||||
|
--include='vrps.csv' \
|
||||||
|
--include='vaps.csv' \
|
||||||
|
--include='stdout.log' \
|
||||||
|
--include='stderr.log' \
|
||||||
|
--exclude='*' \
|
||||||
|
"$REMOTE231:$REMOTE_ROOT/routinator/latest/" "$tmp/latest/"; then
|
||||||
|
rm -rf "$tmp"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
if [[ ! -f "$tmp/latest/run-meta.json" ]]; then
|
||||||
|
rm -rf "$tmp"
|
||||||
|
return 2
|
||||||
|
fi
|
||||||
|
rm -rf "$PEER_ROOT/routinator/latest.next" "$PEER_ROOT/routinator/latest.prev"
|
||||||
|
mv "$tmp/latest" "$PEER_ROOT/routinator/latest.next"
|
||||||
|
if [[ -e "$PEER_ROOT/routinator/latest" ]]; then
|
||||||
|
mv "$PEER_ROOT/routinator/latest" "$PEER_ROOT/routinator/latest.prev"
|
||||||
|
fi
|
||||||
|
mv "$PEER_ROOT/routinator/latest.next" "$PEER_ROOT/routinator/latest"
|
||||||
|
rm -rf "$PEER_ROOT/routinator/latest.prev" "$tmp"
|
||||||
|
}
|
||||||
|
completed=0
|
||||||
|
while true; do
|
||||||
|
if sync_once; then
|
||||||
|
echo "$LOG_PREFIX $(date -u +%Y-%m-%dT%H:%M:%SZ) ok"
|
||||||
|
write_status true "ok"
|
||||||
|
else
|
||||||
|
code=$?
|
||||||
|
echo "$LOG_PREFIX $(date -u +%Y-%m-%dT%H:%M:%SZ) sync failed code=$code" >&2
|
||||||
|
write_status false "routinator rsync failed code=$code"
|
||||||
|
fi
|
||||||
|
completed=$((completed + 1))
|
||||||
|
if [[ "$MAX_SYNCS" =~ ^[0-9]+$ ]] && (( completed >= MAX_SYNCS )); then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep "$SYNC_INTERVAL_SECS"
|
||||||
|
done
|
||||||
Loading…
x
Reference in New Issue
Block a user