20260623 迁移231并固定相位调度

This commit is contained in:
yuyr 2026-06-23 11:37:36 +08:00
parent 4e37b96aff
commit 6ab044480a
7 changed files with 563 additions and 214 deletions

View File

@ -47,7 +47,7 @@
}, },
"targets": [ "targets": [
{ {
"expr": "max(inter_rp_repo_sync_overlap_total{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\",class=\"only_ours\"})", "expr": "max(inter_rp_repo_sync_overlap_total{exported_instance=~\".*inter-rp\",left=\"ours-rp\",right=\"routinator\",class=\"only_ours\"})",
"legendFormat": "only ours", "legendFormat": "only ours",
"refId": "A", "refId": "A",
"instant": true "instant": true
@ -92,7 +92,7 @@
}, },
"targets": [ "targets": [
{ {
"expr": "max(inter_rp_repo_sync_overlap_total{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\",class=\"only_routinator\"})", "expr": "max(inter_rp_repo_sync_overlap_total{exported_instance=~\".*inter-rp\",left=\"ours-rp\",right=\"routinator\",class=\"only_routinator\"})",
"legendFormat": "only routinator", "legendFormat": "only routinator",
"refId": "A", "refId": "A",
"instant": true "instant": true
@ -137,7 +137,7 @@
}, },
"targets": [ "targets": [
{ {
"expr": "max(inter_rp_vaps_diff{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\"})", "expr": "max(inter_rp_vaps_diff{exported_instance=~\".*inter-rp\",left=\"ours-rp\",right=\"routinator\"})",
"legendFormat": "vap diff", "legendFormat": "vap diff",
"refId": "A", "refId": "A",
"instant": true "instant": true
@ -182,7 +182,7 @@
}, },
"targets": [ "targets": [
{ {
"expr": "max(inter_rp_vrps_diff{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\"})", "expr": "max(inter_rp_vrps_diff{exported_instance=~\".*inter-rp\",left=\"ours-rp\",right=\"routinator\"})",
"legendFormat": "vrp diff", "legendFormat": "vrp diff",
"refId": "A", "refId": "A",
"instant": true "instant": true
@ -226,8 +226,8 @@
}, },
"targets": [ "targets": [
{ {
"expr": "inter_rp_run_wall_seconds{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}", "expr": "inter_rp_run_wall_seconds{exported_instance=~\".*inter-rp\",rp=~\"ours-rp|routinator\"}",
"legendFormat": "{{exported_rp}}", "legendFormat": "{{rp}}",
"refId": "A" "refId": "A"
} }
] ]
@ -269,8 +269,8 @@
}, },
"targets": [ "targets": [
{ {
"expr": "inter_rp_run_max_rss_bytes{exported_instance=\"remote200-inter-rp\",kind=\"aggregate_peak\",exported_rp=~\"ours-rp|routinator\"}", "expr": "inter_rp_run_max_rss_bytes{exported_instance=~\".*inter-rp\",kind=\"aggregate_peak\",rp=~\"ours-rp|routinator\"}",
"legendFormat": "{{exported_rp}}", "legendFormat": "{{rp}}",
"refId": "A" "refId": "A"
} }
] ]
@ -312,8 +312,8 @@
}, },
"targets": [ "targets": [
{ {
"expr": "inter_rp_vrps{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}", "expr": "inter_rp_vrps{exported_instance=~\".*inter-rp\",rp=~\"ours-rp|routinator\"}",
"legendFormat": "{{exported_rp}}", "legendFormat": "{{rp}}",
"refId": "A" "refId": "A"
} }
] ]
@ -355,8 +355,8 @@
}, },
"targets": [ "targets": [
{ {
"expr": "inter_rp_vaps{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}", "expr": "inter_rp_vaps{exported_instance=~\".*inter-rp\",rp=~\"ours-rp|routinator\"}",
"legendFormat": "{{exported_rp}}", "legendFormat": "{{rp}}",
"refId": "A" "refId": "A"
} }
] ]
@ -388,24 +388,24 @@
}, },
"targets": [ "targets": [
{ {
"expr": "inter_rp_run_seq{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}", "expr": "inter_rp_run_seq{exported_instance=~\".*inter-rp\",rp=~\"ours-rp|routinator\"}",
"format": "table", "format": "table",
"instant": true, "instant": true,
"legendFormat": "{{exported_rp}} seq", "legendFormat": "{{rp}} seq",
"refId": "A" "refId": "A"
}, },
{ {
"expr": "inter_rp_run_success{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}", "expr": "inter_rp_run_success{exported_instance=~\".*inter-rp\",rp=~\"ours-rp|routinator\"}",
"format": "table", "format": "table",
"instant": true, "instant": true,
"legendFormat": "{{exported_rp}} success", "legendFormat": "{{rp}} success",
"refId": "B" "refId": "B"
}, },
{ {
"expr": "inter_rp_run_wall_seconds{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}", "expr": "inter_rp_run_wall_seconds{exported_instance=~\".*inter-rp\",rp=~\"ours-rp|routinator\"}",
"format": "table", "format": "table",
"instant": true, "instant": true,
"legendFormat": "{{exported_rp}} wall", "legendFormat": "{{rp}} wall",
"refId": "C" "refId": "C"
} }
] ]
@ -437,14 +437,14 @@
}, },
"targets": [ "targets": [
{ {
"expr": "inter_rp_vrps_diff{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\"}", "expr": "inter_rp_vrps_diff{exported_instance=~\".*inter-rp\",left=\"ours-rp\",right=\"routinator\"}",
"format": "table", "format": "table",
"instant": true, "instant": true,
"legendFormat": "vrps ours-rp-routinator", "legendFormat": "vrps ours-rp-routinator",
"refId": "A" "refId": "A"
}, },
{ {
"expr": "inter_rp_vaps_diff{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\"}", "expr": "inter_rp_vaps_diff{exported_instance=~\".*inter-rp\",left=\"ours-rp\",right=\"routinator\"}",
"format": "table", "format": "table",
"instant": true, "instant": true,
"legendFormat": "vaps ours-rp-routinator", "legendFormat": "vaps ours-rp-routinator",
@ -489,8 +489,8 @@
}, },
"targets": [ "targets": [
{ {
"expr": "inter_rp_artifact_age_seconds{exported_instance=\"remote200-inter-rp\",exported_rp=~\"ours-rp|routinator\"}", "expr": "inter_rp_artifact_age_seconds{exported_instance=~\".*inter-rp\",rp=~\"ours-rp|routinator\"}",
"legendFormat": "{{exported_rp}}", "legendFormat": "{{rp}}",
"refId": "A" "refId": "A"
} }
] ]
@ -533,8 +533,8 @@
}, },
"targets": [ "targets": [
{ {
"expr": "inter_rp_repo_sync_total{exported_instance=\"remote200-inter-rp\",state=~\"available|failed\",exported_rp=~\"ours-rp|routinator\"}", "expr": "inter_rp_repo_sync_total{exported_instance=~\".*inter-rp\",state=~\"available|failed\",rp=~\"ours-rp|routinator\"}",
"legendFormat": "{{exported_rp}} {{state}}", "legendFormat": "{{rp}} {{state}}",
"refId": "A" "refId": "A"
} }
] ]
@ -577,7 +577,7 @@
}, },
"targets": [ "targets": [
{ {
"expr": "inter_rp_repo_sync_overlap_total{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\"}", "expr": "inter_rp_repo_sync_overlap_total{exported_instance=~\".*inter-rp\",left=\"ours-rp\",right=\"routinator\"}",
"legendFormat": "{{class}}", "legendFormat": "{{class}}",
"refId": "A" "refId": "A"
} }
@ -651,7 +651,7 @@
}, },
"targets": [ "targets": [
{ {
"expr": "inter_rp_repo_sync_diff_info{exported_instance=\"remote200-inter-rp\",left=\"ours-rp\",right=\"routinator\"}", "expr": "inter_rp_repo_sync_diff_info{exported_instance=~\".*inter-rp\",left=\"ours-rp\",right=\"routinator\"}",
"format": "table", "format": "table",
"instant": true, "instant": true,
"legendFormat": "{{class}} #{{rank}}", "legendFormat": "{{class}} #{{rank}}",

View File

@ -186,6 +186,205 @@
"title": "Publication Points", "title": "Publication Points",
"type": "stat" "type": "stat"
}, },
{
"datasource": {
"type": "prometheus",
"uid": "Prometheus"
},
"fieldConfig": {
"defaults": {
"decimals": 0,
"unit": "none"
},
"overrides": []
},
"gridPos": {
"x": 0,
"y": 4,
"w": 6,
"h": 4
},
"id": 9,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "11.3.1",
"targets": [
{
"expr": "ours_rp_run_sequence",
"legendFormat": "seq",
"refId": "A"
}
],
"title": "Latest Run Sequence",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "Prometheus"
},
"fieldConfig": {
"defaults": {
"decimals": 2,
"unit": "percent",
"min": 0,
"max": 100,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "orange",
"value": 90
},
{
"color": "green",
"value": 98
}
]
}
},
"overrides": []
},
"gridPos": {
"x": 6,
"y": 4,
"w": 6,
"h": 4
},
"id": 10,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "11.3.1",
"targets": [
{
"expr": "100 * sum by (job, instance, exported_instance) (ours_rp_repo_terminal_state_count{terminal_state=\"publication_point_cache\"}) / sum by (job, instance, exported_instance) (ours_rp_publication_points)",
"legendFormat": "PP cache hit ratio",
"refId": "A"
}
],
"title": "Latest PP Cache Hit Ratio",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "Prometheus"
},
"fieldConfig": {
"defaults": {
"unit": "none",
"decimals": 0
},
"overrides": []
},
"gridPos": {
"x": 12,
"y": 4,
"w": 6,
"h": 4
},
"id": 11,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "11.3.1",
"targets": [
{
"expr": "ours_rp_vrps{kind=\"total\"}",
"legendFormat": "VRPs raw",
"refId": "A"
}
],
"title": "VRPs",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "Prometheus"
},
"fieldConfig": {
"defaults": {
"unit": "none",
"decimals": 0
},
"overrides": []
},
"gridPos": {
"x": 18,
"y": 4,
"w": 6,
"h": 4
},
"id": 12,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "11.3.1",
"targets": [
{
"expr": "ours_rp_vaps",
"legendFormat": "VAPs",
"refId": "A"
}
],
"title": "VAPs",
"type": "stat"
},
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
@ -334,186 +533,6 @@
"title": "Large Publication Points by Object Count", "title": "Large Publication Points by Object Count",
"type": "timeseries" "type": "timeseries"
}, },
{
"datasource": {
"type": "prometheus",
"uid": "Prometheus"
},
"fieldConfig": {
"defaults": {
"decimals": 0,
"unit": "none"
},
"overrides": []
},
"gridPos": {
"x": 0,
"y": 4,
"w": 6,
"h": 4
},
"id": 9,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "11.3.1",
"targets": [
{
"expr": "ours_rp_run_sequence",
"legendFormat": "seq",
"refId": "A"
}
],
"title": "Latest Run Sequence",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "Prometheus"
},
"fieldConfig": {
"defaults": {
"decimals": 0,
"unit": "none"
},
"overrides": []
},
"gridPos": {
"x": 6,
"y": 4,
"w": 6,
"h": 4
},
"id": 10,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "11.3.1",
"targets": [
{
"expr": "ours_rp_run_success",
"legendFormat": "success",
"refId": "A"
}
],
"title": "Latest Run Success",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "Prometheus"
},
"fieldConfig": {
"defaults": {
"unit": "none",
"decimals": 0
},
"overrides": []
},
"gridPos": {
"x": 12,
"y": 4,
"w": 6,
"h": 4
},
"id": 11,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "11.3.1",
"targets": [
{
"expr": "ours_rp_vrps{kind=\"total\"}",
"legendFormat": "VRPs raw",
"refId": "A"
}
],
"title": "VRPs",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "Prometheus"
},
"fieldConfig": {
"defaults": {
"unit": "none",
"decimals": 0
},
"overrides": []
},
"gridPos": {
"x": 18,
"y": 4,
"w": 6,
"h": 4
},
"id": 12,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "11.3.1",
"targets": [
{
"expr": "ours_rp_vaps",
"legendFormat": "VAPs",
"refId": "A"
}
],
"title": "VAPs",
"type": "stat"
},
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
@ -586,7 +605,7 @@
"gridPos": { "gridPos": {
"x": 0, "x": 0,
"y": 24, "y": 24,
"w": 24, "w": 12,
"h": 8 "h": 8
}, },
"id": 14, "id": 14,
@ -615,6 +634,70 @@
"title": "Max RSS Over Time", "title": "Max RSS Over Time",
"type": "timeseries" "type": "timeseries"
}, },
{
"datasource": {
"type": "prometheus",
"uid": "Prometheus"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"decimals": 2,
"min": 0,
"max": 100,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "orange",
"value": 90
},
{
"color": "green",
"value": 98
}
]
}
},
"overrides": []
},
"gridPos": {
"x": 12,
"y": 24,
"w": 12,
"h": 8
},
"id": 17,
"options": {
"legend": {
"calcs": [
"lastNotNull",
"min",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"expr": "100 * sum by (job, instance, exported_instance) (ours_rp_repo_terminal_state_count{terminal_state=\"publication_point_cache\"}) / sum by (job, instance, exported_instance) (ours_rp_publication_points)",
"legendFormat": "PP cache hit ratio",
"refId": "A"
}
],
"title": "PP Cache Hit Ratio",
"type": "timeseries"
},
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",

View File

@ -243,6 +243,7 @@ def load_routinator_repo_sets(errors):
failed = set() failed = set()
duration = {} duration = {}
object_counts = {} object_counts = {}
publication_point_states = {}
for metric in ["routinator_rrdp_status", "routinator_rsync_status"]: for metric in ["routinator_rrdp_status", "routinator_rsync_status"]:
for labels, value in parse_prometheus_samples(text, metric): for labels, value in parse_prometheus_samples(text, metric):
uri = labels.get("uri") uri = labels.get("uri")
@ -254,6 +255,20 @@ def load_routinator_repo_sets(errors):
success.add(uri) success.add(uri)
else: else:
failed.add(uri) failed.add(uri)
for labels, value in parse_prometheus_samples(text, "routinator_repository_publication_points_total"):
uri = labels.get("uri")
state = labels.get("state", "unknown")
if not uri:
continue
total.add(uri)
publication_point_states.setdefault(uri, {})[state] = publication_point_states.setdefault(uri, {}).get(state, 0.0) + value
for uri, states in publication_point_states.items():
valid_count = states.get("valid", 0.0)
non_valid_count = sum(value for state, value in states.items() if state != "valid")
if valid_count > 0:
success.add(uri)
elif non_valid_count > 0:
failed.add(uri)
for metric in ["routinator_rrdp_duration", "routinator_rsync_duration"]: for metric in ["routinator_rrdp_duration", "routinator_rsync_duration"]:
for labels, value in parse_prometheus_samples(text, metric): for labels, value in parse_prometheus_samples(text, metric):
uri = labels.get("uri") uri = labels.get("uri")
@ -261,6 +276,7 @@ def load_routinator_repo_sets(errors):
duration[uri] = max(duration.get(uri, 0.0), value) duration[uri] = max(duration.get(uri, 0.0), value)
for labels, value in parse_prometheus_samples(text, "routinator_repository_objects_total"): for labels, value in parse_prometheus_samples(text, "routinator_repository_objects_total"):
add_object_count(object_counts, labels.get("uri"), labels.get("type"), value) add_object_count(object_counts, labels.get("uri"), labels.get("type"), value)
failed = failed - success
return {"total": total, "success": success, "failed": failed, "duration": duration, "object_counts": object_counts} return {"total": total, "success": success, "failed": failed, "duration": duration, "object_counts": object_counts}
def emit_repo_diff_metrics(out, errors): def emit_repo_diff_metrics(out, errors):

View File

@ -87,6 +87,7 @@ mkdir -p "$STAGE_DIR/bin" "$STAGE_DIR/fixtures" "$STAGE_DIR/scripts" "$STAGE_DIR
install -m 0755 "$SCRIPT_DIR/run_soak.sh" "$STAGE_DIR/run_soak.sh" install -m 0755 "$SCRIPT_DIR/run_soak.sh" "$STAGE_DIR/run_soak.sh"
install -m 0755 "$SCRIPT_DIR/run_24h_soak_with_metrics.sh" "$STAGE_DIR/run_24h_soak_with_metrics.sh" install -m 0755 "$SCRIPT_DIR/run_24h_soak_with_metrics.sh" "$STAGE_DIR/run_24h_soak_with_metrics.sh"
install -m 0755 "$SCRIPT_DIR/fixed_phase_loop.sh" "$STAGE_DIR/scripts/soak/fixed_phase_loop.sh"
install -m 0755 "$SCRIPT_DIR/hourly_soak_report.py" "$STAGE_DIR/scripts/soak/hourly_soak_report.py" install -m 0755 "$SCRIPT_DIR/hourly_soak_report.py" "$STAGE_DIR/scripts/soak/hourly_soak_report.py"
install -m 0644 "$SCRIPT_DIR/portable-soak.env.example" "$STAGE_DIR/.env" install -m 0644 "$SCRIPT_DIR/portable-soak.env.example" "$STAGE_DIR/.env"
install -m 0644 "$SCRIPT_DIR/portable-soak.env.example" "$STAGE_DIR/portable-soak.env.example" install -m 0644 "$SCRIPT_DIR/portable-soak.env.example" "$STAGE_DIR/portable-soak.env.example"

View File

@ -0,0 +1,126 @@
#!/usr/bin/env bash
set -euo pipefail
NAME="fixed-phase"
CYCLE_SECS="${PHASE_CYCLE_SECS:-900}"
OFFSET_SECS="${PHASE_OFFSET_SECS:-0}"
LOCK_FILE="${RPKI_HEAVY_LOCK:-/var/lock/rpki-heavy-run.lock}"
LOCK_WAIT_SECS="${LOCK_WAIT_SECS:-30}"
usage() {
cat <<'USAGE'
Usage:
fixed_phase_loop.sh [--name <name>] [--cycle-secs <seconds>] [--offset-secs <seconds>]
[--lock-file <path>] [--lock-wait-secs <seconds>] -- <command> [args...]
Runs one command at fixed wall-clock phases. Missed phases are skipped rather than caught up,
which keeps independent RP jobs from drifting into each other. A shared flock protects against
unexpected overruns.
USAGE
}
die() {
echo "error: $*" >&2
exit 2
}
is_non_negative_int() {
[[ "$1" =~ ^[0-9]+$ ]]
}
while [[ $# -gt 0 ]]; do
case "$1" in
--name)
shift
NAME="${1:?--name requires a value}"
;;
--cycle-secs)
shift
CYCLE_SECS="${1:?--cycle-secs requires a value}"
;;
--offset-secs)
shift
OFFSET_SECS="${1:?--offset-secs requires a value}"
;;
--lock-file)
shift
LOCK_FILE="${1:?--lock-file requires a value}"
;;
--lock-wait-secs)
shift
LOCK_WAIT_SECS="${1:?--lock-wait-secs requires a value}"
;;
--help|-h)
usage
exit 0
;;
--)
shift
break
;;
*)
die "unknown argument: $1"
;;
esac
shift
done
[[ $# -gt 0 ]] || die "missing command after --"
is_non_negative_int "$CYCLE_SECS" || die "--cycle-secs must be a non-negative integer"
is_non_negative_int "$OFFSET_SECS" || die "--offset-secs must be a non-negative integer"
is_non_negative_int "$LOCK_WAIT_SECS" || die "--lock-wait-secs must be a non-negative integer"
(( CYCLE_SECS > 0 )) || die "--cycle-secs must be > 0"
(( OFFSET_SECS < CYCLE_SECS )) || die "--offset-secs must be < --cycle-secs"
mkdir -p "$(dirname "$LOCK_FILE")"
timestamp_utc() {
date -u +%Y-%m-%dT%H:%M:%SZ
}
format_epoch_utc() {
date -u -d "@$1" +%Y-%m-%dT%H:%M:%SZ
}
LAST_TARGET_EPOCH=-1
next_phase_epoch() {
local now="$1"
local shifted=$((now - OFFSET_SECS))
local remainder=$((shifted % CYCLE_SECS))
if (( remainder < 0 )); then
remainder=$((remainder + CYCLE_SECS))
fi
local sleep_secs=$((CYCLE_SECS - remainder))
if (( sleep_secs == CYCLE_SECS )); then
sleep_secs=0
fi
printf '%s\n' "$((now + sleep_secs))"
}
while true; do
now_epoch="$(date +%s)"
target_epoch="$(next_phase_epoch "$now_epoch")"
if (( target_epoch <= LAST_TARGET_EPOCH )); then
target_epoch=$((LAST_TARGET_EPOCH + CYCLE_SECS))
fi
LAST_TARGET_EPOCH="$target_epoch"
sleep_secs=$((target_epoch - now_epoch))
echo "[$(timestamp_utc)] $NAME next_phase=$(format_epoch_utc "$target_epoch") sleep=${sleep_secs}s cycle=${CYCLE_SECS}s offset=${OFFSET_SECS}s" >&2
if (( sleep_secs > 0 )); then
sleep "$sleep_secs"
fi
started_epoch="$(date +%s)"
echo "[$(timestamp_utc)] $NAME phase_start target=$(format_epoch_utc "$target_epoch") lock=$LOCK_FILE wait=${LOCK_WAIT_SECS}s" >&2
set +e
flock -w "$LOCK_WAIT_SECS" "$LOCK_FILE" "$@"
code=$?
set -e
ended_epoch="$(date +%s)"
if (( code == 0 )); then
echo "[$(timestamp_utc)] $NAME phase_done exit=0 elapsed=$((ended_epoch - started_epoch))s" >&2
else
echo "[$(timestamp_utc)] $NAME phase_done exit=$code elapsed=$((ended_epoch - started_epoch))s skipped_or_failed=1" >&2
fi
done

View File

@ -17,6 +17,17 @@ STOP_AFTER_SECS=0
# 示例RIRS=apnic,arin 或 RIRS=afrinic,apnic,arin,lacnic,ripe # 示例RIRS=apnic,arin 或 RIRS=afrinic,apnic,arin,lacnic,ripe
RIRS=afrinic,apnic,arin,lacnic,ripe RIRS=afrinic,apnic,arin,lacnic,ripe
# TAL/TA 输入模式。
# file-with-ta使用 package 内置 fixtures/tal + fixtures/ta完全离线固定输入。
# file-live-ta使用 package 内置 fixtures/tal每轮后台 best-effort 刷新 TA 到 state/live-ta
# 子进程不等待刷新,直接使用当前已有的 state/live-ta首次缺失时从 fixtures/ta 初始化。
# url直接把 TAL URL 传给子进程,由子进程处理 TAL/TA 获取。
TAL_INPUT_MODE=file-with-ta
# file-live-ta 后台刷新 TA 的 curl 超时配置。刷新失败只写日志,不阻断本轮 run。
LIVE_TA_REFRESH_CONNECT_TIMEOUT_SECS=15
LIVE_TA_REFRESH_MAX_TIME_SECS=120
# 运行根目录。默认使用 package 根目录;如需把产物写到独立数据盘,可改成绝对路径。 # 运行根目录。默认使用 package 根目录;如需把产物写到独立数据盘,可改成绝对路径。
RUN_ROOT="${PACKAGE_ROOT}" RUN_ROOT="${PACKAGE_ROOT}"

View File

@ -43,6 +43,9 @@ META_DIR="${META_DIR:-$STATE_ROOT/meta}"
TMP_DIR="${TMP_DIR:-$RUN_ROOT/tmp}" TMP_DIR="${TMP_DIR:-$RUN_ROOT/tmp}"
RSYNC_MIRROR_ROOT="${RSYNC_MIRROR_ROOT:-$STATE_ROOT/rsync-mirror}" RSYNC_MIRROR_ROOT="${RSYNC_MIRROR_ROOT:-$STATE_ROOT/rsync-mirror}"
INVALID_ROOT="$STATE_ROOT/invalid" INVALID_ROOT="$STATE_ROOT/invalid"
LIVE_TA_REFRESH_DIR="${LIVE_TA_REFRESH_DIR:-$META_DIR/live-ta-refresh}"
LIVE_TA_REFRESH_CONNECT_TIMEOUT_SECS="${LIVE_TA_REFRESH_CONNECT_TIMEOUT_SECS:-15}"
LIVE_TA_REFRESH_MAX_TIME_SECS="${LIVE_TA_REFRESH_MAX_TIME_SECS:-120}"
RPKI_BIN="$BIN_DIR/rpki" RPKI_BIN="$BIN_DIR/rpki"
RPKI_DAEMON_BIN="$BIN_DIR/rpki_daemon" RPKI_DAEMON_BIN="$BIN_DIR/rpki_daemon"
@ -194,22 +197,129 @@ live_ta_file_for_rir() {
printf '%s' "$STATE_ROOT/live-ta/$(basename "$(tal_file_for_rir "$1")" .tal).cer" printf '%s' "$STATE_ROOT/live-ta/$(basename "$(tal_file_for_rir "$1")" .tal).cer"
} }
live_ta_refresh_pid_file_for_rir() {
printf '%s' "$LIVE_TA_REFRESH_DIR/$1.pid"
}
refresh_live_ta_for_rir() { refresh_live_ta_for_rir() {
local rir_name="$1" local rir_name="$1"
local run_id="${2:-manual}"
local log_path="${3:-}"
local tal_path local tal_path
local ta_uri local ta_uri
local ta_file local ta_file
local tmp_file local tmp_file
if [[ -n "$log_path" ]]; then
mkdir -p "$(dirname "$log_path")"
exec >> "$log_path" 2>&1
fi
echo "live-ta-refresh start run=$run_id rir=$rir_name at=$(date -u +%Y-%m-%dT%H:%M:%SZ)"
tal_path="$(tal_file_for_rir "$rir_name")" tal_path="$(tal_file_for_rir "$rir_name")"
ta_uri="$(tal_https_uri_from_fixture "$tal_path")" ta_uri="$(tal_https_uri_from_fixture "$tal_path")"
[[ -n "$ta_uri" ]] || die "missing http(s) TA URI in TAL fixture for $rir_name: $tal_path" if [[ -z "$ta_uri" ]]; then
echo "live-ta-refresh failed rir=$rir_name reason=missing_https_uri tal=$tal_path"
return 1
fi
ta_file="$(live_ta_file_for_rir "$rir_name")" ta_file="$(live_ta_file_for_rir "$rir_name")"
mkdir -p "$(dirname "$ta_file")" mkdir -p "$(dirname "$ta_file")"
tmp_file="${ta_file}.tmp.$$" tmp_file="${ta_file}.tmp.$$.$RANDOM"
curl -fsSL --connect-timeout 15 --max-time 120 "$ta_uri" -o "$tmp_file" \ if ! curl -fsSL --connect-timeout "$LIVE_TA_REFRESH_CONNECT_TIMEOUT_SECS" --max-time "$LIVE_TA_REFRESH_MAX_TIME_SECS" "$ta_uri" -o "$tmp_file"; then
|| { rm -f "$tmp_file"; die "failed to refresh TA for $rir_name from $ta_uri"; } rm -f "$tmp_file"
[[ -s "$tmp_file" ]] || { rm -f "$tmp_file"; die "empty TA download for $rir_name from $ta_uri"; } echo "live-ta-refresh failed rir=$rir_name reason=curl uri=$ta_uri"
return 1
fi
if [[ ! -s "$tmp_file" ]]; then
rm -f "$tmp_file"
echo "live-ta-refresh failed rir=$rir_name reason=empty_download uri=$ta_uri"
return 1
fi
mv "$tmp_file" "$ta_file" mv "$tmp_file" "$ta_file"
echo "live-ta-refresh success rir=$rir_name uri=$ta_uri output=$ta_file bytes=$(wc -c < "$ta_file" | tr -d ' ')"
}
ensure_live_ta_for_rir() {
local rir_name="$1"
local live_ta_file
local fixture_ta_file
live_ta_file="$(live_ta_file_for_rir "$rir_name")"
if [[ -s "$live_ta_file" ]]; then
return 0
fi
fixture_ta_file="$(ta_file_for_rir "$rir_name")"
[[ -s "$fixture_ta_file" ]] || die "missing live TA and fixture TA for $rir_name: $live_ta_file / $fixture_ta_file"
mkdir -p "$(dirname "$live_ta_file")"
cp "$fixture_ta_file" "$live_ta_file"
}
reap_finished_live_ta_refresh_for_rir() {
local rir_name="$1"
local pid_file
local pid
local pid_state
local pid_file_mtime
local now_epoch
local stale_after_secs
pid_file="$(live_ta_refresh_pid_file_for_rir "$rir_name")"
[[ -f "$pid_file" ]] || return 0
pid="$(cat "$pid_file" 2>/dev/null || true)"
if [[ "$pid" =~ ^[0-9]+$ ]] && kill -0 "$pid" >/dev/null 2>&1; then
pid_state=""
if [[ -r "/proc/$pid/stat" ]]; then
pid_state="$(awk '{ print $3 }' "/proc/$pid/stat" 2>/dev/null || true)"
fi
if [[ "$pid_state" == "Z" ]]; then
wait "$pid" >/dev/null 2>&1 || true
rm -f "$pid_file"
return 0
fi
pid_file_mtime="$(stat -c %Y "$pid_file" 2>/dev/null || date +%s)"
now_epoch="$(date +%s)"
stale_after_secs=$((LIVE_TA_REFRESH_MAX_TIME_SECS + 60))
if (( now_epoch - pid_file_mtime > stale_after_secs )); then
rm -f "$pid_file"
return 0
fi
return 1
fi
if [[ "$pid" =~ ^[0-9]+$ ]]; then
wait "$pid" >/dev/null 2>&1 || true
fi
rm -f "$pid_file"
return 0
}
start_live_ta_refresh_for_rir() {
local rir_name="$1"
local run_id="$2"
local pid_file
local log_path
local pid
mkdir -p "$LIVE_TA_REFRESH_DIR" "$LOG_ROOT"
pid_file="$(live_ta_refresh_pid_file_for_rir "$rir_name")"
if ! reap_finished_live_ta_refresh_for_rir "$rir_name"; then
pid="$(cat "$pid_file" 2>/dev/null || true)"
echo "live-ta-refresh skip run=$run_id rir=$rir_name reason=previous_refresh_running pid=$pid" \
>> "$LOG_ROOT/live-ta-refresh-$run_id-$rir_name.log"
return 0
fi
log_path="$LOG_ROOT/live-ta-refresh-$run_id-$rir_name.log"
refresh_live_ta_for_rir "$rir_name" "$run_id" "$log_path" &
pid=$!
printf '%s\n' "$pid" > "$pid_file"
}
prepare_live_ta_inputs_for_run() {
local run_id="$1"
local rir_name
if [[ "$TAL_INPUT_MODE" != "file-live-ta" ]]; then
return 0
fi
for rir_name in "${RIR_LIST[@]}"; do
ensure_live_ta_for_rir "$rir_name"
done
for rir_name in "${RIR_LIST[@]}"; do
start_live_ta_refresh_for_rir "$rir_name" "$run_id"
done
} }
compare_view_trust_anchor() { compare_view_trust_anchor() {
@ -432,7 +542,6 @@ build_child_args() {
if [[ "$TAL_INPUT_MODE" == "url" ]]; then if [[ "$TAL_INPUT_MODE" == "url" ]]; then
CHILD_ARGS+=(--tal-url "$(tal_url_for_rir "$rir_name")") CHILD_ARGS+=(--tal-url "$(tal_url_for_rir "$rir_name")")
elif [[ "$TAL_INPUT_MODE" == "file-live-ta" ]]; then elif [[ "$TAL_INPUT_MODE" == "file-live-ta" ]]; then
refresh_live_ta_for_rir "$rir_name"
CHILD_ARGS+=(--tal-path "$(tal_file_for_rir "$rir_name")") CHILD_ARGS+=(--tal-path "$(tal_file_for_rir "$rir_name")")
CHILD_ARGS+=(--ta-path "$(live_ta_file_for_rir "$rir_name")") CHILD_ARGS+=(--ta-path "$(live_ta_file_for_rir "$rir_name")")
else else
@ -613,6 +722,7 @@ run_one_round() {
"$snapshot_reason" "$previous_run_id" "$previous_success_value" "$started_at" "" \ "$snapshot_reason" "$previous_run_id" "$previous_success_value" "$started_at" "" \
"$INVALID_DB_PATH" "$INVALID_STATE_PATH" "$INVALID_TMP_PATH" "" "$PACKAGE_ROOT" "$ENV_FILE" "$INVALID_DB_PATH" "$INVALID_STATE_PATH" "$INVALID_TMP_PATH" "" "$PACKAGE_ROOT" "$ENV_FILE"
prepare_live_ta_inputs_for_run "$run_id"
build_child_args build_child_args
if is_true "$RPKI_ANALYZE"; then if is_true "$RPKI_ANALYZE"; then
CHILD_ARGS+=(--analyze --analysis-out "$run_dir/analyze") CHILD_ARGS+=(--analyze --analysis-out "$run_dir/analyze")
@ -674,6 +784,8 @@ main() {
require_command find require_command find
if [[ "$TAL_INPUT_MODE" == "file-live-ta" ]]; then if [[ "$TAL_INPUT_MODE" == "file-live-ta" ]]; then
require_command curl require_command curl
validate_positive_int "LIVE_TA_REFRESH_CONNECT_TIMEOUT_SECS" "$LIVE_TA_REFRESH_CONNECT_TIMEOUT_SECS"
validate_positive_int "LIVE_TA_REFRESH_MAX_TIME_SECS" "$LIVE_TA_REFRESH_MAX_TIME_SECS"
fi fi
validate_max_runs validate_max_runs
validate_non_negative_int "INTERVAL_SECS" "$INTERVAL_SECS" validate_non_negative_int "INTERVAL_SECS" "$INTERVAL_SECS"
@ -701,7 +813,7 @@ main() {
fi fi
done done
mkdir -p "$RUNS_ROOT" "$LOG_ROOT" "$DB_DIR" "$META_DIR" "$TMP_DIR" "$INVALID_ROOT" mkdir -p "$RUNS_ROOT" "$LOG_ROOT" "$DB_DIR" "$META_DIR" "$TMP_DIR" "$INVALID_ROOT" "$LIVE_TA_REFRESH_DIR"
if is_true "$ALLOW_RSYNC_MIRROR_REUSE"; then if is_true "$ALLOW_RSYNC_MIRROR_REUSE"; then
mkdir -p "$RSYNC_MIRROR_ROOT" mkdir -p "$RSYNC_MIRROR_ROOT"
fi fi