rpki/scripts/manual_sync/full_sync.sh
2026-03-04 11:12:53 +08:00

169 lines
5.6 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
# Full sync + validation from a TAL URL (default: APNIC).
#
# Produces artifacts under OUT_DIR (default: target/live/manual_sync):
# - *_db_* RocksDB directory
# - *_report_*.json audit report
# - *_run_*.log stdout/stderr log (includes summary)
# - *_db_stats_*.txt db_stats --exact output
# - *_rrdp_state_*.tsv rrdp_state_dump output
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
cd "$ROOT_DIR"
TAL_URL="${TAL_URL:-https://tal.apnic.net/tal-archive/apnic-rfc7730-https.tal}"
HTTP_TIMEOUT_SECS="${HTTP_TIMEOUT_SECS:-1800}"
RSYNC_TIMEOUT_SECS="${RSYNC_TIMEOUT_SECS:-1800}"
RSYNC_MIRROR_ROOT="${RSYNC_MIRROR_ROOT:-}"
VALIDATION_TIME="${VALIDATION_TIME:-}"
OUT_DIR="${OUT_DIR:-$ROOT_DIR/target/live/manual_sync}"
mkdir -p "$OUT_DIR"
TS="$(date -u +%Y%m%dT%H%M%SZ)"
RUN_NAME="${RUN_NAME:-apnic_full_${TS}}"
DB_DIR="${DB_DIR:-$OUT_DIR/${RUN_NAME}_db}"
REPORT_JSON="${REPORT_JSON:-$OUT_DIR/${RUN_NAME}_report.json}"
RUN_LOG="${RUN_LOG:-$OUT_DIR/${RUN_NAME}_run.log}"
DB_STATS_TXT="${DB_STATS_TXT:-$OUT_DIR/${RUN_NAME}_db_stats.txt}"
RRDP_STATE_TSV="${RRDP_STATE_TSV:-$OUT_DIR/${RUN_NAME}_rrdp_state.tsv}"
RUN_META_JSON="${RUN_META_JSON:-$OUT_DIR/${RUN_NAME}_meta.json}"
SUMMARY_MD="${SUMMARY_MD:-$OUT_DIR/${RUN_NAME}_summary.md}"
echo "== rpki manual full sync ==" >&2
echo "tal_url=$TAL_URL" >&2
echo "db=$DB_DIR" >&2
echo "report_json=$REPORT_JSON" >&2
echo "out_dir=$OUT_DIR" >&2
cmd=(cargo run --release --bin rpki -- \
--db "$DB_DIR" \
--tal-url "$TAL_URL" \
--http-timeout-secs "$HTTP_TIMEOUT_SECS" \
--rsync-timeout-secs "$RSYNC_TIMEOUT_SECS" \
--report-json "$REPORT_JSON")
if [[ -n "${RSYNC_MIRROR_ROOT}" ]]; then
cmd+=(--rsync-mirror-root "$RSYNC_MIRROR_ROOT")
fi
if [[ -n "${VALIDATION_TIME}" ]]; then
cmd+=(--validation-time "$VALIDATION_TIME")
fi
script_start_s="$(date +%s)"
run_start_s="$(date +%s)"
(
echo "# command:"
printf '%q ' "${cmd[@]}"
echo
echo
"${cmd[@]}"
) 2>&1 | tee "$RUN_LOG" >/dev/null
run_end_s="$(date +%s)"
run_duration_s="$((run_end_s - run_start_s))"
echo "== db_stats (exact) ==" >&2
db_stats_start_s="$(date +%s)"
cargo run --release --bin db_stats -- --db "$DB_DIR" --exact 2>&1 | tee "$DB_STATS_TXT" >/dev/null
db_stats_end_s="$(date +%s)"
db_stats_duration_s="$((db_stats_end_s - db_stats_start_s))"
echo "== rrdp_state_dump ==" >&2
state_start_s="$(date +%s)"
cargo run --release --bin rrdp_state_dump -- --db "$DB_DIR" >"$RRDP_STATE_TSV"
state_end_s="$(date +%s)"
state_duration_s="$((state_end_s - state_start_s))"
script_end_s="$(date +%s)"
total_duration_s="$((script_end_s - script_start_s))"
echo "== write run meta + summary ==" >&2
TAL_URL="$TAL_URL" \
DB_DIR="$DB_DIR" \
REPORT_JSON="$REPORT_JSON" \
RUN_LOG="$RUN_LOG" \
HTTP_TIMEOUT_SECS="$HTTP_TIMEOUT_SECS" \
RSYNC_TIMEOUT_SECS="$RSYNC_TIMEOUT_SECS" \
VALIDATION_TIME_ARG="$VALIDATION_TIME" \
RUN_DURATION_S="$run_duration_s" \
DB_STATS_DURATION_S="$db_stats_duration_s" \
STATE_DURATION_S="$state_duration_s" \
TOTAL_DURATION_S="$total_duration_s" \
python3 - "$REPORT_JSON" "$RUN_META_JSON" "$SUMMARY_MD" <<'PY'
import json
import os
import sys
from datetime import datetime, timezone
from pathlib import Path
report_path = Path(sys.argv[1])
meta_path = Path(sys.argv[2])
summary_path = Path(sys.argv[3])
rep = json.loads(report_path.read_text(encoding="utf-8"))
now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
meta = {
"recorded_at_utc": now,
"tal_url": os.environ["TAL_URL"],
"db_dir": os.environ["DB_DIR"],
"report_json": os.environ["REPORT_JSON"],
"run_log": os.environ["RUN_LOG"],
"validation_time_rfc3339_utc": rep["meta"]["validation_time_rfc3339_utc"],
"http_timeout_secs": int(os.environ["HTTP_TIMEOUT_SECS"]),
"rsync_timeout_secs": int(os.environ["RSYNC_TIMEOUT_SECS"]),
"validation_time_arg": os.environ.get("VALIDATION_TIME_ARG",""),
"durations_secs": {
"rpki_run": int(os.environ["RUN_DURATION_S"]),
"db_stats_exact": int(os.environ["DB_STATS_DURATION_S"]),
"rrdp_state_dump": int(os.environ["STATE_DURATION_S"]),
"total_script": int(os.environ["TOTAL_DURATION_S"]),
},
"counts": {
"publication_points_processed": rep["tree"]["instances_processed"],
"publication_points_failed": rep["tree"]["instances_failed"],
"vrps": len(rep["vrps"]),
"aspas": len(rep["aspas"]),
"audit_publication_points": len(rep["publication_points"]),
},
}
meta_path.write_text(json.dumps(meta, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
lines = []
lines.append("# Manual full sync summary\\n\\n")
lines.append(f"- recorded_at_utc: `{now}`\\n")
lines.append(f"- tal_url: `{meta['tal_url']}`\\n")
lines.append(f"- db: `{meta['db_dir']}`\\n")
lines.append(f"- report_json: `{meta['report_json']}`\\n")
lines.append(f"- validation_time: `{meta['validation_time_rfc3339_utc']}`\\n\\n")
lines.append("## Results\\n\\n")
lines.append("| metric | value |\\n")
lines.append("|---|---:|\\n")
for k in ["publication_points_processed","publication_points_failed","vrps","aspas","audit_publication_points"]:
lines.append(f"| {k} | {meta['counts'][k]} |\\n")
lines.append("\\n")
lines.append("## Durations (seconds)\\n\\n")
lines.append("| step | seconds |\\n")
lines.append("|---|---:|\\n")
for k,v in meta["durations_secs"].items():
lines.append(f"| {k} | {v} |\\n")
lines.append("\\n")
summary_path.write_text("".join(lines), encoding="utf-8")
print(summary_path)
PY
echo "== done ==" >&2
echo "artifacts:" >&2
echo "- db: $DB_DIR" >&2
echo "- report: $REPORT_JSON" >&2
echo "- run log: $RUN_LOG" >&2
echo "- db stats: $DB_STATS_TXT" >&2
echo "- rrdp state: $RRDP_STATE_TSV" >&2
echo "- meta json: $RUN_META_JSON" >&2
echo "- summary md: $SUMMARY_MD" >&2