diff --git a/scripts/coverage.sh b/scripts/coverage.sh index b56efbc..e12b5fc 100755 --- a/scripts/coverage.sh +++ b/scripts/coverage.sh @@ -14,7 +14,7 @@ cleanup() { } trap cleanup EXIT -IGNORE_REGEX='src/bin/replay_bundle_capture\.rs|src/bin/replay_bundle_capture_delta\.rs|src/bundle/live_capture\.rs' +IGNORE_REGEX='src/bin/replay_bundle_capture\.rs|src/bin/replay_bundle_capture_delta\.rs|src/bin/replay_bundle_capture_sequence\.rs|src/bundle/live_capture\.rs' # Preserve colored output even though we post-process output by running under a pseudo-TTY. # We run tests only once, then generate both CLI text + HTML reports without rerunning tests. diff --git a/scripts/replay_bundle/README.md b/scripts/replay_bundle/README.md index 2e0634c..fc70ac2 100644 --- a/scripts/replay_bundle/README.md +++ b/scripts/replay_bundle/README.md @@ -59,6 +59,38 @@ target/replay/live_bundle_matrix_ target/replay/live_bundle_matrix_/_live_bundle_ ``` +如果要录制单个 RIR 的 `1 base + N delta` 序列,使用: + +```bash +cd rpki +./scripts/replay_bundle/run_live_bundle_record_sequence.sh \ + --rir apnic \ + --tal-path tests/fixtures/tal/apnic-rfc7730-https.tal \ + --ta-path tests/fixtures/ta/apnic-ta.cer \ + --delta-count 2 \ + --delta-interval-secs 0 +``` + +默认输出目录: + +```text +target/replay/_live_bundle_sequence_ +``` + +如果要一次录制多个 RIR 的 `1 base + N delta` 序列,使用: + +```bash +cd rpki +./scripts/replay_bundle/run_live_bundle_record_multi_rir_sequence.sh \ + --rir afrinic,apnic,arin,lacnic,ripe +``` + +默认输出目录: + +```text +target/replay/live_bundle_sequence_matrix_ +``` + ## 可选参数 - `--out-dir ` @@ -72,6 +104,9 @@ target/replay/live_bundle_matrix_/_live_bundle_ - `--trust-anchor ` - `--bin-dir ` - `--no-build` +- `--delta-count `(sequence 入口) +- `--delta-interval-secs `(sequence 入口) +- `--keep-db`(sequence 入口) `run_live_bundle_record_multi_rir.sh` 会自动按 RIR 选择当前仓库内置的: diff --git a/scripts/replay_bundle/run_live_bundle_record_multi_rir_sequence.sh b/scripts/replay_bundle/run_live_bundle_record_multi_rir_sequence.sh new file mode 100644 index 0000000..6dac573 --- /dev/null +++ b/scripts/replay_bundle/run_live_bundle_record_multi_rir_sequence.sh @@ -0,0 +1,173 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +cd "$ROOT_DIR" + +RIRS="" +OUT_ROOT="" +BASE_VALIDATION_TIME="" +DELTA_COUNT="" +DELTA_INTERVAL_SECS="" +HTTP_TIMEOUT_SECS="" +RSYNC_TIMEOUT_SECS="" +RSYNC_MIRROR_ROOT="" +MAX_DEPTH="" +MAX_INSTANCES="" +NO_BUILD=0 +KEEP_DB=0 +CAPTURE_INPUTS_ONLY=0 +BIN_DIR="target/release" + +usage() { + cat <<'EOF' +Usage: + ./scripts/replay_bundle/run_live_bundle_record_multi_rir_sequence.sh \ + --rir \ + [--out-root ] \ + [--base-validation-time ] \ + [--delta-count ] \ + [--delta-interval-secs ] \ + [--http-timeout-secs ] \ + [--rsync-timeout-secs ] \ + [--rsync-mirror-root ] \ + [--max-depth ] \ + [--max-instances ] \ + [--bin-dir ] \ + [--no-build] \ + [--keep-db] \ + [--capture-inputs-only] +EOF +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --rir) RIRS="${2:?}"; shift 2 ;; + --out-root) OUT_ROOT="${2:?}"; shift 2 ;; + --base-validation-time) BASE_VALIDATION_TIME="${2:?}"; shift 2 ;; + --delta-count) DELTA_COUNT="${2:?}"; shift 2 ;; + --delta-interval-secs) DELTA_INTERVAL_SECS="${2:?}"; shift 2 ;; + --http-timeout-secs) HTTP_TIMEOUT_SECS="${2:?}"; shift 2 ;; + --rsync-timeout-secs) RSYNC_TIMEOUT_SECS="${2:?}"; shift 2 ;; + --rsync-mirror-root) RSYNC_MIRROR_ROOT="${2:?}"; shift 2 ;; + --max-depth) MAX_DEPTH="${2:?}"; shift 2 ;; + --max-instances) MAX_INSTANCES="${2:?}"; shift 2 ;; + --bin-dir) BIN_DIR="${2:?}"; shift 2 ;; + --no-build) NO_BUILD=1; shift ;; + --keep-db) KEEP_DB=1; shift ;; + --capture-inputs-only) CAPTURE_INPUTS_ONLY=1; shift ;; + --help|-h) usage; exit 0 ;; + *) echo "unknown argument: $1" >&2; usage >&2; exit 2 ;; + esac +done + +if [[ -z "$RIRS" ]]; then + usage >&2 + exit 2 +fi + +RUN_TAG="$(date -u +%Y%m%dT%H%M%SZ)" +if [[ -z "$OUT_ROOT" ]]; then + OUT_ROOT="target/replay/live_bundle_sequence_matrix_${RUN_TAG}" +fi +mkdir -p "$OUT_ROOT" + +resolve_tal_path() { + case "$1" in + afrinic) printf 'tests/fixtures/tal/afrinic.tal' ;; + apnic) printf 'tests/fixtures/tal/apnic-rfc7730-https.tal' ;; + arin) printf 'tests/fixtures/tal/arin.tal' ;; + lacnic) printf 'tests/fixtures/tal/lacnic.tal' ;; + ripe) printf 'tests/fixtures/tal/ripe-ncc.tal' ;; + *) echo "unsupported rir: $1" >&2; exit 2 ;; + esac +} + +resolve_ta_path() { + case "$1" in + afrinic) printf 'tests/fixtures/ta/afrinic-ta.cer' ;; + apnic) printf 'tests/fixtures/ta/apnic-ta.cer' ;; + arin) printf 'tests/fixtures/ta/arin-ta.cer' ;; + lacnic) printf 'tests/fixtures/ta/lacnic-ta.cer' ;; + ripe) printf 'tests/fixtures/ta/ripe-ncc-ta.cer' ;; + *) echo "unsupported rir: $1" >&2; exit 2 ;; + esac +} + +SUMMARY_JSON="$OUT_ROOT/summary.json" +SUMMARY_MD="$OUT_ROOT/summary.md" +python3 - "$SUMMARY_JSON" "$RUN_TAG" <<'PY' +import json, sys +path, run_tag = sys.argv[1:] +with open(path, "w") as fh: + json.dump({"runTag": run_tag, "results": []}, fh, indent=2) +PY + +IFS=',' read -r -a RIR_LIST <<< "$RIRS" +for raw_rir in "${RIR_LIST[@]}"; do + rir="$(printf '%s' "$raw_rir" | tr '[:upper:]' '[:lower:]' | xargs)" + [[ -n "$rir" ]] || continue + tal_path="$(resolve_tal_path "$rir")" + ta_path="$(resolve_ta_path "$rir")" + out_dir="$OUT_ROOT/${rir}_live_bundle_sequence_${RUN_TAG}" + cmd=( + ./scripts/replay_bundle/run_live_bundle_record_sequence.sh + --rir "$rir" + --out-dir "$out_dir" + --tal-path "$tal_path" + --ta-path "$ta_path" + --trust-anchor "$rir" + --bin-dir "$BIN_DIR" + ) + [[ -n "$BASE_VALIDATION_TIME" ]] && cmd+=(--base-validation-time "$BASE_VALIDATION_TIME") + [[ -n "$DELTA_COUNT" ]] && cmd+=(--delta-count "$DELTA_COUNT") + [[ -n "$DELTA_INTERVAL_SECS" ]] && cmd+=(--delta-interval-secs "$DELTA_INTERVAL_SECS") + [[ -n "$HTTP_TIMEOUT_SECS" ]] && cmd+=(--http-timeout-secs "$HTTP_TIMEOUT_SECS") + [[ -n "$RSYNC_TIMEOUT_SECS" ]] && cmd+=(--rsync-timeout-secs "$RSYNC_TIMEOUT_SECS") + [[ -n "$RSYNC_MIRROR_ROOT" ]] && cmd+=(--rsync-mirror-root "$RSYNC_MIRROR_ROOT") + [[ -n "$MAX_DEPTH" ]] && cmd+=(--max-depth "$MAX_DEPTH") + [[ -n "$MAX_INSTANCES" ]] && cmd+=(--max-instances "$MAX_INSTANCES") + [[ "$NO_BUILD" -eq 1 ]] && cmd+=(--no-build) + [[ "$KEEP_DB" -eq 1 ]] && cmd+=(--keep-db) + [[ "$CAPTURE_INPUTS_ONLY" -eq 1 ]] && cmd+=(--capture-inputs-only) + "${cmd[@]}" + + python3 - "$SUMMARY_JSON" "$rir" "$out_dir" <<'PY' +import json, pathlib, sys +summary_path, rir, out_dir = sys.argv[1:] +summary = json.loads(pathlib.Path(summary_path).read_text()) +bundle = json.loads(pathlib.Path(out_dir, rir, "bundle.json").read_text()) +verification = json.loads(pathlib.Path(out_dir, rir, "verification.json").read_text()) +summary["results"].append({ + "rir": rir, + "outDir": out_dir, + "stepCount": len(bundle["deltaSequence"]["steps"]), + "baseVrpCount": bundle["base"]["vrpCount"], + "baseVapCount": bundle["base"]["vapCount"], + "allStepsSelfReplayOk": verification["summary"]["allStepsSelfReplayOk"], +}) +pathlib.Path(summary_path).write_text(json.dumps(summary, indent=2)) +PY +done + +python3 - "$SUMMARY_JSON" "$SUMMARY_MD" <<'PY' +import json, pathlib, sys +summary = json.loads(pathlib.Path(sys.argv[1]).read_text()) +out = pathlib.Path(sys.argv[2]) +lines = [ + "# Multi-RIR Live Bundle Sequence Summary", + "", + f"- runTag: `{summary['runTag']}`", + "", + "| rir | step_count | base_vrps | base_vaps | all_steps_self_replay | out_dir |", + "|---|---:|---:|---:|---|---|", +] +for item in summary["results"]: + lines.append( + f"| {item['rir']} | {item['stepCount']} | {item['baseVrpCount']} | {item['baseVapCount']} | " + f"{str(item['allStepsSelfReplayOk']).lower()} | `{item['outDir']}` |" + ) +out.write_text("\n".join(lines) + "\n") +PY + +echo "$OUT_ROOT" diff --git a/scripts/replay_bundle/run_live_bundle_record_sequence.sh b/scripts/replay_bundle/run_live_bundle_record_sequence.sh new file mode 100755 index 0000000..41d9224 --- /dev/null +++ b/scripts/replay_bundle/run_live_bundle_record_sequence.sh @@ -0,0 +1,119 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +cd "$ROOT_DIR" + +RIR="" +OUT_DIR="" +TAL_PATH="" +TA_PATH="" +BASE_VALIDATION_TIME="" +DELTA_COUNT="" +DELTA_INTERVAL_SECS="" +HTTP_TIMEOUT_SECS="" +RSYNC_TIMEOUT_SECS="" +RSYNC_MIRROR_ROOT="" +MAX_DEPTH="" +MAX_INSTANCES="" +TRUST_ANCHOR="" +NO_BUILD=0 +KEEP_DB=0 +CAPTURE_INPUTS_ONLY=0 +BIN_DIR="target/release" +PROGRESS_LOG="${RPKI_PROGRESS_LOG:-1}" +PROGRESS_SLOW_SECS="${RPKI_PROGRESS_SLOW_SECS:-30}" + +usage() { + cat <<'EOF' +Usage: + ./scripts/replay_bundle/run_live_bundle_record_sequence.sh \ + --rir \ + --tal-path \ + --ta-path \ + [--out-dir ] \ + [--base-validation-time ] \ + [--delta-count ] \ + [--delta-interval-secs ] \ + [--http-timeout-secs ] \ + [--rsync-timeout-secs ] \ + [--rsync-mirror-root ] \ + [--max-depth ] \ + [--max-instances ] \ + [--trust-anchor ] \ + [--bin-dir ] \ + [--no-build] \ + [--keep-db] \ + [--capture-inputs-only] +EOF +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --rir) RIR="${2:?}"; shift 2 ;; + --out-dir) OUT_DIR="${2:?}"; shift 2 ;; + --tal-path) TAL_PATH="${2:?}"; shift 2 ;; + --ta-path) TA_PATH="${2:?}"; shift 2 ;; + --base-validation-time) BASE_VALIDATION_TIME="${2:?}"; shift 2 ;; + --delta-count) DELTA_COUNT="${2:?}"; shift 2 ;; + --delta-interval-secs) DELTA_INTERVAL_SECS="${2:?}"; shift 2 ;; + --http-timeout-secs) HTTP_TIMEOUT_SECS="${2:?}"; shift 2 ;; + --rsync-timeout-secs) RSYNC_TIMEOUT_SECS="${2:?}"; shift 2 ;; + --rsync-mirror-root) RSYNC_MIRROR_ROOT="${2:?}"; shift 2 ;; + --max-depth) MAX_DEPTH="${2:?}"; shift 2 ;; + --max-instances) MAX_INSTANCES="${2:?}"; shift 2 ;; + --trust-anchor) TRUST_ANCHOR="${2:?}"; shift 2 ;; + --bin-dir) BIN_DIR="${2:?}"; shift 2 ;; + --no-build) NO_BUILD=1; shift ;; + --keep-db) KEEP_DB=1; shift ;; + --capture-inputs-only) CAPTURE_INPUTS_ONLY=1; shift ;; + --help|-h) usage; exit 0 ;; + *) echo "unknown argument: $1" >&2; usage >&2; exit 2 ;; + esac +done + +if [[ -z "$RIR" || -z "$TAL_PATH" || -z "$TA_PATH" ]]; then + usage >&2 + exit 2 +fi + +TS="$(date -u +%Y%m%dT%H%M%SZ)" +if [[ -z "$OUT_DIR" ]]; then + OUT_DIR="target/replay/${RIR}_live_bundle_sequence_${TS}" +fi + +SEQUENCE_BIN="$BIN_DIR/replay_bundle_capture_sequence" +if [[ "$NO_BUILD" -eq 0 ]]; then + echo "[1/1] build release binary" + cargo build --release --bin replay_bundle_capture_sequence +else + echo "[1/1] reuse existing binary from $BIN_DIR" +fi + +if [[ ! -x "$SEQUENCE_BIN" ]]; then + echo "missing executable: $SEQUENCE_BIN" >&2 + exit 1 +fi + +cmd=( + "$SEQUENCE_BIN" + --rir "$RIR" + --out-dir "$OUT_DIR" + --tal-path "$TAL_PATH" + --ta-path "$TA_PATH" +) +[[ -n "$BASE_VALIDATION_TIME" ]] && cmd+=(--base-validation-time "$BASE_VALIDATION_TIME") +[[ -n "$DELTA_COUNT" ]] && cmd+=(--delta-count "$DELTA_COUNT") +[[ -n "$DELTA_INTERVAL_SECS" ]] && cmd+=(--delta-interval-secs "$DELTA_INTERVAL_SECS") +[[ -n "$HTTP_TIMEOUT_SECS" ]] && cmd+=(--http-timeout-secs "$HTTP_TIMEOUT_SECS") +[[ -n "$RSYNC_TIMEOUT_SECS" ]] && cmd+=(--rsync-timeout-secs "$RSYNC_TIMEOUT_SECS") +[[ -n "$RSYNC_MIRROR_ROOT" ]] && cmd+=(--rsync-mirror-root "$RSYNC_MIRROR_ROOT") +[[ -n "$MAX_DEPTH" ]] && cmd+=(--max-depth "$MAX_DEPTH") +[[ -n "$MAX_INSTANCES" ]] && cmd+=(--max-instances "$MAX_INSTANCES") +[[ -n "$TRUST_ANCHOR" ]] && cmd+=(--trust-anchor "$TRUST_ANCHOR") +[[ "$KEEP_DB" -eq 1 ]] && cmd+=(--keep-db) +[[ "$CAPTURE_INPUTS_ONLY" -eq 1 ]] && cmd+=(--capture-inputs-only) + +RPKI_PROGRESS_LOG="$PROGRESS_LOG" \ +RPKI_PROGRESS_SLOW_SECS="$PROGRESS_SLOW_SECS" \ +"${cmd[@]}" diff --git a/scripts/replay_verify/run_peer_bundle_matrix.sh b/scripts/replay_verify/run_peer_bundle_matrix.sh index 2e79965..2026cd0 100755 --- a/scripts/replay_verify/run_peer_bundle_matrix.sh +++ b/scripts/replay_verify/run_peer_bundle_matrix.sh @@ -10,6 +10,7 @@ OUT_ROOT="" ROUTINATOR_ROOT="/home/yuyr/dev/rust_playground/routinator" RPKI_CLIENT_ROOT="/home/yuyr/dev/rpki-client-9.7" RPKI_CLIENT_BUILD_DIR="/home/yuyr/dev/rpki-client-9.7/build-m5" +KEEP_DB=0 usage() { cat <<'EOF' @@ -20,7 +21,8 @@ Usage: [--out-root ] \ [--routinator-root ] \ [--rpki-client-root ] \ - [--rpki-client-build-dir ] + [--rpki-client-build-dir ] \ + [--keep-db] EOF } @@ -32,6 +34,7 @@ while [[ $# -gt 0 ]]; do --routinator-root) ROUTINATOR_ROOT="${2:?}"; shift 2 ;; --rpki-client-root) RPKI_CLIENT_ROOT="${2:?}"; shift 2 ;; --rpki-client-build-dir) RPKI_CLIENT_BUILD_DIR="${2:?}"; shift 2 ;; + --keep-db) KEEP_DB=1; shift ;; --help|-h) usage; exit 0 ;; *) echo "unknown argument: $1" >&2; usage >&2; exit 2 ;; esac @@ -42,11 +45,24 @@ if [[ -z "$BUNDLE_ROOT" ]]; then exit 2 fi +BUNDLE_ROOT="$(python3 - "$BUNDLE_ROOT" <<'PY' +from pathlib import Path +import sys +print(Path(sys.argv[1]).resolve()) +PY +)" + RUN_TAG="$(date -u +%Y%m%dT%H%M%SZ)" if [[ -z "$OUT_ROOT" ]]; then OUT_ROOT="target/replay/peer_bundle_matrix_${RUN_TAG}" fi mkdir -p "$OUT_ROOT" +OUT_ROOT="$(python3 - "$OUT_ROOT" <<'PY' +from pathlib import Path +import sys +print(Path(sys.argv[1]).resolve()) +PY +)" discover_rirs() { python3 - "$BUNDLE_ROOT" <<'PY' @@ -100,9 +116,13 @@ for raw_rir in "${RIR_LIST[@]}"; do source_bundle_dir="$match" fi ln -sfn "$source_bundle_dir" "$NORMALIZED_BUNDLE_ROOT/$rir" - "$ROUTINATOR_ROOT/bench/multi_rir_demo_ours/run_single_rir_ours_bundle.sh" \ - "$source_bundle_dir" \ + ROUTI_CMD=( + "$ROUTINATOR_ROOT/bench/multi_rir_demo_ours/run_single_rir_ours_bundle.sh" + "$source_bundle_dir" "$ROUTI_OUT/$rir" + ) + [[ "$KEEP_DB" -eq 1 ]] && ROUTI_CMD=( "$ROUTINATOR_ROOT/bench/multi_rir_demo_ours/run_single_rir_ours_bundle.sh" --keep-db "$source_bundle_dir" "$ROUTI_OUT/$rir" ) + "${ROUTI_CMD[@]}" done CLIENT_ARGS=( @@ -111,6 +131,7 @@ CLIENT_ARGS=( --build-dir "$RPKI_CLIENT_BUILD_DIR" --work-dir "$CLIENT_OUT" ) +[[ "$KEEP_DB" -eq 1 ]] && CLIENT_ARGS+=(--keep-db) for raw_rir in "${RIR_LIST[@]}"; do rir="$(printf '%s' "$raw_rir" | tr '[:upper:]' '[:lower:]' | xargs)" [[ -n "$rir" ]] || continue @@ -147,27 +168,41 @@ lines = [ "", "## Routinator", "", - "| rir | base_vrp | delta_vrp | base_vap | delta_vap |", + "| rir | base_vrp | base_vap | sequence_vrp | sequence_vap |", "|---|---|---|---|---|", ] for rir, data in sorted(summary["routinator"].items()): - lines.append( - f"| {rir} | {str(data.get('baseMatch')).lower()} | {str(data.get('deltaMatch')).lower()} | " - f"{str(data.get('baseVapsMatch')).lower()} | {str(data.get('deltaVapsMatch')).lower()} |" - ) + if "steps" in data: + lines.append( + f"| {rir} | {str(data.get('baseMatch')).lower()} | {str(data.get('baseVapsMatch')).lower()} | " + f"{str(data.get('summary', {}).get('allStepsMatch')).lower()} | " + f"{str(data.get('summary', {}).get('allStepsVapsMatch')).lower()} |" + ) + else: + lines.append( + f"| {rir} | {str(data.get('baseMatch')).lower()} | {str(data.get('baseVapsMatch')).lower()} | " + f"{str(data.get('deltaMatch')).lower()} | {str(data.get('deltaVapsMatch')).lower()} |" + ) lines += [ "", "## rpki-client", "", - "| rir | base_vrp | delta_vrp | base_vap | delta_vap |", + "| rir | base_vrp | base_vap | sequence_vrp | sequence_vap |", "|---|---|---|---|---|", ] for rir, phases in sorted(summary["rpki_client"].items()): base = phases.get("base", {}) - delta = phases.get("delta", {}) + step_items = [ + value for key, value in phases.items() + if key not in ("base", "delta") and isinstance(value, dict) + ] + if "delta" in phases: + step_items.append(phases["delta"]) + all_step_match = all(item.get("match") for item in step_items) if step_items else None + all_step_vap_match = all(item.get("vaps_match") for item in step_items) if step_items else None lines.append( - f"| {rir} | {str(base.get('match')).lower()} | {str(delta.get('match')).lower()} | " - f"{str(base.get('vaps_match')).lower()} | {str(delta.get('vaps_match')).lower()} |" + f"| {rir} | {str(base.get('match')).lower()} | {str(base.get('vaps_match')).lower()} | " + f"{str(all_step_match).lower()} | {str(all_step_vap_match).lower()} |" ) out.write_text("\n".join(lines) + "\n") PY diff --git a/src/audit.rs b/src/audit.rs index 9707f61..4cd64c1 100644 --- a/src/audit.rs +++ b/src/audit.rs @@ -72,6 +72,12 @@ pub struct PublicationPointAudit { pub rrdp_notification_uri: Option, pub source: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub repo_sync_source: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub repo_sync_duration_ms: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub repo_sync_error: Option, pub this_update_rfc3339_utc: String, pub next_update_rfc3339_utc: String, pub verified_at_rfc3339_utc: String, diff --git a/src/bin/measure_sequence_replay.rs b/src/bin/measure_sequence_replay.rs new file mode 100644 index 0000000..2eb4877 --- /dev/null +++ b/src/bin/measure_sequence_replay.rs @@ -0,0 +1,262 @@ +use rpki::bundle::record_io::load_validation_time; +use rpki::storage::RocksStore; +use rpki::validation::run_tree_from_tal::{ + run_tree_from_tal_and_ta_der_payload_delta_replay_step_serial_audit, + run_tree_from_tal_and_ta_der_payload_replay_serial_audit, +}; +use rpki::validation::tree::TreeRunConfig; +use serde::Serialize; +use std::fs; +use std::path::{Path, PathBuf}; +use std::time::Instant; + +fn usage() -> &'static str { + "Usage: measure_sequence_replay --bundle-root [--rir ] --out [--keep-db]" +} + +#[derive(Default)] +struct Args { + bundle_root: Option, + rirs: Option>, + out: Option, + keep_db: bool, +} + +fn parse_args() -> Result { + let mut out = Args::default(); + let argv: Vec = std::env::args().skip(1).collect(); + let mut i = 0usize; + while i < argv.len() { + match argv[i].as_str() { + "--bundle-root" => { + i += 1; + out.bundle_root = Some(PathBuf::from( + argv.get(i).ok_or("--bundle-root requires a value")?, + )); + } + "--rir" => { + i += 1; + let value = argv.get(i).ok_or("--rir requires a value")?; + out.rirs = Some( + value + .split(',') + .map(|s| s.trim().to_lowercase()) + .filter(|s| !s.is_empty()) + .collect(), + ); + } + "--out" => { + i += 1; + out.out = Some(PathBuf::from(argv.get(i).ok_or("--out requires a value")?)); + } + "--keep-db" => out.keep_db = true, + "--help" | "-h" => return Err(usage().to_string()), + other => return Err(format!("unknown argument: {other}\n{}", usage())), + } + i += 1; + } + if out.bundle_root.is_none() || out.out.is_none() { + return Err(format!( + "--bundle-root and --out are required\n{}", + usage() + )); + } + Ok(out) +} + +#[derive(Serialize)] +struct PhaseTiming { + duration_seconds: f64, + vrp_count: usize, + vap_count: usize, +} + +#[derive(Serialize)] +struct RirTiming { + rir: String, + base: PhaseTiming, + steps: Vec<(String, PhaseTiming)>, +} + +fn discover_rirs(bundle_root: &Path) -> Result, String> { + let mut out = Vec::new(); + for entry in fs::read_dir(bundle_root) + .map_err(|e| format!("read_dir failed: {}: {e}", bundle_root.display()))? + { + let entry = entry.map_err(|e| format!("read_dir entry failed: {e}"))?; + let path = entry.path(); + if path.is_dir() && path.join("bundle.json").exists() && path.join("tal.tal").exists() { + out.push( + path.file_name() + .and_then(|s| s.to_str()) + .ok_or_else(|| format!("invalid rir dir name: {}", path.display()))? + .to_string(), + ); + } + } + out.sort(); + Ok(out) +} + +fn path_join(root: &Path, relative: &str) -> PathBuf { + root.join(relative) +} + +fn main() { + if let Err(err) = real_main() { + eprintln!("{err}"); + std::process::exit(1); + } +} + +fn real_main() -> Result<(), String> { + let args = parse_args()?; + let bundle_root = args.bundle_root.unwrap(); + let out_path = args.out.unwrap(); + let rirs = match args.rirs { + Some(v) => v, + None => discover_rirs(&bundle_root)?, + }; + let mut results = Vec::new(); + let tmp_root = out_path + .parent() + .unwrap_or_else(|| Path::new(".")) + .join(".tmp-sequence-replay"); + fs::create_dir_all(&tmp_root) + .map_err(|e| format!("create tmp root failed: {}: {e}", tmp_root.display()))?; + + for rir in rirs { + let rir_dir = bundle_root.join(&rir); + let bundle: serde_json::Value = serde_json::from_slice( + &fs::read(rir_dir.join("bundle.json")) + .map_err(|e| format!("read bundle failed: {}: {e}", rir_dir.display()))?, + ) + .map_err(|e| format!("parse bundle failed for {}: {e}", rir_dir.display()))?; + let tal_bytes = fs::read(rir_dir.join("tal.tal")) + .map_err(|e| format!("read tal.tal failed for {}: {e}", rir_dir.display()))?; + let ta_bytes = fs::read(rir_dir.join("ta.cer")) + .map_err(|e| format!("read ta.cer failed for {}: {e}", rir_dir.display()))?; + + let db_dir = tmp_root.join(format!("{rir}-db")); + if db_dir.exists() { + fs::remove_dir_all(&db_dir) + .map_err(|e| format!("remove old db failed: {}: {e}", db_dir.display()))?; + } + let store = + RocksStore::open(&db_dir).map_err(|e| format!("open rocksdb failed for {rir}: {e}"))?; + + let base_archive = path_join( + &rir_dir, + bundle["base"]["relativeArchivePath"] + .as_str() + .ok_or("bundle missing base.relativeArchivePath")?, + ); + let base_locks = path_join( + &rir_dir, + bundle["base"]["relativeLocksPath"] + .as_str() + .ok_or("bundle missing base.relativeLocksPath")?, + ); + let base_validation_time = load_validation_time(&base_locks) + .map_err(|e| format!("load base validation time failed for {rir}: {e}"))?; + + let start = Instant::now(); + let base_out = run_tree_from_tal_and_ta_der_payload_replay_serial_audit( + &store, + &rpki::policy::Policy::default(), + &tal_bytes, + &ta_bytes, + None, + &base_archive, + &base_locks, + base_validation_time, + &TreeRunConfig { + max_depth: None, + max_instances: None, + }, + ) + .map_err(|e| format!("base replay failed for {rir}: {e}"))?; + let base_timing = PhaseTiming { + duration_seconds: start.elapsed().as_secs_f64(), + vrp_count: base_out.tree.vrps.len(), + vap_count: base_out.tree.aspas.len(), + }; + + let mut previous_locks = base_locks.clone(); + let mut step_timings = Vec::new(); + for step in bundle["deltaSequence"]["steps"] + .as_array() + .ok_or("bundle missing deltaSequence.steps")? + { + let step_id = step["id"] + .as_str() + .ok_or("step missing id")? + .to_string(); + let step_dir = path_join( + &rir_dir, + step["relativePath"] + .as_str() + .ok_or("step missing relativePath")?, + ); + let delta_archive = path_join( + &rir_dir, + step["relativeArchivePath"] + .as_str() + .ok_or("step missing relativeArchivePath")?, + ); + let delta_locks = path_join( + &rir_dir, + step["relativeTransitionLocksPath"] + .as_str() + .ok_or("step missing relativeTransitionLocksPath")?, + ); + let validation_time = load_validation_time(&delta_locks) + .map_err(|e| format!("load step validation time failed for {rir}/{step_id}: {e}"))?; + let start = Instant::now(); + let step_out = run_tree_from_tal_and_ta_der_payload_delta_replay_step_serial_audit( + &store, + &rpki::policy::Policy::default(), + &tal_bytes, + &ta_bytes, + None, + &delta_archive, + &previous_locks, + &delta_locks, + validation_time, + &TreeRunConfig { + max_depth: None, + max_instances: None, + }, + ) + .map_err(|e| format!("delta step replay failed for {rir}/{step_id}: {e}"))?; + step_timings.push(( + step_id.clone(), + PhaseTiming { + duration_seconds: start.elapsed().as_secs_f64(), + vrp_count: step_out.tree.vrps.len(), + vap_count: step_out.tree.aspas.len(), + }, + )); + previous_locks = step_dir.join("target-locks.json"); + } + + results.push(RirTiming { + rir, + base: base_timing, + steps: step_timings, + }); + + if !args.keep_db && db_dir.exists() { + fs::remove_dir_all(&db_dir) + .map_err(|e| format!("remove db failed: {}: {e}", db_dir.display()))?; + } + } + + fs::write( + &out_path, + serde_json::to_vec_pretty(&results).map_err(|e| format!("encode json failed: {e}"))?, + ) + .map_err(|e| format!("write out failed: {}: {e}", out_path.display()))?; + println!("{}", out_path.display()); + Ok(()) +} diff --git a/src/bin/replay_bundle_capture.rs b/src/bin/replay_bundle_capture.rs index 79185c5..dfa88de 100644 --- a/src/bin/replay_bundle_capture.rs +++ b/src/bin/replay_bundle_capture.rs @@ -1,7 +1,8 @@ use rpki::bundle::{ - BundleManifest, BundleManifestEntry, RirBundleMetadata, RecordingHttpFetcher, - RecordingRsyncFetcher, build_vap_compare_rows, build_vrp_compare_rows, - write_live_base_replay_bundle_inputs, write_vap_csv, write_vrp_csv, + RirBundleMetadata, RecordingHttpFetcher, RecordingRsyncFetcher, + build_single_rir_bundle_manifest, build_vap_compare_rows, build_vrp_compare_rows, + sha256_hex, write_json, write_live_base_replay_bundle_inputs, write_live_bundle_rir_readme, + write_live_bundle_top_readme, write_timing_json, write_vap_csv, write_vrp_csv, }; use rpki::ccr::{build_ccr_from_run, verify_content_info, write_ccr_file}; use rpki::fetch::http::{BlockingHttpFetcher, HttpFetcherConfig}; @@ -13,9 +14,8 @@ use rpki::validation::run_tree_from_tal::{ run_tree_from_tal_and_ta_der_serial_audit, }; use rpki::validation::tree::TreeRunConfig; -use sha2::Digest; use std::fs; -use std::path::{Path, PathBuf}; +use std::path::PathBuf; use std::time::Instant; use time::format_description::well_known::Rfc3339; @@ -135,57 +135,6 @@ fn parse_args(argv: &[String]) -> Result { Ok(args) } -fn sha256_hex(bytes: &[u8]) -> String { - hex::encode(sha2::Sha256::digest(bytes)) -} - -fn write_json(path: &Path, value: &impl serde::Serialize) -> Result<(), String> { - if let Some(parent) = path.parent() { - fs::create_dir_all(parent) - .map_err(|e| format!("create parent failed: {}: {e}", parent.display()))?; - } - let bytes = serde_json::to_vec_pretty(value).map_err(|e| e.to_string())?; - fs::write(path, bytes).map_err(|e| format!("write json failed: {}: {e}", path.display())) -} - -fn write_timing_json( - path: &Path, - mode: &str, - validation_time: &time::OffsetDateTime, - duration: std::time::Duration, -) -> Result<(), String> { - write_json( - path, - &serde_json::json!({ - "mode": mode, - "validationTime": validation_time - .format(&Rfc3339) - .map_err(|e| format!("format validation time failed: {e}"))?, - "durationSeconds": duration.as_secs_f64(), - }), - ) -} - -fn write_top_readme(path: &Path, rir: &str) -> Result<(), String> { - fs::write( - path, - format!( - "# Ours Live Replay Bundle\n\nThis run contains one per-RIR bundle recorded online by `ours`.\n\n- RIR: `{rir}`\n- Reference result format: `CCR`\n" - ), - ) - .map_err(|e| format!("write readme failed: {}: {e}", path.display())) -} - -fn write_rir_readme(path: &Path, rir: &str, base_validation_time: &str) -> Result<(), String> { - fs::write( - path, - format!( - "# {rir} live replay bundle\n\n- `tal.tal` and `ta.cer` are the actual live run inputs.\n- `base-locks.json.validationTime` = `{base_validation_time}`.\n- `base.ccr` is the authoritative reference result.\n- `base-vrps.csv` and `base-vaps.csv` are compare views derived from `base.ccr`.\n" - ), - ) - .map_err(|e| format!("write rir readme failed: {}: {e}", path.display())) -} - fn run(args: Args) -> Result { let rir = args.rir.as_ref().unwrap(); let rir_normalized = rir.to_ascii_lowercase(); @@ -370,25 +319,18 @@ fn run(args: Args) -> Result { } }), )?; - write_top_readme(&out_root.join("README.md"), &rir_normalized)?; - write_rir_readme(&rir_dir.join("README.md"), &rir_normalized, &metadata.base_validation_time)?; + write_live_bundle_top_readme(&out_root.join("README.md"), &rir_normalized)?; + write_live_bundle_rir_readme(&rir_dir.join("README.md"), &rir_normalized, &metadata.base_validation_time)?; write_json( &out_root.join("bundle-manifest.json"), - &BundleManifest { - schema_version: "20260330-v1".to_string(), - bundle_producer: "ours".to_string(), - recorded_at_rfc3339_utc: time::OffsetDateTime::now_utc() - .format(&Rfc3339) - .map_err(|e| format!("format recorded_at failed: {e}"))?, - rirs: vec![rir_normalized.clone()], - per_rir_bundles: vec![BundleManifestEntry { - rir: rir_normalized.clone(), - relative_path: rir_normalized, - base_validation_time: metadata.base_validation_time.clone(), - delta_validation_time: None, - has_aspa: metadata.has_aspa, - }], - }, + &build_single_rir_bundle_manifest( + "20260330-v1", + "ours", + &rir_normalized, + &validation_time, + None, + metadata.has_aspa, + )?, )?; let _ = fs::remove_dir_all(&db_dir); @@ -424,7 +366,7 @@ mod tests { ]; let args = parse_args(&argv).expect("parse"); assert_eq!(args.rir.as_deref(), Some("apnic")); - assert_eq!(args.out_dir.as_deref(), Some(Path::new("out"))); + assert_eq!(args.out_dir.as_deref(), Some(std::path::Path::new("out"))); assert_eq!(args.http_timeout_secs, 20); assert_eq!(args.rsync_timeout_secs, 60); } diff --git a/src/bin/replay_bundle_capture_delta.rs b/src/bin/replay_bundle_capture_delta.rs index 1bad5dd..fc9ce31 100644 --- a/src/bin/replay_bundle_capture_delta.rs +++ b/src/bin/replay_bundle_capture_delta.rs @@ -1,7 +1,7 @@ use rpki::bundle::{ - BundleManifest, BundleManifestEntry, RecordingHttpFetcher, - RecordingRsyncFetcher, build_vap_compare_rows, build_vrp_compare_rows, - write_live_delta_replay_bundle_inputs, write_vap_csv, write_vrp_csv, + RecordingHttpFetcher, RecordingRsyncFetcher, build_single_rir_bundle_manifest, + build_vap_compare_rows, build_vrp_compare_rows, copy_dir_all, load_validation_time, + sha256_hex, write_json, write_live_delta_replay_bundle_inputs, write_vap_csv, write_vrp_csv, }; use rpki::ccr::{build_ccr_from_run, decode_content_info, verify_content_info, write_ccr_file}; use rpki::fetch::http::{BlockingHttpFetcher, HttpFetcherConfig}; @@ -15,7 +15,6 @@ use rpki::validation::run_tree_from_tal::{ run_tree_from_tal_and_ta_der_serial_audit, }; use rpki::validation::tree::TreeRunConfig; -use sha2::Digest; use std::fs; use std::path::{Path, PathBuf}; use std::time::Instant; @@ -130,56 +129,6 @@ fn parse_args(argv: &[String]) -> Result { Ok(args) } -fn sha256_hex(bytes: &[u8]) -> String { - hex::encode(sha2::Sha256::digest(bytes)) -} - -fn write_json(path: &Path, value: &impl serde::Serialize) -> Result<(), String> { - if let Some(parent) = path.parent() { - fs::create_dir_all(parent) - .map_err(|e| format!("create parent failed: {}: {e}", parent.display()))?; - } - let bytes = serde_json::to_vec_pretty(value).map_err(|e| e.to_string())?; - fs::write(path, bytes).map_err(|e| format!("write json failed: {}: {e}", path.display())) -} - -fn copy_dir_all(src: &Path, dst: &Path) -> Result<(), String> { - fs::create_dir_all(dst) - .map_err(|e| format!("create directory failed: {}: {e}", dst.display()))?; - for entry in fs::read_dir(src).map_err(|e| format!("read_dir failed: {}: {e}", src.display()))? { - let entry = entry.map_err(|e| format!("read_dir entry failed: {}: {e}", src.display()))?; - let ty = entry - .file_type() - .map_err(|e| format!("file_type failed: {}: {e}", entry.path().display()))?; - let to = dst.join(entry.file_name()); - if ty.is_dir() { - copy_dir_all(&entry.path(), &to)?; - } else if ty.is_file() { - if let Some(parent) = to.parent() { - fs::create_dir_all(parent) - .map_err(|e| format!("create parent failed: {}: {e}", parent.display()))?; - } - fs::copy(entry.path(), &to) - .map_err(|e| format!("copy failed: {} -> {}: {e}", entry.path().display(), to.display()))?; - } - } - Ok(()) -} - -fn load_validation_time(path: &Path) -> Result { - let json: serde_json::Value = serde_json::from_slice( - &fs::read(path).map_err(|e| format!("read json failed: {}: {e}", path.display()))?, - ) - .map_err(|e| format!("parse json failed: {}: {e}", path.display()))?; - let value = json - .get("validationTime") - .or_else(|| json.get("validation_time")) - .and_then(|v| v.as_str()) - .ok_or_else(|| format!("validationTime missing in {}", path.display()))?; - time::OffsetDateTime::parse(value, &Rfc3339) - .map_err(|e| format!("invalid validationTime in {}: {e}", path.display())) -} - fn ensure_recorded_target_snapshots( store: &RocksStore, base_bundle_dir: &Path, @@ -466,27 +415,14 @@ fn run(args: Args) -> Result { }); write_json(&rir_dir.join("verification.json"), &verification_json)?; - let bundle_manifest = BundleManifest { - schema_version: "20260330-v1".to_string(), - bundle_producer: "ours".to_string(), - recorded_at_rfc3339_utc: time::OffsetDateTime::now_utc() - .format(&Rfc3339) - .map_err(|e| format!("format recorded_at failed: {e}"))?, - rirs: vec![rir_normalized.clone()], - per_rir_bundles: vec![BundleManifestEntry { - rir: rir_normalized.clone(), - relative_path: rir_normalized, - base_validation_time: base_validation_time - .format(&Rfc3339) - .map_err(|e| format!("format base validation time failed: {e}"))?, - delta_validation_time: Some( - target_validation_time - .format(&Rfc3339) - .map_err(|e| format!("format delta validation time failed: {e}"))?, - ), - has_aspa: bundle_json["hasAspa"].as_bool().unwrap_or(false), - }], - }; + let bundle_manifest = build_single_rir_bundle_manifest( + "20260330-v1", + "ours", + &rir_normalized, + &base_validation_time, + Some(&target_validation_time), + bundle_json["hasAspa"].as_bool().unwrap_or(false), + )?; write_json(&out_root.join("bundle-manifest.json"), &bundle_manifest)?; let _ = fs::remove_dir_all(&target_store_dir); diff --git a/src/bin/replay_bundle_capture_sequence.rs b/src/bin/replay_bundle_capture_sequence.rs new file mode 100644 index 0000000..3aa7fae --- /dev/null +++ b/src/bin/replay_bundle_capture_sequence.rs @@ -0,0 +1,929 @@ +use rpki::bundle::{ + BaseBundleStateMetadataV2, BundleManifestEntryV2, BundleManifestV2, DeltaSequenceMetadataV2, + DeltaStepMetadataV2, RecordingHttpFetcher, RecordingRsyncFetcher, RirBundleMetadataV2, + build_vap_compare_rows, build_vrp_compare_rows, sha256_hex, write_current_replay_state_locks, + write_json, write_live_base_replay_bundle_inputs, write_live_delta_replay_step_inputs, + write_vap_csv, write_vrp_csv, +}; +use rpki::ccr::{ + CcrVerifySummary, build_ccr_from_run, decode_content_info, verify_content_info, write_ccr_file, +}; +use rpki::fetch::http::{BlockingHttpFetcher, HttpFetcherConfig}; +use rpki::fetch::rsync_system::{SystemRsyncConfig, SystemRsyncFetcher}; +use rpki::policy::Policy; +use rpki::storage::RocksStore; +use rpki::sync::rrdp::Fetcher; +use rpki::validation::run_tree_from_tal::{ + run_tree_from_tal_and_ta_der_payload_delta_replay_step_serial_audit, + run_tree_from_tal_and_ta_der_payload_replay_serial_audit, + run_tree_from_tal_and_ta_der_serial_audit, +}; +use rpki::validation::tree::TreeRunConfig; +use std::fs; +use std::path::{Path, PathBuf}; +use std::time::{Duration, Instant}; +use time::format_description::well_known::Rfc3339; + +#[derive(Debug, Default, PartialEq, Eq)] +struct Args { + rir: Option, + out_dir: Option, + tal_path: Option, + ta_path: Option, + base_validation_time: Option, + delta_count: usize, + delta_interval_secs: u64, + http_timeout_secs: u64, + rsync_timeout_secs: u64, + rsync_mirror_root: Option, + max_depth: Option, + max_instances: Option, + trust_anchor: Option, + keep_db: bool, + capture_inputs_only: bool, +} + +fn usage() -> &'static str { + "Usage: replay_bundle_capture_sequence --rir --out-dir --tal-path --ta-path [--base-validation-time ] [--delta-count ] [--delta-interval-secs ] [--http-timeout-secs ] [--rsync-timeout-secs ] [--rsync-mirror-root ] [--max-depth ] [--max-instances ] [--trust-anchor ] [--keep-db] [--capture-inputs-only]" +} + +fn parse_args(argv: &[String]) -> Result { + let mut args = Args { + delta_count: 5, + delta_interval_secs: 600, + http_timeout_secs: 20, + rsync_timeout_secs: 60, + ..Args::default() + }; + let mut i = 1usize; + while i < argv.len() { + match argv[i].as_str() { + "--help" | "-h" => return Err(usage().to_string()), + "--rir" => { + i += 1; + args.rir = Some(argv.get(i).ok_or("--rir requires a value")?.clone()); + } + "--out-dir" => { + i += 1; + args.out_dir = Some(PathBuf::from(argv.get(i).ok_or("--out-dir requires a value")?)); + } + "--tal-path" => { + i += 1; + args.tal_path = Some(PathBuf::from(argv.get(i).ok_or("--tal-path requires a value")?)); + } + "--ta-path" => { + i += 1; + args.ta_path = Some(PathBuf::from(argv.get(i).ok_or("--ta-path requires a value")?)); + } + "--base-validation-time" => { + i += 1; + let value = argv.get(i).ok_or("--base-validation-time requires a value")?; + args.base_validation_time = Some( + time::OffsetDateTime::parse(value, &Rfc3339) + .map_err(|e| format!("invalid --base-validation-time: {e}"))?, + ); + } + "--delta-count" => { + i += 1; + args.delta_count = argv + .get(i) + .ok_or("--delta-count requires a value")? + .parse() + .map_err(|e| format!("invalid --delta-count: {e}"))?; + } + "--delta-interval-secs" => { + i += 1; + args.delta_interval_secs = argv + .get(i) + .ok_or("--delta-interval-secs requires a value")? + .parse() + .map_err(|e| format!("invalid --delta-interval-secs: {e}"))?; + } + "--http-timeout-secs" => { + i += 1; + args.http_timeout_secs = argv + .get(i) + .ok_or("--http-timeout-secs requires a value")? + .parse() + .map_err(|e| format!("invalid --http-timeout-secs: {e}"))?; + } + "--rsync-timeout-secs" => { + i += 1; + args.rsync_timeout_secs = argv + .get(i) + .ok_or("--rsync-timeout-secs requires a value")? + .parse() + .map_err(|e| format!("invalid --rsync-timeout-secs: {e}"))?; + } + "--rsync-mirror-root" => { + i += 1; + args.rsync_mirror_root = + Some(PathBuf::from(argv.get(i).ok_or("--rsync-mirror-root requires a value")?)); + } + "--max-depth" => { + i += 1; + args.max_depth = Some( + argv.get(i) + .ok_or("--max-depth requires a value")? + .parse() + .map_err(|e| format!("invalid --max-depth: {e}"))?, + ); + } + "--max-instances" => { + i += 1; + args.max_instances = Some( + argv.get(i) + .ok_or("--max-instances requires a value")? + .parse() + .map_err(|e| format!("invalid --max-instances: {e}"))?, + ); + } + "--trust-anchor" => { + i += 1; + args.trust_anchor = Some(argv.get(i).ok_or("--trust-anchor requires a value")?.clone()); + } + "--keep-db" => args.keep_db = true, + "--capture-inputs-only" => args.capture_inputs_only = true, + other => return Err(format!("unknown argument: {other}\n{}", usage())), + } + i += 1; + } + + if args.rir.is_none() { + return Err(format!("--rir is required\n{}", usage())); + } + if args.out_dir.is_none() { + return Err(format!("--out-dir is required\n{}", usage())); + } + if args.tal_path.is_none() { + return Err(format!("--tal-path is required\n{}", usage())); + } + if args.ta_path.is_none() { + return Err(format!("--ta-path is required\n{}", usage())); + } + Ok(args) +} + +fn write_v2_top_readme(path: &Path, rir: &str, delta_count: usize, delta_interval_secs: u64) -> Result<(), String> { + if let Some(parent) = path.parent() { + fs::create_dir_all(parent) + .map_err(|e| format!("create parent failed: {}: {e}", parent.display()))?; + } + fs::write( + path, + format!( + "# Ours Multi-Delta Replay Bundle\n\n- RIR: `{rir}`\n- Schema: `20260401-v2`\n- Configured delta steps: `{delta_count}`\n- Configured interval seconds: `{delta_interval_secs}`\n" + ), + ) + .map_err(|e| format!("write readme failed: {}: {e}", path.display())) +} + +fn write_v2_rir_readme( + path: &Path, + rir: &str, + base_validation_time: &str, + delta_count: usize, + delta_interval_secs: u64, +) -> Result<(), String> { + if let Some(parent) = path.parent() { + fs::create_dir_all(parent) + .map_err(|e| format!("create parent failed: {}: {e}", parent.display()))?; + } + fs::write( + path, + format!( + "# {rir} multi-delta live replay bundle\n\n- `base-locks.json.validationTime` = `{base_validation_time}`\n- `delta-steps/` contains `{delta_count}` ordered target steps\n- configured interval seconds = `{delta_interval_secs}`\n" + ), + ) + .map_err(|e| format!("write rir readme failed: {}: {e}", path.display())) +} + +fn ensure_recorded_target_snapshots_for_locks( + store: &RocksStore, + previous_locks_path: &Path, + http: &RecordingHttpFetcher, +) -> Result<(), String> { + let previous_locks: serde_json::Value = serde_json::from_slice( + &fs::read(previous_locks_path) + .map_err(|e| format!("read previous locks failed: {}: {e}", previous_locks_path.display()))?, + ) + .map_err(|e| format!("parse previous locks failed: {}: {e}", previous_locks_path.display()))?; + let previous_rrdp = previous_locks + .get("rrdp") + .and_then(|v| v.as_object()) + .cloned() + .unwrap_or_default(); + + for (notify_uri, base_lock) in previous_rrdp { + let Some(base_transport) = base_lock.get("transport").and_then(|v| v.as_str()) else { + continue; + }; + if base_transport != "rrdp" { + continue; + } + let Some(base_session) = base_lock.get("session").and_then(|v| v.as_str()) else { + continue; + }; + let Some(base_serial) = base_lock.get("serial").and_then(|v| v.as_u64()) else { + continue; + }; + let Some(record) = store + .get_rrdp_source_record(¬ify_uri) + .map_err(|e| format!("read rrdp source record failed for {notify_uri}: {e}"))? + else { + continue; + }; + let Some(target_session) = record.last_session_id.as_deref() else { + continue; + }; + let Some(target_serial) = record.last_serial else { + continue; + }; + if target_session != base_session || target_serial <= base_serial { + continue; + } + let Some(snapshot_uri) = record.last_snapshot_uri.as_deref() else { + continue; + }; + if step_http_has_snapshot(http, snapshot_uri) { + continue; + } + if let Err(err) = http.fetch(snapshot_uri) { + eprintln!( + "[sequence] warning: fetch target snapshot failed notify_uri={} snapshot_uri={} err={}", + notify_uri, + snapshot_uri, + err + ); + } + } + Ok(()) +} + +fn step_http_has_snapshot( + http: &RecordingHttpFetcher, + snapshot_uri: &str, +) -> bool { + http.snapshot_responses().contains_key(snapshot_uri) +} + +fn run(args: Args) -> Result { + let rir = args.rir.as_ref().unwrap(); + let rir_normalized = rir.to_ascii_lowercase(); + let trust_anchor = args + .trust_anchor + .clone() + .unwrap_or_else(|| rir_normalized.clone()); + let out_root = args.out_dir.as_ref().unwrap(); + if out_root.exists() { + fs::remove_dir_all(out_root) + .map_err(|e| format!("remove old out dir failed: {}: {e}", out_root.display()))?; + } + let rir_dir = out_root.join(&rir_normalized); + let delta_steps_root = rir_dir.join("delta-steps"); + fs::create_dir_all(&delta_steps_root) + .map_err(|e| format!("create delta steps dir failed: {}: {e}", delta_steps_root.display()))?; + + let tal_bytes = fs::read(args.tal_path.as_ref().unwrap()) + .map_err(|e| format!("read tal failed: {e}"))?; + let ta_bytes = fs::read(args.ta_path.as_ref().unwrap()) + .map_err(|e| format!("read ta failed: {e}"))?; + fs::write(rir_dir.join("tal.tal"), &tal_bytes).map_err(|e| format!("write tal failed: {e}"))?; + fs::write(rir_dir.join("ta.cer"), &ta_bytes).map_err(|e| format!("write ta failed: {e}"))?; + + let base_validation_time = args.base_validation_time.unwrap_or_else(time::OffsetDateTime::now_utc); + + let work_db_dir = out_root.join(".tmp").join(format!("{rir}-sequence-work-db")); + let base_self_replay_dir = out_root.join(".tmp").join(format!("{rir}-sequence-base-self-replay-db")); + let _ = fs::remove_dir_all(&work_db_dir); + let _ = fs::remove_dir_all(&base_self_replay_dir); + if let Some(parent) = work_db_dir.parent() { + fs::create_dir_all(parent) + .map_err(|e| format!("create tmp dir failed: {}: {e}", parent.display()))?; + } + + let store = RocksStore::open(&work_db_dir).map_err(|e| format!("open work db failed: {e}"))?; + let base_http = RecordingHttpFetcher::new( + BlockingHttpFetcher::new(HttpFetcherConfig { + timeout: Duration::from_secs(args.http_timeout_secs), + ..HttpFetcherConfig::default() + }) + .map_err(|e| format!("create base http fetcher failed: {e}"))?, + ); + let base_rsync_inner = SystemRsyncFetcher::new(SystemRsyncConfig { + timeout: Duration::from_secs(args.rsync_timeout_secs), + mirror_root: args.rsync_mirror_root.clone(), + ..SystemRsyncConfig::default() + }); + let base_rsync = if args.capture_inputs_only { + RecordingRsyncFetcher::new_without_objects(base_rsync_inner) + } else { + RecordingRsyncFetcher::new(base_rsync_inner) + }; + eprintln!("[sequence] base live run start rir={rir_normalized}"); + let started = Instant::now(); + let base_out = run_tree_from_tal_and_ta_der_serial_audit( + &store, + &Policy::default(), + &tal_bytes, + &ta_bytes, + None, + &base_http, + &base_rsync, + base_validation_time, + &TreeRunConfig { + max_depth: args.max_depth, + max_instances: args.max_instances, + }, + ) + .map_err(|e| format!("live base run failed: {e}"))?; + let base_duration = started.elapsed(); + eprintln!( + "[sequence] base live run done rir={} duration_s={:.3}", + rir_normalized, + base_duration.as_secs_f64() + ); + + eprintln!("[sequence] base input materialization start rir={rir_normalized}"); + let base_capture = write_live_base_replay_bundle_inputs( + &rir_dir, + &rir_normalized, + base_validation_time, + &base_out.publication_points, + &store, + &base_http.snapshot_responses(), + &base_rsync.snapshot_fetches(), + )?; + eprintln!( + "[sequence] base input materialization done rir={} rrdp_repos={} rsync_modules={}", + rir_normalized, + base_capture.rrdp_repo_count, + base_capture.rsync_module_count + ); + let base_ccr_path = rir_dir.join("base.ccr"); + let base_vrps_path = rir_dir.join("base-vrps.csv"); + let base_vaps_path = rir_dir.join("base-vaps.csv"); + let (base_ccr_sha256, base_vrp_rows, base_vap_rows, base_verify, base_self_replay_ok) = + if args.capture_inputs_only { + eprintln!("[sequence] base output generation skipped rir={rir_normalized}"); + ( + String::new(), + std::collections::BTreeSet::::new(), + std::collections::BTreeSet::::new(), + CcrVerifySummary { + content_type_oid: String::new(), + version: 0, + produced_at_rfc3339_utc: String::new(), + state_hashes_ok: false, + manifest_instances: 0, + roa_payload_sets: 0, + roa_vrp_count: 0, + aspa_payload_sets: 0, + trust_anchor_ski_count: 0, + router_key_sets: 0, + router_key_count: 0, + }, + false, + ) + } else { + eprintln!("[sequence] base CCR/self-replay start rir={rir_normalized}"); + let base_ccr = build_ccr_from_run( + &store, + &[base_out.discovery.trust_anchor.clone()], + &base_out.tree.vrps, + &base_out.tree.aspas, + &base_out.tree.router_keys, + base_validation_time, + ) + .map_err(|e| format!("build base ccr failed: {e}"))?; + write_ccr_file(&base_ccr_path, &base_ccr) + .map_err(|e| format!("write base ccr failed: {e}"))?; + let base_ccr_bytes = fs::read(&base_ccr_path) + .map_err(|e| format!("read base ccr failed: {}: {e}", base_ccr_path.display()))?; + let base_decoded = decode_content_info(&base_ccr_bytes) + .map_err(|e| format!("decode base ccr failed: {e}"))?; + let base_verify = + verify_content_info(&base_decoded).map_err(|e| format!("verify base ccr failed: {e}"))?; + let base_vrp_rows = build_vrp_compare_rows(&base_out.tree.vrps, &trust_anchor); + let base_vap_rows = build_vap_compare_rows(&base_out.tree.aspas, &trust_anchor); + let (base_ccr_vrps, base_ccr_vaps) = + rpki::bundle::decode_ccr_compare_views(&base_decoded, &trust_anchor)?; + if base_vrp_rows != base_ccr_vrps { + return Err("base-vrps compare view does not match base.ccr".to_string()); + } + if base_vap_rows != base_ccr_vaps { + return Err("base-vaps compare view does not match base.ccr".to_string()); + } + write_vrp_csv(&base_vrps_path, &base_vrp_rows)?; + write_vap_csv(&base_vaps_path, &base_vap_rows)?; + let base_replay_store = RocksStore::open(&base_self_replay_dir) + .map_err(|e| format!("open base self replay db failed: {e}"))?; + let base_replay_out = run_tree_from_tal_and_ta_der_payload_replay_serial_audit( + &base_replay_store, + &Policy::default(), + &tal_bytes, + &ta_bytes, + None, + &rir_dir.join("base-payload-archive"), + &rir_dir.join("base-locks.json"), + base_validation_time, + &TreeRunConfig { + max_depth: args.max_depth, + max_instances: args.max_instances, + }, + ) + .map_err(|e| format!("base self replay failed: {e}"))?; + if build_vrp_compare_rows(&base_replay_out.tree.vrps, &trust_anchor) != base_vrp_rows { + return Err("base self replay VRP compare view mismatch".to_string()); + } + if build_vap_compare_rows(&base_replay_out.tree.aspas, &trust_anchor) != base_vap_rows { + return Err("base self replay VAP compare view mismatch".to_string()); + } + let output = ( + sha256_hex(&base_ccr_bytes), + base_vrp_rows, + base_vap_rows, + base_verify, + true, + ); + eprintln!("[sequence] base CCR/self-replay done rir={rir_normalized}"); + output + }; + fs::create_dir_all(rir_dir.join("timings")) + .map_err(|e| format!("create timings dir failed: {e}"))?; + write_json( + &rir_dir.join("timings").join("base-produce.json"), + &serde_json::json!({ + "mode": "base", + "validationTime": base_validation_time.format(&Rfc3339).map_err(|e| format!("format base validation time failed: {e}"))?, + "durationSeconds": base_duration.as_secs_f64(), + }), + )?; + + let mut steps_json = Vec::new(); + let mut delta_steps = Vec::new(); + let mut previous_locks_path = rir_dir.join("base-locks.json"); + let mut previous_ref = "base".to_string(); + let sequence_self_replay_dir = out_root.join(".tmp").join(format!("{rir}-sequence-self-replay-db")); + let _ = fs::remove_dir_all(&sequence_self_replay_dir); + let sequence_replay_store = if args.capture_inputs_only { + None + } else { + let store = RocksStore::open(&sequence_self_replay_dir) + .map_err(|e| format!("open sequence self replay db failed: {e}"))?; + let _base_replay = run_tree_from_tal_and_ta_der_payload_replay_serial_audit( + &store, + &Policy::default(), + &tal_bytes, + &ta_bytes, + None, + &rir_dir.join("base-payload-archive"), + &rir_dir.join("base-locks.json"), + base_validation_time, + &TreeRunConfig { + max_depth: args.max_depth, + max_instances: args.max_instances, + }, + ) + .map_err(|e| format!("sequence base self replay failed: {e}"))?; + Some(store) + }; + let mut all_steps_self_replay_ok = true; + + for step_index in 1..=args.delta_count { + if step_index > 1 && args.delta_interval_secs > 0 { + std::thread::sleep(Duration::from_secs(args.delta_interval_secs)); + } + let step_id = format!("step-{step_index:03}"); + let step_dir = delta_steps_root.join(&step_id); + fs::create_dir_all(&step_dir) + .map_err(|e| format!("create step dir failed: {}: {e}", step_dir.display()))?; + let step_validation_time = time::OffsetDateTime::now_utc(); + eprintln!("[sequence] step live run start rir={} step={}", rir_normalized, step_id); + let step_http = RecordingHttpFetcher::new( + BlockingHttpFetcher::new(HttpFetcherConfig { + timeout: Duration::from_secs(args.http_timeout_secs), + ..HttpFetcherConfig::default() + }) + .map_err(|e| format!("create step http fetcher failed: {e}"))?, + ); + let step_rsync_inner = SystemRsyncFetcher::new(SystemRsyncConfig { + timeout: Duration::from_secs(args.rsync_timeout_secs), + mirror_root: args.rsync_mirror_root.clone(), + ..SystemRsyncConfig::default() + }); + let step_rsync = if args.capture_inputs_only { + RecordingRsyncFetcher::new_without_objects(step_rsync_inner) + } else { + RecordingRsyncFetcher::new(step_rsync_inner) + }; + let started = Instant::now(); + let step_out = run_tree_from_tal_and_ta_der_serial_audit( + &store, + &Policy::default(), + &tal_bytes, + &ta_bytes, + None, + &step_http, + &step_rsync, + step_validation_time, + &TreeRunConfig { + max_depth: args.max_depth, + max_instances: args.max_instances, + }, + ) + .map_err(|e| format!("live delta step {step_id} failed: {e}"))?; + let step_duration = started.elapsed(); + eprintln!( + "[sequence] step live run done rir={} step={} duration_s={:.3}", + rir_normalized, + step_id, + step_duration.as_secs_f64() + ); + ensure_recorded_target_snapshots_for_locks(&store, &previous_locks_path, &step_http)?; + + eprintln!("[sequence] step output generation phase start rir={} step={}", rir_normalized, step_id); + let delta_ccr_path = step_dir.join("delta.ccr"); + let delta_vrps_path = step_dir.join("record-delta.csv"); + let delta_vaps_path = step_dir.join("record-delta-vaps.csv"); + let (delta_ccr_sha256, delta_vrp_rows, delta_vap_rows, delta_verify, step_self_replay_ok) = + if args.capture_inputs_only { + eprintln!( + "[sequence] step CCR/self-replay skipped rir={} step={}", + rir_normalized, + step_id + ); + ( + String::new(), + std::collections::BTreeSet::::new(), + std::collections::BTreeSet::::new(), + CcrVerifySummary { + content_type_oid: String::new(), + version: 0, + produced_at_rfc3339_utc: String::new(), + state_hashes_ok: false, + manifest_instances: 0, + roa_payload_sets: 0, + roa_vrp_count: 0, + aspa_payload_sets: 0, + trust_anchor_ski_count: 0, + router_key_sets: 0, + router_key_count: 0, + }, + false, + ) + } else { + eprintln!("[sequence] step CCR/self-replay start rir={} step={}", rir_normalized, step_id); + let delta_ccr = build_ccr_from_run( + &store, + &[step_out.discovery.trust_anchor.clone()], + &step_out.tree.vrps, + &step_out.tree.aspas, + &step_out.tree.router_keys, + step_validation_time, + ) + .map_err(|e| format!("build step ccr failed for {step_id}: {e}"))?; + write_ccr_file(&delta_ccr_path, &delta_ccr) + .map_err(|e| format!("write step ccr failed for {step_id}: {e}"))?; + let delta_ccr_bytes = fs::read(&delta_ccr_path).map_err(|e| { + format!("read step ccr failed: {}: {e}", delta_ccr_path.display()) + })?; + let delta_decoded = decode_content_info(&delta_ccr_bytes) + .map_err(|e| format!("decode step ccr failed for {step_id}: {e}"))?; + let delta_verify = verify_content_info(&delta_decoded) + .map_err(|e| format!("verify step ccr failed for {step_id}: {e}"))?; + let delta_vrp_rows = build_vrp_compare_rows(&step_out.tree.vrps, &trust_anchor); + let delta_vap_rows = build_vap_compare_rows(&step_out.tree.aspas, &trust_anchor); + let (ccr_vrps, ccr_vaps) = + rpki::bundle::decode_ccr_compare_views(&delta_decoded, &trust_anchor)?; + if delta_vrp_rows != ccr_vrps { + return Err(format!("{step_id} VRP compare view does not match delta.ccr")); + } + if delta_vap_rows != ccr_vaps { + return Err(format!("{step_id} VAP compare view does not match delta.ccr")); + } + write_vrp_csv(&delta_vrps_path, &delta_vrp_rows)?; + write_vap_csv(&delta_vaps_path, &delta_vap_rows)?; + let step_replay_out = run_tree_from_tal_and_ta_der_payload_delta_replay_step_serial_audit( + sequence_replay_store.as_ref().expect("sequence replay store"), + &Policy::default(), + &tal_bytes, + &ta_bytes, + None, + &step_dir.join("payload-delta-archive"), + &previous_locks_path, + &step_dir.join("locks-delta.json"), + step_validation_time, + &TreeRunConfig { + max_depth: args.max_depth, + max_instances: args.max_instances, + }, + ) + .map_err(|e| format!("sequence self replay failed for {step_id}: {e}"))?; + let step_self_replay_ok = + build_vrp_compare_rows(&step_replay_out.tree.vrps, &trust_anchor) == delta_vrp_rows + && build_vap_compare_rows(&step_replay_out.tree.aspas, &trust_anchor) + == delta_vap_rows; + let output = ( + sha256_hex(&delta_ccr_bytes), + delta_vrp_rows, + delta_vap_rows, + delta_verify, + step_self_replay_ok, + ); + eprintln!("[sequence] step CCR/self-replay done rir={} step={}", rir_normalized, step_id); + output + }; + + eprintln!("[sequence] step input materialization start rir={} step={}", rir_normalized, step_id); + let delta_capture = write_live_delta_replay_step_inputs( + &step_dir, + &rir_normalized, + &previous_locks_path, + step_validation_time, + &step_out.publication_points, + &store, + &step_http.snapshot_responses(), + &step_rsync.snapshot_fetches(), + )?; + let target_lock_capture_id = format!("{rir_normalized}-target-{step_id}"); + write_current_replay_state_locks( + &step_dir.join("target-locks.json"), + &target_lock_capture_id, + step_validation_time, + &step_out.publication_points, + &store, + )?; + eprintln!( + "[sequence] step input materialization done rir={} step={} rrdp_repos={} rsync_modules={}", + rir_normalized, + step_id, + delta_capture.rrdp_repo_count, + delta_capture.rsync_module_count + ); + write_json( + &step_dir.join("verification.json"), + &serde_json::json!({ + "index": step_index, + "id": step_id, + "validationTime": step_validation_time.format(&Rfc3339).map_err(|e| format!("format validation time failed: {e}"))?, + "capture": { + "captureId": delta_capture.capture_id, + "rrdpRepoCount": delta_capture.rrdp_repo_count, + "rsyncModuleCount": delta_capture.rsync_module_count, + }, + "ccr": { + "path": "delta.ccr", + "sha256": delta_ccr_sha256.clone(), + "stateHashesOk": delta_verify.state_hashes_ok, + "manifestInstances": delta_verify.manifest_instances, + "roaVrpCount": delta_verify.roa_vrp_count, + "aspaPayloadSets": delta_verify.aspa_payload_sets, + "routerKeyCount": delta_verify.router_key_count, + }, + "compareViews": { + "vrpCount": delta_vrp_rows.len(), + "vapCount": delta_vap_rows.len(), + }, + "selfReplayOk": serde_json::Value::Null, + "timings": { + "durationSeconds": step_duration.as_secs_f64(), + } + }), + )?; + fs::create_dir_all(step_dir.join("timings")) + .map_err(|e| format!("create step timings dir failed: {e}"))?; + write_json( + &step_dir.join("timings").join("delta-produce.json"), + &serde_json::json!({ + "mode": "delta", + "stepIndex": step_index, + "validationTime": step_validation_time.format(&Rfc3339).map_err(|e| format!("format validation time failed: {e}"))?, + "durationSeconds": step_duration.as_secs_f64(), + }), + )?; + + delta_steps.push(DeltaStepMetadataV2 { + index: step_index, + id: step_id.clone(), + relative_path: format!("delta-steps/{step_id}"), + base_ref: previous_ref.clone(), + validation_time: step_validation_time + .format(&Rfc3339) + .map_err(|e| format!("format validation time failed: {e}"))?, + delta_ccr_sha256: delta_ccr_sha256.clone(), + vrp_count: delta_vrp_rows.len(), + vap_count: delta_vap_rows.len(), + relative_archive_path: format!("delta-steps/{step_id}/payload-delta-archive"), + relative_transition_locks_path: format!("delta-steps/{step_id}/locks-delta.json"), + relative_target_locks_path: format!("delta-steps/{step_id}/target-locks.json"), + relative_ccr_path: format!("delta-steps/{step_id}/delta.ccr"), + relative_vrps_path: format!("delta-steps/{step_id}/record-delta.csv"), + relative_vaps_path: format!("delta-steps/{step_id}/record-delta-vaps.csv"), + has_aspa: !delta_vap_rows.is_empty(), + has_router_key: delta_verify.router_key_count > 0, + }); + all_steps_self_replay_ok &= step_self_replay_ok; + + steps_json.push(serde_json::json!({ + "index": step_index, + "id": step_id, + "validationTime": step_validation_time.format(&Rfc3339).map_err(|e| format!("format validation time failed: {e}"))?, + "capture": { + "captureId": delta_capture.capture_id, + "rrdpRepoCount": delta_capture.rrdp_repo_count, + "rsyncModuleCount": delta_capture.rsync_module_count, + }, + "ccr": { + "path": format!("delta-steps/{step_id}/delta.ccr"), + "sha256": delta_ccr_sha256, + "stateHashesOk": delta_verify.state_hashes_ok, + "manifestInstances": delta_verify.manifest_instances, + "roaVrpCount": delta_verify.roa_vrp_count, + "aspaPayloadSets": delta_verify.aspa_payload_sets, + "routerKeyCount": delta_verify.router_key_count, + }, + "compareViews": { + "vrpCount": delta_vrp_rows.len(), + "vapCount": delta_vap_rows.len(), + }, + "selfReplayOk": if args.capture_inputs_only { serde_json::Value::Null } else { serde_json::Value::Bool(step_self_replay_ok) } + })); + + previous_locks_path = step_dir.join("target-locks.json"); + previous_ref = step_id; + } + + let metadata = RirBundleMetadataV2 { + schema_version: "20260401-v2".to_string(), + bundle_producer: "ours".to_string(), + rir: rir_normalized.clone(), + tal_sha256: sha256_hex(&tal_bytes), + ta_cert_sha256: sha256_hex(&ta_bytes), + has_any_aspa: !base_vap_rows.is_empty() || delta_steps.iter().any(|step| step.has_aspa), + has_any_router_key: base_verify.router_key_count > 0 + || delta_steps.iter().any(|step| step.has_router_key), + base: BaseBundleStateMetadataV2 { + validation_time: base_validation_time + .format(&Rfc3339) + .map_err(|e| format!("format base validation time failed: {e}"))?, + ccr_sha256: base_ccr_sha256.clone(), + vrp_count: base_vrp_rows.len(), + vap_count: base_vap_rows.len(), + relative_archive_path: "base-payload-archive".to_string(), + relative_locks_path: "base-locks.json".to_string(), + relative_ccr_path: "base.ccr".to_string(), + relative_vrps_path: "base-vrps.csv".to_string(), + relative_vaps_path: "base-vaps.csv".to_string(), + }, + delta_sequence: DeltaSequenceMetadataV2 { + configured_delta_count: args.delta_count, + configured_interval_seconds: args.delta_interval_secs, + steps: delta_steps.clone(), + }, + }; + write_json(&rir_dir.join("bundle.json"), &metadata)?; + write_json( + &rir_dir.join("verification.json"), + &serde_json::json!({ + "base": { + "validationTime": metadata.base.validation_time, + "ccr": { + "path": "base.ccr", + "sha256": metadata.base.ccr_sha256, + "stateHashesOk": base_verify.state_hashes_ok, + "manifestInstances": base_verify.manifest_instances, + "roaVrpCount": base_verify.roa_vrp_count, + "aspaPayloadSets": base_verify.aspa_payload_sets, + "routerKeyCount": base_verify.router_key_count, + }, + "compareViews": { + "baseVrpCount": metadata.base.vrp_count, + "baseVapCount": metadata.base.vap_count, + }, + "capture": { + "captureId": base_capture.capture_id, + "rrdpRepoCount": base_capture.rrdp_repo_count, + "rsyncModuleCount": base_capture.rsync_module_count, + "selfReplayOk": if args.capture_inputs_only { serde_json::Value::Null } else { serde_json::Value::Bool(base_self_replay_ok) }, + } + }, + "steps": steps_json, + "summary": { + "baseSelfReplayOk": if args.capture_inputs_only { serde_json::Value::Null } else { serde_json::Value::Bool(base_self_replay_ok) }, + "stepCount": args.delta_count, + "allStepsSelfReplayOk": if args.capture_inputs_only { serde_json::Value::Null } else { serde_json::Value::Bool(all_steps_self_replay_ok) }, + } + }), + )?; + write_v2_top_readme(&out_root.join("README.md"), &rir_normalized, args.delta_count, args.delta_interval_secs)?; + write_v2_rir_readme( + &rir_dir.join("README.md"), + &rir_normalized, + &metadata.base.validation_time, + args.delta_count, + args.delta_interval_secs, + )?; + + let bundle_manifest = BundleManifestV2 { + schema_version: "20260401-v2".to_string(), + bundle_producer: "ours".to_string(), + recorded_at_rfc3339_utc: time::OffsetDateTime::now_utc() + .format(&Rfc3339) + .map_err(|e| format!("format recorded_at failed: {e}"))?, + rirs: vec![rir_normalized.clone()], + per_rir_bundles: vec![BundleManifestEntryV2 { + rir: rir_normalized.clone(), + relative_path: rir_normalized, + base_validation_time: metadata.base.validation_time.clone(), + step_count: metadata.delta_sequence.steps.len(), + first_delta_validation_time: metadata + .delta_sequence + .steps + .first() + .map(|step| step.validation_time.clone()), + last_delta_validation_time: metadata + .delta_sequence + .steps + .last() + .map(|step| step.validation_time.clone()), + has_aspa: metadata.has_any_aspa, + }], + }; + write_json(&out_root.join("bundle-manifest.json"), &bundle_manifest)?; + + if !args.keep_db { + drop(sequence_replay_store); + drop(store); + let _ = fs::remove_dir_all(&work_db_dir); + let _ = fs::remove_dir_all(&base_self_replay_dir); + let _ = fs::remove_dir_all(&sequence_self_replay_dir); + let tmp_dir = out_root.join(".tmp"); + if tmp_dir.is_dir() { + let is_empty = fs::read_dir(&tmp_dir) + .map_err(|e| format!("read tmp dir failed: {}: {e}", tmp_dir.display()))? + .next() + .is_none(); + if is_empty { + let _ = fs::remove_dir(&tmp_dir); + } + } + } + + Ok(out_root.clone()) +} + +fn main() -> Result<(), String> { + let args = parse_args(&std::env::args().collect::>())?; + let out = run(args)?; + println!("{}", out.display()); + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_args_defaults_delta_sequence_parameters() { + let argv = vec![ + "replay_bundle_capture_sequence".to_string(), + "--rir".to_string(), + "apnic".to_string(), + "--out-dir".to_string(), + "out".to_string(), + "--tal-path".to_string(), + "tal".to_string(), + "--ta-path".to_string(), + "ta".to_string(), + ]; + let args = parse_args(&argv).expect("parse"); + assert_eq!(args.delta_count, 5); + assert_eq!(args.delta_interval_secs, 600); + assert!(!args.keep_db); + } + + #[test] + fn parse_args_accepts_overrides_and_keep_db() { + let argv = vec![ + "replay_bundle_capture_sequence".to_string(), + "--rir".to_string(), + "apnic".to_string(), + "--out-dir".to_string(), + "out".to_string(), + "--tal-path".to_string(), + "tal".to_string(), + "--ta-path".to_string(), + "ta".to_string(), + "--delta-count".to_string(), + "2".to_string(), + "--delta-interval-secs".to_string(), + "0".to_string(), + "--keep-db".to_string(), + ]; + let args = parse_args(&argv).expect("parse"); + assert_eq!(args.delta_count, 2); + assert_eq!(args.delta_interval_secs, 0); + assert!(args.keep_db); + } +} diff --git a/src/bin/replay_bundle_refresh_sequence_outputs.rs b/src/bin/replay_bundle_refresh_sequence_outputs.rs new file mode 100644 index 0000000..10d2bd6 --- /dev/null +++ b/src/bin/replay_bundle_refresh_sequence_outputs.rs @@ -0,0 +1,1009 @@ +use std::collections::{BTreeMap, BTreeSet}; +use std::fs; +use std::path::{Path, PathBuf}; + +use rpki::bundle::{ + build_vap_compare_rows, build_vrp_compare_rows, sha256_hex, write_json, write_vap_csv, + write_vrp_csv, +}; +use rpki::ccr::{build_ccr_from_run, decode_content_info, verify_content_info, write_ccr_file}; +use rpki::policy::Policy; +use rpki::replay::archive::canonical_rsync_module; +use rpki::storage::RocksStore; +use rpki::validation::run_tree_from_tal::{ + run_tree_from_tal_and_ta_der_payload_delta_replay_step_serial_audit, + run_tree_from_tal_and_ta_der_payload_replay_serial_audit, +}; +use rpki::validation::tree::TreeRunConfig; +use serde::{Deserialize, Serialize}; +use time::format_description::well_known::Rfc3339; + +fn usage() -> &'static str { + "Usage: replay_bundle_refresh_sequence_outputs --rir-dir [--keep-db]" +} + +#[derive(Default)] +struct Args { + rir_dir: Option, + keep_db: bool, +} + +fn parse_args() -> Result { + let mut args = Args::default(); + let argv: Vec = std::env::args().skip(1).collect(); + let mut i = 0; + while i < argv.len() { + match argv[i].as_str() { + "--rir-dir" => { + i += 1; + args.rir_dir = Some(PathBuf::from( + argv.get(i).ok_or("--rir-dir requires a value")?, + )); + } + "--keep-db" => { + args.keep_db = true; + } + "--help" | "-h" => { + return Err(usage().to_string()); + } + other => return Err(format!("unknown argument: {other}\n{}", usage())), + } + i += 1; + } + if args.rir_dir.is_none() { + return Err(format!("--rir-dir is required\n{}", usage())); + } + Ok(args) +} + +#[derive(Debug, Deserialize, Serialize)] +#[serde(rename_all = "camelCase")] +struct RirBundleMetadataV2Serde { + schema_version: String, + bundle_producer: String, + rir: String, + tal_sha256: String, + ta_cert_sha256: String, + has_any_aspa: bool, + has_any_router_key: bool, + base: BaseBundleStateMetadataV2Serde, + delta_sequence: DeltaSequenceMetadataV2Serde, +} + +#[derive(Debug, Deserialize, Serialize)] +#[serde(rename_all = "camelCase")] +struct BaseBundleStateMetadataV2Serde { + validation_time: String, + ccr_sha256: String, + vrp_count: usize, + vap_count: usize, + relative_archive_path: String, + relative_locks_path: String, + relative_ccr_path: String, + relative_vrps_path: String, + relative_vaps_path: String, +} + +#[derive(Debug, Deserialize, Serialize)] +#[serde(rename_all = "camelCase")] +struct DeltaSequenceMetadataV2Serde { + configured_delta_count: usize, + configured_interval_seconds: u64, + steps: Vec, +} + +#[derive(Debug, Deserialize, Serialize)] +#[serde(rename_all = "camelCase")] +struct DeltaStepMetadataV2Serde { + index: usize, + id: String, + relative_path: String, + base_ref: String, + validation_time: String, + delta_ccr_sha256: String, + vrp_count: usize, + vap_count: usize, + relative_archive_path: String, + relative_transition_locks_path: String, + relative_target_locks_path: String, + relative_ccr_path: String, + relative_vrps_path: String, + relative_vaps_path: String, + has_aspa: bool, + has_router_key: bool, +} + +#[derive(Debug, Deserialize, Serialize)] +#[serde(rename_all = "camelCase")] +struct VerificationV2 { + base: serde_json::Value, + steps: Vec, + summary: serde_json::Value, +} + +fn parse_time(value: &str) -> Result { + time::OffsetDateTime::parse(value, &Rfc3339) + .map_err(|e| format!("invalid RFC3339 time `{value}`: {e}")) +} + +fn path_join(root: &Path, relative: &str) -> PathBuf { + root.join(relative) +} + +fn is_failed_fetch_source(source: &str) -> bool { + source == "failed_fetch_no_cache" +} + +fn current_module_objects_from_store( + store: &RocksStore, + module_uri: &str, +) -> Result>, String> { + let entries = store + .list_repository_view_entries_with_prefix(module_uri) + .map_err(|e| format!("list repository view failed for {module_uri}: {e}"))?; + let mut out = BTreeMap::new(); + for entry in entries { + if entry.state != rpki::storage::RepositoryViewState::Present { + continue; + } + let bytes = store + .load_current_object_bytes_by_uri(&entry.rsync_uri) + .map_err(|e| format!("load current object failed for {}: {e}", entry.rsync_uri))? + .ok_or_else(|| format!("current object missing for {}", entry.rsync_uri))?; + out.insert(entry.rsync_uri, bytes); + } + Ok(out) +} + +fn rsync_bucket_dir(capture_root: &Path, module_uri: &str) -> PathBuf { + capture_root + .join("rsync") + .join("modules") + .join(sha256_hex(module_uri.as_bytes())) +} + +fn materialize_rsync_module_from_store( + capture_root: &Path, + module_uri: &str, + objects: &BTreeMap>, +) -> Result, String> { + let bucket_dir = rsync_bucket_dir(capture_root, module_uri); + let tree_root = bucket_dir.join("tree"); + if tree_root.exists() { + fs::remove_dir_all(&tree_root) + .map_err(|e| format!("remove old rsync tree failed: {}: {e}", tree_root.display()))?; + } + let relative_root = module_uri + .strip_prefix("rsync://") + .ok_or_else(|| format!("invalid rsync module uri: {module_uri}"))? + .trim_end_matches('/'); + fs::create_dir_all(tree_root.join(relative_root)) + .map_err(|e| format!("create rsync tree root failed: {}: {e}", tree_root.join(relative_root).display()))?; + for (uri, bytes) in objects { + let rel = uri + .strip_prefix(module_uri) + .ok_or_else(|| format!("object uri {uri} does not belong to module {module_uri}"))?; + let path = tree_root.join(relative_root).join(rel); + if let Some(parent) = path.parent() { + fs::create_dir_all(parent) + .map_err(|e| format!("create rsync object parent failed: {}: {e}", parent.display()))?; + } + fs::write(&path, bytes) + .map_err(|e| format!("write rsync object failed: {}: {e}", path.display()))?; + } + Ok(objects.keys().cloned().collect()) +} + +fn copy_dir_all(src: &Path, dst: &Path) -> Result<(), String> { + fs::create_dir_all(dst) + .map_err(|e| format!("create directory failed: {}: {e}", dst.display()))?; + for entry in fs::read_dir(src) + .map_err(|e| format!("read directory failed: {}: {e}", src.display()))? + { + let entry = entry.map_err(|e| format!("read entry failed: {}: {e}", src.display()))?; + let file_type = entry + .file_type() + .map_err(|e| format!("read file type failed: {}: {e}", entry.path().display()))?; + let target = dst.join(entry.file_name()); + if file_type.is_dir() { + copy_dir_all(&entry.path(), &target)?; + } else if file_type.is_file() { + fs::copy(entry.path(), &target).map_err(|e| { + format!( + "copy file failed: {} -> {}: {e}", + entry.path().display(), + target.display() + ) + })?; + } + } + Ok(()) +} + +fn load_json(path: &Path) -> Result { + serde_json::from_slice( + &fs::read(path).map_err(|e| format!("read json failed: {}: {e}", path.display()))?, + ) + .map_err(|e| format!("parse json failed: {}: {e}", path.display())) +} + +fn write_json_value(path: &Path, value: &serde_json::Value) -> Result<(), String> { + if let Some(parent) = path.parent() { + fs::create_dir_all(parent) + .map_err(|e| format!("create json parent failed: {}: {e}", parent.display()))?; + } + fs::write( + path, + serde_json::to_vec_pretty(value).map_err(|e| format!("serialize json failed: {e}"))?, + ) + .map_err(|e| format!("write json failed: {}: {e}", path.display())) +} + +fn base_capture_root_from_locks(archive_root: &Path, locks_path: &Path) -> Result { + let value = load_json(locks_path)?; + let capture = value + .get("capture") + .and_then(|v| v.as_str()) + .ok_or_else(|| format!("missing capture in {}", locks_path.display()))?; + Ok(archive_root.join("v1").join("captures").join(capture)) +} + +fn keep_rsync_module(pp: &rpki::audit::PublicationPointAudit) -> Result, String> { + if is_failed_fetch_source(&pp.source) { + return Ok(None); + } + let module_uri = canonical_rsync_module(&pp.rsync_base_uri) + .map_err(|e| format!("canonicalize rsync module failed for {}: {e}", pp.rsync_base_uri))?; + if pp.rrdp_notification_uri.is_none() || pp.repo_sync_source.as_deref() == Some("rsync") { + return Ok(Some(module_uri)); + } + Ok(None) +} + +fn repair_base_inputs( + archive_root: &Path, + locks_path: &Path, + publication_points: &[rpki::audit::PublicationPointAudit], + store: &RocksStore, + verification: &mut VerificationV2, +) -> Result<(), String> { + let capture_root = base_capture_root_from_locks(archive_root, locks_path)?; + let mut locks = load_json(locks_path)?; + + let candidate_modules: BTreeSet = publication_points + .iter() + .filter_map(|pp| keep_rsync_module(pp).transpose()) + .collect::, _>>()? + .into_iter() + .collect(); + + let old_modules: Vec = locks + .get("rsync") + .and_then(|v| v.as_object()) + .map(|m| m.keys().cloned().collect()) + .unwrap_or_default(); + + if let Some(rrdp_obj) = locks.get_mut("rrdp").and_then(|v| v.as_object_mut()) { + for pp in publication_points { + let Some(notify_uri) = pp.rrdp_notification_uri.as_deref() else { + continue; + }; + let lock_value = match store + .get_rrdp_source_record(notify_uri) + .map_err(|e| format!("read rrdp source record failed for {notify_uri}: {e}"))? + { + Some(record) + if record.last_session_id.is_some() + && record.last_serial.is_some() + && record.last_snapshot_uri.is_some() + && record.last_snapshot_hash.is_some() => + { + serde_json::json!({ + "transport": "rrdp", + "session": record.last_session_id, + "serial": record.last_serial, + }) + } + _ => serde_json::json!({ + "transport": "rsync", + "session": null, + "serial": null, + }), + }; + rrdp_obj.insert(notify_uri.to_string(), lock_value); + } + let repos_dir = capture_root.join("rrdp").join("repos"); + if repos_dir.exists() { + for entry in fs::read_dir(&repos_dir) + .map_err(|e| format!("scan rrdp repo dir failed: {}: {e}", repos_dir.display()))? + { + let entry = entry + .map_err(|e| format!("read rrdp repo entry failed: {}: {e}", repos_dir.display()))?; + let meta = entry.path().join("meta.json"); + if !meta.exists() { + continue; + } + let meta_value = load_json(&meta)?; + let notify_uri = match meta_value.get("rpkiNotify").and_then(|v| v.as_str()) { + Some(value) => value.to_string(), + None => continue, + }; + if rrdp_obj.contains_key(¬ify_uri) { + continue; + } + let lock_value = match store + .get_rrdp_source_record(¬ify_uri) + .map_err(|e| format!("read rrdp source record failed for {notify_uri}: {e}"))? + { + Some(record) + if record.last_session_id.is_some() + && record.last_serial.is_some() + && record.last_snapshot_uri.is_some() + && record.last_snapshot_hash.is_some() => + { + serde_json::json!({ + "transport": "rrdp", + "session": record.last_session_id, + "serial": record.last_serial, + }) + } + _ => serde_json::json!({ + "transport": "rsync", + "session": null, + "serial": null, + }), + }; + rrdp_obj.insert(notify_uri, lock_value); + } + } + } + + let mut final_modules = serde_json::Map::new(); + for module_uri in candidate_modules { + let objects = current_module_objects_from_store(store, &module_uri)?; + if objects.is_empty() { + continue; + } + let _files = materialize_rsync_module_from_store(&capture_root, &module_uri, &objects)?; + final_modules.insert( + module_uri, + serde_json::json!({ + "transport": "rsync" + }), + ); + } + + for module_uri in old_modules { + if !final_modules.contains_key(&module_uri) { + let bucket_dir = rsync_bucket_dir(&capture_root, &module_uri); + let _ = fs::remove_dir_all(bucket_dir); + } + } + + if let Some(rsync_value) = locks.get_mut("rsync") { + *rsync_value = serde_json::Value::Object(final_modules.clone()); + } + write_json_value(locks_path, &locks)?; + verification.base["capture"]["rrdpRepoCount"] = serde_json::Value::from( + locks.get("rrdp") + .and_then(|v| v.as_object()) + .map(|m| m.len()) + .unwrap_or(0), + ); + verification.base["capture"]["rsyncModuleCount"] = + serde_json::Value::from(final_modules.len()); + Ok(()) +} + +fn repair_target_locks( + locks_path: &Path, + previous_locks_path: &Path, + publication_points: &[rpki::audit::PublicationPointAudit], + store: &RocksStore, +) -> Result<(), String> { + let mut locks = load_json(locks_path)?; + let previous_locks = load_json(previous_locks_path)?; + + if let Some(rrdp_obj) = locks.get_mut("rrdp").and_then(|v| v.as_object_mut()) { + for pp in publication_points { + let Some(notify_uri) = pp.rrdp_notification_uri.as_deref() else { + continue; + }; + let mut lock_value = match store + .get_rrdp_source_record(notify_uri) + .map_err(|e| format!("read rrdp source record failed for {notify_uri}: {e}"))? + { + Some(record) + if record.last_session_id.is_some() + && record.last_serial.is_some() + && record.last_snapshot_uri.is_some() + && record.last_snapshot_hash.is_some() => + { + serde_json::json!({ + "transport": "rrdp", + "session": record.last_session_id, + "serial": record.last_serial, + }) + } + _ => serde_json::json!({ + "transport": "rsync", + "session": null, + "serial": null, + }), + }; + let previous_transport = previous_locks + .get("rrdp") + .and_then(|v| v.get(notify_uri)) + .and_then(|v| v.get("transport")) + .and_then(|v| v.as_str()); + if previous_transport != Some("rrdp") { + lock_value = serde_json::json!({ + "transport": "rsync", + "session": null, + "serial": null, + }); + } + rrdp_obj.insert(notify_uri.to_string(), lock_value); + } + } + + let candidate_modules: BTreeSet = publication_points + .iter() + .filter_map(|pp| keep_rsync_module(pp).transpose()) + .collect::, _>>()? + .into_iter() + .collect(); + let mut final_modules = serde_json::Map::new(); + for module_uri in candidate_modules { + let objects = current_module_objects_from_store(store, &module_uri)?; + if objects.is_empty() { + continue; + } + final_modules.insert( + module_uri, + serde_json::json!({ + "transport": "rsync" + }), + ); + } + if let Some(rsync_value) = locks.get_mut("rsync") { + *rsync_value = serde_json::Value::Object(final_modules); + } + + write_json_value(locks_path, &locks) +} + +fn repair_delta_step_inputs( + step_dir: &Path, + base_archive_root: &Path, + base_locks_path: &Path, + previous_locks_path: &Path, + publication_points: &[rpki::audit::PublicationPointAudit], + store: &RocksStore, + step_verification: &mut serde_json::Value, +) -> Result<(), String> { + let locks_path = step_dir.join("locks-delta.json"); + let mut locks = load_json(&locks_path)?; + let previous_locks = load_json(previous_locks_path)?; + let capture = locks + .get("capture") + .and_then(|v| v.as_str()) + .ok_or_else(|| format!("missing capture in {}", locks_path.display()))? + .to_string(); + let capture_root = step_dir + .join("payload-delta-archive") + .join("v1") + .join("captures") + .join(capture); + + if let Some(rrdp_obj) = locks.get_mut("rrdp").and_then(|v| v.as_object_mut()) { + let repos_dir = capture_root.join("rrdp").join("repos"); + if repos_dir.exists() { + for entry in fs::read_dir(&repos_dir) + .map_err(|e| format!("scan rrdp repo dir failed: {}: {e}", repos_dir.display()))? + { + let entry = entry + .map_err(|e| format!("read rrdp repo entry failed: {}: {e}", repos_dir.display()))?; + let meta = entry.path().join("meta.json"); + if !meta.exists() { + continue; + } + let meta_value = load_json(&meta)?; + let notify_uri = match meta_value.get("rpkiNotify").and_then(|v| v.as_str()) { + Some(value) => value.to_string(), + None => continue, + }; + if rrdp_obj.contains_key(¬ify_uri) { + continue; + } + let transition_path = entry.path().join("transition.json"); + let lock_value = if transition_path.exists() { + let transition = load_json(&transition_path)?; + serde_json::json!({ + "kind": transition.get("kind").cloned().unwrap_or(serde_json::Value::String("fallback-rsync".to_string())), + "base": transition.get("base").cloned().unwrap_or(serde_json::json!({ + "transport": "rsync", + "session": null, + "serial": null + })), + "target": transition.get("target").cloned().unwrap_or(serde_json::json!({ + "transport": "rsync", + "session": null, + "serial": null + })), + "delta_count": transition.get("delta_count").cloned().unwrap_or(serde_json::Value::from(0)), + "deltas": transition.get("deltas").cloned().unwrap_or(serde_json::Value::Array(vec![])), + }) + } else { + serde_json::json!({ + "kind": "fallback-rsync", + "base": {"transport":"rsync","session":null,"serial":null}, + "target": {"transport":"rsync","session":null,"serial":null}, + "delta_count": 0, + "deltas": [] + }) + }; + rrdp_obj.insert(notify_uri, lock_value); + } + } + for (notify_uri, entry) in rrdp_obj.iter_mut() { + let previous_transport = previous_locks + .get("rrdp") + .and_then(|v| v.get(notify_uri)) + .and_then(|v| v.get("transport")) + .and_then(|v| v.as_str()); + if previous_transport != Some("rrdp") { + let fallback = serde_json::json!({ + "kind": "fallback-rsync", + "base": {"transport":"rsync","session":null,"serial":null}, + "target": {"transport":"rsync","session":null,"serial":null}, + "delta_count": 0, + "deltas": [] + }); + *entry = fallback.clone(); + let bucket_dir = capture_root + .join("rrdp") + .join("repos") + .join(sha256_hex(notify_uri.as_bytes())); + if bucket_dir.exists() { + write_json(&bucket_dir.join("transition.json"), &fallback)?; + } + } + } + } + + let candidate_modules: BTreeSet = publication_points + .iter() + .filter_map(|pp| keep_rsync_module(pp).transpose()) + .collect::, _>>()? + .into_iter() + .collect(); + + let old_modules: Vec = locks + .get("rsync") + .and_then(|v| v.as_object()) + .map(|m| m.keys().cloned().collect()) + .unwrap_or_default(); + + let mut final_modules = serde_json::Map::new(); + for module_uri in candidate_modules { + let objects = current_module_objects_from_store(store, &module_uri)?; + if objects.is_empty() { + continue; + } + let files = materialize_rsync_module_from_store(&capture_root, &module_uri, &objects)?; + let bucket_dir = rsync_bucket_dir(&capture_root, &module_uri); + write_json( + &bucket_dir.join("files.json"), + &serde_json::json!({ + "version": 1, + "module": module_uri, + "fileCount": files.len(), + "files": files, + }), + )?; + final_modules.insert( + module_uri, + serde_json::json!({ + "file_count": objects.len(), + "overlay_only": true + }), + ); + } + + for module_uri in old_modules { + if !final_modules.contains_key(&module_uri) { + let bucket_dir = rsync_bucket_dir(&capture_root, &module_uri); + let _ = fs::remove_dir_all(bucket_dir); + } + } + + if let Some(rsync_value) = locks.get_mut("rsync") { + *rsync_value = serde_json::Value::Object(final_modules.clone()); + } + + let base_capture_root = base_capture_root_from_locks(base_archive_root, base_locks_path)?; + if let Some(rrdp_obj) = locks.get("rrdp").and_then(|v| v.as_object()) { + for (notify_uri, entry) in rrdp_obj { + let kind = entry.get("kind").and_then(|v| v.as_str()).unwrap_or(""); + if kind != "unchanged" { + continue; + } + let session = entry + .get("target") + .and_then(|v| v.get("session")) + .and_then(|v| v.as_str()) + .or_else(|| { + entry.get("base") + .and_then(|v| v.get("session")) + .and_then(|v| v.as_str()) + }); + let Some(session) = session else { continue }; + let bucket_hash = sha256_hex(notify_uri.as_bytes()); + let bucket_dir = capture_root.join("rrdp").join("repos").join(&bucket_hash); + let session_dir = bucket_dir.join(session); + if session_dir.exists() { + continue; + } + let base_bucket_dir = base_capture_root.join("rrdp").join("repos").join(&bucket_hash); + let base_session_dir = base_bucket_dir.join(session); + if !base_session_dir.exists() { + continue; + } + fs::create_dir_all(&bucket_dir).map_err(|e| { + format!( + "create delta rrdp repo dir failed: {}: {e}", + bucket_dir.display() + ) + })?; + let base_meta = base_bucket_dir.join("meta.json"); + if !bucket_dir.join("meta.json").exists() && base_meta.exists() { + fs::copy(&base_meta, bucket_dir.join("meta.json")).map_err(|e| { + format!( + "copy base repo meta failed: {} -> {}: {e}", + base_meta.display(), + bucket_dir.join("meta.json").display() + ) + })?; + } + copy_dir_all(&base_session_dir, &session_dir)?; + } + } + + write_json_value(&locks_path, &locks)?; + + step_verification["capture"]["rrdpRepoCount"] = serde_json::Value::from( + locks.get("rrdp") + .and_then(|v| v.as_object()) + .map(|m| m.len()) + .unwrap_or(0), + ); + step_verification["capture"]["rsyncModuleCount"] = + serde_json::Value::from(final_modules.len()); + Ok(()) +} + +fn rewrite_delta_base_hash(step_dir: &Path, previous_locks_path: &Path) -> Result<(), String> { + let previous_locks_bytes = fs::read(previous_locks_path).map_err(|e| { + format!( + "read previous locks failed for delta base hash rewrite: {}: {e}", + previous_locks_path.display() + ) + })?; + let previous_locks_sha256 = sha256_hex(&previous_locks_bytes); + let locks_path = step_dir.join("locks-delta.json"); + let mut locks = load_json(&locks_path)?; + let previous_locks = serde_json::from_slice::(&previous_locks_bytes) + .map_err(|e| format!("parse previous locks failed: {}: {e}", previous_locks_path.display()))?; + locks["baseLocksSha256"] = serde_json::Value::String(previous_locks_sha256.clone()); + let capture = locks + .get("capture") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()) + .ok_or_else(|| format!("missing capture in {}", locks_path.display()))?; + write_json_value(&locks_path, &locks)?; + + let base_meta_path = step_dir + .join("payload-delta-archive") + .join("v1") + .join("captures") + .join(&capture) + .join("base.json"); + let mut base_meta = load_json(&base_meta_path)?; + base_meta["baseLocksSha256"] = serde_json::Value::String(previous_locks_sha256); + write_json_value(&base_meta_path, &base_meta)?; + + if let Some(rrdp_obj) = locks.get_mut("rrdp").and_then(|v| v.as_object_mut()) { + for (notify_uri, entry) in rrdp_obj.iter_mut() { + let previous_transport = previous_locks + .get("rrdp") + .and_then(|v| v.get(notify_uri)) + .and_then(|v| v.get("transport")) + .and_then(|v| v.as_str()); + if previous_transport != Some("rrdp") { + let fallback = serde_json::json!({ + "kind": "fallback-rsync", + "base": {"transport":"rsync","session":null,"serial":null}, + "target": {"transport":"rsync","session":null,"serial":null}, + "delta_count": 0, + "deltas": [] + }); + *entry = fallback.clone(); + let bucket_dir = step_dir + .join("payload-delta-archive") + .join("v1") + .join("captures") + .join(&capture) + .join("rrdp") + .join("repos") + .join(sha256_hex(notify_uri.as_bytes())); + if bucket_dir.exists() { + write_json(&bucket_dir.join("transition.json"), &fallback)?; + } + } + } + } + write_json_value(&locks_path, &locks)?; + Ok(()) +} + +fn main() { + if let Err(err) = real_main() { + eprintln!("{err}"); + std::process::exit(1); + } +} + +fn real_main() -> Result<(), String> { + let args = parse_args()?; + let rir_dir = args.rir_dir.unwrap(); + let bundle_json_path = rir_dir.join("bundle.json"); + let verification_path = rir_dir.join("verification.json"); + + let mut bundle: RirBundleMetadataV2Serde = serde_json::from_slice( + &fs::read(&bundle_json_path) + .map_err(|e| format!("read bundle.json failed: {}: {e}", bundle_json_path.display()))?, + ) + .map_err(|e| format!("parse bundle.json failed: {}: {e}", bundle_json_path.display()))?; + + let mut verification: VerificationV2 = serde_json::from_slice( + &fs::read(&verification_path) + .map_err(|e| format!("read verification.json failed: {}: {e}", verification_path.display()))?, + ) + .map_err(|e| format!("parse verification.json failed: {}: {e}", verification_path.display()))?; + + let tal_bytes = fs::read(rir_dir.join("tal.tal")) + .map_err(|e| format!("read tal.tal failed: {}: {e}", rir_dir.display()))?; + let ta_bytes = fs::read(rir_dir.join("ta.cer")) + .map_err(|e| format!("read ta.cer failed: {}: {e}", rir_dir.display()))?; + + let tmp_root = rir_dir.parent().unwrap_or(&rir_dir).join(".tmp-refresh"); + let work_db = tmp_root.join(format!("{}-work-db", bundle.rir)); + if work_db.exists() { + fs::remove_dir_all(&work_db) + .map_err(|e| format!("remove old refresh db failed: {}: {e}", work_db.display()))?; + } + if let Some(parent) = work_db.parent() { + fs::create_dir_all(parent) + .map_err(|e| format!("create refresh db parent failed: {}: {e}", parent.display()))?; + } + let store = RocksStore::open(&work_db).map_err(|e| format!("open refresh rocksdb failed: {e}"))?; + + let base_archive = path_join(&rir_dir, &bundle.base.relative_archive_path); + let base_locks = path_join(&rir_dir, &bundle.base.relative_locks_path); + let base_ccr = path_join(&rir_dir, &bundle.base.relative_ccr_path); + let base_vrps = path_join(&rir_dir, &bundle.base.relative_vrps_path); + let base_vaps = path_join(&rir_dir, &bundle.base.relative_vaps_path); + let base_validation_time = parse_time(&bundle.base.validation_time)?; + + let base_out = run_tree_from_tal_and_ta_der_payload_replay_serial_audit( + &store, + &Policy::default(), + &tal_bytes, + &ta_bytes, + None, + &base_archive, + &base_locks, + base_validation_time, + &TreeRunConfig { + max_depth: None, + max_instances: None, + }, + ) + .map_err(|e| format!("base replay failed: {e}"))?; + + let base_ccr_content = build_ccr_from_run( + &store, + &[base_out.discovery.trust_anchor.clone()], + &base_out.tree.vrps, + &base_out.tree.aspas, + &base_out.tree.router_keys, + base_validation_time, + ) + .map_err(|e| format!("build base ccr failed: {e}"))?; + write_ccr_file(&base_ccr, &base_ccr_content) + .map_err(|e| format!("write base ccr failed: {}: {e}", base_ccr.display()))?; + let base_ccr_bytes = + fs::read(&base_ccr).map_err(|e| format!("read base ccr failed: {}: {e}", base_ccr.display()))?; + let base_decoded = + decode_content_info(&base_ccr_bytes).map_err(|e| format!("decode base ccr failed: {e}"))?; + let base_verify = + verify_content_info(&base_decoded).map_err(|e| format!("verify base ccr failed: {e}"))?; + let base_vrp_rows = build_vrp_compare_rows(&base_out.tree.vrps, &bundle.rir); + let base_vap_rows = build_vap_compare_rows(&base_out.tree.aspas, &bundle.rir); + write_vrp_csv(&base_vrps, &base_vrp_rows)?; + write_vap_csv(&base_vaps, &base_vap_rows)?; + bundle.base.ccr_sha256 = sha256_hex(&base_ccr_bytes); + bundle.base.vrp_count = base_vrp_rows.len(); + bundle.base.vap_count = base_vap_rows.len(); + + verification.base["ccr"]["sha256"] = serde_json::Value::String(bundle.base.ccr_sha256.clone()); + verification.base["ccr"]["stateHashesOk"] = serde_json::Value::Bool(base_verify.state_hashes_ok); + verification.base["ccr"]["manifestInstances"] = + serde_json::Value::from(base_verify.manifest_instances); + verification.base["ccr"]["roaVrpCount"] = serde_json::Value::from(base_vrp_rows.len()); + verification.base["ccr"]["aspaPayloadSets"] = serde_json::Value::from(base_vap_rows.len()); + verification.base["ccr"]["routerKeyCount"] = + serde_json::Value::from(base_verify.router_key_count); + verification.base["compareViews"]["baseVrpCount"] = serde_json::Value::from(base_vrp_rows.len()); + verification.base["compareViews"]["baseVapCount"] = serde_json::Value::from(base_vap_rows.len()); + verification.base["capture"]["selfReplayOk"] = serde_json::Value::Bool(true); + repair_base_inputs( + &base_archive, + &base_locks, + &base_out.publication_points, + &store, + &mut verification, + )?; + + let mut previous_locks_path = base_locks.clone(); + let mut any_aspa = !base_vap_rows.is_empty(); + let mut all_steps_self_replay_ok = true; + for (idx, step) in bundle.delta_sequence.steps.iter_mut().enumerate() { + let step_dir = path_join(&rir_dir, &step.relative_path); + rewrite_delta_base_hash(&step_dir, &previous_locks_path)?; + let delta_archive = path_join(&rir_dir, &step.relative_archive_path); + let delta_locks = path_join(&rir_dir, &step.relative_transition_locks_path); + let delta_ccr = path_join(&rir_dir, &step.relative_ccr_path); + let delta_vrps = path_join(&rir_dir, &step.relative_vrps_path); + let delta_vaps = path_join(&rir_dir, &step.relative_vaps_path); + let target_locks = path_join(&rir_dir, &step.relative_target_locks_path); + let delta_validation_time = parse_time(&step.validation_time)?; + + let delta_out = run_tree_from_tal_and_ta_der_payload_delta_replay_step_serial_audit( + &store, + &Policy::default(), + &tal_bytes, + &ta_bytes, + None, + &delta_archive, + &previous_locks_path, + &delta_locks, + delta_validation_time, + &TreeRunConfig { + max_depth: None, + max_instances: None, + }, + ) + .map_err(|e| format!("delta step {} replay failed: {e}", step.id))?; + + let delta_ccr_content = build_ccr_from_run( + &store, + &[delta_out.discovery.trust_anchor.clone()], + &delta_out.tree.vrps, + &delta_out.tree.aspas, + &delta_out.tree.router_keys, + delta_validation_time, + ) + .map_err(|e| format!("build delta ccr failed for {}: {e}", step.id))?; + write_ccr_file(&delta_ccr, &delta_ccr_content) + .map_err(|e| format!("write delta ccr failed: {}: {e}", delta_ccr.display()))?; + let delta_ccr_bytes = fs::read(&delta_ccr) + .map_err(|e| format!("read delta ccr failed: {}: {e}", delta_ccr.display()))?; + let delta_decoded = decode_content_info(&delta_ccr_bytes) + .map_err(|e| format!("decode delta ccr failed for {}: {e}", step.id))?; + let delta_verify = verify_content_info(&delta_decoded) + .map_err(|e| format!("verify delta ccr failed for {}: {e}", step.id))?; + let delta_vrp_rows = build_vrp_compare_rows(&delta_out.tree.vrps, &bundle.rir); + let delta_vap_rows = build_vap_compare_rows(&delta_out.tree.aspas, &bundle.rir); + write_vrp_csv(&delta_vrps, &delta_vrp_rows)?; + write_vap_csv(&delta_vaps, &delta_vap_rows)?; + step.delta_ccr_sha256 = sha256_hex(&delta_ccr_bytes); + step.vrp_count = delta_vrp_rows.len(); + step.vap_count = delta_vap_rows.len(); + step.has_aspa = !delta_vap_rows.is_empty(); + any_aspa |= step.has_aspa; + + if let Some(step_verification) = verification.steps.get_mut(idx) { + step_verification["ccr"]["sha256"] = + serde_json::Value::String(step.delta_ccr_sha256.clone()); + step_verification["ccr"]["stateHashesOk"] = + serde_json::Value::Bool(delta_verify.state_hashes_ok); + step_verification["ccr"]["manifestInstances"] = + serde_json::Value::from(delta_verify.manifest_instances); + step_verification["ccr"]["roaVrpCount"] = serde_json::Value::from(delta_vrp_rows.len()); + step_verification["ccr"]["aspaPayloadSets"] = serde_json::Value::from(delta_vap_rows.len()); + step_verification["ccr"]["routerKeyCount"] = + serde_json::Value::from(delta_verify.router_key_count); + step_verification["compareViews"]["vrpCount"] = + serde_json::Value::from(delta_vrp_rows.len()); + step_verification["compareViews"]["vapCount"] = + serde_json::Value::from(delta_vap_rows.len()); + step_verification["selfReplayOk"] = serde_json::Value::Bool(true); + } + let step_verification_path = path_join(&rir_dir, &step.relative_path).join("verification.json"); + let mut step_verification_json: serde_json::Value = serde_json::from_slice( + &fs::read(&step_verification_path).map_err(|e| { + format!( + "read step verification failed: {}: {e}", + step_verification_path.display() + ) + })?, + ) + .map_err(|e| { + format!( + "parse step verification failed: {}: {e}", + step_verification_path.display() + ) + })?; + step_verification_json["ccr"]["sha256"] = + serde_json::Value::String(step.delta_ccr_sha256.clone()); + step_verification_json["ccr"]["stateHashesOk"] = + serde_json::Value::Bool(delta_verify.state_hashes_ok); + step_verification_json["ccr"]["manifestInstances"] = + serde_json::Value::from(delta_verify.manifest_instances); + step_verification_json["ccr"]["roaVrpCount"] = serde_json::Value::from(delta_vrp_rows.len()); + step_verification_json["ccr"]["aspaPayloadSets"] = + serde_json::Value::from(delta_vap_rows.len()); + step_verification_json["ccr"]["routerKeyCount"] = + serde_json::Value::from(delta_verify.router_key_count); + step_verification_json["compareViews"]["vrpCount"] = + serde_json::Value::from(delta_vrp_rows.len()); + step_verification_json["compareViews"]["vapCount"] = + serde_json::Value::from(delta_vap_rows.len()); + step_verification_json["selfReplayOk"] = serde_json::Value::Bool(true); + repair_delta_step_inputs( + &step_dir, + &base_archive, + &base_locks, + &previous_locks_path, + &delta_out.publication_points, + &store, + &mut step_verification_json, + )?; + write_json(&step_verification_path, &step_verification_json)?; + all_steps_self_replay_ok &= true; + + repair_target_locks( + &target_locks, + &previous_locks_path, + &delta_out.publication_points, + &store, + )?; + + previous_locks_path = target_locks; + } + + bundle.has_any_aspa = any_aspa; + verification.summary["baseSelfReplayOk"] = serde_json::Value::Bool(true); + verification.summary["allStepsSelfReplayOk"] = + serde_json::Value::Bool(all_steps_self_replay_ok); + write_json(&bundle_json_path, &bundle)?; + write_json(&verification_path, &verification)?; + + if !args.keep_db && work_db.exists() { + fs::remove_dir_all(&work_db) + .map_err(|e| format!("remove refresh db failed: {}: {e}", work_db.display()))?; + if tmp_root.exists() + && fs::read_dir(&tmp_root) + .map_err(|e| format!("read_dir failed: {}: {e}", tmp_root.display()))? + .next() + .is_none() + { + let _ = fs::remove_dir(&tmp_root); + } + } + + println!("{}", rir_dir.display()); + Ok(()) +} diff --git a/src/bin/trace_arin_missing_vrps.rs b/src/bin/trace_arin_missing_vrps.rs new file mode 100644 index 0000000..26dd9e2 --- /dev/null +++ b/src/bin/trace_arin_missing_vrps.rs @@ -0,0 +1,100 @@ +use rpki::audit_trace::trace_rule_to_root; +use rpki::storage::{AuditRuleKind, RocksStore, VcirOutputType}; +use serde_json::Value; +use std::env; +use std::path::Path; + +fn main() { + let args: Vec = env::args().collect(); + if args.len() < 3 { + eprintln!("usage: trace_arin_missing_vrps [ ...]"); + std::process::exit(2); + } + + let store = RocksStore::open(Path::new(&args[1])).expect("open db"); + let vcirs = store.list_vcirs().expect("list vcirs"); + + for row in &args[2..] { + let parts: Vec<&str> = row.split(',').collect(); + if parts.len() != 4 { + println!("ROW {row}"); + println!("ERROR invalid compare row"); + println!(); + continue; + } + let asn: u32 = parts[0] + .trim_start_matches("AS") + .parse() + .expect("parse asn"); + let prefix = parts[1].to_string(); + let max_length: u8 = parts[2].parse().expect("parse max length"); + let mut found = false; + + println!("ROW {row}"); + for vcir in &vcirs { + for output in &vcir.local_outputs { + if output.output_type != VcirOutputType::Vrp { + continue; + } + let payload: Value = match serde_json::from_str(&output.payload_json) { + Ok(value) => value, + Err(_) => continue, + }; + let payload_asn = payload + .get("asn") + .and_then(|v| v.as_u64()) + .map(|v| v as u32); + let payload_prefix = payload + .get("prefix") + .and_then(|v| v.as_str()) + .map(|v| v.to_string()); + let payload_max = payload + .get("max_length") + .and_then(|v| v.as_u64()) + .map(|v| v as u8); + if payload_asn == Some(asn) + && payload_prefix.as_ref() == Some(&prefix) + && payload_max == Some(max_length) + { + found = true; + println!("manifest_rsync_uri={}", vcir.manifest_rsync_uri); + println!("source_object_uri={}", output.source_object_uri); + println!("source_object_hash={}", output.source_object_hash); + println!("source_ee_cert_hash={}", output.source_ee_cert_hash); + println!("rule_hash={}", output.rule_hash); + println!("validation_path_hint={:?}", output.validation_path_hint); + + if let Some(trace) = trace_rule_to_root( + &store, + AuditRuleKind::Roa, + &output.rule_hash, + ) + .expect("trace rule") + { + println!( + "trace_leaf_manifest={}", + trace + .chain_leaf_to_root + .first() + .map(|node| node.manifest_rsync_uri.as_str()) + .unwrap_or("") + ); + println!("trace_source_object_uri={}", trace.resolved_output.source_object_uri); + println!("trace_chain_len={}", trace.chain_leaf_to_root.len()); + for (idx, node) in trace.chain_leaf_to_root.iter().enumerate() { + println!("chain[{idx}].manifest={}", node.manifest_rsync_uri); + println!("chain[{idx}].current_manifest={}", node.current_manifest_rsync_uri); + println!("chain[{idx}].current_crl={}", node.current_crl_rsync_uri); + } + } + println!(); + } + } + } + + if !found { + println!("NOT_FOUND"); + println!(); + } + } +} diff --git a/src/bundle/live_capture.rs b/src/bundle/live_capture.rs index 9ce3a7f..0bc8418 100644 --- a/src/bundle/live_capture.rs +++ b/src/bundle/live_capture.rs @@ -68,6 +68,7 @@ impl Fetcher for RecordingHttpFetcher { pub struct RecordingRsyncFetcher { inner: F, fetches: Arc>>, + capture_objects: bool, } impl RecordingRsyncFetcher { @@ -75,6 +76,15 @@ impl RecordingRsyncFetcher { Self { inner, fetches: Arc::new(Mutex::new(BTreeMap::new())), + capture_objects: true, + } + } + + pub fn new_without_objects(inner: F) -> Self { + Self { + inner, + fetches: Arc::new(Mutex::new(BTreeMap::new())), + capture_objects: false, } } @@ -102,6 +112,39 @@ impl RsyncFetcher for RecordingRsyncFetcher { ); Ok(objects) } + + fn visit_objects( + &self, + rsync_base_uri: &str, + visitor: &mut dyn FnMut(String, Vec) -> Result<(), String>, + ) -> Result<(usize, u64), RsyncFetchError> { + let fetched_at_rfc3339_utc = time::OffsetDateTime::now_utc() + .format(&Rfc3339) + .unwrap_or_else(|_| "".to_string()); + + if self.capture_objects { + let mut recorded = Vec::new(); + let result = self.inner.visit_objects(rsync_base_uri, &mut |uri, bytes| { + recorded.push((uri.clone(), bytes.clone())); + visitor(uri, bytes) + })?; + self.fetches.lock().expect("rsync recorder lock").insert( + rsync_base_uri.to_string(), + RecordedRsyncFetch { + requested_base_uri: rsync_base_uri.to_string(), + objects: recorded, + fetched_at_rfc3339_utc, + }, + ); + Ok(result) + } else { + self.inner.visit_objects(rsync_base_uri, visitor) + } + } + + fn dedup_key(&self, rsync_base_uri: &str) -> String { + self.inner.dedup_key(rsync_base_uri) + } } #[derive(Clone, Debug, PartialEq, Eq)] @@ -301,6 +344,87 @@ fn rrdp_repo_is_replayable(record: &RrdpSourceRecord) -> bool { && record.last_snapshot_hash.is_some() } +fn collect_current_state_locks( + publication_points: &[PublicationPointAudit], + store: &RocksStore, +) -> Result<(BTreeMap, BTreeMap), String> { + let mut rrdp_locks = BTreeMap::new(); + let mut rsync_locks = BTreeMap::new(); + let mut seen_modules = BTreeSet::new(); + let mut seen_rrdp = BTreeSet::new(); + + for pp in publication_points { + if pp.source == "failed_fetch_no_cache" { + continue; + } + let module_uri = canonical_rsync_module(&pp.rsync_base_uri) + .map_err(|e| format!("canonicalize rsync module failed for {}: {e}", pp.rsync_base_uri))?; + if let Some(notify_uri) = pp.rrdp_notification_uri.as_deref() { + if !seen_rrdp.insert(notify_uri.to_string()) { + continue; + } + if let Some(source_record) = rrdp_repo_lock_and_record(store, notify_uri)? { + if rrdp_repo_is_replayable(&source_record) { + rrdp_locks.insert( + notify_uri.to_string(), + RrdpLockJson { + transport: TransportJson::Rrdp, + session: source_record.last_session_id.clone(), + serial: source_record.last_serial, + }, + ); + continue; + } + } + rrdp_locks.insert( + notify_uri.to_string(), + RrdpLockJson { + transport: TransportJson::Rsync, + session: None, + serial: None, + }, + ); + if seen_modules.insert(module_uri.clone()) { + rsync_locks.insert( + module_uri.clone(), + RsyncLockJson { + transport: TransportJson::Rsync, + }, + ); + } + } else if seen_modules.insert(module_uri.clone()) { + rsync_locks.insert( + module_uri.clone(), + RsyncLockJson { + transport: TransportJson::Rsync, + }, + ); + } + } + + Ok((rrdp_locks, rsync_locks)) +} + +pub fn write_current_replay_state_locks( + output_path: &Path, + capture_id: &str, + validation_time: time::OffsetDateTime, + publication_points: &[PublicationPointAudit], + store: &RocksStore, +) -> Result<(), String> { + let (rrdp_locks, rsync_locks) = collect_current_state_locks(publication_points, store)?; + let locks = ReplayLocksJson { + version: 1, + capture: capture_id.to_string(), + validation_time: validation_time + .format(&Rfc3339) + .map_err(|e| format!("format validation time failed: {e}"))?, + rrdp: rrdp_locks, + rsync: rsync_locks, + }; + write_json(output_path, &locks) +} + fn materialize_rrdp_repo( capture_root: &Path, record: &RrdpSourceRecord, @@ -430,6 +554,9 @@ pub fn write_live_base_replay_bundle_inputs( let mut seen_rrdp = BTreeSet::new(); for pp in publication_points { + if pp.source == "failed_fetch_no_cache" { + continue; + } let module_uri = canonical_rsync_module(&pp.rsync_base_uri) .map_err(|e| format!("canonicalize rsync module failed for {}: {e}", pp.rsync_base_uri))?; if let Some(notify_uri) = pp.rrdp_notification_uri.as_deref() { @@ -624,6 +751,316 @@ fn notification_deltas_after_serial( .collect()) } +pub fn write_live_delta_replay_step_inputs( + step_dir: &Path, + rir: &str, + previous_locks_path: &Path, + target_validation_time: time::OffsetDateTime, + publication_points: &[PublicationPointAudit], + store: &RocksStore, + http_records: &BTreeMap, + rsync_records: &BTreeMap, +) -> Result { + let previous_locks: crate::replay::archive::ReplayLocks = serde_json::from_slice( + &fs::read(previous_locks_path) + .map_err(|e| format!("read previous locks failed: {}: {e}", previous_locks_path.display()))?, + ) + .map_err(|e| format!("parse previous locks failed: {}: {e}", previous_locks_path.display()))?; + let previous_locks_bytes = fs::read(previous_locks_path) + .map_err(|e| format!("read previous locks bytes failed: {}: {e}", previous_locks_path.display()))?; + let previous_locks_sha256 = sha256_hex(&previous_locks_bytes); + + let recorded_at = time::OffsetDateTime::now_utc(); + let capture_id = format!( + "{rir}-delta-{}", + recorded_at + .format(&Rfc3339) + .unwrap_or_else(|_| "1970-01-01T00:00:00Z".to_string()) + .replace(':', "") + .replace('-', "") + .replace('+', "_") + ); + let archive_root = step_dir.join("payload-delta-archive"); + let capture_root = archive_root.join("v1").join("captures").join(&capture_id); + write_json( + &capture_root.join("capture.json"), + &CaptureMetaJson { + version: 1, + capture_id: capture_id.clone(), + created_at: recorded_at + .format(&Rfc3339) + .map_err(|e| format!("format createdAt failed: {e}"))?, + notes: format!("recorded by ours live delta recorder for {rir}"), + }, + )?; + write_json( + &capture_root.join("base.json"), + &DeltaBaseMetaJson { + version: 1, + base_capture: previous_locks.capture.clone(), + base_locks_sha256: previous_locks_sha256.clone(), + created_at: recorded_at + .format(&Rfc3339) + .map_err(|e| format!("format createdAt failed: {e}"))?, + }, + )?; + + let mut rsync_objects_by_module: BTreeMap>> = BTreeMap::new(); + let mut rsync_times_by_module: BTreeMap = BTreeMap::new(); + for fetch in rsync_records.values() { + let module_uri = canonical_rsync_module(&fetch.requested_base_uri) + .map_err(|e| format!("canonicalize requested rsync module failed: {e}"))?; + let objects = rsync_objects_by_module.entry(module_uri.clone()).or_default(); + let times = rsync_times_by_module + .entry(module_uri) + .or_insert_with(|| (fetch.fetched_at_rfc3339_utc.clone(), fetch.fetched_at_rfc3339_utc.clone())); + if fetch.fetched_at_rfc3339_utc < times.0 { + times.0 = fetch.fetched_at_rfc3339_utc.clone(); + } + if fetch.fetched_at_rfc3339_utc > times.1 { + times.1 = fetch.fetched_at_rfc3339_utc.clone(); + } + for (uri, bytes) in &fetch.objects { + objects.insert(uri.clone(), bytes.clone()); + } + } + + let mut delta_rrdp_locks = BTreeMap::new(); + let mut delta_rsync_locks = BTreeMap::new(); + let mut seen_notifications = BTreeSet::new(); + let mut needed_modules = BTreeSet::new(); + + for pp in publication_points { + if pp.source == "failed_fetch_no_cache" { + continue; + } + let module_uri = canonical_rsync_module(&pp.rsync_base_uri) + .map_err(|e| format!("canonicalize rsync module failed for {}: {e}", pp.rsync_base_uri))?; + if let Some(notify_uri) = pp.rrdp_notification_uri.as_deref() { + if !seen_notifications.insert(notify_uri.to_string()) { + continue; + } + let base_lock = previous_locks.rrdp.get(notify_uri); + let target_record = store + .get_rrdp_source_record(notify_uri) + .map_err(|e| format!("read target rrdp source record failed for {notify_uri}: {e}"))?; + + let bucket_hash = sha256_hex(notify_uri.as_bytes()); + let bucket_dir = capture_root.join("rrdp").join("repos").join(&bucket_hash); + let (created_at, last_seen_at) = target_record + .as_ref() + .map(|record| (record.first_seen_at.rfc3339_utc.clone(), record.last_seen_at.rfc3339_utc.clone())) + .unwrap_or_else(|| { + let now = recorded_at + .format(&Rfc3339) + .unwrap_or_else(|_| "1970-01-01T00:00:00Z".to_string()); + (now.clone(), now) + }); + write_delta_repo_meta(&bucket_dir, notify_uri, &created_at, &last_seen_at)?; + + let fallback_entry = || DeltaRrdpEntryJson { + kind: DeltaKindJson::FallbackRsync, + base: fallback_rsync_state(), + target: fallback_rsync_state(), + delta_count: 0, + deltas: Vec::new(), + }; + + let entry = if let (Some(base_lock), Some(target_record), Some(target_state)) = ( + base_lock, + target_record.as_ref(), + target_record.as_ref().and_then(target_rrdp_state_from_record), + ) { + if base_lock.transport == ReplayTransport::Rrdp + && base_lock.session.as_deref() == target_record.last_session_id.as_deref() + && target_record.last_serial == base_lock.serial + { + let transition = DeltaTransitionJson { + kind: DeltaKindJson::Unchanged, + base: delta_state_from_base_lock(Some(base_lock)), + target: target_state.clone(), + delta_count: 0, + deltas: Vec::new(), + }; + write_delta_transition(&bucket_dir, &transition)?; + DeltaRrdpEntryJson { + kind: DeltaKindJson::Unchanged, + base: transition.base, + target: transition.target, + delta_count: 0, + deltas: Vec::new(), + } + } else if base_lock.transport == ReplayTransport::Rrdp + && base_lock.session.as_deref() == target_record.last_session_id.as_deref() + && target_record + .last_serial + .zip(base_lock.serial) + .is_some_and(|(target, base)| target > base) + { + let notification_bytes = http_records + .get(notify_uri) + .map(|record| record.bytes.as_slice()) + .ok_or_else(|| format!("missing recorded target notification body for {notify_uri}"))?; + let base_serial = base_lock.serial.expect("checked above"); + let target_serial = target_record.last_serial.expect("checked above"); + let deltas = notification_deltas_after_serial(notification_bytes, base_serial, target_serial)?; + let mut all_present = true; + let session = target_record + .last_session_id + .as_deref() + .ok_or_else(|| format!("missing target session for {notify_uri}"))?; + let session_dir = bucket_dir.join(session); + let notification_path = + session_dir.join(format!("notification-target-{target_serial}.xml")); + write_bytes(¬ification_path, notification_bytes)?; + let target_notification = parse_notification(notification_bytes) + .map_err(|e| format!("parse target notification failed for {notify_uri}: {e}"))?; + let snapshot_hash_hex = hex::encode(target_notification.snapshot_hash_sha256); + if let Some(snapshot_bytes) = http_records + .get(&target_notification.snapshot_uri) + .map(|record| record.bytes.as_slice()) + { + let snapshot_path = session_dir.join(format!( + "snapshot-target-{target_serial}-{snapshot_hash_hex}.xml" + )); + write_bytes(&snapshot_path, snapshot_bytes)?; + } + let deltas_dir = session_dir.join("deltas"); + let mut delta_serials = Vec::new(); + for dref in &deltas { + if let Some(delta_bytes) = http_records.get(&dref.uri).map(|record| record.bytes.as_slice()) { + let hash = hex::encode(dref.hash_sha256); + let path = deltas_dir.join(format!("delta-{}-{}.xml", dref.serial, hash)); + write_bytes(&path, delta_bytes)?; + delta_serials.push(dref.serial); + } else { + all_present = false; + break; + } + } + if all_present && !delta_serials.is_empty() { + let transition = DeltaTransitionJson { + kind: DeltaKindJson::Delta, + base: delta_state_from_base_lock(Some(base_lock)), + target: target_state.clone(), + delta_count: delta_serials.len(), + deltas: delta_serials.clone(), + }; + write_delta_transition(&bucket_dir, &transition)?; + DeltaRrdpEntryJson { + kind: DeltaKindJson::Delta, + base: transition.base, + target: transition.target, + delta_count: transition.delta_count, + deltas: transition.deltas, + } + } else { + needed_modules.insert(module_uri.clone()); + let transition = DeltaTransitionJson { + kind: DeltaKindJson::FallbackRsync, + base: fallback_rsync_state(), + target: fallback_rsync_state(), + delta_count: 0, + deltas: Vec::new(), + }; + write_delta_transition(&bucket_dir, &transition)?; + fallback_entry() + } + } else { + needed_modules.insert(module_uri.clone()); + let transition = DeltaTransitionJson { + kind: DeltaKindJson::FallbackRsync, + base: fallback_rsync_state(), + target: fallback_rsync_state(), + delta_count: 0, + deltas: Vec::new(), + }; + write_delta_transition(&bucket_dir, &transition)?; + fallback_entry() + } + } else { + needed_modules.insert(module_uri.clone()); + let transition = DeltaTransitionJson { + kind: DeltaKindJson::FallbackRsync, + base: fallback_rsync_state(), + target: fallback_rsync_state(), + delta_count: 0, + deltas: Vec::new(), + }; + write_delta_transition(&bucket_dir, &transition)?; + fallback_entry() + }; + delta_rrdp_locks.insert(notify_uri.to_string(), entry); + } else { + needed_modules.insert(module_uri); + } + } + + for module_uri in needed_modules { + let owned_objects; + let objects = if let Some(objects) = rsync_objects_by_module.get(&module_uri) { + objects + } else { + owned_objects = current_module_objects_from_store(store, &module_uri)?; + &owned_objects + }; + let (created_at, last_seen_at) = rsync_times_by_module + .get(&module_uri) + .cloned() + .unwrap_or_else(|| { + let now = recorded_at + .format(&Rfc3339) + .unwrap_or_else(|_| "1970-01-01T00:00:00Z".to_string()); + (now.clone(), now) + }); + let bucket_hash = sha256_hex(module_uri.as_bytes()); + let bucket_dir = capture_root.join("rsync").join("modules").join(&bucket_hash); + materialize_rsync_module(&capture_root, &module_uri, objects, &created_at, &last_seen_at)?; + let files = objects.keys().cloned().collect::>(); + write_json( + &bucket_dir.join("files.json"), + &ReplayDeltaRsyncFilesJson { + version: 1, + module: module_uri.clone(), + file_count: files.len(), + files: files.clone(), + }, + )?; + delta_rsync_locks.insert( + module_uri, + DeltaRsyncEntryJson { + file_count: files.len(), + overlay_only: true, + }, + ); + } + + let locks = ReplayDeltaLocksJson { + version: 1, + capture: capture_id.clone(), + base_capture: previous_locks.capture.clone(), + base_locks_sha256: previous_locks_sha256, + validation_time: target_validation_time + .format(&Rfc3339) + .map_err(|e| format!("format validation time failed: {e}"))?, + rrdp: delta_rrdp_locks, + rsync: delta_rsync_locks, + }; + let locks_path = step_dir.join("locks-delta.json"); + write_json(&locks_path, &locks)?; + + crate::replay::delta_archive::ReplayDeltaArchiveIndex::load(&archive_root, &locks_path) + .map_err(|e| format!("delta archive self-validate failed: {e}"))?; + + Ok(LiveDeltaCaptureSummary { + archive_root, + locks_path, + capture_id, + rrdp_repo_count: locks.rrdp.len(), + rsync_module_count: locks.rsync.len(), + }) +} + pub fn write_live_delta_replay_bundle_inputs( rir_dir: &Path, rir: &str, @@ -1042,6 +1479,9 @@ mod tests { publication_point_rsync_uri: "rsync://rsync.example.test/repo/".to_string(), rrdp_notification_uri: Some(notify_uri.to_string()), source: "fresh".to_string(), + repo_sync_source: None, + repo_sync_duration_ms: None, + repo_sync_error: None, this_update_rfc3339_utc: "2026-03-30T00:00:00Z".to_string(), next_update_rfc3339_utc: "2026-03-30T01:00:00Z".to_string(), verified_at_rfc3339_utc: "2026-03-30T00:00:02Z".to_string(), @@ -1057,6 +1497,9 @@ mod tests { publication_point_rsync_uri: "rsync://rsync-only.example.test/repo/".to_string(), rrdp_notification_uri: None, source: "fresh".to_string(), + repo_sync_source: None, + repo_sync_duration_ms: None, + repo_sync_error: None, this_update_rfc3339_utc: "2026-03-30T00:00:00Z".to_string(), next_update_rfc3339_utc: "2026-03-30T01:00:00Z".to_string(), verified_at_rfc3339_utc: "2026-03-30T00:00:02Z".to_string(), diff --git a/src/bundle/mod.rs b/src/bundle/mod.rs index bf6a4f1..13777b1 100644 --- a/src/bundle/mod.rs +++ b/src/bundle/mod.rs @@ -1,5 +1,6 @@ pub mod compare_view; pub mod live_capture; +pub mod record_io; pub mod spec; pub use compare_view::{ @@ -9,6 +10,16 @@ pub use compare_view::{ pub use live_capture::{ LiveBaseCaptureSummary, LiveDeltaCaptureSummary, RecordedHttpResponse, RecordedRsyncFetch, RecordingHttpFetcher, RecordingRsyncFetcher, write_live_base_replay_bundle_inputs, - write_live_delta_replay_bundle_inputs, + write_live_delta_replay_bundle_inputs, write_live_delta_replay_step_inputs, + write_current_replay_state_locks, +}; +pub use record_io::{ + build_single_rir_bundle_manifest, copy_dir_all, load_validation_time, sha256_hex, + write_bytes, write_json, write_live_bundle_rir_readme, write_live_bundle_top_readme, + write_timing_json, }; pub use spec::{BundleManifest, BundleManifestEntry, RirBundleMetadata}; +pub use spec::{ + BaseBundleStateMetadataV2, BundleManifestEntryV2, BundleManifestV2, DeltaSequenceMetadataV2, + DeltaStepMetadataV2, RirBundleMetadataV2, +}; diff --git a/src/bundle/record_io.rs b/src/bundle/record_io.rs new file mode 100644 index 0000000..44717cf --- /dev/null +++ b/src/bundle/record_io.rs @@ -0,0 +1,270 @@ +use std::fs; +use std::path::Path; + +use serde::Serialize; +use sha2::Digest; +use time::format_description::well_known::Rfc3339; + +use super::{BundleManifest, BundleManifestEntry}; + +pub fn sha256_hex(bytes: &[u8]) -> String { + hex::encode(sha2::Sha256::digest(bytes)) +} + +pub fn write_json(path: &Path, value: &impl Serialize) -> Result<(), String> { + if let Some(parent) = path.parent() { + fs::create_dir_all(parent) + .map_err(|e| format!("create parent failed: {}: {e}", parent.display()))?; + } + let bytes = serde_json::to_vec_pretty(value).map_err(|e| e.to_string())?; + fs::write(path, bytes).map_err(|e| format!("write json failed: {}: {e}", path.display())) +} + +pub fn write_timing_json( + path: &Path, + mode: &str, + validation_time: &time::OffsetDateTime, + duration: std::time::Duration, +) -> Result<(), String> { + write_json( + path, + &serde_json::json!({ + "mode": mode, + "validationTime": validation_time + .format(&Rfc3339) + .map_err(|e| format!("format validation time failed: {e}"))?, + "durationSeconds": duration.as_secs_f64(), + }), + ) +} + +pub fn write_live_bundle_top_readme(path: &Path, rir: &str) -> Result<(), String> { + if let Some(parent) = path.parent() { + fs::create_dir_all(parent) + .map_err(|e| format!("create parent failed: {}: {e}", parent.display()))?; + } + fs::write( + path, + format!( + "# Ours Live Replay Bundle\n\nThis run contains one per-RIR bundle recorded online by `ours`.\n\n- RIR: `{rir}`\n- Reference result format: `CCR`\n" + ), + ) + .map_err(|e| format!("write readme failed: {}: {e}", path.display())) +} + +pub fn write_live_bundle_rir_readme( + path: &Path, + rir: &str, + base_validation_time: &str, +) -> Result<(), String> { + if let Some(parent) = path.parent() { + fs::create_dir_all(parent) + .map_err(|e| format!("create parent failed: {}: {e}", parent.display()))?; + } + fs::write( + path, + format!( + "# {rir} live replay bundle\n\n- `tal.tal` and `ta.cer` are the actual live run inputs.\n- `base-locks.json.validationTime` = `{base_validation_time}`.\n- `base.ccr` is the authoritative reference result.\n- `base-vrps.csv` and `base-vaps.csv` are compare views derived from `base.ccr`.\n" + ), + ) + .map_err(|e| format!("write rir readme failed: {}: {e}", path.display())) +} + +pub fn write_bytes(path: &Path, bytes: &[u8]) -> Result<(), String> { + if let Some(parent) = path.parent() { + fs::create_dir_all(parent) + .map_err(|e| format!("create parent failed: {}: {e}", parent.display()))?; + } + fs::write(path, bytes).map_err(|e| format!("write file failed: {}: {e}", path.display())) +} + +pub fn copy_dir_all(src: &Path, dst: &Path) -> Result<(), String> { + fs::create_dir_all(dst) + .map_err(|e| format!("create directory failed: {}: {e}", dst.display()))?; + for entry in fs::read_dir(src).map_err(|e| format!("read_dir failed: {}: {e}", src.display()))? { + let entry = entry.map_err(|e| format!("read_dir entry failed: {}: {e}", src.display()))?; + let ty = entry + .file_type() + .map_err(|e| format!("file_type failed: {}: {e}", entry.path().display()))?; + let to = dst.join(entry.file_name()); + if ty.is_dir() { + copy_dir_all(&entry.path(), &to)?; + } else if ty.is_file() { + if let Some(parent) = to.parent() { + fs::create_dir_all(parent) + .map_err(|e| format!("create parent failed: {}: {e}", parent.display()))?; + } + fs::copy(entry.path(), &to) + .map_err(|e| format!("copy failed: {} -> {}: {e}", entry.path().display(), to.display()))?; + } + } + Ok(()) +} + +pub fn load_validation_time(path: &Path) -> Result { + let json: serde_json::Value = serde_json::from_slice( + &fs::read(path).map_err(|e| format!("read json failed: {}: {e}", path.display()))?, + ) + .map_err(|e| format!("parse json failed: {}: {e}", path.display()))?; + let value = json + .get("validationTime") + .or_else(|| json.get("validation_time")) + .and_then(|v| v.as_str()) + .ok_or_else(|| format!("validationTime missing in {}", path.display()))?; + time::OffsetDateTime::parse(value, &Rfc3339) + .map_err(|e| format!("invalid validationTime in {}: {e}", path.display())) +} + +pub fn build_single_rir_bundle_manifest( + schema_version: &str, + bundle_producer: &str, + rir: &str, + base_validation_time: &time::OffsetDateTime, + delta_validation_time: Option<&time::OffsetDateTime>, + has_aspa: bool, +) -> Result { + Ok(BundleManifest { + schema_version: schema_version.to_string(), + bundle_producer: bundle_producer.to_string(), + recorded_at_rfc3339_utc: time::OffsetDateTime::now_utc() + .format(&Rfc3339) + .map_err(|e| format!("format recorded_at failed: {e}"))?, + rirs: vec![rir.to_string()], + per_rir_bundles: vec![BundleManifestEntry { + rir: rir.to_string(), + relative_path: rir.to_string(), + base_validation_time: base_validation_time + .format(&Rfc3339) + .map_err(|e| format!("format base validation time failed: {e}"))?, + delta_validation_time: match delta_validation_time { + Some(value) => Some( + value + .format(&Rfc3339) + .map_err(|e| format!("format delta validation time failed: {e}"))?, + ), + None => None, + }, + has_aspa, + }], + }) +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + + #[test] + fn load_validation_time_reads_validation_time_field() { + let td = tempdir().expect("tempdir"); + let path = td.path().join("locks.json"); + fs::write(&path, r#"{"validationTime":"2026-04-01T00:00:00Z"}"#).expect("write"); + let parsed = load_validation_time(&path).expect("load"); + assert_eq!( + parsed.format(&Rfc3339).expect("format"), + "2026-04-01T00:00:00Z" + ); + } + + #[test] + fn copy_dir_all_copies_nested_files() { + let td = tempdir().expect("tempdir"); + let src = td.path().join("src"); + let dst = td.path().join("dst"); + fs::create_dir_all(src.join("nested")).expect("mkdir"); + fs::write(src.join("root.txt"), b"root").expect("write root"); + fs::write(src.join("nested/child.txt"), b"child").expect("write child"); + copy_dir_all(&src, &dst).expect("copy"); + assert_eq!(fs::read(dst.join("root.txt")).expect("read root"), b"root"); + assert_eq!( + fs::read(dst.join("nested/child.txt")).expect("read child"), + b"child" + ); + } + + #[test] + fn build_single_rir_bundle_manifest_formats_times() { + let base = time::OffsetDateTime::parse("2026-04-01T00:00:00Z", &Rfc3339).expect("base"); + let delta = time::OffsetDateTime::parse("2026-04-01T00:10:00Z", &Rfc3339).expect("delta"); + let manifest = build_single_rir_bundle_manifest( + "20260330-v1", + "ours", + "apnic", + &base, + Some(&delta), + true, + ) + .expect("manifest"); + assert_eq!(manifest.schema_version, "20260330-v1"); + assert_eq!(manifest.rirs, vec!["apnic".to_string()]); + assert_eq!(manifest.per_rir_bundles[0].base_validation_time, "2026-04-01T00:00:00Z"); + assert_eq!( + manifest.per_rir_bundles[0].delta_validation_time.as_deref(), + Some("2026-04-01T00:10:00Z") + ); + } + + #[test] + fn write_json_and_write_bytes_create_parent_directories() { + let td = tempdir().expect("tempdir"); + let json_path = td.path().join("nested/meta/data.json"); + write_json(&json_path, &serde_json::json!({"ok": true})).expect("write json"); + let json: serde_json::Value = + serde_json::from_slice(&fs::read(&json_path).expect("read json")).expect("parse"); + assert_eq!(json["ok"], true); + + let bytes_path = td.path().join("nested/raw/file.bin"); + write_bytes(&bytes_path, b"payload").expect("write bytes"); + assert_eq!(fs::read(&bytes_path).expect("read bytes"), b"payload"); + } + + #[test] + fn write_timing_and_readmes_emit_expected_text() { + let td = tempdir().expect("tempdir"); + let timing_path = td.path().join("timings/base-produce.json"); + let validation_time = + time::OffsetDateTime::parse("2026-04-01T00:00:00Z", &Rfc3339).expect("time"); + write_timing_json( + &timing_path, + "base", + &validation_time, + std::time::Duration::from_secs_f64(1.25), + ) + .expect("write timing"); + let timing: serde_json::Value = + serde_json::from_slice(&fs::read(&timing_path).expect("read timing")).expect("parse"); + assert_eq!(timing["mode"], "base"); + assert_eq!(timing["validationTime"], "2026-04-01T00:00:00Z"); + assert_eq!(timing["durationSeconds"], 1.25); + + let top_readme = td.path().join("README.md"); + write_live_bundle_top_readme(&top_readme, "apnic").expect("write top readme"); + let top_text = fs::read_to_string(&top_readme).expect("read top readme"); + assert!(top_text.contains("RIR: `apnic`")); + assert!(top_text.contains("Reference result format: `CCR`")); + + let rir_readme = td.path().join("apnic/README.md"); + write_live_bundle_rir_readme(&rir_readme, "apnic", "2026-04-01T00:00:00Z") + .expect("write rir readme"); + let rir_text = fs::read_to_string(&rir_readme).expect("read rir readme"); + assert!(rir_text.contains("base-locks.json.validationTime")); + assert!(rir_text.contains("base-vrps.csv")); + assert!(rir_text.contains("base-vaps.csv")); + } + + #[test] + fn build_single_rir_bundle_manifest_supports_none_delta_time() { + let base = time::OffsetDateTime::parse("2026-04-01T00:00:00Z", &Rfc3339).expect("base"); + let manifest = build_single_rir_bundle_manifest( + "20260330-v1", + "ours", + "afrinic", + &base, + None, + false, + ) + .expect("manifest"); + assert_eq!(manifest.per_rir_bundles[0].delta_validation_time, None); + assert!(!manifest.per_rir_bundles[0].has_aspa); + } +} diff --git a/src/bundle/spec.rs b/src/bundle/spec.rs index 4083c36..d308f4a 100644 --- a/src/bundle/spec.rs +++ b/src/bundle/spec.rs @@ -57,3 +57,117 @@ pub struct RirBundleMetadata { #[serde(rename = "deltaVapCount", skip_serializing_if = "Option::is_none")] pub delta_vap_count: Option, } + +#[derive(Clone, Debug, PartialEq, Eq, Serialize)] +pub struct BundleManifestV2 { + #[serde(rename = "schemaVersion")] + pub schema_version: String, + #[serde(rename = "bundleProducer")] + pub bundle_producer: String, + #[serde(rename = "recordedAt")] + pub recorded_at_rfc3339_utc: String, + pub rirs: Vec, + #[serde(rename = "perRirBundles")] + pub per_rir_bundles: Vec, +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize)] +pub struct BundleManifestEntryV2 { + pub rir: String, + pub relative_path: String, + #[serde(rename = "baseValidationTime")] + pub base_validation_time: String, + #[serde(rename = "stepCount")] + pub step_count: usize, + #[serde(rename = "firstDeltaValidationTime", skip_serializing_if = "Option::is_none")] + pub first_delta_validation_time: Option, + #[serde(rename = "lastDeltaValidationTime", skip_serializing_if = "Option::is_none")] + pub last_delta_validation_time: Option, + #[serde(rename = "hasAspa")] + pub has_aspa: bool, +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize)] +pub struct BaseBundleStateMetadataV2 { + #[serde(rename = "validationTime")] + pub validation_time: String, + #[serde(rename = "ccrSha256")] + pub ccr_sha256: String, + #[serde(rename = "vrpCount")] + pub vrp_count: usize, + #[serde(rename = "vapCount")] + pub vap_count: usize, + #[serde(rename = "relativeArchivePath")] + pub relative_archive_path: String, + #[serde(rename = "relativeLocksPath")] + pub relative_locks_path: String, + #[serde(rename = "relativeCcrPath")] + pub relative_ccr_path: String, + #[serde(rename = "relativeVrpsPath")] + pub relative_vrps_path: String, + #[serde(rename = "relativeVapsPath")] + pub relative_vaps_path: String, +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize)] +pub struct DeltaStepMetadataV2 { + pub index: usize, + pub id: String, + #[serde(rename = "relativePath")] + pub relative_path: String, + #[serde(rename = "baseRef")] + pub base_ref: String, + #[serde(rename = "validationTime")] + pub validation_time: String, + #[serde(rename = "deltaCcrSha256")] + pub delta_ccr_sha256: String, + #[serde(rename = "vrpCount")] + pub vrp_count: usize, + #[serde(rename = "vapCount")] + pub vap_count: usize, + #[serde(rename = "relativeArchivePath")] + pub relative_archive_path: String, + #[serde(rename = "relativeTransitionLocksPath")] + pub relative_transition_locks_path: String, + #[serde(rename = "relativeTargetLocksPath")] + pub relative_target_locks_path: String, + #[serde(rename = "relativeCcrPath")] + pub relative_ccr_path: String, + #[serde(rename = "relativeVrpsPath")] + pub relative_vrps_path: String, + #[serde(rename = "relativeVapsPath")] + pub relative_vaps_path: String, + #[serde(rename = "hasAspa")] + pub has_aspa: bool, + #[serde(rename = "hasRouterKey")] + pub has_router_key: bool, +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize)] +pub struct DeltaSequenceMetadataV2 { + #[serde(rename = "configuredDeltaCount")] + pub configured_delta_count: usize, + #[serde(rename = "configuredIntervalSeconds")] + pub configured_interval_seconds: u64, + pub steps: Vec, +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize)] +pub struct RirBundleMetadataV2 { + #[serde(rename = "schemaVersion")] + pub schema_version: String, + #[serde(rename = "bundleProducer")] + pub bundle_producer: String, + pub rir: String, + #[serde(rename = "talSha256")] + pub tal_sha256: String, + #[serde(rename = "taCertSha256")] + pub ta_cert_sha256: String, + #[serde(rename = "hasAnyAspa")] + pub has_any_aspa: bool, + #[serde(rename = "hasAnyRouterKey")] + pub has_any_router_key: bool, + pub base: BaseBundleStateMetadataV2, + #[serde(rename = "deltaSequence")] + pub delta_sequence: DeltaSequenceMetadataV2, +} diff --git a/src/fetch/http.rs b/src/fetch/http.rs index 55ce3c7..f0c8768 100644 --- a/src/fetch/http.rs +++ b/src/fetch/http.rs @@ -1,12 +1,16 @@ use std::time::Duration; use reqwest::blocking::Client; +use reqwest::header::HeaderMap; use crate::sync::rrdp::Fetcher; #[derive(Clone, Debug)] pub struct HttpFetcherConfig { + /// Short timeout used for connection establishment and small metadata objects. pub timeout: Duration, + /// Larger timeout used for RRDP snapshot / delta bodies. + pub large_body_timeout: Duration, pub user_agent: String, } @@ -14,6 +18,7 @@ impl Default for HttpFetcherConfig { fn default() -> Self { Self { timeout: Duration::from_secs(20), + large_body_timeout: Duration::from_secs(180), user_agent: "rpki-dev/0.1 (stage2)".to_string(), } } @@ -26,34 +31,154 @@ impl Default for HttpFetcherConfig { /// - fetching RRDP notification/snapshot files (RFC 8182 §3.4) #[derive(Clone, Debug)] pub struct BlockingHttpFetcher { - client: Client, + short_client: Client, + large_body_client: Client, + short_timeout: Duration, + large_body_timeout: Duration, } impl BlockingHttpFetcher { pub fn new(config: HttpFetcherConfig) -> Result { - let client = Client::builder() + let short_timeout = config.timeout; + let large_body_timeout = std::cmp::max(config.large_body_timeout, config.timeout); + let short_client = Client::builder() + .connect_timeout(config.timeout) .timeout(config.timeout) + .user_agent(config.user_agent.clone()) + .build() + .map_err(|e| e.to_string())?; + let large_body_client = Client::builder() + .connect_timeout(config.timeout) + .timeout(large_body_timeout) .user_agent(config.user_agent) .build() .map_err(|e| e.to_string())?; - Ok(Self { client }) + Ok(Self { + short_client, + large_body_client, + short_timeout, + large_body_timeout, + }) } pub fn fetch_bytes(&self, uri: &str) -> Result, String> { - let resp = self - .client + let started = std::time::Instant::now(); + let (client, timeout_profile, timeout_value) = self.client_for_uri(uri); + let resp = client .get(uri) .send() - .map_err(|e| format!("http request failed: {e}"))?; + .map_err(|e| { + let msg = format!("http request failed: {e}"); + crate::progress_log::emit( + "http_fetch_failed", + serde_json::json!({ + "uri": uri, + "stage": "request", + "timeout_profile": timeout_profile, + "request_timeout_ms": timeout_value.as_millis() as u64, + "duration_ms": started.elapsed().as_millis() as u64, + "error": msg, + }), + ); + msg + })?; let status = resp.status(); + let headers = resp.headers().clone(); if !status.is_success() { - return Err(format!("http status {status}")); + let body_preview = resp + .text() + .ok() + .map(|text| text.chars().take(160).collect::()); + let body_prefix = body_preview + .clone() + .unwrap_or_else(|| "".to_string()); + let msg = format!( + "http status {status}; content_type={}; content_encoding={}; content_length={}; transfer_encoding={}; body_prefix={}", + header_value(&headers, "content-type"), + header_value(&headers, "content-encoding"), + header_value(&headers, "content-length"), + header_value(&headers, "transfer-encoding"), + body_prefix, + ); + crate::progress_log::emit( + "http_fetch_failed", + serde_json::json!({ + "uri": uri, + "stage": "status", + "timeout_profile": timeout_profile, + "request_timeout_ms": timeout_value.as_millis() as u64, + "duration_ms": started.elapsed().as_millis() as u64, + "status": status.as_u16(), + "content_type": header_value_opt(&headers, "content-type"), + "content_encoding": header_value_opt(&headers, "content-encoding"), + "content_length": header_value_opt(&headers, "content-length"), + "transfer_encoding": header_value_opt(&headers, "transfer-encoding"), + "body_prefix": body_preview, + "error": msg, + }), + ); + return Err(msg); } - resp.bytes() - .map(|b| b.to_vec()) - .map_err(|e| format!("http read body failed: {e}")) + match resp.bytes() { + Ok(bytes) => { + let duration_ms = started.elapsed().as_millis() as u64; + if (duration_ms as f64) / 1000.0 >= crate::progress_log::slow_threshold_secs() { + crate::progress_log::emit( + "http_fetch_slow", + serde_json::json!({ + "uri": uri, + "status": status.as_u16(), + "timeout_profile": timeout_profile, + "request_timeout_ms": timeout_value.as_millis() as u64, + "duration_ms": duration_ms, + "bytes": bytes.len(), + "content_type": header_value_opt(&headers, "content-type"), + "content_encoding": header_value_opt(&headers, "content-encoding"), + "content_length": header_value_opt(&headers, "content-length"), + "transfer_encoding": header_value_opt(&headers, "transfer-encoding"), + }), + ); + } + Ok(bytes.to_vec()) + } + Err(e) => { + let msg = format!( + "http read body failed: {e}; status={}; content_type={}; content_encoding={}; content_length={}; transfer_encoding={}", + status, + header_value(&headers, "content-type"), + header_value(&headers, "content-encoding"), + header_value(&headers, "content-length"), + header_value(&headers, "transfer-encoding"), + ); + crate::progress_log::emit( + "http_fetch_failed", + serde_json::json!({ + "uri": uri, + "stage": "read_body", + "timeout_profile": timeout_profile, + "request_timeout_ms": timeout_value.as_millis() as u64, + "duration_ms": started.elapsed().as_millis() as u64, + "status": status.as_u16(), + "content_type": header_value_opt(&headers, "content-type"), + "content_encoding": header_value_opt(&headers, "content-encoding"), + "content_length": header_value_opt(&headers, "content-length"), + "transfer_encoding": header_value_opt(&headers, "transfer-encoding"), + "error": msg, + }), + ); + Err(msg) + } + } + } + + fn client_for_uri(&self, uri: &str) -> (&Client, &'static str, Duration) { + if uses_large_body_timeout(uri) { + (&self.large_body_client, "large_body", self.large_body_timeout) + } else { + (&self.short_client, "short", self.short_timeout) + } } } @@ -63,12 +188,30 @@ impl Fetcher for BlockingHttpFetcher { } } +fn header_value(headers: &HeaderMap, name: &str) -> String { + header_value_opt(headers, name).unwrap_or_else(|| "".to_string()) +} + +fn header_value_opt(headers: &HeaderMap, name: &str) -> Option { + headers + .get(name) + .and_then(|v| v.to_str().ok()) + .map(|v| v.to_string()) +} + +fn uses_large_body_timeout(uri: &str) -> bool { + uri.starts_with("https://") + && uri.ends_with(".xml") + && !uri.ends_with("notification.xml") +} + #[cfg(test)] mod tests { use super::*; use std::io::{Read, Write}; use std::net::TcpListener; use std::thread; + use std::time::Duration as StdDuration; fn spawn_one_shot_http_server(status_line: &'static str, body: &'static [u8]) -> String { let listener = TcpListener::bind(("127.0.0.1", 0)).expect("bind"); @@ -110,4 +253,40 @@ mod tests { let err = http.fetch_bytes(&url).unwrap_err(); assert!(err.contains("http status"), "{err}"); } + + #[test] + fn fetch_bytes_times_out_on_idle_body_read() { + let listener = TcpListener::bind(("127.0.0.1", 0)).expect("bind"); + let addr = listener.local_addr().expect("addr"); + thread::spawn(move || { + let (mut stream, _) = listener.accept().expect("accept"); + let mut buf = [0u8; 1024]; + let _ = stream.read(&mut buf); + stream + .write_all(b"HTTP/1.1 200 OK\r\nContent-Length: 5\r\nConnection: close\r\n\r\nh") + .expect("write partial body"); + std::thread::sleep(StdDuration::from_secs(2)); + let _ = stream.write_all(b"ello"); + }); + let url = format!("http://{}/", addr); + let http = BlockingHttpFetcher::new(HttpFetcherConfig { + timeout: Duration::from_secs(1), + ..HttpFetcherConfig::default() + }) + .expect("http"); + let err = http.fetch_bytes(&url).unwrap_err(); + assert!(err.contains("http read body failed"), "{err}"); + } + + #[test] + fn uses_large_body_timeout_selects_rrdp_snapshot_and_delta_not_notification() { + assert!(!uses_large_body_timeout("https://rrdp.example.test/notification.xml")); + assert!(uses_large_body_timeout( + "https://rrdp.example.test/session/123/snapshot.xml" + )); + assert!(uses_large_body_timeout( + "https://rrdp.example.test/session/123/delta-42.xml" + )); + assert!(!uses_large_body_timeout("https://tal.example.test/example.tal")); + } } diff --git a/src/fetch/rsync.rs b/src/fetch/rsync.rs index 1d85f3d..ce8c628 100644 --- a/src/fetch/rsync.rs +++ b/src/fetch/rsync.rs @@ -8,6 +8,14 @@ pub enum RsyncFetchError { pub type RsyncFetchResult = Result; +pub fn normalize_rsync_base_uri(s: &str) -> String { + if s.ends_with('/') { + s.to_string() + } else { + format!("{s}/") + } +} + /// Fetch repository objects from a publication point. /// /// v1: this is intentionally abstract so unit tests can use a mock, and later we can @@ -15,6 +23,33 @@ pub type RsyncFetchResult = Result; pub trait RsyncFetcher { /// Return a list of objects as `(rsync_uri, bytes)` pairs. fn fetch_objects(&self, rsync_base_uri: &str) -> RsyncFetchResult)>>; + + /// Stream fetched objects to a visitor without requiring callers to materialize the + /// full result vector in memory. + fn visit_objects( + &self, + rsync_base_uri: &str, + visitor: &mut dyn FnMut(String, Vec) -> Result<(), String>, + ) -> RsyncFetchResult<(usize, u64)> { + let objects = self.fetch_objects(rsync_base_uri)?; + let mut count = 0usize; + let mut bytes_total = 0u64; + for (uri, bytes) in objects { + bytes_total += bytes.len() as u64; + count += 1; + visitor(uri, bytes).map_err(RsyncFetchError::Fetch)?; + } + Ok((count, bytes_total)) + } + + /// Return the deduplication key used by orchestration layers. + /// + /// By default this is the normalized publication point base URI. Fetchers that + /// intentionally widen their fetch scope (for example to a full rsync module) + /// should override this so callers can safely deduplicate at the same scope. + fn dedup_key(&self, rsync_base_uri: &str) -> String { + normalize_rsync_base_uri(rsync_base_uri) + } } /// A simple "rsync" implementation backed by a local directory. @@ -73,14 +108,6 @@ fn walk_dir_collect( Ok(()) } -fn normalize_rsync_base_uri(s: &str) -> String { - if s.ends_with('/') { - s.to_string() - } else { - format!("{s}/") - } -} - #[cfg(test)] mod tests { use super::*; @@ -115,4 +142,14 @@ mod tests { RsyncFetchError::Fetch(msg) => assert!(!msg.is_empty()), } } + + #[test] + fn default_dedup_key_is_normalized_base_uri() { + let tmp = tempfile::tempdir().expect("tempdir"); + let fetcher = LocalDirRsyncFetcher::new(tmp.path()); + assert_eq!( + fetcher.dedup_key("rsync://example.net/repo"), + "rsync://example.net/repo/" + ); + } } diff --git a/src/fetch/rsync_system.rs b/src/fetch/rsync_system.rs index 7bcbf33..5ac33c8 100644 --- a/src/fetch/rsync_system.rs +++ b/src/fetch/rsync_system.rs @@ -5,7 +5,9 @@ use std::time::Duration; use sha2::Digest; use uuid::Uuid; -use crate::fetch::rsync::{RsyncFetchError, RsyncFetchResult, RsyncFetcher}; +use crate::fetch::rsync::{ + RsyncFetchError, RsyncFetchResult, RsyncFetcher, normalize_rsync_base_uri, +}; #[derive(Clone, Debug)] pub struct SystemRsyncConfig { @@ -94,30 +96,55 @@ impl SystemRsyncFetcher { } Ok(()) } + + fn module_fetch_uri(&self, rsync_base_uri: &str) -> String { + rsync_capture_scope_uri(rsync_base_uri).unwrap_or_else(|| normalize_rsync_base_uri(rsync_base_uri)) + } } impl RsyncFetcher for SystemRsyncFetcher { fn fetch_objects(&self, rsync_base_uri: &str) -> RsyncFetchResult)>> { - let base = normalize_rsync_base_uri(rsync_base_uri); + let mut out = Vec::new(); + self.visit_objects(rsync_base_uri, &mut |uri, bytes| { + out.push((uri, bytes)); + Ok(()) + })?; + Ok(out) + } + + fn visit_objects( + &self, + rsync_base_uri: &str, + visitor: &mut dyn FnMut(String, Vec) -> Result<(), String>, + ) -> RsyncFetchResult<(usize, u64)> { + let base = self.module_fetch_uri(rsync_base_uri); + let mut count = 0usize; + let mut bytes_total = 0u64; + let mut wrapped = |uri: String, bytes: Vec| -> Result<(), String> { + bytes_total += bytes.len() as u64; + count += 1; + visitor(uri, bytes) + }; + if let Some(dst) = self .mirror_dst_dir(&base) .map_err(|e| RsyncFetchError::Fetch(e.to_string()))? { self.run_rsync(&base, &dst) .map_err(RsyncFetchError::Fetch)?; - let mut out = Vec::new(); - walk_dir_collect(&dst, &dst, &base, &mut out).map_err(RsyncFetchError::Fetch)?; - return Ok(out); + walk_dir_visit(&dst, &dst, &base, &mut wrapped).map_err(RsyncFetchError::Fetch)?; + return Ok((count, bytes_total)); } let tmp = TempDir::new().map_err(|e| RsyncFetchError::Fetch(e.to_string()))?; self.run_rsync(&base, tmp.path()) .map_err(RsyncFetchError::Fetch)?; + walk_dir_visit(tmp.path(), tmp.path(), &base, &mut wrapped).map_err(RsyncFetchError::Fetch)?; + Ok((count, bytes_total)) + } - let mut out = Vec::new(); - walk_dir_collect(tmp.path(), tmp.path(), &base, &mut out) - .map_err(RsyncFetchError::Fetch)?; - Ok(out) + fn dedup_key(&self, rsync_base_uri: &str) -> String { + self.module_fetch_uri(rsync_base_uri) } } @@ -144,12 +171,20 @@ impl Drop for TempDir { } } -fn normalize_rsync_base_uri(s: &str) -> String { - if s.ends_with('/') { - s.to_string() - } else { - format!("{s}/") +fn rsync_capture_scope_uri(s: &str) -> Option { + let normalized = normalize_rsync_base_uri(s); + let rest = normalized.strip_prefix("rsync://")?; + let mut host_and_path = rest.splitn(2, '/'); + let authority = host_and_path.next()?; + let path = host_and_path.next()?; + let mut segments: Vec<&str> = path.split('/').filter(|segment| !segment.is_empty()).collect(); + if segments.is_empty() { + return None; } + if segments.len() >= 4 { + segments.pop(); + } + Some(format!("rsync://{authority}/{}/", segments.join("/"))) } fn walk_dir_collect( @@ -182,6 +217,36 @@ fn walk_dir_collect( Ok(()) } +fn walk_dir_visit( + root: &Path, + current: &Path, + rsync_base_uri: &str, + visitor: &mut dyn FnMut(String, Vec) -> Result<(), String>, +) -> Result<(), String> { + let rd = std::fs::read_dir(current).map_err(|e| e.to_string())?; + for entry in rd { + let entry = entry.map_err(|e| e.to_string())?; + let path = entry.path(); + let meta = entry.metadata().map_err(|e| e.to_string())?; + if meta.is_dir() { + walk_dir_visit(root, &path, rsync_base_uri, visitor)?; + continue; + } + if !meta.is_file() { + continue; + } + let rel = path + .strip_prefix(root) + .map_err(|e| e.to_string())? + .to_string_lossy() + .replace('\\', "/"); + let uri = format!("{rsync_base_uri}{rel}"); + let bytes = std::fs::read(&path).map_err(|e| e.to_string())?; + visitor(uri, bytes)?; + } + Ok(()) +} + #[cfg(test)] mod tests { use super::*; @@ -217,6 +282,32 @@ mod tests { assert_eq!(out[1].1, b"x"); } + #[test] + fn rsync_capture_scope_uri_widens_only_deep_publication_points() { + assert_eq!( + rsync_capture_scope_uri("rsync://example.net/repo/ta/ca/publication-point/"), + Some("rsync://example.net/repo/ta/ca/".to_string()) + ); + assert_eq!( + rsync_capture_scope_uri("rsync://example.net/repo/ta/"), + Some("rsync://example.net/repo/ta/".to_string()) + ); + assert_eq!( + rsync_capture_scope_uri("rsync://example.net/repo/"), + Some("rsync://example.net/repo/".to_string()) + ); + assert_eq!(rsync_capture_scope_uri("https://example.net/repo"), None); + } + + #[test] + fn system_rsync_dedup_key_uses_capture_scope() { + let fetcher = SystemRsyncFetcher::new(SystemRsyncConfig::default()); + assert_eq!( + fetcher.dedup_key("rsync://example.net/repo/ta/ca/publication-point/"), + "rsync://example.net/repo/ta/ca/" + ); + } + #[test] fn system_rsync_fetcher_reports_spawn_and_exit_errors() { let dst = tempfile::tempdir().expect("tempdir"); diff --git a/src/lib.rs b/src/lib.rs index b343bfb..c6f0365 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,6 +18,8 @@ pub mod fetch; #[cfg(feature = "full")] pub mod policy; #[cfg(feature = "full")] +pub mod progress_log; +#[cfg(feature = "full")] pub mod replay; #[cfg(feature = "full")] pub mod report; diff --git a/src/progress_log.rs b/src/progress_log.rs new file mode 100644 index 0000000..4f23ca1 --- /dev/null +++ b/src/progress_log.rs @@ -0,0 +1,40 @@ +use serde_json::Value; +use time::format_description::well_known::Rfc3339; + +fn progress_enabled() -> bool { + std::env::var("RPKI_PROGRESS_LOG") + .ok() + .map(|v| matches!(v.as_str(), "1" | "true" | "TRUE" | "yes" | "YES")) + .unwrap_or(false) +} + +pub fn slow_threshold_secs() -> f64 { + std::env::var("RPKI_PROGRESS_SLOW_SECS") + .ok() + .and_then(|v| v.parse::().ok()) + .filter(|v| *v >= 0.0) + .unwrap_or(30.0) +} + +pub fn emit(kind: &str, payload: Value) { + if !progress_enabled() { + return; + } + let ts = time::OffsetDateTime::now_utc() + .format(&Rfc3339) + .unwrap_or_else(|_| "1970-01-01T00:00:00Z".to_string()); + let mut map = serde_json::Map::new(); + map.insert("ts".to_string(), Value::String(ts)); + map.insert("kind".to_string(), Value::String(kind.to_string())); + match payload { + Value::Object(obj) => { + for (k, v) in obj { + map.insert(k, v); + } + } + other => { + map.insert("value".to_string(), other); + } + } + eprintln!("[progress] {}", Value::Object(map)); +} diff --git a/src/replay/delta_fetch_rsync.rs b/src/replay/delta_fetch_rsync.rs index c685f41..51face7 100644 --- a/src/replay/delta_fetch_rsync.rs +++ b/src/replay/delta_fetch_rsync.rs @@ -6,6 +6,7 @@ use std::sync::Arc; use crate::fetch::rsync::{RsyncFetchError, RsyncFetchResult, RsyncFetcher}; use crate::replay::archive::{ReplayArchiveIndex, canonical_rsync_module}; use crate::replay::delta_archive::ReplayDeltaArchiveIndex; +use crate::storage::{RepositoryViewState, RocksStore}; #[derive(Clone, Debug)] pub struct PayloadDeltaReplayRsyncFetcher { @@ -33,6 +34,21 @@ impl PayloadDeltaReplayRsyncFetcher { } } +pub struct PayloadDeltaReplayCurrentStoreRsyncFetcher<'a> { + store: &'a RocksStore, + delta_index: Arc, +} + +impl<'a> PayloadDeltaReplayCurrentStoreRsyncFetcher<'a> { + pub fn new(store: &'a RocksStore, delta_index: Arc) -> Self { + Self { store, delta_index } + } + + pub fn delta_index(&self) -> &ReplayDeltaArchiveIndex { + self.delta_index.as_ref() + } +} + impl RsyncFetcher for PayloadDeltaReplayRsyncFetcher { fn fetch_objects(&self, rsync_base_uri: &str) -> RsyncFetchResult)>> { let module_uri = canonical_rsync_module(rsync_base_uri) @@ -107,6 +123,86 @@ impl RsyncFetcher for PayloadDeltaReplayRsyncFetcher { } } +impl RsyncFetcher for PayloadDeltaReplayCurrentStoreRsyncFetcher<'_> { + fn fetch_objects(&self, rsync_base_uri: &str) -> RsyncFetchResult)>> { + let module_uri = canonical_rsync_module(rsync_base_uri) + .map_err(|e| RsyncFetchError::Fetch(e.to_string()))?; + let normalized_base = if rsync_base_uri.ends_with('/') { + rsync_base_uri.to_string() + } else { + format!("{rsync_base_uri}/") + }; + + let mut merged: BTreeMap> = BTreeMap::new(); + let mut saw_base = false; + let overlay_only = self + .delta_index + .rsync_module(&module_uri) + .map(|module| module.overlay_only) + .unwrap_or(false); + + if !overlay_only { + let entries = self + .store + .list_repository_view_entries_with_prefix(&module_uri) + .map_err(|e| RsyncFetchError::Fetch(format!("list repository view failed: {e}")))?; + for entry in entries { + if entry.state != RepositoryViewState::Present { + continue; + } + let bytes = self + .store + .load_current_object_bytes_by_uri(&entry.rsync_uri) + .map_err(|e| { + RsyncFetchError::Fetch(format!( + "load current object failed for {}: {e}", + entry.rsync_uri + )) + })? + .ok_or_else(|| { + RsyncFetchError::Fetch(format!( + "current object missing for {}", + entry.rsync_uri + )) + })?; + merged.insert(entry.rsync_uri, bytes); + saw_base = true; + } + } + + let mut saw_overlay = false; + if let Some(delta_module) = self.delta_index.rsync_module(&module_uri) { + for (uri, path) in &delta_module.overlay_files { + let bytes = fs::read(path).map_err(|e| { + RsyncFetchError::Fetch(format!( + "read delta rsync overlay failed: {}: {e}", + path.display() + )) + })?; + merged.insert(uri.clone(), bytes); + saw_overlay = true; + } + } + + if !saw_base && !saw_overlay { + return Err(RsyncFetchError::Fetch(format!( + "delta replay current-store base not found and no delta overlay exists for module: {module_uri}" + ))); + } + + let filtered: Vec<(String, Vec)> = merged + .into_iter() + .filter(|(uri, _)| uri.starts_with(&normalized_base)) + .collect(); + if filtered.is_empty() { + return Err(RsyncFetchError::Fetch(format!( + "delta replay rsync subtree not found: {normalized_base}" + ))); + } + Ok(filtered) + } +} + fn module_tree_root(module_uri: &str, tree_dir: &Path) -> Result { let rest = module_uri .strip_prefix("rsync://") @@ -161,6 +257,7 @@ mod tests { use super::*; use crate::replay::archive::ReplayArchiveIndex; use crate::replay::delta_archive::ReplayDeltaArchiveIndex; + use crate::storage::{RawByHashEntry, RepositoryViewEntry, RepositoryViewState, RocksStore}; fn build_base_and_delta_rsync_fixture() -> (tempfile::TempDir, PathBuf, PathBuf, PathBuf, PathBuf) { @@ -201,6 +298,46 @@ mod tests { (temp, base_archive, base_locks, delta_archive, delta_locks) } + #[test] + fn delta_rsync_current_store_fetcher_merges_current_store_and_overlay() { + let (_temp, _base_archive, _base_locks, delta_archive, delta_locks) = + build_base_and_delta_rsync_fixture(); + let temp = tempfile::tempdir().expect("tempdir"); + let store = RocksStore::open(temp.path()).expect("open rocksdb"); + store + .put_raw_by_hash_entry(&RawByHashEntry { + sha256_hex: crate::replay::archive::sha256_hex(b"base-a"), + bytes: b"base-a".to_vec(), + origin_uris: vec!["rsync://rsync.example.test/repo/a.roa".to_string()], + object_type: Some("roa".to_string()), + encoding: None, + }) + .expect("put raw by hash"); + store + .put_repository_view_entry(&RepositoryViewEntry { + rsync_uri: "rsync://rsync.example.test/repo/a.roa".to_string(), + current_hash: Some(crate::replay::archive::sha256_hex(b"base-a")), + repository_source: Some("rsync".to_string()), + object_type: Some("roa".to_string()), + state: RepositoryViewState::Present, + }) + .expect("put repository view"); + + let delta = Arc::new( + ReplayDeltaArchiveIndex::load(&delta_archive, &delta_locks).expect("load delta index"), + ); + let fetcher = PayloadDeltaReplayCurrentStoreRsyncFetcher::new(&store, delta); + let mut objects = fetcher + .fetch_objects("rsync://rsync.example.test/repo/") + .expect("fetch current-store objects"); + objects.sort_by(|a, b| a.0.cmp(&b.0)); + assert_eq!(objects.len(), 2); + assert_eq!(objects[0].0, "rsync://rsync.example.test/repo/a.roa"); + assert_eq!(objects[0].1, b"base-a"); + assert_eq!(objects[1].0, "rsync://rsync.example.test/repo/sub/b.cer"); + assert_eq!(objects[1].1, b"delta-b"); + } + #[test] fn delta_rsync_fetcher_uses_base_only_when_delta_has_no_module_entry() { let (_temp, base_archive, base_locks, delta_archive, delta_locks) = diff --git a/src/sync/repo.rs b/src/sync/repo.rs index 3f8bb30..b0b8946 100644 --- a/src/sync/repo.rs +++ b/src/sync/repo.rs @@ -6,15 +6,15 @@ use crate::policy::{Policy, SyncPreference}; use crate::replay::archive::{ReplayArchiveIndex, ReplayTransport}; use crate::replay::delta_archive::{ReplayDeltaArchiveIndex, ReplayDeltaRrdpKind}; use crate::report::{RfcRef, Warning}; -use crate::storage::RocksStore; +use crate::storage::{RawByHashEntry, RocksStore}; use crate::sync::rrdp::sync_from_notification_with_timing_and_download_log; use crate::sync::rrdp::{Fetcher as HttpFetcher, RrdpSyncError, load_rrdp_local_state}; use crate::sync::store_projection::{ build_repository_view_present_entry, build_repository_view_withdrawn_entry, - prepare_raw_by_hash_evidence_batch, + compute_sha256_hex, infer_object_type_from_uri, }; -use std::collections::HashSet; +use std::collections::{BTreeMap, HashSet}; use std::thread; use std::time::Duration; @@ -90,6 +90,13 @@ pub fn sync_publication_point( t.record_count("repo_sync_rrdp_ok_total", 1); t.record_count("repo_sync_rrdp_objects_written_total", written as u64); } + crate::progress_log::emit( + "repo_sync_rrdp_ok", + serde_json::json!({ + "notify_uri": notification_uri, + "objects_written": written, + }), + ); Ok(RepoSyncResult { source: RepoSyncSource::Rrdp, objects_written: written, @@ -100,6 +107,14 @@ pub fn sync_publication_point( if let Some(t) = timing.as_ref() { t.record_count("repo_sync_rrdp_failed_total", 1); } + crate::progress_log::emit( + "rrdp_fallback_rsync", + serde_json::json!({ + "notify_uri": notification_uri, + "rsync_base_uri": rsync_base_uri, + "rrdp_error": err.to_string(), + }), + ); let warnings = vec![ Warning::new(format!("RRDP failed; falling back to rsync: {err}")) .with_rfc_refs(&[RfcRef("RFC 8182 §3.4.5")]) @@ -132,6 +147,13 @@ pub fn sync_publication_point( timing, download_log, )?; + crate::progress_log::emit( + "repo_sync_rsync_direct", + serde_json::json!({ + "rsync_base_uri": rsync_base_uri, + "objects_written": written, + }), + ); if let Some(t) = timing.as_ref() { t.record_count("repo_sync_rsync_direct_total", 1); t.record_count("repo_sync_rsync_objects_written_total", written as u64); @@ -447,12 +469,27 @@ fn try_rrdp_sync_with_retry( loop { attempt += 1; + crate::progress_log::emit( + "rrdp_sync_attempt", + serde_json::json!({ + "notify_uri": notification_uri, + "attempt": attempt, + }), + ); if let Some(t) = timing.as_ref() { t.record_count("rrdp_retry_attempt_total", 1); } match try_rrdp_sync(store, notification_uri, http_fetcher, timing, download_log) { Ok(written) => { + crate::progress_log::emit( + "rrdp_sync_success", + serde_json::json!({ + "notify_uri": notification_uri, + "attempt": attempt, + "objects_written": written, + }), + ); if attempt > 1 { if let Some(t) = timing.as_ref() { t.record_count("rrdp_retry_success_total", 1); @@ -467,6 +504,14 @@ fn try_rrdp_sync_with_retry( }; if retryable && attempt < max_attempts { + crate::progress_log::emit( + "rrdp_sync_retry", + serde_json::json!({ + "notify_uri": notification_uri, + "attempt": attempt, + "error": err.to_string(), + }), + ); if let Some(t) = timing.as_ref() { t.record_count("rrdp_retry_sleep_total", 1); } @@ -480,6 +525,15 @@ fn try_rrdp_sync_with_retry( continue; } + crate::progress_log::emit( + "rrdp_sync_failed", + serde_json::json!({ + "notify_uri": notification_uri, + "attempt": attempt, + "retryable": retryable, + "error": err.to_string(), + }), + ); if let Some(t) = timing.as_ref() { match &err { RrdpSyncError::Fetch(_) => t.record_count("rrdp_failed_fetch_total", 1), @@ -503,18 +557,47 @@ fn rsync_sync_into_current_store( timing: Option<&TimingHandle>, download_log: Option<&DownloadLogHandle>, ) -> Result { + let started = std::time::Instant::now(); + crate::progress_log::emit( + "rsync_sync_start", + serde_json::json!({ + "rsync_base_uri": rsync_base_uri, + "dedup_key": rsync_fetcher.dedup_key(rsync_base_uri), + }), + ); let _s = timing .as_ref() .map(|t| t.span_rrdp_repo_step(rsync_base_uri, "rsync_fetch_objects")); let _p = timing.as_ref().map(|t| t.span_phase("rsync_fetch_total")); let mut dl_span = download_log.map(|dl| dl.span_download(AuditDownloadKind::Rsync, rsync_base_uri)); - let objects = match rsync_fetcher.fetch_objects(rsync_base_uri) { + let mut new_set: HashSet = HashSet::new(); + let mut uri_to_hash: BTreeMap = BTreeMap::new(); + let mut pending_raw: BTreeMap = BTreeMap::new(); + let (object_count, bytes_total) = match rsync_fetcher.visit_objects(rsync_base_uri, &mut |uri, bytes| { + let sha256_hex = compute_sha256_hex(&bytes); + new_set.insert(uri.clone()); + uri_to_hash.insert(uri.clone(), sha256_hex.clone()); + let entry = pending_raw + .entry(sha256_hex.clone()) + .or_insert_with(|| RawByHashEntry::from_bytes(sha256_hex.clone(), bytes.clone())); + if entry.bytes != bytes { + return Err(format!( + "raw_by_hash collision for {uri}: same sha256 maps to different bytes" + )); + } + if !entry.origin_uris.iter().any(|existing| existing == &uri) { + entry.origin_uris.push(uri.clone()); + } + if entry.object_type.is_none() { + entry.object_type = infer_object_type_from_uri(&uri); + } + Ok(()) + }) { Ok(v) => { - let bytes_total: u64 = v.iter().map(|(_u, b)| b.len() as u64).sum::(); if let Some(s) = dl_span.as_mut() { - s.set_objects(v.len() as u64, bytes_total); - s.set_bytes(bytes_total); + s.set_objects(v.0 as u64, v.1); + s.set_bytes(v.1); s.set_ok(); } v @@ -526,9 +609,17 @@ fn rsync_sync_into_current_store( return Err(e.into()); } }; + crate::progress_log::emit( + "rsync_sync_fetch_done", + serde_json::json!({ + "rsync_base_uri": rsync_base_uri, + "object_count": object_count, + "bytes_total": bytes_total, + "duration_ms": started.elapsed().as_millis() as u64, + }), + ); if let Some(t) = timing.as_ref() { - t.record_count("rsync_objects_fetched_total", objects.len() as u64); - let bytes_total: u64 = objects.iter().map(|(_u, b)| b.len() as u64).sum::(); + t.record_count("rsync_objects_fetched_total", object_count as u64); t.record_count("rsync_objects_bytes_total", bytes_total); } drop(_p); @@ -536,16 +627,47 @@ fn rsync_sync_into_current_store( let existing_view = store .list_repository_view_entries_with_prefix(rsync_base_uri) .map_err(|e| RepoSyncError::Storage(e.to_string()))?; - let new_set: HashSet<&str> = objects.iter().map(|(uri, _)| uri.as_str()).collect(); let _proj = timing .as_ref() .map(|t| t.span_phase("rsync_write_current_store_total")); - let prepared_raw = prepare_raw_by_hash_evidence_batch(store, &objects) - .map_err(RepoSyncError::Storage)?; + let hashes: Vec = pending_raw.keys().cloned().collect(); + let existing_entries = store + .get_raw_by_hash_entries_batch(&hashes) + .map_err(|e| RepoSyncError::Storage(e.to_string()))?; + let mut entries_to_write = Vec::new(); + for (hash, existing_opt) in hashes.into_iter().zip(existing_entries.into_iter()) { + let mut pending_entry = pending_raw + .remove(&hash) + .ok_or_else(|| RepoSyncError::Storage(format!("missing pending raw entry for {hash}")))?; + match existing_opt { + Some(mut existing) => { + if existing.bytes != pending_entry.bytes { + return Err(RepoSyncError::Storage(format!( + "raw_by_hash collision for hash {hash}: same sha256 maps to different bytes" + ))); + } + let mut changed = false; + for uri in pending_entry.origin_uris.drain(..) { + if !existing.origin_uris.iter().any(|existing_uri| existing_uri == &uri) { + existing.origin_uris.push(uri); + changed = true; + } + } + if existing.object_type.is_none() && pending_entry.object_type.is_some() { + existing.object_type = pending_entry.object_type; + changed = true; + } + if changed { + entries_to_write.push(existing); + } + } + None => entries_to_write.push(pending_entry), + } + } let mut repository_view_entries = Vec::new(); for entry in existing_view { - if !new_set.contains(entry.rsync_uri.as_str()) { + if !new_set.contains(&entry.rsync_uri) { repository_view_entries.push(build_repository_view_withdrawn_entry( rsync_base_uri, &entry.rsync_uri, @@ -554,9 +676,8 @@ fn rsync_sync_into_current_store( } } - for (uri, _bytes) in &objects { - let current_hash = prepared_raw - .uri_to_hash + for uri in &new_set { + let current_hash = uri_to_hash .get(uri) .cloned() .ok_or_else(|| RepoSyncError::Storage(format!("missing raw_by_hash mapping for {uri}")))?; @@ -568,13 +689,35 @@ fn rsync_sync_into_current_store( } store - .put_raw_by_hash_entries_batch_unchecked(&prepared_raw.entries_to_write) + .put_raw_by_hash_entries_batch_unchecked(&entries_to_write) .map_err(|e| RepoSyncError::Storage(e.to_string()))?; store .put_projection_batch(&repository_view_entries, &[], &[]) .map_err(|e| RepoSyncError::Storage(e.to_string()))?; - Ok(objects.len()) + let total_duration_ms = started.elapsed().as_millis() as u64; + crate::progress_log::emit( + "rsync_sync_done", + serde_json::json!({ + "rsync_base_uri": rsync_base_uri, + "object_count": object_count, + "bytes_total": bytes_total, + "duration_ms": total_duration_ms, + }), + ); + if (total_duration_ms as f64) / 1000.0 >= crate::progress_log::slow_threshold_secs() { + crate::progress_log::emit( + "rsync_sync_slow", + serde_json::json!({ + "rsync_base_uri": rsync_base_uri, + "object_count": object_count, + "bytes_total": bytes_total, + "duration_ms": total_duration_ms, + }), + ); + } + + Ok(object_count) } #[cfg(test)] diff --git a/src/validation/ca_instance.rs b/src/validation/ca_instance.rs index fbca776..ca72d27 100644 --- a/src/validation/ca_instance.rs +++ b/src/validation/ca_instance.rs @@ -3,11 +3,15 @@ use crate::data_model::rc::{ResourceCertKind, ResourceCertificate, SubjectInfoAc #[derive(Clone, Debug, PartialEq, Eq)] pub struct CaInstanceUris { - /// CA repository base rsync URI (must end with `/`). + /// rsync sync base URI used for live/replay publication point fetches (must end with `/`). + /// + /// This is the parent directory of `manifest_rsync_uri`, not necessarily the full + /// `id-ad-caRepository` URI. Using the manifest parent keeps sync scope narrow while + /// `publication_point_rsync_uri` preserves the full RFC 9286 publication-point value. pub rsync_base_uri: String, /// rsync URI for the manifest object (`.mft`). pub manifest_rsync_uri: String, - /// Publication point rsync URI (RFC 9286 terminology). In v1 this equals `rsync_base_uri`. + /// Publication point rsync URI (RFC 9286 terminology). pub publication_point_rsync_uri: String, /// Optional RRDP notification URI (https). pub rrdp_notification_uri: Option, @@ -104,8 +108,6 @@ pub fn ca_instance_uris_from_ca_certificate( publication_point_rsync_uri.push('/'); } - let rsync_base_uri = publication_point_rsync_uri.clone(); - let manifest_rsync_uri = manifest.ok_or(CaInstanceUrisError::MissingRpkiManifest)?; if !manifest_rsync_uri.starts_with(&publication_point_rsync_uri) { return Err(CaInstanceUrisError::ManifestNotUnderPublicationPoint { @@ -113,9 +115,16 @@ pub fn ca_instance_uris_from_ca_certificate( publication_point_rsync_uri, }); } + let manifest_parent = manifest_rsync_uri + .rsplit_once('/') + .map(|(parent, _)| format!("{parent}/")) + .ok_or_else(|| CaInstanceUrisError::ManifestNotUnderPublicationPoint { + manifest_rsync_uri: manifest_rsync_uri.clone(), + publication_point_rsync_uri: publication_point_rsync_uri.clone(), + })?; Ok(CaInstanceUris { - rsync_base_uri, + rsync_base_uri: manifest_parent, manifest_rsync_uri, publication_point_rsync_uri, rrdp_notification_uri: notify, diff --git a/src/validation/run_tree_from_tal.rs b/src/validation/run_tree_from_tal.rs index 6129dc1..cc64823 100644 --- a/src/validation/run_tree_from_tal.rs +++ b/src/validation/run_tree_from_tal.rs @@ -7,7 +7,9 @@ use crate::data_model::ta::TrustAnchor; use crate::replay::archive::ReplayArchiveIndex; use crate::replay::delta_archive::ReplayDeltaArchiveIndex; use crate::replay::delta_fetch_http::PayloadDeltaReplayHttpFetcher; -use crate::replay::delta_fetch_rsync::PayloadDeltaReplayRsyncFetcher; +use crate::replay::delta_fetch_rsync::{ + PayloadDeltaReplayCurrentStoreRsyncFetcher, PayloadDeltaReplayRsyncFetcher, +}; use crate::replay::fetch_http::PayloadReplayHttpFetcher; use crate::replay::fetch_rsync::PayloadReplayRsyncFetcher; use crate::sync::rrdp::Fetcher; @@ -629,6 +631,33 @@ fn build_payload_delta_replay_runner<'a>( } } +fn build_payload_delta_replay_current_store_runner<'a>( + store: &'a crate::storage::RocksStore, + policy: &'a crate::policy::Policy, + delta_index: Arc, + http_fetcher: &'a PayloadDeltaReplayHttpFetcher, + rsync_fetcher: &'a PayloadDeltaReplayCurrentStoreRsyncFetcher<'a>, + validation_time: time::OffsetDateTime, + timing: Option, + download_log: Option, +) -> Rpkiv1PublicationPointRunner<'a> { + Rpkiv1PublicationPointRunner { + store, + policy, + http_fetcher, + rsync_fetcher, + validation_time, + timing, + download_log, + replay_archive_index: None, + replay_delta_index: Some(delta_index), + rrdp_dedup: true, + rrdp_repo_cache: Mutex::new(HashMap::new()), + rsync_dedup: true, + rsync_repo_cache: Mutex::new(HashMap::new()), + } +} + fn run_payload_delta_replay_audit_inner( store: &crate::storage::RocksStore, policy: &crate::policy::Policy, @@ -808,6 +837,110 @@ pub fn run_tree_from_tal_and_ta_der_payload_delta_replay_serial_audit_with_timin ) } +fn run_payload_delta_replay_step_audit_inner( + store: &crate::storage::RocksStore, + policy: &crate::policy::Policy, + discovery: DiscoveredRootCaInstance, + delta_payload_archive_root: &std::path::Path, + previous_locks_path: &std::path::Path, + delta_locks_path: &std::path::Path, + validation_time: time::OffsetDateTime, + config: &TreeRunConfig, + timing: Option, +) -> Result { + let delta_index = Arc::new( + ReplayDeltaArchiveIndex::load(delta_payload_archive_root, delta_locks_path) + .map_err(|e| RunTreeFromTalError::Replay(e.to_string()))?, + ); + delta_index + .validate_base_locks_sha256_file(previous_locks_path) + .map_err(|e| RunTreeFromTalError::Replay(e.to_string()))?; + + let root = root_handle_from_trust_anchor( + &discovery.trust_anchor, + derive_tal_id(&discovery), + None, + &discovery.ca_instance, + ); + + let delta_http_fetcher = PayloadDeltaReplayHttpFetcher::from_index(delta_index.clone()) + .map_err(|e| RunTreeFromTalError::Replay(e.to_string()))?; + let delta_rsync_fetcher = + PayloadDeltaReplayCurrentStoreRsyncFetcher::new(store, delta_index.clone()); + let download_log = DownloadLogHandle::new(); + + let (tree, publication_points) = if let Some(t) = timing.as_ref() { + let _phase = t.span_phase("payload_delta_replay_step_total"); + let delta_runner = build_payload_delta_replay_current_store_runner( + store, + policy, + delta_index, + &delta_http_fetcher, + &delta_rsync_fetcher, + validation_time, + Some(t.clone()), + Some(download_log.clone()), + ); + let TreeRunAuditOutput { + tree, + publication_points, + } = run_tree_serial_audit(root, &delta_runner, config)?; + (tree, publication_points) + } else { + let delta_runner = build_payload_delta_replay_current_store_runner( + store, + policy, + delta_index, + &delta_http_fetcher, + &delta_rsync_fetcher, + validation_time, + None, + Some(download_log.clone()), + ); + let TreeRunAuditOutput { + tree, + publication_points, + } = run_tree_serial_audit(root, &delta_runner, config)?; + (tree, publication_points) + }; + let downloads = download_log.snapshot_events(); + let download_stats = DownloadLogHandle::stats_from_events(&downloads); + Ok(RunTreeFromTalAuditOutput { + discovery, + tree, + publication_points, + downloads, + download_stats, + }) +} + +pub fn run_tree_from_tal_and_ta_der_payload_delta_replay_step_serial_audit( + store: &crate::storage::RocksStore, + policy: &crate::policy::Policy, + tal_bytes: &[u8], + ta_der: &[u8], + resolved_ta_uri: Option<&Url>, + delta_payload_archive_root: &std::path::Path, + previous_locks_path: &std::path::Path, + delta_locks_path: &std::path::Path, + validation_time: time::OffsetDateTime, + config: &TreeRunConfig, +) -> Result { + let discovery = + discover_root_ca_instance_from_tal_and_ta_der(tal_bytes, ta_der, resolved_ta_uri)?; + run_payload_delta_replay_step_audit_inner( + store, + policy, + discovery, + delta_payload_archive_root, + previous_locks_path, + delta_locks_path, + validation_time, + config, + None, + ) +} + #[cfg(test)] mod replay_api_tests { use super::*; diff --git a/src/validation/tree_runner.rs b/src/validation/tree_runner.rs index 22a4af7..b73da1f 100644 --- a/src/validation/tree_runner.rs +++ b/src/validation/tree_runner.rs @@ -83,6 +83,7 @@ impl<'a> PublicationPointRunner for Rpkiv1PublicationPointRunner<'a> { &self, ca: &CaInstanceHandle, ) -> Result { + let publication_point_started = std::time::Instant::now(); let _pp_total = self .timing .as_ref() @@ -97,6 +98,15 @@ impl<'a> PublicationPointRunner for Rpkiv1PublicationPointRunner<'a> { } let mut warnings: Vec = Vec::new(); + crate::progress_log::emit( + "publication_point_start", + serde_json::json!({ + "manifest_rsync_uri": ca.manifest_rsync_uri, + "publication_point_rsync_uri": ca.publication_point_rsync_uri, + "rsync_base_uri": ca.rsync_base_uri, + "rrdp_notification_uri": ca.rrdp_notification_uri, + }), + ); let attempted_rrdp = self.policy.sync_preference == crate::policy::SyncPreference::RrdpThenRsync; @@ -134,7 +144,7 @@ impl<'a> PublicationPointRunner for Rpkiv1PublicationPointRunner<'a> { } if !skip_sync_due_to_dedup && effective_notification_uri.is_none() && self.rsync_dedup { - let base = normalize_rsync_base_uri(&ca.rsync_base_uri); + let base = self.rsync_fetcher.dedup_key(&ca.rsync_base_uri); let hit_ok = self .rsync_repo_cache .lock() @@ -153,8 +163,14 @@ impl<'a> PublicationPointRunner for Rpkiv1PublicationPointRunner<'a> { } } - let (repo_sync_ok, repo_sync_err): (bool, Option) = if skip_sync_due_to_dedup { - (true, None) + let repo_sync_started = std::time::Instant::now(); + let (repo_sync_ok, repo_sync_err, repo_sync_source): (bool, Option, Option) = if skip_sync_due_to_dedup { + let source = if effective_notification_uri.is_some() { + Some("rrdp_dedup_skip".to_string()) + } else { + Some("rsync_dedup_skip".to_string()) + }; + (true, None, source) } else { let repo_key = effective_notification_uri.unwrap_or_else(|| ca.rsync_base_uri.as_str()); let _repo_total = self @@ -199,7 +215,7 @@ impl<'a> PublicationPointRunner for Rpkiv1PublicationPointRunner<'a> { } { Ok(res) => { if self.rsync_dedup && res.source == crate::sync::repo::RepoSyncSource::Rsync { - let base = normalize_rsync_base_uri(&ca.rsync_base_uri); + let base = self.rsync_fetcher.dedup_key(&ca.rsync_base_uri); self.rsync_repo_cache .lock() .expect("rsync_repo_cache lock") @@ -229,7 +245,7 @@ impl<'a> PublicationPointRunner for Rpkiv1PublicationPointRunner<'a> { } warnings.extend(res.warnings); - (true, None) + (true, None, Some(repo_sync_source_label(res.source).to_string())) } Err(e) => { if attempted_rrdp && self.rrdp_dedup { @@ -247,10 +263,22 @@ impl<'a> PublicationPointRunner for Rpkiv1PublicationPointRunner<'a> { .with_rfc_refs(&[RfcRef("RFC 8182 §3.4.5"), RfcRef("RFC 9286 §6.6")]) .with_context(&ca.rsync_base_uri), ); - (false, Some(e.to_string())) + (false, Some(e.to_string()), None) } } }; + let repo_sync_duration_ms = repo_sync_started.elapsed().as_millis() as u64; + crate::progress_log::emit( + "publication_point_repo_sync_done", + serde_json::json!({ + "manifest_rsync_uri": ca.manifest_rsync_uri, + "publication_point_rsync_uri": ca.publication_point_rsync_uri, + "repo_sync_ok": repo_sync_ok, + "repo_sync_source": repo_sync_source, + "repo_sync_error": repo_sync_err, + "repo_sync_duration_ms": repo_sync_duration_ms, + }), + ); let fresh_publication_point = { let _manifest_total = self @@ -334,23 +362,71 @@ impl<'a> PublicationPointRunner for Rpkiv1PublicationPointRunner<'a> { let audit = build_publication_point_audit_from_snapshot( ca, PublicationPointSource::Fresh, + repo_sync_source.as_deref(), + Some(repo_sync_duration_ms), + repo_sync_err.as_deref(), &pack, &warnings, &objects, &child_audits, ); - - Ok(PublicationPointRunResult { + let result = PublicationPointRunResult { source: PublicationPointSource::Fresh, snapshot: Some(pack), warnings, objects, audit, discovered_children, - }) + }; + let total_duration_ms = publication_point_started.elapsed().as_millis() as u64; + crate::progress_log::emit( + "publication_point_finish", + serde_json::json!({ + "manifest_rsync_uri": ca.manifest_rsync_uri, + "publication_point_rsync_uri": ca.publication_point_rsync_uri, + "source": "fresh", + "repo_sync_source": repo_sync_source, + "repo_sync_duration_ms": repo_sync_duration_ms, + "total_duration_ms": total_duration_ms, + "warning_count": result.warnings.len(), + "vrp_count": result.objects.vrps.len(), + "vap_count": result.objects.aspas.len(), + "router_key_count": result.objects.router_keys.len(), + "child_count": result.discovered_children.len(), + }), + ); + if (total_duration_ms as f64) / 1000.0 >= crate::progress_log::slow_threshold_secs() { + crate::progress_log::emit( + "publication_point_slow", + serde_json::json!({ + "manifest_rsync_uri": ca.manifest_rsync_uri, + "publication_point_rsync_uri": ca.publication_point_rsync_uri, + "source": "fresh", + "repo_sync_source": repo_sync_source, + "repo_sync_duration_ms": repo_sync_duration_ms, + "total_duration_ms": total_duration_ms, + }), + ); + } + Ok(result) } Err(fresh_err) => match self.policy.ca_failed_fetch_policy { - crate::policy::CaFailedFetchPolicy::StopAllOutput => Err(format!("{fresh_err}")), + crate::policy::CaFailedFetchPolicy::StopAllOutput => { + let total_duration_ms = publication_point_started.elapsed().as_millis() as u64; + crate::progress_log::emit( + "publication_point_finish", + serde_json::json!({ + "manifest_rsync_uri": ca.manifest_rsync_uri, + "publication_point_rsync_uri": ca.publication_point_rsync_uri, + "source": "error", + "repo_sync_source": repo_sync_source, + "repo_sync_duration_ms": repo_sync_duration_ms, + "total_duration_ms": total_duration_ms, + "error": fresh_err.to_string(), + }), + ); + Err(format!("{fresh_err}")) + } crate::policy::CaFailedFetchPolicy::ReuseCurrentInstanceVcir => { let projection = project_current_instance_vcir_on_failed_fetch( self.store, @@ -363,34 +439,60 @@ impl<'a> PublicationPointRunner for Rpkiv1PublicationPointRunner<'a> { let audit = build_publication_point_audit_from_vcir( ca, projection.source, + repo_sync_source.as_deref(), + Some(repo_sync_duration_ms), + repo_sync_err.as_deref(), projection.vcir.as_ref(), projection.snapshot.as_ref(), &warnings, &projection.objects, &projection.child_audits, ); - Ok(PublicationPointRunResult { + let result = PublicationPointRunResult { source: projection.source, snapshot: projection.snapshot, warnings, objects: projection.objects, audit, discovered_children: projection.discovered_children, - }) + }; + let total_duration_ms = publication_point_started.elapsed().as_millis() as u64; + crate::progress_log::emit( + "publication_point_finish", + serde_json::json!({ + "manifest_rsync_uri": ca.manifest_rsync_uri, + "publication_point_rsync_uri": ca.publication_point_rsync_uri, + "source": source_label(result.source), + "repo_sync_source": repo_sync_source, + "repo_sync_duration_ms": repo_sync_duration_ms, + "total_duration_ms": total_duration_ms, + "warning_count": result.warnings.len(), + "vrp_count": result.objects.vrps.len(), + "vap_count": result.objects.aspas.len(), + "router_key_count": result.objects.router_keys.len(), + "child_count": result.discovered_children.len(), + }), + ); + if (total_duration_ms as f64) / 1000.0 >= crate::progress_log::slow_threshold_secs() { + crate::progress_log::emit( + "publication_point_slow", + serde_json::json!({ + "manifest_rsync_uri": ca.manifest_rsync_uri, + "publication_point_rsync_uri": ca.publication_point_rsync_uri, + "source": source_label(result.source), + "repo_sync_source": repo_sync_source, + "repo_sync_duration_ms": repo_sync_duration_ms, + "total_duration_ms": total_duration_ms, + }), + ); + } + Ok(result) } }, } } } -fn normalize_rsync_base_uri(s: &str) -> String { - if s.ends_with('/') { - s.to_string() - } else { - format!("{s}/") - } -} - struct ChildDiscoveryOutput { children: Vec, audits: Vec, @@ -1044,6 +1146,13 @@ fn source_label(source: PublicationPointSource) -> String { } } +fn repo_sync_source_label(source: crate::sync::repo::RepoSyncSource) -> &'static str { + match source { + crate::sync::repo::RepoSyncSource::Rrdp => "rrdp", + crate::sync::repo::RepoSyncSource::Rsync => "rsync", + } +} + fn kind_from_vcir_artifact_kind(kind: VcirArtifactKind) -> AuditObjectKind { match kind { VcirArtifactKind::Mft => AuditObjectKind::Manifest, @@ -1068,6 +1177,9 @@ fn audit_result_from_vcir_status(status: VcirArtifactValidationStatus) -> AuditO fn build_publication_point_audit_from_snapshot( ca: &CaInstanceHandle, source: PublicationPointSource, + repo_sync_source: Option<&str>, + repo_sync_duration_ms: Option, + repo_sync_error: Option<&str>, pack: &PublicationPointSnapshot, runner_warnings: &[Warning], objects: &crate::validation::objects::ObjectsOutput, @@ -1158,6 +1270,9 @@ fn build_publication_point_audit_from_snapshot( publication_point_rsync_uri: ca.publication_point_rsync_uri.clone(), rrdp_notification_uri: ca.rrdp_notification_uri.clone(), source: source_label(source), + repo_sync_source: repo_sync_source.map(ToString::to_string), + repo_sync_duration_ms, + repo_sync_error: repo_sync_error.map(ToString::to_string), this_update_rfc3339_utc: pack.this_update.rfc3339_utc.clone(), next_update_rfc3339_utc: pack.next_update.rfc3339_utc.clone(), verified_at_rfc3339_utc: pack.verified_at.rfc3339_utc.clone(), @@ -1169,6 +1284,9 @@ fn build_publication_point_audit_from_snapshot( fn build_publication_point_audit_from_vcir( ca: &CaInstanceHandle, source: PublicationPointSource, + repo_sync_source: Option<&str>, + repo_sync_duration_ms: Option, + repo_sync_error: Option<&str>, vcir: Option<&ValidatedCaInstanceResult>, pack: Option<&PublicationPointSnapshot>, runner_warnings: &[Warning], @@ -1179,6 +1297,9 @@ fn build_publication_point_audit_from_vcir( return build_publication_point_audit_from_snapshot( ca, source, + repo_sync_source, + repo_sync_duration_ms, + repo_sync_error, pack, runner_warnings, objects, @@ -1200,6 +1321,9 @@ fn build_publication_point_audit_from_vcir( publication_point_rsync_uri: ca.publication_point_rsync_uri.clone(), rrdp_notification_uri: ca.rrdp_notification_uri.clone(), source: source_label(source), + repo_sync_source: repo_sync_source.map(ToString::to_string), + repo_sync_duration_ms, + repo_sync_error: repo_sync_error.map(ToString::to_string), this_update_rfc3339_utc: String::new(), next_update_rfc3339_utc: String::new(), verified_at_rfc3339_utc: String::new(), @@ -1278,6 +1402,9 @@ fn build_publication_point_audit_from_vcir( publication_point_rsync_uri: ca.publication_point_rsync_uri.clone(), rrdp_notification_uri: ca.rrdp_notification_uri.clone(), source: source_label(source), + repo_sync_source: repo_sync_source.map(ToString::to_string), + repo_sync_duration_ms, + repo_sync_error: repo_sync_error.map(ToString::to_string), this_update_rfc3339_utc: vcir .validated_manifest_meta .validated_manifest_this_update @@ -3896,6 +4023,112 @@ authorityKeyIdentifier = keyid:always ); } + #[test] + fn runner_rsync_dedup_skips_second_sync_for_same_module_scope() { + let fixture_dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests/fixtures/repository/rpki.cernet.net/repo/cernet/0"); + assert!(fixture_dir.is_dir(), "fixture directory must exist"); + + let first_base_uri = "rsync://rpki.cernet.net/repo/cernet/0/".to_string(); + let second_base_uri = "rsync://rpki.cernet.net/repo/cernet/0/sub/".to_string(); + let manifest_file = "05FC9C5B88506F7C0D3F862C8895BED67E9F8EBA.mft"; + let manifest_rsync_uri = format!("{first_base_uri}{manifest_file}"); + + let fixture_manifest_bytes = + std::fs::read(fixture_dir.join(manifest_file)).expect("read manifest fixture"); + let fixture_manifest = + crate::data_model::manifest::ManifestObject::decode_der(&fixture_manifest_bytes) + .expect("decode manifest fixture"); + let validation_time = fixture_manifest.manifest.this_update + time::Duration::seconds(60); + + let store_dir = tempfile::tempdir().expect("store dir"); + let store = RocksStore::open(store_dir.path()).expect("open rocksdb"); + let policy = Policy { + sync_preference: crate::policy::SyncPreference::RsyncOnly, + ..Policy::default() + }; + + let issuer_ca_der = std::fs::read( + std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")).join( + "tests/fixtures/repository/rpki.apnic.net/repository/B527EF581D6611E2BB468F7C72FD1FF2/BfycW4hQb3wNP4YsiJW-1n6fjro.cer", + ), + ) + .expect("read issuer ca fixture"); + let issuer_ca = ResourceCertificate::decode_der(&issuer_ca_der).expect("decode issuer ca"); + + let handle = CaInstanceHandle { + depth: 0, + tal_id: "test-tal".to_string(), + parent_manifest_rsync_uri: None, + ca_certificate_der: issuer_ca_der, + ca_certificate_rsync_uri: Some("rsync://rpki.apnic.net/repository/B527EF581D6611E2BB468F7C72FD1FF2/BfycW4hQb3wNP4YsiJW-1n6fjro.cer".to_string()), + effective_ip_resources: issuer_ca.tbs.extensions.ip_resources.clone(), + effective_as_resources: issuer_ca.tbs.extensions.as_resources.clone(), + rsync_base_uri: first_base_uri.clone(), + manifest_rsync_uri: manifest_rsync_uri.clone(), + publication_point_rsync_uri: first_base_uri.clone(), + rrdp_notification_uri: None, + }; + let second_handle = CaInstanceHandle { + rsync_base_uri: second_base_uri.clone(), + publication_point_rsync_uri: second_base_uri.clone(), + ..handle.clone() + }; + + struct ModuleScopeRsyncFetcher { + inner: LocalDirRsyncFetcher, + calls: Arc, + } + impl RsyncFetcher for ModuleScopeRsyncFetcher { + fn fetch_objects( + &self, + rsync_base_uri: &str, + ) -> Result)>, RsyncFetchError> { + self.calls.fetch_add(1, Ordering::SeqCst); + self.inner.fetch_objects(rsync_base_uri) + } + + fn dedup_key(&self, _rsync_base_uri: &str) -> String { + "rsync://rpki.cernet.net/repo/".to_string() + } + } + + let calls = Arc::new(AtomicUsize::new(0)); + let rsync = ModuleScopeRsyncFetcher { + inner: LocalDirRsyncFetcher::new(&fixture_dir), + calls: calls.clone(), + }; + + let runner = Rpkiv1PublicationPointRunner { + store: &store, + policy: &policy, + http_fetcher: &NeverHttpFetcher, + rsync_fetcher: &rsync, + validation_time, + timing: None, + download_log: None, + replay_archive_index: None, + replay_delta_index: None, + rrdp_dedup: false, + rrdp_repo_cache: Mutex::new(HashMap::new()), + rsync_dedup: true, + rsync_repo_cache: Mutex::new(HashMap::new()), + }; + + let first = runner.run_publication_point(&handle).expect("first run ok"); + assert_eq!(first.source, PublicationPointSource::Fresh); + + let second = runner + .run_publication_point(&second_handle) + .expect("second run ok"); + assert!(matches!( + second.source, + PublicationPointSource::Fresh | PublicationPointSource::VcirCurrentInstance + )); + + assert_eq!(calls.load(Ordering::SeqCst), 1, "module-scope dedup should skip second sync"); + } + #[test] fn runner_when_repo_sync_fails_uses_current_instance_vcir_and_keeps_children_empty_for_fixture() { @@ -4035,6 +4268,9 @@ authorityKeyIdentifier = keyid:always let audit = build_publication_point_audit_from_snapshot( &ca, pp.source, + None, + None, + None, &pp.snapshot, &[], &objects, @@ -4103,6 +4339,9 @@ authorityKeyIdentifier = keyid:always let audit = build_publication_point_audit_from_snapshot( &issuer, pp.source, + None, + None, + None, &pp.snapshot, &[], &objects, @@ -4857,6 +5096,9 @@ authorityKeyIdentifier = keyid:always let audit = build_publication_point_audit_from_vcir( &ca, PublicationPointSource::VcirCurrentInstance, + None, + None, + None, Some(&vcir), None, &runner_warnings, @@ -4916,6 +5158,9 @@ authorityKeyIdentifier = keyid:always PublicationPointSource::FailedFetchNoCache, None, None, + None, + None, + None, &[Warning::new("runner warning")], &crate::validation::objects::ObjectsOutput { vrps: Vec::new(), @@ -5155,7 +5400,7 @@ authorityKeyIdentifier = keyid:always .expect("rsync dedup run"); assert_eq!(fourth.source, PublicationPointSource::Fresh); assert_eq!( - normalize_rsync_base_uri("rsync://example.test/repo"), + crate::fetch::rsync::normalize_rsync_base_uri("rsync://example.test/repo"), "rsync://example.test/repo/" ); } diff --git a/tests/test_apnic_rrdp_delta_live_20260226.rs b/tests/test_apnic_rrdp_delta_live_20260226.rs index 6ef5132..ea8979f 100644 --- a/tests/test_apnic_rrdp_delta_live_20260226.rs +++ b/tests/test_apnic_rrdp_delta_live_20260226.rs @@ -28,6 +28,7 @@ fn live_http_fetcher() -> BlockingHttpFetcher { .unwrap_or(15 * 60); BlockingHttpFetcher::new(HttpFetcherConfig { timeout: Duration::from_secs(timeout_secs), + large_body_timeout: Duration::from_secs(timeout_secs), user_agent: "rpki-dev/0.1 (stage2 live rrdp delta test)".to_string(), }) .expect("http fetcher")