diff --git a/Cargo.toml b/Cargo.toml index 21e546e..6105dea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ ring = "0.17.14" x509-parser = { version = "0.18.0", features = ["verify"] } url = "2.5.8" serde = { version = "1.0.218", features = ["derive"] } -serde_json = "1.0.140" +serde_json = { version = "1.0.140", features = ["raw_value"] } toml = "0.8.20" rocksdb = { version = "0.22.0", optional = true, default-features = false, features = ["lz4"] } serde_cbor = "0.11.2" diff --git a/scripts/coverage.sh b/scripts/coverage.sh index e9ab1b3..237c04f 100755 --- a/scripts/coverage.sh +++ b/scripts/coverage.sh @@ -27,7 +27,7 @@ cleanup() { } trap cleanup EXIT -IGNORE_REGEX='repository_view_stats\.rs|db_stats\.rs|rrdp_state_dump\.rs|ccr_dump\.rs|ccr_verify\.rs|ccr_to_routinator_csv\.rs|ccr_to_compare_views\.rs|cir_materialize\.rs|cir_extract_inputs\.rs|cir_drop_report\.rs|cir_ta_only_fixture\.rs|cir_dump_reject_list\.rs|rpki_object_parse\.rs|triage_ccr_cir_pair\.rs|rpki_artifact_metrics|rpki_inter_rp_metrics|rpki_daemon\.rs|sequence_triage_ccr_cir|ccr_state_compare\.rs|cir_state_compare\.rs|cir_probe_rpki_client_cache\.rs|ccr/compare_view\.rs|progress_log\.rs|cli\.rs|validation/run_tree_from_tal\.rs|validation/tree_parallel\.rs|validation/tree_runner|validation/from_tal\.rs|sync/store_projection\.rs|sync/repo\.rs|sync/rrdp|(^|/)storage(/|\.rs$)|cir/materialize\.rs' +IGNORE_REGEX='repository_view_stats\.rs|db_stats\.rs|rrdp_state_dump\.rs|ccr_dump\.rs|ccr_verify\.rs|ccr_to_routinator_csv\.rs|ccr_to_compare_views\.rs|cir_materialize\.rs|cir_extract_inputs\.rs|cir_drop_report\.rs|cir_ta_only_fixture\.rs|cir_dump_reject_list\.rs|rpki_object_parse\.rs|rpki_query_indexer\.rs|rpki_query_service\.rs|triage_ccr_cir_pair\.rs|rpki_artifact_metrics|rpki_inter_rp_metrics|rpki_daemon\.rs|sequence_triage_ccr_cir|ccr_state_compare\.rs|cir_state_compare\.rs|cir_probe_rpki_client_cache\.rs|ccr/compare_view\.rs|progress_log\.rs|cli\.rs|validation/run_tree_from_tal\.rs|validation/tree_parallel\.rs|validation/tree_runner|validation/from_tal\.rs|sync/store_projection\.rs|sync/repo\.rs|sync/rrdp|(^|/)storage(/|\.rs$)|cir/materialize\.rs' # Preserve colored output even though we post-process output by running under a pseudo-TTY. # We run tests only once, then generate both CLI text + HTML reports without rerunning tests. diff --git a/scripts/soak/build_portable_soak_package.sh b/scripts/soak/build_portable_soak_package.sh index c8b9a85..477c4fc 100755 --- a/scripts/soak/build_portable_soak_package.sh +++ b/scripts/soak/build_portable_soak_package.sh @@ -14,7 +14,7 @@ Usage: scripts/soak/build_portable_soak_package.sh [--out-dir ] [--profile ] Requires release binaries to already exist. Build them first, for example: - cargo build --release --bin rpki --bin rpki_daemon --bin db_stats + cargo build --release --bin rpki --bin rpki_daemon --bin db_stats --bin rpki_artifact_metrics --bin rpki_query_service --bin rpki_query_indexer USAGE } @@ -53,7 +53,7 @@ else TARGET_BIN_DIR="$REPO_ROOT/target/$PROFILE" fi -REQUIRED_BINS=(rpki rpki_daemon db_stats rpki_artifact_metrics) +REQUIRED_BINS=(rpki rpki_daemon db_stats rpki_artifact_metrics rpki_query_service rpki_query_indexer) OPTIONAL_BINS=( ccr_dump ccr_state_compare diff --git a/scripts/soak/portable-soak.env.example b/scripts/soak/portable-soak.env.example index aa888a1..cb13fe0 100644 --- a/scripts/soak/portable-soak.env.example +++ b/scripts/soak/portable-soak.env.example @@ -90,6 +90,25 @@ METRICS_LISTEN=0.0.0.0:9556 METRICS_POLL_SECS=5 METRICS_INSTANCE=remote231-24h +# 是否启动 query service。启动后默认只索引启动后新完成的 run,避免补历史 run 影响 RP 性能。 +START_QUERY_SERVICE=0 + +# query service 退出策略。长期运行时通常保持 0,让服务持续可查。 +STOP_QUERY_SERVICE_ON_EXIT=0 + +# query service 监听地址和索引保留策略。 +QUERY_LISTEN=0.0.0.0:9560 +QUERY_WATCH_INTERVAL_SECS=5 +QUERY_RETAIN_INDEXED_RUNS=10 +QUERY_PROJECTION_ENTRY_LIMIT=20 + +# 默认不设置最小 run seq,由 query service 自动从“启动时最新 run 的下一轮”开始跟踪。 +# 如需实验性指定起点,可设置为具体数字,例如 QUERY_WATCH_MIN_RUN_SEQ=7208。 +QUERY_WATCH_MIN_RUN_SEQ= + +# 是否允许 query service 补索引历史 run。默认 0,避免大量历史 report.json 扫描和 query-db 写入拖慢 RP。 +QUERY_WATCH_BACKFILL=0 + # 是否启动 package 内置 Prometheus/Grafana monitor stack。 START_MONITOR_STACK=1 diff --git a/scripts/soak/run_24h_soak_with_metrics.sh b/scripts/soak/run_24h_soak_with_metrics.sh index 6db8fc0..f23bfa0 100755 --- a/scripts/soak/run_24h_soak_with_metrics.sh +++ b/scripts/soak/run_24h_soak_with_metrics.sh @@ -20,6 +20,7 @@ SOAK_SCRIPT="${SOAK_SCRIPT:-$PACKAGE_ROOT/run_soak.sh}" HOURLY_REPORT_SCRIPT="${HOURLY_REPORT_SCRIPT:-$PACKAGE_ROOT/scripts/soak/hourly_soak_report.py}" SOAK_DURATION_SECS="${SOAK_DURATION_SECS:-0}" +SOAK_INTERVAL_SECS="${SOAK_INTERVAL_SECS:-${INTERVAL_SECS:-0}}" HOURLY_REPORT_INTERVAL_SECS="${HOURLY_REPORT_INTERVAL_SECS:-3600}" SOAK_RETAIN_RUNS="${SOAK_RETAIN_RUNS:-100}" CLEAN_TMP_AFTER_RUN="${CLEAN_TMP_AFTER_RUN:-1}" @@ -30,6 +31,17 @@ STOP_METRICS_SERVICE_ON_EXIT="${STOP_METRICS_SERVICE_ON_EXIT:-0}" METRICS_LISTEN="${METRICS_LISTEN:-0.0.0.0:9556}" METRICS_POLL_SECS="${METRICS_POLL_SECS:-5}" METRICS_INSTANCE="${METRICS_INSTANCE:-remote231-24h}" +START_QUERY_SERVICE="${START_QUERY_SERVICE:-0}" +STOP_QUERY_SERVICE_ON_EXIT="${STOP_QUERY_SERVICE_ON_EXIT:-0}" +QUERY_LISTEN="${QUERY_LISTEN:-0.0.0.0:9560}" +QUERY_DB="${QUERY_DB:-$RUN_ROOT/state/query-db}" +QUERY_EXPORT_ROOT="${QUERY_EXPORT_ROOT:-$RUN_ROOT/state/query-exports}" +QUERY_REPO_BYTES_DB="${QUERY_REPO_BYTES_DB:-$RUN_ROOT/state/db/repo-bytes.db}" +QUERY_WATCH_INTERVAL_SECS="${QUERY_WATCH_INTERVAL_SECS:-5}" +QUERY_WATCH_MIN_RUN_SEQ="${QUERY_WATCH_MIN_RUN_SEQ:-}" +QUERY_WATCH_BACKFILL="${QUERY_WATCH_BACKFILL:-0}" +QUERY_RETAIN_INDEXED_RUNS="${QUERY_RETAIN_INDEXED_RUNS:-10}" +QUERY_PROJECTION_ENTRY_LIMIT="${QUERY_PROJECTION_ENTRY_LIMIT:-20}" PROMETHEUS_RETENTION="${PROMETHEUS_RETENTION:-7d}" SEND_FEISHU="${SEND_FEISHU:-1}" FEISHU_DRY_RUN="${FEISHU_DRY_RUN:-0}" @@ -45,6 +57,7 @@ WARNING_MAX="${WARNING_MAX:--1}" SOAK_PID="" METRICS_PID="" +QUERY_PID="" REPORTER_STOP=0 die() { @@ -75,6 +88,10 @@ cleanup() { kill "$METRICS_PID" >/dev/null 2>&1 || true wait "$METRICS_PID" >/dev/null 2>&1 || true fi + if is_true "$STOP_QUERY_SERVICE_ON_EXIT" && [[ -n "$QUERY_PID" ]] && kill -0 "$QUERY_PID" >/dev/null 2>&1; then + kill "$QUERY_PID" >/dev/null 2>&1 || true + wait "$QUERY_PID" >/dev/null 2>&1 || true + fi if is_true "$START_MONITOR_STACK" && is_true "$STOP_MONITOR_STACK_ON_EXIT" && [[ -f "$MONITOR_DIR/docker-compose.yml" ]]; then (cd "$MONITOR_DIR" && PROMETHEUS_RETENTION="$PROMETHEUS_RETENTION" docker compose down) >/dev/null 2>&1 || true fi @@ -116,6 +133,7 @@ format_epoch_rfc3339() { main() { validate_non_negative_int "SOAK_DURATION_SECS" "$SOAK_DURATION_SECS" + validate_non_negative_int "SOAK_INTERVAL_SECS" "$SOAK_INTERVAL_SECS" validate_non_negative_int "HOURLY_REPORT_INTERVAL_SECS" "$HOURLY_REPORT_INTERVAL_SECS" [[ "$HOURLY_REPORT_INTERVAL_SECS" != "0" ]] || die "HOURLY_REPORT_INTERVAL_SECS must be > 0" [[ -x "$SOAK_SCRIPT" ]] || die "missing executable: $SOAK_SCRIPT" @@ -137,6 +155,34 @@ main() { echo "$METRICS_PID" > "$LOG_ROOT/metrics.pid" fi + if is_true "$START_QUERY_SERVICE"; then + [[ -x "$BIN_DIR/rpki_query_service" ]] || die "missing executable: $BIN_DIR/rpki_query_service" + [[ -x "$BIN_DIR/rpki_query_indexer" ]] || die "missing executable: $BIN_DIR/rpki_query_indexer" + mkdir -p "$QUERY_EXPORT_ROOT" + query_args=( + "$BIN_DIR/rpki_query_service" + --query-db "$QUERY_DB" + --repo-bytes-db "$QUERY_REPO_BYTES_DB" + --export-root "$QUERY_EXPORT_ROOT" + --listen "$QUERY_LISTEN" + --watch-run-root "$RUN_ROOT" + --watch-interval-secs "$QUERY_WATCH_INTERVAL_SECS" + --indexer-bin "$BIN_DIR/rpki_query_indexer" + --retain-indexed-runs "$QUERY_RETAIN_INDEXED_RUNS" + --projection-entry-limit "$QUERY_PROJECTION_ENTRY_LIMIT" + ) + if [[ -n "$QUERY_WATCH_MIN_RUN_SEQ" ]]; then + query_args+=(--watch-min-run-seq "$QUERY_WATCH_MIN_RUN_SEQ") + fi + if is_true "$QUERY_WATCH_BACKFILL"; then + query_args+=(--watch-backfill) + fi + "${query_args[@]}" \ + > "$LOG_ROOT/query-service.stdout" 2> "$LOG_ROOT/query-service.stderr" & + QUERY_PID="$!" + echo "$QUERY_PID" > "$LOG_ROOT/query-service.pid" + fi + if is_true "$START_MONITOR_STACK"; then if [[ ! -f "$MONITOR_DIR/docker-compose.yml" ]]; then die "missing monitor compose: $MONITOR_DIR/docker-compose.yml" @@ -169,7 +215,7 @@ main() { fi printf '\n# Generated by run_24h_soak_with_metrics.sh\n' printf 'MAX_RUNS=-1\n' - printf 'INTERVAL_SECS=0\n' + printf 'INTERVAL_SECS=%q\n' "$SOAK_INTERVAL_SECS" if (( SOAK_DURATION_SECS > 0 )); then printf 'STOP_AFTER_SECS=%q\n' "$SOAK_DURATION_SECS" else diff --git a/src/audit.rs b/src/audit.rs index 76b7243..bdf503e 100644 --- a/src/audit.rs +++ b/src/audit.rs @@ -41,6 +41,63 @@ pub struct AuditWarning { pub context: Option, } +#[derive(Clone, Debug, PartialEq, Eq, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct QueryAuditManifest { + pub schema_version: u32, + pub status: String, + pub events_path: String, + pub events_count: u64, + pub events_sha256: String, + pub writer_version: u32, + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, +} + +#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct ValidationEventCounts { + #[serde(skip_serializing_if = "Option::is_none")] + pub objects: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub warnings: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub vrps: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub aspas: Option, +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct ValidationEvent { + pub schema_version: u32, + pub seq: u64, + pub event_type: String, + pub validation_time: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub pp_node_id: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub pp_manifest_uri: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub pp_rsync_base_uri: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub repo_sync_phase: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub repo_terminal_state: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub object_uri: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub sha256: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub object_type: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub result: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub reason: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub counts: Option, +} + impl From<&crate::report::Warning> for AuditWarning { fn from(w: &crate::report::Warning) -> Self { Self { @@ -222,6 +279,8 @@ pub struct AuditReportV2 { pub downloads: Vec, pub download_stats: AuditDownloadStats, pub repo_sync_stats: AuditRepoSyncStats, + #[serde(rename = "queryAudit", skip_serializing_if = "Option::is_none")] + pub query_audit: Option, } #[derive(Clone, Debug, PartialEq, Eq, Serialize)] diff --git a/src/bin/rpki_object_parse.rs b/src/bin/rpki_object_parse.rs index 1a75207..33e3021 100644 --- a/src/bin/rpki_object_parse.rs +++ b/src/bin/rpki_object_parse.rs @@ -1,29 +1,6 @@ -use std::net::{Ipv4Addr, Ipv6Addr}; -use std::path::{Path, PathBuf}; +use std::path::PathBuf; -use rpki::data_model::aspa::AspaObject; -use rpki::data_model::crl::RpkixCrl; -use rpki::data_model::manifest::ManifestObject; -use rpki::data_model::rc::{ - AccessDescription, RcExtensions, ResourceCertificate, SubjectInfoAccess, -}; -use rpki::data_model::roa::{IpPrefix as RoaIpPrefix, RoaAfi, RoaObject}; -use rpki::data_model::signed_object::{ - ResourceEeCertificate, RpkiSignedObject, SignedAttrsProfiled, SignerInfoProfiled, -}; -use rpki::data_model::ta::TaCertificate; -use serde_json::{Value, json}; -use sha2::{Digest, Sha256}; - -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -enum ObjectType { - Auto, - Cer, - Mft, - Crl, - Roa, - Aspa, -} +use rpki::object_projection::{ObjectType, parse_object_json, resolve_object_type}; #[derive(Debug, PartialEq, Eq)] struct Args { @@ -91,7 +68,8 @@ fn parse_args(argv: &[String]) -> Result { "--type" => { index += 1; let value = argv.get(index).ok_or("--type requires a value")?; - args.object_type = parse_object_type(value)?; + args.object_type = + ObjectType::parse(value).map_err(|err| format!("{err}\n{}", usage()))?; } "--input" | "--in" => { index += 1; @@ -130,18 +108,6 @@ fn parse_args(argv: &[String]) -> Result { Ok(args) } -fn parse_object_type(value: &str) -> Result { - match value.to_ascii_lowercase().as_str() { - "auto" => Ok(ObjectType::Auto), - "cer" | ".cer" | "cert" | "certificate" => Ok(ObjectType::Cer), - "mft" | ".mft" | "manifest" => Ok(ObjectType::Mft), - "crl" | ".crl" => Ok(ObjectType::Crl), - "roa" | ".roa" => Ok(ObjectType::Roa), - "asa" | ".asa" | "aspa" => Ok(ObjectType::Aspa), - _ => Err(format!("unsupported --type: {value}\n{}", usage())), - } -} - fn parse_limit(value: &str) -> Result { if value.eq_ignore_ascii_case("all") { return Ok(usize::MAX); @@ -151,390 +117,10 @@ fn parse_limit(value: &str) -> Result { .map_err(|_| format!("invalid --entry-limit: {value}")) } -fn resolve_object_type(object_type: ObjectType, path: &Path) -> Result { - if object_type != ObjectType::Auto { - return Ok(object_type); - } - match path - .extension() - .and_then(|v| v.to_str()) - .map(|v| v.to_ascii_lowercase()) - .as_deref() - { - Some("cer") => Ok(ObjectType::Cer), - Some("mft") => Ok(ObjectType::Mft), - Some("crl") => Ok(ObjectType::Crl), - Some("roa") => Ok(ObjectType::Roa), - Some("asa") | Some("aspa") => Ok(ObjectType::Aspa), - _ => Err(format!( - "cannot infer object type from path: {}", - path.display() - )), - } -} - -fn parse_object_json( - object_type: ObjectType, - input_path: &Path, - bytes: &[u8], - entry_limit: usize, -) -> Value { - let object = match object_type { - ObjectType::Auto => unreachable!("auto must be resolved"), - ObjectType::Cer => parse_cer_json(bytes), - ObjectType::Mft => parse_mft_json(bytes, entry_limit), - ObjectType::Crl => parse_crl_json(bytes, entry_limit), - ObjectType::Roa => parse_roa_json(bytes, entry_limit), - ObjectType::Aspa => parse_aspa_json(bytes, entry_limit), - }; - json!({ - "tool": "rpki_object_parse", - "schemaVersion": 1, - "input": { - "path": input_path.display().to_string(), - "type": object_type_label(object_type), - "bytes": bytes_summary(bytes), - }, - "object": object, - }) -} - -fn parse_cer_json(bytes: &[u8]) -> Value { - match ResourceCertificate::decode_der(bytes) { - Ok(cert) => { - let ta_profile = match TaCertificate::decode_der(bytes) { - Ok(ta) => json!({ - "valid": true, - "selfSignature": result_json(ta.verify_self_signature().map_err(|e| e.to_string())), - }), - Err(err) => json!({ - "valid": false, - "error": err.to_string(), - }), - }; - json!({ - "type": "cer", - "decode": {"profileValid": true}, - "resourceCertificate": resource_certificate_json(&cert), - "trustAnchorProfile": ta_profile, - }) - } - Err(err) => json!({ - "type": "cer", - "decode": {"profileValid": false, "error": err.to_string()}, - }), - } -} - -fn parse_mft_json(bytes: &[u8], entry_limit: usize) -> Value { - match ManifestObject::decode_der(bytes) { - Ok(mft) => { - let files = mft.manifest.parse_files(); - let (file_sample, file_list_error) = match files { - Ok(entries) => ( - json!({ - "count": entries.len(), - "truncated": entries.len() > entry_limit, - "entries": entries.iter().take(entry_limit).map(|item| { - json!({"fileName": item.file_name, "hashHex": hex::encode(item.hash_bytes)}) - }).collect::>(), - }), - Value::Null, - ), - Err(err) => (Value::Null, json!(err.to_string())), - }; - json!({ - "type": "mft", - "decode": {"profileValid": true}, - "eContentType": mft.econtent_type, - "signedObject": signed_object_json(&mft.signed_object), - "manifest": { - "version": mft.manifest.version, - "manifestNumberHex": mft.manifest.manifest_number.to_hex_upper(), - "thisUpdate": format_time(mft.manifest.this_update), - "nextUpdate": format_time(mft.manifest.next_update), - "fileHashAlg": mft.manifest.file_hash_alg, - "fileCount": mft.manifest.file_count(), - "fileList": file_sample, - "fileListError": file_list_error, - }, - "embeddedEeProfile": result_json(mft.validate_embedded_ee_cert().map_err(|e| e.to_string())), - "cmsSignature": result_json(mft.signed_object.verify_signature().map_err(|e| e.to_string())), - }) - } - Err(err) => json!({ - "type": "mft", - "decode": {"profileValid": false, "error": err.to_string()}, - }), - } -} - -fn parse_crl_json(bytes: &[u8], entry_limit: usize) -> Value { - match RpkixCrl::decode_der(bytes) { - Ok(crl) => json!({ - "type": "crl", - "decode": {"profileValid": true}, - "rawDer": bytes_summary(&crl.raw_der), - "version": crl.version, - "issuer": crl.issuer_dn, - "signatureAlgorithm": crl.signature_algorithm_oid, - "thisUpdate": format_time(crl.this_update.utc), - "nextUpdate": format_time(crl.next_update.utc), - "extensions": { - "authorityKeyIdentifier": hex::encode(&crl.extensions.authority_key_identifier), - "crlNumberHex": crl.extensions.crl_number.to_hex_upper(), - "crlNumber": crl.extensions.crl_number.to_u64(), - }, - "revokedCertificates": { - "count": crl.revoked_certs.len(), - "truncated": crl.revoked_certs.len() > entry_limit, - "entries": crl.revoked_certs.iter().take(entry_limit).map(|item| { - json!({ - "serialNumberHex": item.serial_number.to_hex_upper(), - "serialNumber": item.serial_number.to_u64(), - "revocationDate": format_time(item.revocation_date.utc), - }) - }).collect::>(), - }, - }), - Err(err) => json!({ - "type": "crl", - "decode": {"profileValid": false, "error": err.to_string()}, - }), - } -} - -fn parse_roa_json(bytes: &[u8], entry_limit: usize) -> Value { - match RoaObject::decode_der(bytes) { - Ok(roa) => json!({ - "type": "roa", - "decode": {"profileValid": true}, - "eContentType": roa.econtent_type, - "signedObject": signed_object_json(&roa.signed_object), - "roa": { - "version": roa.roa.version, - "asId": roa.roa.as_id, - "ipAddressFamilies": roa.roa.ip_addr_blocks.iter().map(|family| { - json!({ - "afi": format!("{:?}", family.afi), - "addressCount": family.addresses.len(), - "truncated": family.addresses.len() > entry_limit, - "addresses": family.addresses.iter().take(entry_limit).map(|entry| { - json!({ - "prefix": roa_prefix_string(&entry.prefix), - "maxLength": entry.max_length, - }) - }).collect::>(), - }) - }).collect::>(), - }, - "embeddedEeProfile": result_json(roa.validate_embedded_ee_cert().map_err(|e| e.to_string())), - "cmsSignature": result_json(roa.signed_object.verify_signature().map_err(|e| e.to_string())), - }), - Err(err) => json!({ - "type": "roa", - "decode": {"profileValid": false, "error": err.to_string()}, - }), - } -} - -fn parse_aspa_json(bytes: &[u8], entry_limit: usize) -> Value { - match AspaObject::decode_der(bytes) { - Ok(aspa) => json!({ - "type": "aspa", - "decode": {"profileValid": true}, - "eContentType": aspa.econtent_type, - "signedObject": signed_object_json(&aspa.signed_object), - "aspa": { - "version": aspa.aspa.version, - "customerAsId": aspa.aspa.customer_as_id, - "providerCount": aspa.aspa.provider_as_ids.len(), - "providersTruncated": aspa.aspa.provider_as_ids.len() > entry_limit, - "providerAsIds": aspa.aspa.provider_as_ids.iter().take(entry_limit).copied().collect::>(), - }, - "embeddedEeProfile": result_json(aspa.validate_embedded_ee_cert().map_err(|e| e.to_string())), - "cmsSignature": result_json(aspa.signed_object.verify_signature().map_err(|e| e.to_string())), - }), - Err(err) => json!({ - "type": "aspa", - "decode": {"profileValid": false, "error": err.to_string()}, - }), - } -} - -fn resource_certificate_json(cert: &ResourceCertificate) -> Value { - let tbs = &cert.tbs; - json!({ - "rawDer": bytes_summary(&cert.raw_der), - "kind": format!("{:?}", cert.kind), - "version": tbs.version, - "serialNumberHex": hex::encode(tbs.serial_number.to_bytes_be()), - "signatureAlgorithm": tbs.signature_algorithm, - "issuer": tbs.issuer_name.to_string(), - "subject": tbs.subject_name.to_string(), - "validity": { - "notBefore": format_time(tbs.validity_not_before), - "notAfter": format_time(tbs.validity_not_after), - }, - "subjectPublicKeyInfo": bytes_summary(&tbs.subject_public_key_info), - "extensions": rc_extensions_json(&tbs.extensions), - }) -} - -fn rc_extensions_json(ext: &RcExtensions) -> Value { - json!({ - "basicConstraintsCa": ext.basic_constraints_ca, - "subjectKeyIdentifier": ext.subject_key_identifier.as_ref().map(|v| hex::encode(v)), - "authorityKeyIdentifier": ext.authority_key_identifier.as_ref().map(|v| hex::encode(v)), - "crlDistributionPointsUris": ext.crl_distribution_points_uris, - "caIssuersUris": ext.ca_issuers_uris, - "subjectInfoAccess": subject_info_access_json(ext.subject_info_access.as_ref()), - "certificatePoliciesOid": ext.certificate_policies_oid, - "ipResources": serde_json::to_value(&ext.ip_resources).unwrap_or(Value::Null), - "asResources": serde_json::to_value(&ext.as_resources).unwrap_or(Value::Null), - }) -} - -fn subject_info_access_json(value: Option<&SubjectInfoAccess>) -> Value { - match value { - None => Value::Null, - Some(SubjectInfoAccess::Ca(ca)) => json!({ - "kind": "ca", - "accessDescriptions": ca.access_descriptions.iter().map(access_description_json).collect::>(), - }), - Some(SubjectInfoAccess::Ee(ee)) => json!({ - "kind": "ee", - "signedObjectUris": ee.signed_object_uris, - "accessDescriptions": ee.access_descriptions.iter().map(access_description_json).collect::>(), - }), - } -} - -fn access_description_json(value: &AccessDescription) -> Value { - json!({ - "accessMethodOid": value.access_method_oid, - "accessLocation": value.access_location, - }) -} - -fn signed_object_json(signed_object: &RpkiSignedObject) -> Value { - let signed_data = &signed_object.signed_data; - json!({ - "rawDer": bytes_summary(&signed_object.raw_der), - "contentInfoContentType": signed_object.content_info_content_type, - "signedData": { - "version": signed_data.version, - "digestAlgorithms": signed_data.digest_algorithms, - "encapContentInfo": { - "eContentType": signed_data.encap_content_info.econtent_type, - "eContent": bytes_summary(&signed_data.encap_content_info.econtent), - }, - "certificates": signed_data.certificates.iter().map(ee_certificate_json).collect::>(), - "crlsPresent": signed_data.crls_present, - "signerInfos": signed_data.signer_infos.iter().map(signer_info_json).collect::>(), - }, - }) -} - -fn ee_certificate_json(cert: &ResourceEeCertificate) -> Value { - json!({ - "rawDer": bytes_summary(&cert.raw_der), - "subjectKeyIdentifier": hex::encode(&cert.subject_key_identifier), - "spkiDer": bytes_summary(&cert.spki_der), - "rsaPublicKey": { - "modulus": bytes_summary(&cert.rsa_public_modulus), - "exponent": bytes_summary(&cert.rsa_public_exponent), - }, - "tbsCertificate": bytes_summary(&cert.tbs_certificate_der), - "certificateSignature": bytes_summary(&cert.signature_bytes), - "keyUsageSummary": format!("{:?}", cert.key_usage_summary), - "siaSignedObjectUris": cert.sia_signed_object_uris, - "resourceCertificate": resource_certificate_json(&cert.resource_cert), - }) -} - -fn signer_info_json(info: &SignerInfoProfiled) -> Value { - json!({ - "version": info.version, - "sidSki": hex::encode(&info.sid_ski), - "digestAlgorithm": info.digest_algorithm, - "signatureAlgorithm": info.signature_algorithm, - "signedAttrs": signed_attrs_json(&info.signed_attrs), - "unsignedAttrsPresent": info.unsigned_attrs_present, - "signature": bytes_summary(&info.signature), - "signedAttrsDerForSignature": bytes_summary(&info.signed_attrs_der_for_signature), - }) -} - -fn signed_attrs_json(attrs: &SignedAttrsProfiled) -> Value { - json!({ - "contentType": attrs.content_type, - "messageDigest": hex::encode(&attrs.message_digest), - "signingTime": { - "utc": format_time(attrs.signing_time.utc), - "encoding": format!("{:?}", attrs.signing_time.encoding), - }, - "otherAttrsPresent": attrs.other_attrs_present, - }) -} - -fn result_json(result: Result<(), String>) -> Value { - match result { - Ok(()) => json!({"valid": true}), - Err(err) => json!({"valid": false, "error": err}), - } -} - -fn object_type_label(object_type: ObjectType) -> &'static str { - match object_type { - ObjectType::Auto => "auto", - ObjectType::Cer => "cer", - ObjectType::Mft => "mft", - ObjectType::Crl => "crl", - ObjectType::Roa => "roa", - ObjectType::Aspa => "aspa", - } -} - -fn bytes_summary(bytes: &[u8]) -> Value { - let head_len = bytes.len().min(16); - let tail_len = bytes.len().min(16); - json!({ - "len": bytes.len(), - "sha256": sha256_hex(bytes), - "headHex": hex::encode(&bytes[..head_len]), - "tailHex": hex::encode(&bytes[bytes.len().saturating_sub(tail_len)..]), - }) -} - -fn sha256_hex(bytes: &[u8]) -> String { - hex::encode(Sha256::digest(bytes)) -} - -fn format_time(value: time::OffsetDateTime) -> String { - value - .to_offset(time::UtcOffset::UTC) - .format(&time::format_description::well_known::Rfc3339) - .unwrap_or_else(|_| value.unix_timestamp().to_string()) -} - -fn roa_prefix_string(prefix: &RoaIpPrefix) -> String { - let bytes = prefix.addr_bytes(); - match prefix.afi { - RoaAfi::Ipv4 => { - let octets = [bytes[0], bytes[1], bytes[2], bytes[3]]; - format!("{}/{}", Ipv4Addr::from(octets), prefix.prefix_len) - } - RoaAfi::Ipv6 => { - let mut octets = [0u8; 16]; - octets.copy_from_slice(bytes); - format!("{}/{}", Ipv6Addr::from(octets), prefix.prefix_len) - } - } -} - #[cfg(test)] mod tests { + use std::path::Path; + use super::*; #[test] diff --git a/src/bin/rpki_query_indexer.rs b/src/bin/rpki_query_indexer.rs new file mode 100644 index 0000000..2140e33 --- /dev/null +++ b/src/bin/rpki_query_indexer.rs @@ -0,0 +1,278 @@ +use std::path::PathBuf; + +use rpki::query_db::{ArtifactIndexerConfig, index_artifacts}; + +#[derive(Debug, PartialEq, Eq)] +struct Args { + query_db: PathBuf, + run_root: Option, + run_dir: Option, + repo_bytes_db: Option, + projection_entry_limit: usize, + min_run_seq: Option, + retain_indexed_runs: Option, + dump_summary: bool, +} + +fn usage() -> &'static str { + "Usage: rpki_query_indexer --query-db (--run-root |--run-dir ) [--repo-bytes-db ] [--projection-entry-limit ] [--min-run-seq ] [--retain-indexed-runs ] [--dump-summary]" +} + +fn parse_args(argv: &[String]) -> Result { + let mut query_db = None; + let mut run_root = None; + let mut run_dir = None; + let mut repo_bytes_db = None; + let mut projection_entry_limit = 50usize; + let mut min_run_seq = None; + let mut retain_indexed_runs = Some(10usize); + let mut dump_summary = false; + let mut index = 1usize; + while index < argv.len() { + match argv[index].as_str() { + "--query-db" => { + index += 1; + query_db = Some(PathBuf::from(value_at(argv, index, "--query-db")?)); + } + "--run-root" => { + index += 1; + run_root = Some(PathBuf::from(value_at(argv, index, "--run-root")?)); + } + "--run-dir" => { + index += 1; + run_dir = Some(PathBuf::from(value_at(argv, index, "--run-dir")?)); + } + "--repo-bytes-db" => { + index += 1; + repo_bytes_db = Some(PathBuf::from(value_at(argv, index, "--repo-bytes-db")?)); + } + "--projection-entry-limit" => { + index += 1; + let raw = value_at(argv, index, "--projection-entry-limit")?; + projection_entry_limit = raw + .parse::() + .map_err(|_| format!("invalid --projection-entry-limit: {raw}"))?; + } + "--min-run-seq" => { + index += 1; + let raw = value_at(argv, index, "--min-run-seq")?; + min_run_seq = Some( + raw.parse::() + .map_err(|_| format!("invalid --min-run-seq: {raw}"))?, + ); + } + "--retain-indexed-runs" => { + index += 1; + let raw = value_at(argv, index, "--retain-indexed-runs")?; + retain_indexed_runs = Some( + raw.parse::() + .map_err(|_| format!("invalid --retain-indexed-runs: {raw}"))?, + ); + } + "--dump-summary" => dump_summary = true, + "-h" | "--help" => return Err(usage().to_string()), + other => return Err(format!("unknown argument: {other}\n{}", usage())), + } + index += 1; + } + if run_root.is_some() == run_dir.is_some() { + return Err(format!( + "exactly one of --run-root or --run-dir is required\n{}", + usage() + )); + } + Ok(Args { + query_db: query_db.ok_or_else(|| format!("--query-db is required\n{}", usage()))?, + run_root, + run_dir, + repo_bytes_db, + projection_entry_limit, + min_run_seq, + retain_indexed_runs, + dump_summary, + }) +} + +fn value_at<'a>(argv: &'a [String], index: usize, flag: &str) -> Result<&'a str, String> { + argv.get(index) + .map(String::as_str) + .ok_or_else(|| format!("{flag} requires a value")) +} + +fn main() { + let argv = std::env::args().collect::>(); + let args = match parse_args(&argv) { + Ok(args) => args, + Err(err) => { + eprintln!("{err}"); + std::process::exit(2); + } + }; + let summary = match index_artifacts(&ArtifactIndexerConfig { + query_db_path: args.query_db, + run_root: args.run_root, + run_dir: args.run_dir, + repo_bytes_db_path: args.repo_bytes_db, + projection_entry_limit: args.projection_entry_limit, + min_run_seq: args.min_run_seq, + retain_indexed_runs: args.retain_indexed_runs, + }) { + Ok(summary) => summary, + Err(err) => { + eprintln!("query index failed: {err}"); + std::process::exit(1); + } + }; + if args.dump_summary { + println!( + "{}", + serde_json::to_string_pretty(&summary).expect("serialize summary") + ); + } else { + println!( + "indexed_runs={} runs_deleted={} retained_runs={} repos={} publication_points={} objects={} latest_ready_run={}", + summary.runs_indexed, + summary.runs_deleted, + summary.retained_runs, + summary.repos_indexed, + summary.publication_points_indexed, + summary.object_instances_indexed, + summary.latest_ready_run.as_deref().unwrap_or("none") + ); + } + if !summary.errors.is_empty() { + eprintln!( + "index completed with {} per-run errors", + summary.errors.len() + ); + std::process::exit(1); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn argv(args: &[&str]) -> Vec { + std::iter::once("rpki_query_indexer") + .chain(args.iter().copied()) + .map(str::to_string) + .collect() + } + + #[test] + fn parse_args_accepts_run_root_and_optional_flags() { + let args = parse_args(&argv(&[ + "--query-db", + "query.db", + "--run-root", + "runs", + "--repo-bytes-db", + "repo-bytes.db", + "--projection-entry-limit", + "7", + "--min-run-seq", + "42", + "--retain-indexed-runs", + "3", + "--dump-summary", + ])) + .expect("args"); + assert_eq!(args.query_db, PathBuf::from("query.db")); + assert_eq!(args.run_root.as_deref(), Some(std::path::Path::new("runs"))); + assert_eq!(args.run_dir, None); + assert_eq!( + args.repo_bytes_db.as_deref(), + Some(std::path::Path::new("repo-bytes.db")) + ); + assert_eq!(args.projection_entry_limit, 7); + assert_eq!(args.min_run_seq, Some(42)); + assert_eq!(args.retain_indexed_runs, Some(3)); + assert!(args.dump_summary); + } + + #[test] + fn parse_args_accepts_single_run_dir() { + let args = parse_args(&argv(&[ + "--query-db", + "query.db", + "--run-dir", + "runs/run_0001", + ])) + .expect("args"); + assert_eq!( + args.run_dir.as_deref(), + Some(std::path::Path::new("runs/run_0001")) + ); + assert_eq!(args.run_root, None); + assert_eq!(args.projection_entry_limit, 50); + assert_eq!(args.min_run_seq, None); + assert_eq!(args.retain_indexed_runs, Some(10)); + } + + #[test] + fn parse_args_rejects_invalid_or_ambiguous_input() { + assert!( + parse_args(&argv(&["--query-db", "query.db"])) + .unwrap_err() + .contains("exactly one") + ); + assert!( + parse_args(&argv(&[ + "--query-db", + "query.db", + "--run-root", + "runs", + "--run-dir", + "runs/run_0001", + ])) + .unwrap_err() + .contains("exactly one") + ); + assert!( + parse_args(&argv(&["--run-root", "runs"])) + .unwrap_err() + .contains("--query-db") + ); + assert!( + parse_args(&argv(&[ + "--query-db", + "query.db", + "--run-root", + "runs", + "--projection-entry-limit", + "bad", + ])) + .unwrap_err() + .contains("invalid --projection-entry-limit") + ); + assert!( + parse_args(&argv(&[ + "--query-db", + "query.db", + "--run-root", + "runs", + "--unknown", + ])) + .unwrap_err() + .contains("unknown argument") + ); + assert!( + parse_args(&argv(&[ + "--query-db", + "query.db", + "--run-root", + "runs", + "--retain-indexed-runs", + "bad", + ])) + .unwrap_err() + .contains("invalid --retain-indexed-runs") + ); + assert!( + parse_args(&argv(&["--query-db"])) + .unwrap_err() + .contains("requires a value") + ); + } +} diff --git a/src/bin/rpki_query_service.rs b/src/bin/rpki_query_service.rs new file mode 100644 index 0000000..83df155 --- /dev/null +++ b/src/bin/rpki_query_service.rs @@ -0,0 +1,2897 @@ +use std::collections::BTreeMap; +use std::fs::{self, File}; +use std::io::{Read, Seek, SeekFrom, Write}; +use std::net::{TcpListener, TcpStream}; +use std::path::PathBuf; +use std::process::Command; +use std::sync::{Arc, Mutex}; + +use rpki::blob_store::ExternalRepoBytesDb; +use rpki::query_db::{ + ChainEdgeRecord, ExportJobRecord, ObjectInstanceRecord, QueryDb, QueryDbError, + ValidationExplainRecord, +}; +use serde::Serialize; +use serde_json::{Value, json}; + +const EXPORT_PAGE_LIMIT: usize = 1000; +const EXPORT_OBJECT_SET_MAX: usize = 10_000; +type ExportJobStore = Arc>>; + +#[derive(Clone, Debug, PartialEq, Eq)] +struct Args { + query_db: PathBuf, + repo_bytes_db: Option, + export_root: PathBuf, + listen: String, + watch_run_root: Option, + watch_interval_secs: u64, + projection_entry_limit: usize, + watch_min_run_seq: Option, + watch_backfill: bool, + indexer_bin: Option, + retain_indexed_runs: usize, +} + +fn usage() -> &'static str { + "Usage: rpki_query_service --query-db [--repo-bytes-db ] [--export-root ] [--listen ] [--watch-run-root ] [--watch-interval-secs ] [--watch-min-run-seq ] [--watch-backfill] [--projection-entry-limit ] [--indexer-bin ] [--retain-indexed-runs ]" +} + +fn parse_args(argv: &[String]) -> Result { + let mut query_db = None; + let mut repo_bytes_db = None; + let mut export_root = None; + let mut listen = "127.0.0.1:9560".to_string(); + let mut watch_run_root = None; + let mut watch_interval_secs = 10u64; + let mut projection_entry_limit = 50usize; + let mut watch_min_run_seq = None; + let mut watch_backfill = false; + let mut indexer_bin = None; + let mut retain_indexed_runs = 10usize; + let mut index = 1usize; + while index < argv.len() { + match argv[index].as_str() { + "--query-db" => { + index += 1; + query_db = Some(PathBuf::from(value_at(argv, index, "--query-db")?)); + } + "--repo-bytes-db" => { + index += 1; + repo_bytes_db = Some(PathBuf::from(value_at(argv, index, "--repo-bytes-db")?)); + } + "--export-root" => { + index += 1; + export_root = Some(PathBuf::from(value_at(argv, index, "--export-root")?)); + } + "--listen" => { + index += 1; + listen = value_at(argv, index, "--listen")?.to_string(); + } + "--watch-run-root" => { + index += 1; + watch_run_root = Some(PathBuf::from(value_at(argv, index, "--watch-run-root")?)); + } + "--watch-interval-secs" => { + index += 1; + let raw = value_at(argv, index, "--watch-interval-secs")?; + watch_interval_secs = raw + .parse::() + .map_err(|_| format!("invalid --watch-interval-secs: {raw}"))?; + } + "--projection-entry-limit" => { + index += 1; + let raw = value_at(argv, index, "--projection-entry-limit")?; + projection_entry_limit = raw + .parse::() + .map_err(|_| format!("invalid --projection-entry-limit: {raw}"))?; + } + "--watch-min-run-seq" => { + index += 1; + let raw = value_at(argv, index, "--watch-min-run-seq")?; + watch_min_run_seq = Some( + raw.parse::() + .map_err(|_| format!("invalid --watch-min-run-seq: {raw}"))?, + ); + } + "--indexer-bin" => { + index += 1; + indexer_bin = Some(PathBuf::from(value_at(argv, index, "--indexer-bin")?)); + } + "--watch-backfill" => { + watch_backfill = true; + } + "--retain-indexed-runs" => { + index += 1; + let raw = value_at(argv, index, "--retain-indexed-runs")?; + retain_indexed_runs = raw + .parse::() + .map_err(|_| format!("invalid --retain-indexed-runs: {raw}"))?; + } + "-h" | "--help" => return Err(usage().to_string()), + other => return Err(format!("unknown argument: {other}\n{}", usage())), + } + index += 1; + } + let query_db = query_db.ok_or_else(|| format!("--query-db is required\n{}", usage()))?; + let default_export_root = query_db.with_extension("exports"); + Ok(Args { + query_db, + repo_bytes_db, + export_root: export_root.unwrap_or(default_export_root), + listen, + watch_run_root, + watch_interval_secs, + projection_entry_limit, + watch_min_run_seq, + watch_backfill, + indexer_bin, + retain_indexed_runs, + }) +} + +fn value_at<'a>(argv: &'a [String], index: usize, flag: &str) -> Result<&'a str, String> { + argv.get(index) + .map(String::as_str) + .ok_or_else(|| format!("{flag} requires a value")) +} + +fn main() { + if let Err(err) = real_main() { + eprintln!("{err}"); + std::process::exit(1); + } +} + +fn real_main() -> Result<(), String> { + let args = parse_args(&std::env::args().collect::>())?; + let db = Arc::new(open_query_db_for_service(&args).map_err(|e| e.to_string())?); + let export_jobs = Arc::new(Mutex::new(BTreeMap::new())); + let repo_bytes = match args.repo_bytes_db.as_ref() { + Some(path) => Some(Arc::new( + ExternalRepoBytesDb::open_read_only(path).map_err(|e| e.to_string())?, + )), + None => None, + }; + if let Some(run_root) = args.watch_run_root.clone() { + let effective_watch_min_run_seq = resolve_effective_watch_min_run_seq( + &run_root, + args.watch_min_run_seq, + args.watch_backfill, + )?; + spawn_indexer_watcher( + Arc::clone(&db), + args.query_db.clone(), + resolve_indexer_bin(args.indexer_bin.clone())?, + run_root, + args.projection_entry_limit, + args.watch_interval_secs, + effective_watch_min_run_seq, + args.retain_indexed_runs, + ); + } + let listener = TcpListener::bind(&args.listen) + .map_err(|e| format!("bind failed: {}: {e}", args.listen))?; + eprintln!("rpki_query_service listening on {}", args.listen); + for stream in listener.incoming() { + match stream { + Ok(stream) => { + let db = Arc::clone(&db); + let repo_bytes = repo_bytes.clone(); + let export_jobs = Arc::clone(&export_jobs); + let export_root = args.export_root.clone(); + std::thread::spawn(move || { + let _ = handle_client(stream, db, repo_bytes, export_jobs, export_root); + }); + } + Err(err) => eprintln!("accept failed: {err}"), + } + } + Ok(()) +} + +fn open_query_db_for_service(args: &Args) -> Result { + if args.watch_run_root.is_some() { + drop(QueryDb::open(&args.query_db)?); + let secondary_path = args.query_db.with_extension("secondary"); + if let Some(parent) = secondary_path.parent() { + fs::create_dir_all(parent)?; + } + QueryDb::open_secondary(&args.query_db, &secondary_path) + } else { + QueryDb::open(&args.query_db) + } +} + +fn resolve_indexer_bin(configured: Option) -> Result { + if let Some(path) = configured { + return Ok(path); + } + let current_exe = std::env::current_exe().map_err(|e| e.to_string())?; + let Some(parent) = current_exe.parent() else { + return Err("cannot resolve current executable directory".to_string()); + }; + Ok(parent.join("rpki_query_indexer")) +} + +fn resolve_effective_watch_min_run_seq( + run_root: &std::path::Path, + configured_min_run_seq: Option, + watch_backfill: bool, +) -> Result, String> { + if configured_min_run_seq.is_some() || watch_backfill { + return Ok(configured_min_run_seq); + } + latest_completed_run_seq(run_root).map(|seq| Some(seq.saturating_add(1))) +} + +fn spawn_indexer_watcher( + db: Arc, + query_db_path: PathBuf, + indexer_bin: PathBuf, + run_root: PathBuf, + projection_entry_limit: usize, + interval_secs: u64, + min_run_seq: Option, + retain_indexed_runs: usize, +) { + std::thread::spawn(move || { + loop { + match discover_next_unindexed_run(&db, &run_root, min_run_seq) { + Ok(Some(run_dir)) => { + match run_indexer_child( + &indexer_bin, + &query_db_path, + &run_dir, + projection_entry_limit, + retain_indexed_runs, + ) { + Ok(message) => { + let _ = db.try_catch_up_with_primary(); + eprintln!("query watcher indexed {}: {message}", run_dir.display()); + } + Err(err) => eprintln!( + "query watcher index failed for {}: {err}", + run_dir.display() + ), + } + } + Ok(None) => { + let _ = db.try_catch_up_with_primary(); + } + Err(err) => eprintln!("query watcher index failed: {err}"), + } + std::thread::sleep(std::time::Duration::from_secs(interval_secs.max(1))); + } + }); +} + +fn latest_completed_run_seq(run_root: &std::path::Path) -> Result { + let runs_root = if run_root.join("runs").is_dir() { + run_root.join("runs") + } else { + run_root.to_path_buf() + }; + let mut latest = 0u64; + if !runs_root.exists() { + return Ok(latest); + } + for entry in fs::read_dir(&runs_root).map_err(|e| e.to_string())? { + let entry = entry.map_err(|e| e.to_string())?; + let path = entry.path(); + if !path.is_dir() || !path.join("report.json").exists() { + continue; + } + let Some(seq) = run_seq_from_path(&path) else { + continue; + }; + if !run_status_is_success_or_unknown(&path)? { + continue; + } + latest = latest.max(seq); + } + Ok(latest) +} + +fn discover_next_unindexed_run( + db: &QueryDb, + run_root: &std::path::Path, + min_run_seq: Option, +) -> Result, String> { + db.try_catch_up_with_primary().map_err(|e| e.to_string())?; + let runs_root = if run_root.join("runs").is_dir() { + run_root.join("runs") + } else { + run_root.to_path_buf() + }; + let mut candidates = Vec::new(); + let latest_ready_seq = match db.latest_ready_run().map_err(|e| e.to_string())? { + Some(run_id) => db + .get_run(&run_id) + .map_err(|e| e.to_string())? + .and_then(|run| run.run_seq), + None => None, + }; + let min_candidate_seq = min_run_seq.unwrap_or(0).max( + latest_ready_seq + .map(|seq| seq.saturating_add(1)) + .unwrap_or(0), + ); + for entry in fs::read_dir(&runs_root).map_err(|e| e.to_string())? { + let entry = entry.map_err(|e| e.to_string())?; + let path = entry.path(); + if !path.is_dir() || !path.join("report.json").exists() { + continue; + } + let Some(seq) = run_seq_from_path(&path) else { + continue; + }; + if seq < min_candidate_seq { + continue; + } + let run_id = path + .file_name() + .and_then(|name| name.to_str()) + .unwrap_or_default() + .to_string(); + if db.get_run(&run_id).map_err(|e| e.to_string())?.is_some() { + continue; + } + if !run_status_is_success_or_unknown(&path)? { + continue; + } + candidates.push((seq, path)); + } + candidates.sort_by_key(|(seq, _)| *seq); + Ok(candidates.into_iter().map(|(_, path)| path).next()) +} + +fn run_indexer_child( + indexer_bin: &std::path::Path, + query_db_path: &std::path::Path, + run_dir: &std::path::Path, + projection_entry_limit: usize, + retain_indexed_runs: usize, +) -> Result { + let output = Command::new(indexer_bin) + .arg("--query-db") + .arg(query_db_path) + .arg("--run-dir") + .arg(run_dir) + .arg("--projection-entry-limit") + .arg(projection_entry_limit.to_string()) + .arg("--retain-indexed-runs") + .arg(retain_indexed_runs.to_string()) + .arg("--dump-summary") + .output() + .map_err(|e| format!("spawn {} failed: {e}", indexer_bin.display()))?; + if !output.status.success() { + return Err(format!( + "exit={} stdout={} stderr={}", + output.status, + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + )); + } + Ok(String::from_utf8_lossy(&output.stdout).trim().to_string()) +} + +fn run_seq_from_path(path: &std::path::Path) -> Option { + path.file_name() + .and_then(|name| name.to_str()) + .and_then(|name| name.strip_prefix("run_")) + .and_then(|value| value.parse::().ok()) +} + +fn run_status_is_success_or_unknown(run_dir: &std::path::Path) -> Result { + let summary_path = run_dir.join("run-summary.json"); + if !summary_path.exists() { + return Ok(true); + } + let summary: Value = serde_json::from_slice( + &fs::read(&summary_path) + .map_err(|e| format!("read run summary failed: {}: {e}", summary_path.display()))?, + ) + .map_err(|e| format!("parse run summary failed: {}: {e}", summary_path.display()))?; + Ok(summary + .get("status") + .and_then(Value::as_str) + .map_or(true, |status| status == "success")) +} + +fn handle_client( + mut stream: TcpStream, + db: Arc, + repo_bytes: Option>, + export_jobs: ExportJobStore, + export_root: PathBuf, +) -> Result<(), String> { + let Some(request) = read_http_request(&mut stream)? else { + return Ok(()); + }; + if let Err(err) = db.try_catch_up_with_primary() { + eprintln!("query db catch-up failed: {err}"); + } + let response = if request.method != "GET" { + if request.method == "POST" { + match route_post_request( + Arc::clone(&db), + repo_bytes.clone(), + Arc::clone(&export_jobs), + export_root, + &request.target, + &request.body, + ) { + Ok(value) => json_response(200, &value), + Err(ApiError { status, message }) => { + json_response(status, &json!({"error": message})) + } + } + } else { + json_response(405, &json!({"error":"method_not_allowed"})) + } + } else if let Some(raw_result) = + route_raw_request(&db, repo_bytes.as_deref(), &export_jobs, &request.target) + { + match raw_result { + Ok(response) => response, + Err(ApiError { status, message }) => json_response(status, &json!({"error": message})), + } + } else { + match route_request(&db, repo_bytes.as_deref(), &export_jobs, &request.target) { + Ok(value) => json_response(200, &value), + Err(ApiError { status, message }) => json_response(status, &json!({"error": message})), + } + }; + stream.write_all(&response).map_err(|e| e.to_string())?; + Ok(()) +} + +struct HttpRequest { + method: String, + target: String, + body: Vec, +} + +fn read_http_request(stream: &mut TcpStream) -> Result, String> { + let mut buffer = Vec::new(); + let mut chunk = [0u8; 8192]; + let read = stream.read(&mut chunk).map_err(|e| e.to_string())?; + if read == 0 { + return Ok(None); + } + buffer.extend_from_slice(&chunk[..read]); + let header_end = loop { + if let Some(pos) = find_header_end(&buffer) { + break pos; + } + let read = stream.read(&mut chunk).map_err(|e| e.to_string())?; + if read == 0 { + return Err("incomplete HTTP headers".to_string()); + } + buffer.extend_from_slice(&chunk[..read]); + }; + let headers = String::from_utf8_lossy(&buffer[..header_end]); + let Some(first_line) = headers.lines().next() else { + return Ok(None); + }; + let mut parts = first_line.split_whitespace(); + let method = parts.next().unwrap_or("").to_string(); + let target = parts.next().unwrap_or("").to_string(); + let content_length = headers + .lines() + .filter_map(|line| line.split_once(':')) + .find(|(name, _)| name.eq_ignore_ascii_case("content-length")) + .and_then(|(_, value)| value.trim().parse::().ok()) + .unwrap_or(0); + let body_start = header_end + 4; + while buffer.len().saturating_sub(body_start) < content_length { + let read = stream.read(&mut chunk).map_err(|e| e.to_string())?; + if read == 0 { + break; + } + buffer.extend_from_slice(&chunk[..read]); + } + let body_end = body_start + content_length.min(buffer.len().saturating_sub(body_start)); + Ok(Some(HttpRequest { + method, + target, + body: buffer[body_start..body_end].to_vec(), + })) +} + +fn find_header_end(bytes: &[u8]) -> Option { + bytes.windows(4).position(|window| window == b"\r\n\r\n") +} + +#[derive(Debug)] +struct ApiError { + status: u16, + message: String, +} + +impl ApiError { + fn new(status: u16, message: impl Into) -> Self { + Self { + status, + message: message.into(), + } + } +} + +impl From for ApiError { + fn from(value: QueryDbError) -> Self { + Self::new(500, value.to_string()) + } +} + +fn route_request( + db: &QueryDb, + repo_bytes: Option<&ExternalRepoBytesDb>, + export_jobs: &ExportJobStore, + target: &str, +) -> Result { + let (path, query) = split_target(target); + let query = parse_query(query); + let path = path.trim_end_matches('/'); + let segments = path + .trim_start_matches("/api/v1") + .trim_start_matches('/') + .split('/') + .filter(|s| !s.is_empty()) + .collect::>(); + if segments.is_empty() { + return Ok(json!({"data":{"service":"rpki_query_service","version":1}})); + } + match segments.as_slice() { + ["runs"] => page_response(db.list_runs(limit(&query), cursor(&query))?, None), + ["latest_run"] => { + let run_id = db + .latest_ready_run()? + .ok_or_else(|| ApiError::new(404, "latest run not found"))?; + let run = db + .get_run(&run_id)? + .ok_or_else(|| ApiError::new(404, "latest run record not found"))?; + data_response(&run, Some(run_id)) + } + ["runs", "latest"] => { + let run_id = db + .latest_ready_run()? + .ok_or_else(|| ApiError::new(404, "latest run not found"))?; + let run = db + .get_run(&run_id)? + .ok_or_else(|| ApiError::new(404, "latest run record not found"))?; + data_response(&run, Some(run_id)) + } + ["runs", raw_run_id] => { + let run_id = resolve_run(db, raw_run_id)?; + let run = db + .get_run(&run_id)? + .ok_or_else(|| ApiError::new(404, "run not found"))?; + data_response(&run, Some(run_id)) + } + ["runs", raw_run_id, "artifacts"] => { + let run_id = resolve_run(db, raw_run_id)?; + let run = db + .get_run(&run_id)? + .ok_or_else(|| ApiError::new(404, "run not found"))?; + data_response(&run.artifact_paths, Some(run_id)) + } + ["runs", raw_run_id, "summary"] => { + let run_id = resolve_run(db, raw_run_id)?; + let stat = db.get_stat(&run_id, "overview", "counts")?; + data_response( + &stat.map(|s| s.value).unwrap_or_else(|| json!({})), + Some(run_id), + ) + } + ["runs", raw_run_id, "repos"] => { + let run_id = resolve_run(db, raw_run_id)?; + page_response( + db.list_repos(&run_id, limit(&query), cursor(&query))?, + Some(run_id), + ) + } + ["runs", raw_run_id, "repos", repo_id] => { + let run_id = resolve_run(db, raw_run_id)?; + let repo = db + .get_repo(&run_id, repo_id)? + .ok_or_else(|| ApiError::new(404, "repo not found"))?; + data_response(&repo, Some(run_id)) + } + ["runs", raw_run_id, "repos", repo_id, "stats"] => { + let run_id = resolve_run(db, raw_run_id)?; + let repo = db + .get_repo(&run_id, repo_id)? + .ok_or_else(|| ApiError::new(404, "repo not found"))?; + data_response( + &json!({ + "publicationPoints": repo.publication_points, + "objects": repo.objects, + "rejectedObjects": repo.rejected_objects, + "syncDurationMsTotal": repo.sync_duration_ms_total, + "phases": repo.phases, + "terminalStates": repo.terminal_states, + }), + Some(run_id), + ) + } + ["runs", raw_run_id, "repos", repo_id, "publication-points"] => { + let run_id = resolve_run(db, raw_run_id)?; + page_response( + db.list_publication_points_for_repo( + &run_id, + repo_id, + limit(&query), + cursor(&query), + )?, + Some(run_id), + ) + } + ["runs", raw_run_id, "repos", repo_id, "objects"] => { + let run_id = resolve_run(db, raw_run_id)?; + page_response( + db.list_objects_for_repo(&run_id, repo_id, limit(&query), cursor(&query))?, + Some(run_id), + ) + } + ["runs", raw_run_id, "publication-points"] => { + let run_id = resolve_run(db, raw_run_id)?; + page_response( + db.list_publication_points(&run_id, limit(&query), cursor(&query))?, + Some(run_id), + ) + } + ["runs", raw_run_id, "publication-points", pp_id] => { + let run_id = resolve_run(db, raw_run_id)?; + let pp = db + .get_publication_point(&run_id, pp_id)? + .ok_or_else(|| ApiError::new(404, "publication point not found"))?; + data_response(&pp, Some(run_id)) + } + ["runs", raw_run_id, "publication-points", pp_id, "stats"] => { + let run_id = resolve_run(db, raw_run_id)?; + let pp = db + .get_publication_point(&run_id, pp_id)? + .ok_or_else(|| ApiError::new(404, "publication point not found"))?; + data_response( + &json!({ + "objects": pp.objects, + "rejectedObjects": pp.rejected_objects, + "warnings": pp.warnings, + "repoSyncSource": pp.repo_sync_source, + "repoSyncPhase": pp.repo_sync_phase, + "repoSyncDurationMs": pp.repo_sync_duration_ms, + "repoTerminalState": pp.repo_terminal_state, + }), + Some(run_id), + ) + } + ["runs", raw_run_id, "publication-points", pp_id, "objects"] => { + let run_id = resolve_run(db, raw_run_id)?; + page_response( + db.list_objects_for_pp(&run_id, pp_id, limit(&query), cursor(&query))?, + Some(run_id), + ) + } + ["runs", raw_run_id, "objects"] => { + let run_id = resolve_run(db, raw_run_id)?; + page_response( + db.list_objects(&run_id, limit(&query), cursor(&query))?, + Some(run_id), + ) + } + ["runs", raw_run_id, "objects", "by-uri"] => { + let run_id = resolve_run(db, raw_run_id)?; + let uri = query + .get("uri") + .ok_or_else(|| ApiError::new(400, "uri query parameter is required"))?; + let object = db + .get_object_by_uri(&run_id, uri)? + .ok_or_else(|| ApiError::new(404, "object uri not found"))?; + data_response(&object, Some(run_id)) + } + ["runs", raw_run_id, "objects", object_instance_id] => { + let run_id = resolve_run(db, raw_run_id)?; + let object = db + .get_object_by_instance_id(&run_id, object_instance_id)? + .ok_or_else(|| ApiError::new(404, "object not found"))?; + data_response(&object, Some(run_id)) + } + ["runs", raw_run_id, "objects", object_instance_id, "parsed"] => { + let run_id = resolve_run(db, raw_run_id)?; + let object = db + .get_object_by_instance_id(&run_id, object_instance_id)? + .ok_or_else(|| ApiError::new(404, "object not found"))?; + let projection = projection_for_explain(db, repo_bytes, &object)?; + data_response(&projection, Some(run_id)) + } + [ + "runs", + raw_run_id, + "objects", + object_instance_id, + "validation", + ] => { + let run_id = resolve_run(db, raw_run_id)?; + let object = db + .get_object_by_instance_id(&run_id, object_instance_id)? + .ok_or_else(|| ApiError::new(404, "object not found"))?; + data_response(&validation_summary(&object), Some(run_id)) + } + [ + "runs", + raw_run_id, + "objects", + object_instance_id, + "validation", + "file", + ] => { + let run_id = resolve_run(db, raw_run_id)?; + let object = db + .get_object_by_instance_id(&run_id, object_instance_id)? + .ok_or_else(|| ApiError::new(404, "object not found"))?; + data_response(&file_validation_summary(&object), Some(run_id)) + } + [ + "runs", + raw_run_id, + "objects", + object_instance_id, + "validation", + "chain", + ] => { + let run_id = resolve_run(db, raw_run_id)?; + let object = db + .get_object_by_instance_id(&run_id, object_instance_id)? + .ok_or_else(|| ApiError::new(404, "object not found"))?; + data_response(&chain_validation_summary(&object), Some(run_id)) + } + ["runs", raw_run_id, "objects", object_instance_id, "chain"] => { + let run_id = resolve_run(db, raw_run_id)?; + let chain_edges = chain_edges_for_object(db, repo_bytes, &run_id, object_instance_id)?; + data_response(&chain_edges, Some(run_id)) + } + [ + "runs", + raw_run_id, + "objects", + object_instance_id, + "parsed", + "manifest-files", + ] => { + let run_id = resolve_run(db, raw_run_id)?; + projection_array_page( + db, + repo_bytes, + &run_id, + object_instance_id, + &["object", "manifest", "fileList", "entries"], + "manifest-files", + limit(&query), + offset_cursor(&query), + ) + } + [ + "runs", + raw_run_id, + "objects", + object_instance_id, + "parsed", + "revoked-certs", + ] => { + let run_id = resolve_run(db, raw_run_id)?; + projection_array_page( + db, + repo_bytes, + &run_id, + object_instance_id, + &["object", "revokedCertificates", "entries"], + "revoked-certs", + limit(&query), + offset_cursor(&query), + ) + } + ["runs", raw_run_id, "exports", job_id] => { + let run_id = resolve_run(db, raw_run_id)?; + let job = get_export_job(db, export_jobs, &run_id, job_id)? + .ok_or_else(|| ApiError::new(404, "export job not found"))?; + data_response(&job, Some(run_id)) + } + ["objects", sha256] => { + let projection = global_projection_for_hash(db, repo_bytes, sha256)? + .ok_or_else(|| ApiError::new(404, "object projection not found"))?; + data_response(&projection, None) + } + ["runs", raw_run_id, "stats", "overview"] => { + let run_id = resolve_run(db, raw_run_id)?; + let stat = db.get_stat(&run_id, "overview", "counts")?; + data_response( + &stat.map(|s| s.value).unwrap_or_else(|| json!({})), + Some(run_id), + ) + } + ["runs", raw_run_id, "stats", "repos"] => { + let run_id = resolve_run(db, raw_run_id)?; + page_response( + db.list_repos(&run_id, limit(&query), cursor(&query))?, + Some(run_id), + ) + } + ["runs", raw_run_id, "stats", "publication-points"] => { + let run_id = resolve_run(db, raw_run_id)?; + page_response( + db.list_publication_points(&run_id, limit(&query), cursor(&query))?, + Some(run_id), + ) + } + ["runs", raw_run_id, "stats", "object-types"] => { + let run_id = resolve_run(db, raw_run_id)?; + let stat = db.get_stat(&run_id, "objects", "by_type")?; + data_response( + &stat.map(|s| s.value).unwrap_or_else(|| json!({})), + Some(run_id), + ) + } + ["runs", raw_run_id, "stats", "validation"] => { + let run_id = resolve_run(db, raw_run_id)?; + let stat = db.get_stat(&run_id, "validation", "by_result")?; + data_response( + &stat.map(|s| s.value).unwrap_or_else(|| json!({})), + Some(run_id), + ) + } + ["runs", raw_run_id, "stats", "reasons"] => { + let run_id = resolve_run(db, raw_run_id)?; + let stat = db.get_stat(&run_id, "validation", "reasons")?; + data_response( + &stat.map(|s| s.value).unwrap_or_else(|| json!({})), + Some(run_id), + ) + } + ["runs", raw_run_id, "stats", "downloads"] => { + let run_id = resolve_run(db, raw_run_id)?; + let stat = db.get_stat(&run_id, "downloads", "summary")?; + data_response( + &stat.map(|s| s.value).unwrap_or_else(|| json!({})), + Some(run_id), + ) + } + ["runs", raw_run_id, "stats", "validation-events"] => { + let run_id = resolve_run(db, raw_run_id)?; + let name = query.get("name").map(String::as_str).unwrap_or("by_type"); + let stat = db.get_stat(&run_id, "validation_events", name)?; + data_response( + &stat.map(|s| s.value).unwrap_or_else(|| json!({})), + Some(run_id), + ) + } + ["runs", raw_run_id, "stats", scope] => { + let run_id = resolve_run(db, raw_run_id)?; + let name = query.get("name").map(String::as_str).unwrap_or("counts"); + let stat = db.get_stat(&run_id, scope, name)?; + data_response( + &stat.map(|s| s.value).unwrap_or_else(|| json!({})), + Some(run_id), + ) + } + _ => Err(ApiError::new(404, "not found")), + } +} + +fn route_post_request( + db: Arc, + repo_bytes: Option>, + export_jobs: ExportJobStore, + export_root: PathBuf, + target: &str, + body: &[u8], +) -> Result { + let (path, _) = split_target(target); + let path = path.trim_end_matches('/'); + let segments = path + .trim_start_matches("/api/v1") + .trim_start_matches('/') + .split('/') + .filter(|s| !s.is_empty()) + .collect::>(); + match segments.as_slice() { + ["runs", raw_run_id, "exports"] => { + let repo_bytes = repo_bytes + .ok_or_else(|| ApiError::new(400, "repo-bytes db is required for export jobs"))?; + let run_id = resolve_run(&db, raw_run_id)?; + let request: Value = if body.is_empty() { + json!({}) + } else { + serde_json::from_slice(body) + .map_err(|err| ApiError::new(400, format!("invalid JSON body: {err}")))? + }; + let scope = request + .get("scope") + .and_then(Value::as_str) + .unwrap_or("repo") + .to_string(); + let repo_id = request + .get("repo_id") + .or_else(|| request.get("repoId")) + .and_then(Value::as_str) + .map(str::to_string); + let pp_id = request + .get("pp_id") + .or_else(|| request.get("ppId")) + .and_then(Value::as_str) + .map(str::to_string); + let object_instance_ids = request + .get("object_instance_ids") + .or_else(|| request.get("objectInstanceIds")) + .and_then(Value::as_array) + .map(|items| { + items + .iter() + .filter_map(Value::as_str) + .map(str::to_string) + .collect::>() + }) + .unwrap_or_default(); + validate_export_request(&scope, repo_id.as_deref(), pp_id.as_deref())?; + let job_id = uuid::Uuid::new_v4().to_string(); + let output_path = export_root.join(&run_id).join(format!("{job_id}.tar")); + let job = ExportJobRecord { + schema_version: 1, + job_id: job_id.clone(), + run_id: run_id.clone(), + scope, + repo_id, + pp_id, + status: "running".to_string(), + created_at: now_rfc3339(), + finished_at: None, + output_path: Some(output_path.display().to_string()), + object_count: 0, + bytes_written: 0, + error: None, + }; + put_export_job(db.as_ref(), &export_jobs, &job)?; + let worker_db = Arc::clone(&db); + let worker_export_jobs = Arc::clone(&export_jobs); + let failed_job_template = job.clone(); + let response_run_id = run_id.clone(); + let response_job = job.clone(); + std::thread::spawn(move || { + let final_job = run_export_job( + &worker_db, + repo_bytes.as_ref(), + job, + output_path, + object_instance_ids, + ) + .unwrap_or_else(|err| ExportJobRecord { + schema_version: 1, + job_id: failed_job_template.job_id.clone(), + run_id: failed_job_template.run_id.clone(), + scope: failed_job_template.scope.clone(), + repo_id: failed_job_template.repo_id.clone(), + pp_id: failed_job_template.pp_id.clone(), + status: "failed".to_string(), + created_at: failed_job_template.created_at.clone(), + finished_at: Some(now_rfc3339()), + output_path: failed_job_template.output_path.clone(), + object_count: 0, + bytes_written: 0, + error: Some(err.message), + }); + let _ = put_export_job(worker_db.as_ref(), &worker_export_jobs, &final_job); + }); + data_response(&response_job, Some(response_run_id)) + } + [ + "runs", + raw_run_id, + "objects", + object_instance_id, + "validation", + "explain", + ] => { + let run_id = resolve_run(&db, raw_run_id)?; + let force_refresh = if body.is_empty() { + false + } else { + let request: Value = serde_json::from_slice(body) + .map_err(|err| ApiError::new(400, format!("invalid JSON body: {err}")))?; + request + .get("refresh") + .or_else(|| request.get("forceRefresh")) + .and_then(Value::as_bool) + .unwrap_or(false) + }; + let explain = validation_explain( + db.as_ref(), + repo_bytes.as_deref(), + &run_id, + object_instance_id, + force_refresh, + )?; + data_response(&explain, Some(run_id)) + } + _ => Err(ApiError::new(404, "not found")), + } +} + +fn validate_export_request( + scope: &str, + repo_id: Option<&str>, + pp_id: Option<&str>, +) -> Result<(), ApiError> { + match scope { + "repo" if repo_id.is_some() => Ok(()), + "publication_point" | "publicationPoint" if pp_id.is_some() => Ok(()), + "object_set" | "objectSet" => Ok(()), + "repo" => Err(ApiError::new(400, "repo export requires repo_id")), + "publication_point" | "publicationPoint" => Err(ApiError::new( + 400, + "publication point export requires pp_id", + )), + other => Err(ApiError::new( + 400, + format!("unsupported export scope: {other}"), + )), + } +} + +fn route_raw_request( + db: &QueryDb, + repo_bytes: Option<&ExternalRepoBytesDb>, + export_jobs: &ExportJobStore, + target: &str, +) -> Option, ApiError>> { + let (path, _) = split_target(target); + let path = path.trim_end_matches('/'); + let segments = path + .trim_start_matches("/api/v1") + .trim_start_matches('/') + .split('/') + .filter(|s| !s.is_empty()) + .collect::>(); + match segments.as_slice() { + ["runs", raw_run_id, "objects", object_instance_id, "raw"] => Some(raw_object_response( + db, + repo_bytes, + raw_run_id, + object_instance_id, + )), + ["runs", raw_run_id, "exports", job_id, "download"] => Some(export_download_response( + db, + export_jobs, + raw_run_id, + job_id, + )), + _ => None, + } +} + +fn raw_object_response( + db: &QueryDb, + repo_bytes: Option<&ExternalRepoBytesDb>, + raw_run_id: &str, + object_instance_id: &str, +) -> Result, ApiError> { + let repo_bytes = repo_bytes + .ok_or_else(|| ApiError::new(400, "repo-bytes db is not configured for raw download"))?; + let run_id = resolve_run(db, raw_run_id)?; + let object = db + .get_object_by_instance_id(&run_id, object_instance_id)? + .ok_or_else(|| ApiError::new(404, "object not found"))?; + if !is_sha256_hex(&object.sha256) { + return Err(ApiError::new(404, "object raw hash is unavailable")); + } + let bytes = repo_bytes + .get_blob_bytes(&object.sha256) + .map_err(|err| ApiError::new(500, err.to_string()))? + .ok_or_else(|| ApiError::new(404, "object raw bytes not found"))?; + Ok(binary_response(200, "application/octet-stream", bytes)) +} + +fn export_download_response( + db: &QueryDb, + export_jobs: &ExportJobStore, + raw_run_id: &str, + job_id: &str, +) -> Result, ApiError> { + let run_id = resolve_run(db, raw_run_id)?; + let job = get_export_job(db, export_jobs, &run_id, job_id)? + .ok_or_else(|| ApiError::new(404, "export job not found"))?; + if job.status != "complete" { + return Err(ApiError::new( + 409, + format!("export job status is {}", job.status), + )); + } + let path = job + .output_path + .as_ref() + .ok_or_else(|| ApiError::new(404, "export output path not found"))?; + let bytes = fs::read(path).map_err(|err| ApiError::new(404, err.to_string()))?; + Ok(binary_response(200, "application/x-tar", bytes)) +} + +fn export_job_store_key(run_id: &str, job_id: &str) -> String { + format!("{run_id}/{job_id}") +} + +fn put_export_job( + db: &QueryDb, + export_jobs: &ExportJobStore, + job: &ExportJobRecord, +) -> Result<(), ApiError> { + if !db.is_secondary() { + db.put_export_job(job)?; + } + let mut jobs = export_jobs + .lock() + .map_err(|_| ApiError::new(500, "export job store lock poisoned"))?; + jobs.insert(export_job_store_key(&job.run_id, &job.job_id), job.clone()); + Ok(()) +} + +fn get_export_job( + db: &QueryDb, + export_jobs: &ExportJobStore, + run_id: &str, + job_id: &str, +) -> Result, ApiError> { + if let Some(job) = export_jobs + .lock() + .map_err(|_| ApiError::new(500, "export job store lock poisoned"))? + .get(&export_job_store_key(run_id, job_id)) + .cloned() + { + return Ok(Some(job)); + } + Ok(db.get_export_job(run_id, job_id)?) +} + +fn run_export_job( + db: &QueryDb, + repo_bytes: &ExternalRepoBytesDb, + mut job: ExportJobRecord, + output_path: PathBuf, + object_instance_ids: Vec, +) -> Result { + if let Some(parent) = output_path.parent() { + fs::create_dir_all(parent).map_err(|err| ApiError::new(500, err.to_string()))?; + } + let manifest_path = output_path.with_extension("manifest.json.tmp"); + let mut file = File::create(&output_path).map_err(|err| ApiError::new(500, err.to_string()))?; + let mut manifest = + File::create(&manifest_path).map_err(|err| ApiError::new(500, err.to_string()))?; + write_manifest_prefix(&mut manifest, &job)?; + let mut object_count = 0u64; + + match job.scope.as_str() { + "repo" => { + let repo_id = job.repo_id.as_deref().unwrap_or_default(); + let mut cursor = None; + loop { + let page = db.list_objects_for_repo( + &job.run_id, + repo_id, + EXPORT_PAGE_LIMIT, + cursor.as_deref(), + )?; + for object in page.data { + write_export_object( + repo_bytes, + &mut file, + &mut manifest, + &mut object_count, + &object, + )?; + } + let Some(next_cursor) = page.next_cursor else { + break; + }; + cursor = Some(next_cursor); + } + } + "publication_point" | "publicationPoint" => { + let pp_id = job.pp_id.as_deref().unwrap_or_default(); + let mut cursor = None; + loop { + let page = db.list_objects_for_pp( + &job.run_id, + pp_id, + EXPORT_PAGE_LIMIT, + cursor.as_deref(), + )?; + for object in page.data { + write_export_object( + repo_bytes, + &mut file, + &mut manifest, + &mut object_count, + &object, + )?; + } + let Some(next_cursor) = page.next_cursor else { + break; + }; + cursor = Some(next_cursor); + } + } + "object_set" | "objectSet" => { + if object_instance_ids.len() > EXPORT_OBJECT_SET_MAX { + return Err(ApiError::new( + 400, + format!("objectSet export accepts at most {EXPORT_OBJECT_SET_MAX} objects"), + )); + } + for object_instance_id in object_instance_ids { + let object = db + .get_object_by_instance_id(&job.run_id, &object_instance_id)? + .ok_or_else(|| { + ApiError::new(404, format!("object not found: {object_instance_id}")) + })?; + write_export_object( + repo_bytes, + &mut file, + &mut manifest, + &mut object_count, + &object, + )?; + } + } + other => { + return Err(ApiError::new( + 400, + format!("unsupported export scope: {other}"), + )); + } + } + write_manifest_suffix(&mut manifest, object_count)?; + manifest + .flush() + .map_err(|err| ApiError::new(500, err.to_string()))?; + drop(manifest); + write_tar_file_entry(&mut file, "manifest.json", &manifest_path)?; + let _ = fs::remove_file(&manifest_path); + file.write_all(&[0u8; 1024]) + .map_err(|err| ApiError::new(500, err.to_string()))?; + file.flush() + .map_err(|err| ApiError::new(500, err.to_string()))?; + let bytes_written = file + .seek(SeekFrom::End(0)) + .map_err(|err| ApiError::new(500, err.to_string()))?; + job.status = "complete".to_string(); + job.finished_at = Some(now_rfc3339()); + job.object_count = object_count; + job.bytes_written = bytes_written; + Ok(job) +} + +fn write_manifest_prefix(file: &mut File, job: &ExportJobRecord) -> Result<(), ApiError> { + write!(file, "{{\"schemaVersion\":1,\"jobId\":") + .map_err(|err| ApiError::new(500, err.to_string()))?; + serde_json::to_writer(&mut *file, &job.job_id) + .map_err(|err| ApiError::new(500, err.to_string()))?; + write!(file, ",\"runId\":").map_err(|err| ApiError::new(500, err.to_string()))?; + serde_json::to_writer(&mut *file, &job.run_id) + .map_err(|err| ApiError::new(500, err.to_string()))?; + write!(file, ",\"scope\":").map_err(|err| ApiError::new(500, err.to_string()))?; + serde_json::to_writer(&mut *file, &job.scope) + .map_err(|err| ApiError::new(500, err.to_string()))?; + write!(file, ",\"repoId\":").map_err(|err| ApiError::new(500, err.to_string()))?; + serde_json::to_writer(&mut *file, &job.repo_id) + .map_err(|err| ApiError::new(500, err.to_string()))?; + write!(file, ",\"ppId\":").map_err(|err| ApiError::new(500, err.to_string()))?; + serde_json::to_writer(&mut *file, &job.pp_id) + .map_err(|err| ApiError::new(500, err.to_string()))?; + write!(file, ",\"objects\":[").map_err(|err| ApiError::new(500, err.to_string()))?; + Ok(()) +} + +fn write_manifest_suffix(file: &mut File, object_count: u64) -> Result<(), ApiError> { + write!(file, "],\"objectCount\":{object_count}}}") + .map_err(|err| ApiError::new(500, err.to_string())) +} + +fn write_export_object( + repo_bytes: &ExternalRepoBytesDb, + tar_file: &mut File, + manifest_file: &mut File, + object_count: &mut u64, + object: &ObjectInstanceRecord, +) -> Result<(), ApiError> { + if !is_sha256_hex(&object.sha256) { + return Err(ApiError::new( + 500, + format!("object has invalid sha256: {}", object.uri), + )); + } + let bytes = repo_bytes + .get_blob_bytes(&object.sha256) + .map_err(|err| ApiError::new(500, err.to_string()))? + .ok_or_else(|| ApiError::new(500, format!("bytes missing for {}", object.uri)))?; + write_tar_entry(tar_file, &export_object_path(object), &bytes)?; + if *object_count > 0 { + write!(manifest_file, ",").map_err(|err| ApiError::new(500, err.to_string()))?; + } + serde_json::to_writer( + manifest_file, + &json!({ + "uri": object.uri, + "sha256": object.sha256, + "objectType": object.object_type, + "result": object.result, + "sourceSection": object.source_section, + }), + ) + .map_err(|err| ApiError::new(500, err.to_string()))?; + *object_count += 1; + Ok(()) +} + +fn write_tar_entry(file: &mut File, path: &str, bytes: &[u8]) -> Result<(), ApiError> { + let name = normalize_tar_path(path); + let mut header = [0u8; 512]; + let name_bytes = name.as_bytes(); + let name_len = name_bytes.len().min(100); + header[..name_len].copy_from_slice(&name_bytes[..name_len]); + write_octal(&mut header[100..108], 0o644); + write_octal(&mut header[108..116], 0); + write_octal(&mut header[116..124], 0); + write_octal(&mut header[124..136], bytes.len() as u64); + write_octal(&mut header[136..148], 0); + for byte in &mut header[148..156] { + *byte = b' '; + } + header[156] = b'0'; + header[257..263].copy_from_slice(b"ustar\0"); + header[263..265].copy_from_slice(b"00"); + let checksum: u64 = header.iter().map(|byte| *byte as u64).sum(); + write_checksum(&mut header[148..156], checksum); + file.write_all(&header) + .map_err(|err| ApiError::new(500, err.to_string()))?; + file.write_all(bytes) + .map_err(|err| ApiError::new(500, err.to_string()))?; + let padding = (512 - (bytes.len() % 512)) % 512; + if padding > 0 { + file.write_all(&vec![0u8; padding]) + .map_err(|err| ApiError::new(500, err.to_string()))?; + } + Ok(()) +} + +fn write_tar_file_entry( + file: &mut File, + path: &str, + source_path: &PathBuf, +) -> Result<(), ApiError> { + let name = normalize_tar_path(path); + let size = fs::metadata(source_path) + .map_err(|err| ApiError::new(500, err.to_string()))? + .len(); + let mut header = [0u8; 512]; + let name_bytes = name.as_bytes(); + let name_len = name_bytes.len().min(100); + header[..name_len].copy_from_slice(&name_bytes[..name_len]); + write_octal(&mut header[100..108], 0o644); + write_octal(&mut header[108..116], 0); + write_octal(&mut header[116..124], 0); + write_octal(&mut header[124..136], size); + write_octal(&mut header[136..148], 0); + for byte in &mut header[148..156] { + *byte = b' '; + } + header[156] = b'0'; + header[257..263].copy_from_slice(b"ustar\0"); + header[263..265].copy_from_slice(b"00"); + let checksum: u64 = header.iter().map(|byte| *byte as u64).sum(); + write_checksum(&mut header[148..156], checksum); + file.write_all(&header) + .map_err(|err| ApiError::new(500, err.to_string()))?; + let mut source = File::open(source_path).map_err(|err| ApiError::new(500, err.to_string()))?; + std::io::copy(&mut source, file).map_err(|err| ApiError::new(500, err.to_string()))?; + let padding = (512 - (size as usize % 512)) % 512; + if padding > 0 { + file.write_all(&vec![0u8; padding]) + .map_err(|err| ApiError::new(500, err.to_string()))?; + } + Ok(()) +} + +fn write_octal(slot: &mut [u8], value: u64) { + let width = slot.len(); + let text = format!("{value:0width$o}\0", width = width - 1); + slot.copy_from_slice(&text.as_bytes()[..width]); +} + +fn write_checksum(slot: &mut [u8], value: u64) { + let text = format!("{value:06o}\0 ",); + slot.copy_from_slice(&text.as_bytes()[..slot.len()]); +} + +fn normalize_tar_path(path: &str) -> String { + let mut out = path + .replace("://", "/") + .chars() + .map(|ch| match ch { + 'A'..='Z' | 'a'..='z' | '0'..='9' | '.' | '_' | '-' | '/' => ch, + _ => '_', + }) + .collect::(); + while out.contains("//") { + out = out.replace("//", "/"); + } + out.trim_start_matches('/').chars().take(100).collect() +} + +fn export_object_path(object: &ObjectInstanceRecord) -> String { + let ext = object + .uri + .rsplit_once('.') + .map(|(_, ext)| ext) + .filter(|ext| ext.len() <= 5) + .unwrap_or("bin"); + format!( + "objects/{}_{}.{}", + object.object_instance_id, + object.sha256.chars().take(16).collect::(), + ext + ) +} + +fn projection_array_page( + db: &QueryDb, + repo_bytes: Option<&ExternalRepoBytesDb>, + run_id: &str, + object_instance_id: &str, + path: &[&str], + list_kind: &str, + limit: usize, + offset: usize, +) -> Result { + let object = db + .get_object_by_instance_id(run_id, object_instance_id)? + .ok_or_else(|| ApiError::new(404, "object not found"))?; + let (total, data) = + match projection_list_from_repo_bytes(repo_bytes, &object, list_kind, offset, limit)? { + Some(page) => page, + None => { + let projection = projection_for_explain(db, repo_bytes, &object)? + .ok_or_else(|| ApiError::new(404, "object projection not found"))?; + let entries = json_path(&projection.projection, path) + .and_then(Value::as_array) + .ok_or_else(|| ApiError::new(404, "projection list not found"))?; + let end = (offset + limit).min(entries.len()); + ( + entries.len(), + entries[offset.min(entries.len())..end].to_vec(), + ) + } + }; + let end = (offset + data.len()).min(total); + let next_cursor = if end < total { + Some(end.to_string()) + } else { + None + }; + Ok(json!({ + "data": data, + "page": {"nextCursor": next_cursor, "limit": limit}, + "meta": meta(Some(run_id.to_string())), + })) +} + +const EXPLAIN_VERSION: u32 = 1; + +fn validation_explain( + db: &QueryDb, + repo_bytes: Option<&ExternalRepoBytesDb>, + run_id: &str, + object_instance_id: &str, + force_refresh: bool, +) -> Result { + if !force_refresh + && let Some(cached) = + db.get_validation_explain(run_id, object_instance_id, EXPLAIN_VERSION)? + { + return Ok(cached); + } + let object = db + .get_object_by_instance_id(run_id, object_instance_id)? + .ok_or_else(|| ApiError::new(404, "object not found"))?; + let projection = projection_for_explain(db, repo_bytes, &object)?; + let parsevalidate = parsevalidate_summary(&object, projection.as_ref()); + let chain_edges = build_chain_edges(db, run_id, &object, projection.as_ref())?; + let chainvalidate = json!({ + "status": final_status(&object), + "mode": "audit-context", + "issues": issues_for_object(&object), + "edgesCount": chain_edges.len(), + "note": "M7 explain uses run audit state and parse projection only; full chain revalidation is deferred." + }); + let explain = ValidationExplainRecord { + schema_version: 1, + explain_version: EXPLAIN_VERSION, + run_id: run_id.to_string(), + object_instance_id: object.object_instance_id.clone(), + uri: object.uri.clone(), + sha256: object.sha256.clone(), + object_type: object.object_type.clone(), + final_status: final_status(&object).to_string(), + audit_result: object.result.clone(), + detail_summary: object.detail_summary.clone(), + authoritative: false, + explain_mode: "audit_projection".to_string(), + generated_at: now_rfc3339(), + parsevalidate, + chainvalidate, + chain_edges, + }; + db.put_validation_explain(&explain)?; + Ok(explain) +} + +fn chain_edges_for_object( + db: &QueryDb, + repo_bytes: Option<&ExternalRepoBytesDb>, + run_id: &str, + object_instance_id: &str, +) -> Result, ApiError> { + let object = db + .get_object_by_instance_id(run_id, object_instance_id)? + .ok_or_else(|| ApiError::new(404, "object not found"))?; + let projection = projection_for_explain(db, repo_bytes, &object)?; + build_chain_edges(db, run_id, &object, projection.as_ref()) +} + +fn projection_for_explain( + db: &QueryDb, + repo_bytes: Option<&ExternalRepoBytesDb>, + object: &ObjectInstanceRecord, +) -> Result, ApiError> { + if !is_sha256_hex(&object.sha256) { + return Ok(None); + } + if let Some(projection) = db.get_object_projection(&object.sha256)? { + return Ok(Some(projection)); + } + let Some(repo_bytes) = repo_bytes else { + return Ok(None); + }; + let Some(bytes) = repo_bytes + .get_blob_bytes(&object.sha256) + .map_err(|err| ApiError::new(500, err.to_string()))? + else { + return Ok(None); + }; + let projection = rpki::object_projection::build_object_projection( + object_type_for_projection(&object.object_type), + std::path::Path::new(&object.uri), + &bytes, + 100, + ); + db.put_object_projection(&projection)?; + Ok(Some(projection)) +} + +fn global_projection_for_hash( + db: &QueryDb, + repo_bytes: Option<&ExternalRepoBytesDb>, + sha256: &str, +) -> Result, ApiError> { + if let Some(projection) = db.get_object_projection(sha256)? { + return Ok(Some(projection)); + } + let Some(run_id) = db.latest_ready_run()? else { + return Ok(None); + }; + let Some(object) = db.get_object_by_sha256(&run_id, sha256)? else { + return Ok(None); + }; + projection_for_explain(db, repo_bytes, &object) +} + +fn validation_summary(object: &ObjectInstanceRecord) -> Value { + json!({ + "objectInstanceId": object.object_instance_id, + "uri": object.uri, + "sha256": object.sha256, + "objectType": object.object_type, + "finalStatus": final_status(object), + "auditResult": object.result, + "detailSummary": object.detail_summary, + "rejected": object.rejected, + "rejectReason": object.reject_reason, + "sourceSection": object.source_section, + "fileValidation": file_validation_summary(object), + "chainValidation": chain_validation_summary(object), + "explainAvailable": true, + "authoritative": false + }) +} + +fn file_validation_summary(object: &ObjectInstanceRecord) -> Value { + json!({ + "status": final_status(object), + "stage": "audit_result", + "issues": issues_for_object(object), + "detailSummary": object.detail_summary, + }) +} + +fn chain_validation_summary(object: &ObjectInstanceRecord) -> Value { + json!({ + "status": final_status(object), + "stage": "audit_context", + "issues": issues_for_object(object), + "edgesPage": {"nextCursor": null, "limit": 100}, + "note": "Use POST validation/explain for cached audit projection details." + }) +} + +fn parsevalidate_summary( + object: &ObjectInstanceRecord, + projection: Option<&rpki::object_projection::ObjectProjectionRecord>, +) -> Value { + let projection_status = projection + .map(|record| record.parse_status.as_str()) + .unwrap_or("unavailable"); + let projection_error = projection.and_then(|record| record.error_summary.as_deref()); + let status = match projection_status { + "ok" if !object.rejected && object.result == "ok" => "valid", + "ok" if object.rejected || object.result == "error" => "warning", + "error" => "invalid", + _ if object.result == "error" || object.rejected => "invalid", + _ => "unknown", + }; + json!({ + "status": status, + "projectionStatus": projection_status, + "issues": issues_for_object(object), + "projectionError": projection_error, + "projection": projection.map(|record| record.projection.clone()), + }) +} + +fn build_chain_edges( + db: &QueryDb, + run_id: &str, + object: &ObjectInstanceRecord, + projection: Option<&rpki::object_projection::ObjectProjectionRecord>, +) -> Result, ApiError> { + let mut edges = Vec::new(); + if let Some(pp) = db.get_publication_point(run_id, &object.pp_id)? + && let Some(manifest_uri) = pp.manifest_rsync_uri + && manifest_uri != object.uri + { + let manifest = db.get_object_by_uri(run_id, &manifest_uri)?; + edges.push(ChainEdgeRecord { + relation: "publication_point_manifest".to_string(), + from_uri: object.uri.clone(), + to_uri: manifest_uri, + to_object_instance_id: manifest + .as_ref() + .map(|item| item.object_instance_id.clone()), + to_sha256: manifest.as_ref().map(|item| item.sha256.clone()), + status: if manifest.is_some() { + "linked".to_string() + } else { + "missing".to_string() + }, + evidence: json!({"ppId": object.pp_id}), + }); + } + if let Some(uri) = first_signed_object_uri(projection) { + let target = db.get_object_by_uri(run_id, &uri)?; + edges.push(ChainEdgeRecord { + relation: "embedded_ee_sia_signed_object".to_string(), + from_uri: object.uri.clone(), + to_uri: uri, + to_object_instance_id: target.as_ref().map(|item| item.object_instance_id.clone()), + to_sha256: target.as_ref().map(|item| item.sha256.clone()), + status: if target.is_some() { + "linked".to_string() + } else { + "missing".to_string() + }, + evidence: json!({"source": "projection.signedObject.certificates[0].resourceCertificate.extensions.subjectInfoAccess"}), + }); + } + Ok(edges) +} + +fn first_signed_object_uri( + projection: Option<&rpki::object_projection::ObjectProjectionRecord>, +) -> Option { + projection + .and_then(|record| { + record + .projection + .get("object")? + .get("signedObject")? + .get("signedData")? + .get("certificates")? + .as_array()? + .first()? + .get("resourceCertificate")? + .get("extensions")? + .get("subjectInfoAccess")? + .get("signedObjectUris")? + .as_array()? + .first()? + .as_str() + }) + .map(str::to_string) +} + +fn final_status(object: &ObjectInstanceRecord) -> &'static str { + if object.rejected || object.result == "error" { + "invalid" + } else if object.result == "skipped" { + "skipped" + } else if object.result == "ok" { + "valid" + } else { + "unknown" + } +} + +fn issues_for_object(object: &ObjectInstanceRecord) -> Vec { + let reason = object + .reject_reason + .as_ref() + .or(object.detail_summary.as_ref()) + .filter(|reason| !reason.is_empty()); + reason + .map(|reason| { + vec![json!({ + "stage": "audit_result", + "severity": if object.rejected || object.result == "error" { "error" } else { "warning" }, + "reasonCode": stable_reason_code(reason), + "summary": reason, + })] + }) + .unwrap_or_default() +} + +fn stable_reason_code(reason: &str) -> String { + let lower = reason.to_ascii_lowercase(); + let code = lower + .chars() + .map(|ch| if ch.is_ascii_alphanumeric() { ch } else { '_' }) + .collect::(); + let code = code + .split('_') + .filter(|part| !part.is_empty()) + .collect::>() + .join("_"); + if code.is_empty() { + "unknown".to_string() + } else { + code.chars().take(96).collect() + } +} + +fn object_type_for_projection(label: &str) -> rpki::object_projection::ObjectType { + match label { + "cer" | "certificate" | "router_certificate" => rpki::object_projection::ObjectType::Cer, + "mft" | "manifest" => rpki::object_projection::ObjectType::Mft, + "crl" => rpki::object_projection::ObjectType::Crl, + "roa" => rpki::object_projection::ObjectType::Roa, + "asa" | "aspa" => rpki::object_projection::ObjectType::Aspa, + _ => rpki::object_projection::ObjectType::Auto, + } +} + +fn projection_list_from_repo_bytes( + repo_bytes: Option<&ExternalRepoBytesDb>, + object: &ObjectInstanceRecord, + list_kind: &str, + offset: usize, + limit: usize, +) -> Result)>, ApiError> { + let Some(repo_bytes) = repo_bytes else { + return Ok(None); + }; + if !is_sha256_hex(&object.sha256) { + return Ok(None); + } + let Some(bytes) = repo_bytes + .get_blob_bytes(&object.sha256) + .map_err(|err| ApiError::new(500, err.to_string()))? + else { + return Ok(None); + }; + let page = match list_kind { + "manifest-files" => { + rpki::object_projection::manifest_file_entries_page(&bytes, offset, limit) + } + "revoked-certs" => rpki::object_projection::crl_revoked_entries_page(&bytes, offset, limit), + _ => return Ok(None), + } + .map_err(|err| ApiError::new(500, err))?; + Ok(Some(page)) +} + +fn json_path<'a>(value: &'a Value, path: &[&str]) -> Option<&'a Value> { + let mut current = value; + for key in path { + current = current.get(*key)?; + } + Some(current) +} + +fn resolve_run(db: &QueryDb, raw: &str) -> Result { + db.resolve_run_id(raw)? + .ok_or_else(|| ApiError::new(404, "run not found")) +} + +fn split_target(target: &str) -> (&str, &str) { + target.split_once('?').unwrap_or((target, "")) +} + +fn parse_query(raw: &str) -> BTreeMap { + raw.split('&') + .filter(|part| !part.is_empty()) + .filter_map(|part| { + let (key, value) = part.split_once('=').unwrap_or((part, "")); + Some((percent_decode(key)?, percent_decode(value)?)) + }) + .collect() +} + +fn percent_decode(value: &str) -> Option { + let bytes = value.as_bytes(); + let mut out = Vec::with_capacity(bytes.len()); + let mut i = 0usize; + while i < bytes.len() { + match bytes[i] { + b'+' => out.push(b' '), + b'%' if i + 2 < bytes.len() => { + let hi = hex_value(bytes[i + 1])?; + let lo = hex_value(bytes[i + 2])?; + out.push((hi << 4) | lo); + i += 2; + } + byte => out.push(byte), + } + i += 1; + } + String::from_utf8(out).ok() +} + +fn hex_value(byte: u8) -> Option { + match byte { + b'0'..=b'9' => Some(byte - b'0'), + b'a'..=b'f' => Some(byte - b'a' + 10), + b'A'..=b'F' => Some(byte - b'A' + 10), + _ => None, + } +} + +fn limit(query: &BTreeMap) -> usize { + query + .get("limit") + .and_then(|value| value.parse::().ok()) + .unwrap_or(100) + .clamp(1, 1000) +} + +fn cursor(query: &BTreeMap) -> Option<&str> { + query.get("cursor").map(String::as_str) +} + +fn offset_cursor(query: &BTreeMap) -> usize { + query + .get("cursor") + .and_then(|value| value.parse::().ok()) + .unwrap_or(0) +} + +fn now_rfc3339() -> String { + time::OffsetDateTime::now_utc() + .format(&time::format_description::well_known::Rfc3339) + .unwrap_or_else(|_| "1970-01-01T00:00:00Z".to_string()) +} + +fn data_response(data: &T, run_id: Option) -> Result { + Ok(json!({"data": data, "page": null, "meta": meta(run_id)})) +} + +fn page_response( + page: rpki::query_db::QueryPage, + run_id: Option, +) -> Result { + Ok(json!({ + "data": page.data, + "page": {"nextCursor": page.next_cursor, "limit": page.limit}, + "meta": meta(run_id), + })) +} + +fn meta(run_id: Option) -> Value { + json!({"runId": run_id, "schemaVersion": 1}) +} + +fn json_response(status: u16, value: &Value) -> Vec { + let body = serde_json::to_vec(value).unwrap_or_else(|_| b"{}".to_vec()); + let reason = match status { + 200 => "OK", + 400 => "Bad Request", + 404 => "Not Found", + 405 => "Method Not Allowed", + 409 => "Conflict", + _ => "Internal Server Error", + }; + format!( + "HTTP/1.1 {status} {reason}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", + body.len(), + String::from_utf8_lossy(&body) + ) + .into_bytes() +} + +fn binary_response(status: u16, content_type: &str, body: Vec) -> Vec { + let reason = match status { + 200 => "OK", + 404 => "Not Found", + _ => "Internal Server Error", + }; + let mut response = format!( + "HTTP/1.1 {status} {reason}\r\nContent-Type: {content_type}\r\nContent-Length: {}\r\nConnection: close\r\n\r\n", + body.len() + ) + .into_bytes(); + response.extend(body); + response +} + +fn is_sha256_hex(value: &str) -> bool { + value.len() == 64 && value.bytes().all(|byte| byte.is_ascii_hexdigit()) +} + +#[cfg(test)] +mod tests { + use super::*; + use rpki::query_db::{ArtifactIndexerConfig, index_artifacts}; + use sha2::{Digest, Sha256}; + use std::fs; + + fn test_export_jobs() -> ExportJobStore { + Arc::new(Mutex::new(BTreeMap::new())) + } + + fn test_route_request( + db: &QueryDb, + repo_bytes: Option<&ExternalRepoBytesDb>, + target: &str, + ) -> Result { + route_request(db, repo_bytes, &test_export_jobs(), target) + } + + fn test_route_raw_request( + db: &QueryDb, + repo_bytes: Option<&ExternalRepoBytesDb>, + target: &str, + ) -> Option, ApiError>> { + route_raw_request(db, repo_bytes, &test_export_jobs(), target) + } + + #[test] + fn parse_query_decodes_limit_and_cursor() { + let query = parse_query("limit=10&cursor=objinst%2Frun_1%2Fabc"); + assert_eq!(limit(&query), 10); + assert_eq!(cursor(&query), Some("objinst/run_1/abc")); + } + + #[test] + fn parse_args_accepts_watcher_and_rejects_invalid_input() { + let argv = [ + "rpki_query_service", + "--query-db", + "query.db", + "--repo-bytes-db", + "repo-bytes.db", + "--export-root", + "exports", + "--listen", + "127.0.0.1:19560", + "--watch-run-root", + "runs", + "--watch-interval-secs", + "2", + "--watch-min-run-seq", + "42", + "--watch-backfill", + "--projection-entry-limit", + "9", + "--indexer-bin", + "bin/rpki_query_indexer", + "--retain-indexed-runs", + "3", + ] + .iter() + .map(|value| value.to_string()) + .collect::>(); + let args = parse_args(&argv).expect("args"); + assert_eq!(args.query_db, PathBuf::from("query.db")); + assert_eq!(args.repo_bytes_db, Some(PathBuf::from("repo-bytes.db"))); + assert_eq!(args.export_root, PathBuf::from("exports")); + assert_eq!(args.listen, "127.0.0.1:19560"); + assert_eq!(args.watch_run_root, Some(PathBuf::from("runs"))); + assert_eq!(args.watch_interval_secs, 2); + assert_eq!(args.watch_min_run_seq, Some(42)); + assert!(args.watch_backfill); + assert_eq!(args.projection_entry_limit, 9); + assert_eq!( + args.indexer_bin, + Some(PathBuf::from("bin/rpki_query_indexer")) + ); + assert_eq!(args.retain_indexed_runs, 3); + + let default_args = parse_args(&[ + "rpki_query_service".to_string(), + "--query-db".to_string(), + "query.db".to_string(), + ]) + .expect("default args"); + assert_eq!(default_args.export_root, PathBuf::from("query.exports")); + assert_eq!(default_args.listen, "127.0.0.1:9560"); + assert_eq!(default_args.watch_interval_secs, 10); + assert_eq!(default_args.watch_min_run_seq, None); + assert!(!default_args.watch_backfill); + assert_eq!(default_args.projection_entry_limit, 50); + assert_eq!(default_args.indexer_bin, None); + assert_eq!(default_args.retain_indexed_runs, 10); + + assert!( + parse_args(&["rpki_query_service".to_string()]) + .unwrap_err() + .contains("--query-db") + ); + assert!( + parse_args(&[ + "rpki_query_service".to_string(), + "--query-db".to_string(), + "query.db".to_string(), + "--watch-interval-secs".to_string(), + "bad".to_string(), + ]) + .unwrap_err() + .contains("invalid --watch-interval-secs") + ); + assert!( + parse_args(&[ + "rpki_query_service".to_string(), + "--query-db".to_string(), + "query.db".to_string(), + "--projection-entry-limit".to_string(), + "bad".to_string(), + ]) + .unwrap_err() + .contains("invalid --projection-entry-limit") + ); + assert!( + parse_args(&[ + "rpki_query_service".to_string(), + "--query-db".to_string(), + "query.db".to_string(), + "--retain-indexed-runs".to_string(), + "bad".to_string(), + ]) + .unwrap_err() + .contains("invalid --retain-indexed-runs") + ); + assert!( + parse_args(&[ + "rpki_query_service".to_string(), + "--query-db".to_string(), + "query.db".to_string(), + "--unknown".to_string(), + ]) + .unwrap_err() + .contains("unknown argument") + ); + } + + #[test] + fn resolve_effective_watch_min_run_seq_defaults_to_next_run_without_backfill() { + let temp = tempfile::tempdir().expect("tempdir"); + let runs_root = temp.path().join("runs"); + fs::create_dir_all(runs_root.join("run_0003")).expect("run_0003"); + fs::write(runs_root.join("run_0003/report.json"), "{}").expect("report"); + fs::write( + runs_root.join("run_0003/run-summary.json"), + r#"{"status":"success"}"#, + ) + .expect("summary"); + fs::create_dir_all(runs_root.join("run_0004")).expect("run_0004"); + fs::write(runs_root.join("run_0004/report.json"), "{}").expect("report"); + fs::write( + runs_root.join("run_0004/run-summary.json"), + r#"{"status":"failed"}"#, + ) + .expect("summary"); + + assert_eq!( + resolve_effective_watch_min_run_seq(temp.path(), None, false).expect("min seq"), + Some(4) + ); + assert_eq!( + resolve_effective_watch_min_run_seq(temp.path(), Some(2), false).expect("min seq"), + Some(2) + ); + assert_eq!( + resolve_effective_watch_min_run_seq(temp.path(), None, true).expect("min seq"), + None + ); + } + + #[test] + fn route_request_exposes_run_navigation_stats_and_object_details() { + let temp = tempfile::tempdir().expect("tempdir"); + let run_dir = temp.path().join("runs/run_0001"); + fs::create_dir_all(&run_dir).expect("run dir"); + let object_bytes = + fs::read("tests/fixtures/repository/rpki.cernet.net/repo/cernet/0/AS4538.roa") + .expect("fixture roa"); + let object_sha = hex::encode(Sha256::digest(&object_bytes)); + write_sample_run(&run_dir, &object_sha); + + let repo_bytes_path = temp.path().join("repo-bytes.db"); + let repo_bytes = ExternalRepoBytesDb::open(&repo_bytes_path).expect("repo bytes"); + repo_bytes + .put_blob_bytes_batch(&[(object_sha.clone(), object_bytes)]) + .expect("put bytes"); + drop(repo_bytes); + + let query_db_path = temp.path().join("query-db"); + index_artifacts(&ArtifactIndexerConfig { + query_db_path: query_db_path.clone(), + run_root: Some(temp.path().to_path_buf()), + run_dir: None, + repo_bytes_db_path: Some(repo_bytes_path.clone()), + projection_entry_limit: 5, + min_run_seq: None, + retain_indexed_runs: None, + }) + .expect("index artifacts"); + let db = QueryDb::open(&query_db_path).expect("query db"); + let repo_bytes = ExternalRepoBytesDb::open(&repo_bytes_path).expect("repo bytes"); + let repo = db + .list_repos("run_0001", 1, None) + .expect("repos") + .data + .into_iter() + .next() + .expect("repo"); + let pp = db + .list_publication_points("run_0001", 1, None) + .expect("pps") + .data + .into_iter() + .next() + .expect("pp"); + let object = db + .list_objects("run_0001", 1, None) + .expect("objects") + .data + .into_iter() + .next() + .expect("object"); + + assert_eq!( + test_route_request(&db, Some(&repo_bytes), "/api/v1").expect("root")["data"]["service"] + .as_str(), + Some("rpki_query_service") + ); + assert_eq!( + test_route_request(&db, Some(&repo_bytes), "/api/v1/runs").expect("runs")["data"] + .as_array() + .expect("runs") + .len(), + 1 + ); + assert_eq!( + test_route_request(&db, Some(&repo_bytes), "/api/v1/runs/latest") + .expect("latest")["data"]["runId"] + .as_str(), + Some("run_0001") + ); + assert_eq!( + test_route_request(&db, Some(&repo_bytes), "/api/v1/latest_run") + .expect("latest_run")["data"]["runId"] + .as_str(), + Some("run_0001") + ); + assert_eq!( + test_route_request(&db, Some(&repo_bytes), "/api/v1/runs/run_0001") + .expect("run")["data"]["counts"]["objects"] + .as_u64(), + Some(1) + ); + assert!( + test_route_request(&db, Some(&repo_bytes), "/api/v1/runs/latest/artifacts") + .expect("artifacts")["data"]["report.json"] + .as_str() + .is_some() + ); + assert_eq!( + test_route_request(&db, Some(&repo_bytes), "/api/v1/runs/latest/summary") + .expect("summary")["data"]["objects"] + .as_u64(), + Some(1) + ); + + for target in [ + "/api/v1/runs/latest/repos", + &format!("/api/v1/runs/latest/repos/{}", repo.repo_id), + &format!("/api/v1/runs/latest/repos/{}/stats", repo.repo_id), + &format!( + "/api/v1/runs/latest/repos/{}/publication-points", + repo.repo_id + ), + &format!("/api/v1/runs/latest/repos/{}/objects", repo.repo_id), + "/api/v1/runs/latest/publication-points", + &format!("/api/v1/runs/latest/publication-points/{}", pp.pp_id), + &format!("/api/v1/runs/latest/publication-points/{}/stats", pp.pp_id), + &format!( + "/api/v1/runs/latest/publication-points/{}/objects", + pp.pp_id + ), + "/api/v1/runs/latest/objects", + &format!( + "/api/v1/runs/latest/objects/by-uri?uri={}", + percent_encode_for_test(&object.uri) + ), + &format!("/api/v1/runs/latest/objects/{}", object.object_instance_id), + &format!( + "/api/v1/runs/latest/objects/{}/parsed", + object.object_instance_id + ), + &format!( + "/api/v1/runs/latest/objects/{}/validation/file", + object.object_instance_id + ), + &format!( + "/api/v1/runs/latest/objects/{}/validation/chain", + object.object_instance_id + ), + "/api/v1/runs/latest/stats/overview", + "/api/v1/runs/latest/stats/repos", + "/api/v1/runs/latest/stats/publication-points", + "/api/v1/runs/latest/stats/object-types", + "/api/v1/runs/latest/stats/validation", + "/api/v1/runs/latest/stats/reasons", + "/api/v1/runs/latest/stats/downloads", + "/api/v1/runs/latest/stats/validation-events?name=manifest", + "/api/v1/runs/latest/stats/objects?name=by_source", + &format!("/api/v1/objects/{object_sha}"), + ] { + let value = test_route_request(&db, Some(&repo_bytes), target).unwrap_or_else(|err| { + panic!("route failed for {target}: {} {}", err.status, err.message) + }); + assert!(value.get("meta").is_some(), "{target}"); + } + + assert_eq!( + test_route_request(&db, Some(&repo_bytes), "/api/v1/runs/latest/objects/by-uri") + .unwrap_err() + .status, + 400 + ); + assert_eq!( + test_route_request(&db, Some(&repo_bytes), "/api/v1/runs/latest/repos/nope") + .unwrap_err() + .status, + 404 + ); + } + + #[test] + fn raw_route_downloads_object_bytes_from_repo_bytes_db() { + let temp = tempfile::tempdir().expect("tempdir"); + let run_dir = temp.path().join("runs/run_0001"); + fs::create_dir_all(&run_dir).expect("run dir"); + let object_bytes = + fs::read("tests/fixtures/repository/rpki.cernet.net/repo/cernet/0/AS4538.roa") + .expect("fixture roa"); + let object_sha = hex::encode(Sha256::digest(&object_bytes)); + write_sample_run(&run_dir, &object_sha); + + let repo_bytes_path = temp.path().join("repo-bytes.db"); + let repo_bytes = ExternalRepoBytesDb::open(&repo_bytes_path).expect("repo bytes"); + repo_bytes + .put_blob_bytes_batch(&[(object_sha.clone(), object_bytes.clone())]) + .expect("put bytes"); + drop(repo_bytes); + + let query_db_path = temp.path().join("query-db"); + index_artifacts(&ArtifactIndexerConfig { + query_db_path: query_db_path.clone(), + run_root: Some(temp.path().to_path_buf()), + run_dir: None, + repo_bytes_db_path: Some(repo_bytes_path.clone()), + projection_entry_limit: 5, + min_run_seq: None, + retain_indexed_runs: None, + }) + .expect("index artifacts"); + let db = QueryDb::open(&query_db_path).expect("query db"); + let repo_bytes = ExternalRepoBytesDb::open(&repo_bytes_path).expect("repo bytes"); + let object = db + .list_objects("run_0001", 1, None) + .expect("objects") + .data + .into_iter() + .next() + .expect("object"); + + let response = test_route_raw_request( + &db, + Some(&repo_bytes), + &format!( + "/api/v1/runs/latest/objects/{}/raw", + object.object_instance_id + ), + ) + .expect("raw route") + .expect("raw response"); + let separator = response + .windows(4) + .position(|window| window == b"\r\n\r\n") + .expect("header separator"); + assert_eq!(&response[separator + 4..], object_bytes.as_slice()); + } + + #[test] + fn export_job_writes_tar_with_manifest_and_object() { + let temp = tempfile::tempdir().expect("tempdir"); + let run_dir = temp.path().join("runs/run_0001"); + fs::create_dir_all(&run_dir).expect("run dir"); + let object_bytes = + fs::read("tests/fixtures/repository/rpki.cernet.net/repo/cernet/0/AS4538.roa") + .expect("fixture roa"); + let object_sha = hex::encode(Sha256::digest(&object_bytes)); + write_sample_run(&run_dir, &object_sha); + let repo_bytes_path = temp.path().join("repo-bytes.db"); + let repo_bytes = ExternalRepoBytesDb::open(&repo_bytes_path).expect("repo bytes"); + repo_bytes + .put_blob_bytes_batch(&[(object_sha, object_bytes)]) + .expect("put bytes"); + drop(repo_bytes); + let query_db_path = temp.path().join("query-db"); + index_artifacts(&ArtifactIndexerConfig { + query_db_path: query_db_path.clone(), + run_root: Some(temp.path().to_path_buf()), + run_dir: None, + repo_bytes_db_path: Some(repo_bytes_path.clone()), + projection_entry_limit: 5, + min_run_seq: None, + retain_indexed_runs: None, + }) + .expect("index artifacts"); + let db = QueryDb::open(&query_db_path).expect("query db"); + let repo_bytes = ExternalRepoBytesDb::open(&repo_bytes_path).expect("repo bytes"); + let repo = db + .list_repos("run_0001", 1, None) + .expect("repos") + .data + .into_iter() + .next() + .expect("repo"); + let job = ExportJobRecord { + schema_version: 1, + job_id: "job1".to_string(), + run_id: "run_0001".to_string(), + scope: "repo".to_string(), + repo_id: Some(repo.repo_id), + pp_id: None, + status: "running".to_string(), + created_at: now_rfc3339(), + finished_at: None, + output_path: Some(temp.path().join("export.tar").display().to_string()), + object_count: 0, + bytes_written: 0, + error: None, + }; + let output_path = temp.path().join("export.tar"); + let final_job = + run_export_job(&db, &repo_bytes, job, output_path.clone(), Vec::new()).expect("export"); + assert_eq!(final_job.status, "complete"); + assert_eq!(final_job.object_count, 1); + let tar = fs::read(output_path).expect("tar"); + assert!( + tar.windows("manifest.json".len()) + .any(|window| window == b"manifest.json") + ); + assert!( + tar.windows("AS4538.roa".len()) + .any(|window| window == b"AS4538.roa") + || tar.windows(".roa".len()).any(|window| window == b".roa") + ); + } + + #[test] + fn post_export_route_creates_and_completes_repo_job() { + let temp = tempfile::tempdir().expect("tempdir"); + let run_dir = temp.path().join("runs/run_0001"); + fs::create_dir_all(&run_dir).expect("run dir"); + let object_bytes = + fs::read("tests/fixtures/repository/rpki.cernet.net/repo/cernet/0/AS4538.roa") + .expect("fixture roa"); + let object_sha = hex::encode(Sha256::digest(&object_bytes)); + write_sample_run(&run_dir, &object_sha); + let repo_bytes_path = temp.path().join("repo-bytes.db"); + let repo_bytes = ExternalRepoBytesDb::open(&repo_bytes_path).expect("repo bytes"); + repo_bytes + .put_blob_bytes_batch(&[(object_sha, object_bytes)]) + .expect("put bytes"); + drop(repo_bytes); + let query_db_path = temp.path().join("query-db"); + index_artifacts(&ArtifactIndexerConfig { + query_db_path: query_db_path.clone(), + run_root: Some(temp.path().to_path_buf()), + run_dir: None, + repo_bytes_db_path: Some(repo_bytes_path.clone()), + projection_entry_limit: 5, + min_run_seq: None, + retain_indexed_runs: None, + }) + .expect("index artifacts"); + let db = Arc::new(QueryDb::open(&query_db_path).expect("query db")); + let repo_bytes = Arc::new(ExternalRepoBytesDb::open(&repo_bytes_path).expect("repo bytes")); + let repo = db + .list_repos("run_0001", 1, None) + .expect("repos") + .data + .into_iter() + .next() + .expect("repo"); + let body = + serde_json::to_vec(&json!({"scope":"repo","repo_id": repo.repo_id})).expect("body"); + let response = route_post_request( + Arc::clone(&db), + Some(repo_bytes), + test_export_jobs(), + temp.path().join("exports"), + "/api/v1/runs/latest/exports", + &body, + ) + .expect("post export"); + let job_id = response["data"]["jobId"] + .as_str() + .expect("job id") + .to_string(); + let mut job = None; + for _ in 0..50 { + job = db.get_export_job("run_0001", &job_id).expect("job"); + if job.as_ref().is_some_and(|job| job.status == "complete") { + break; + } + std::thread::sleep(std::time::Duration::from_millis(20)); + } + let job = job.expect("completed job"); + assert_eq!(job.status, "complete"); + assert_eq!(job.object_count, 1); + assert!(job.bytes_written > 0); + } + + #[test] + fn projection_large_lists_page_from_repo_bytes() { + let temp = tempfile::tempdir().expect("tempdir"); + let run_dir = temp.path().join("runs/run_0001"); + fs::create_dir_all(&run_dir).expect("run dir"); + let mft_bytes = fs::read( + "tests/fixtures/repository/rpki.cernet.net/repo/cernet/0/05FC9C5B88506F7C0D3F862C8895BED67E9F8EBA.mft", + ) + .expect("fixture mft"); + let mft_sha = hex::encode(Sha256::digest(&mft_bytes)); + write_manifest_sample_run(&run_dir, &mft_sha); + let repo_bytes_path = temp.path().join("repo-bytes.db"); + let repo_bytes = ExternalRepoBytesDb::open(&repo_bytes_path).expect("repo bytes"); + repo_bytes + .put_blob_bytes_batch(&[(mft_sha, mft_bytes)]) + .expect("put bytes"); + drop(repo_bytes); + let query_db_path = temp.path().join("query-db"); + index_artifacts(&ArtifactIndexerConfig { + query_db_path: query_db_path.clone(), + run_root: Some(temp.path().to_path_buf()), + run_dir: None, + repo_bytes_db_path: Some(repo_bytes_path.clone()), + projection_entry_limit: 2, + min_run_seq: None, + retain_indexed_runs: None, + }) + .expect("index artifacts"); + let db = QueryDb::open(&query_db_path).expect("query db"); + let repo_bytes = ExternalRepoBytesDb::open(&repo_bytes_path).expect("repo bytes"); + let object = db + .list_objects("run_0001", 1, None) + .expect("objects") + .data + .into_iter() + .next() + .expect("object"); + let page = projection_array_page( + &db, + Some(&repo_bytes), + "run_0001", + &object.object_instance_id, + &["object", "manifest", "fileList", "entries"], + "manifest-files", + 2, + 2, + ) + .expect("page"); + assert_eq!(page["data"].as_array().expect("data").len(), 2); + assert!(page["page"]["nextCursor"].as_str().is_some()); + } + + #[test] + fn projection_large_lists_can_use_stored_projection_without_repo_bytes() { + let temp = tempfile::tempdir().expect("tempdir"); + let run_dir = temp.path().join("runs/run_0001"); + fs::create_dir_all(&run_dir).expect("run dir"); + let mft_bytes = fs::read( + "tests/fixtures/repository/rpki.cernet.net/repo/cernet/0/05FC9C5B88506F7C0D3F862C8895BED67E9F8EBA.mft", + ) + .expect("fixture mft"); + let mft_sha = hex::encode(Sha256::digest(&mft_bytes)); + write_manifest_sample_run(&run_dir, &mft_sha); + let repo_bytes_path = temp.path().join("repo-bytes.db"); + let repo_bytes = ExternalRepoBytesDb::open(&repo_bytes_path).expect("repo bytes"); + repo_bytes + .put_blob_bytes_batch(&[(mft_sha, mft_bytes)]) + .expect("put bytes"); + drop(repo_bytes); + let query_db_path = temp.path().join("query-db"); + index_artifacts(&ArtifactIndexerConfig { + query_db_path: query_db_path.clone(), + run_root: Some(temp.path().to_path_buf()), + run_dir: None, + repo_bytes_db_path: Some(repo_bytes_path.clone()), + projection_entry_limit: 4, + min_run_seq: None, + retain_indexed_runs: None, + }) + .expect("index artifacts"); + let db = QueryDb::open(&query_db_path).expect("query db"); + let repo_bytes = ExternalRepoBytesDb::open(&repo_bytes_path).expect("repo bytes"); + let object = db + .list_objects("run_0001", 1, None) + .expect("objects") + .data + .into_iter() + .next() + .expect("object"); + let projection = projection_for_explain(&db, Some(&repo_bytes), &object) + .expect("prime lazy projection cache") + .expect("projection"); + assert_eq!(projection.sha256, object.sha256); + drop(repo_bytes); + let page = projection_array_page( + &db, + None, + "run_0001", + &object.object_instance_id, + &["object", "manifest", "fileList", "entries"], + "manifest-files", + 2, + 0, + ) + .expect("page"); + assert_eq!(page["data"].as_array().expect("data").len(), 2); + } + + #[test] + fn post_and_raw_routes_return_expected_error_statuses() { + let temp = tempfile::tempdir().expect("tempdir"); + let run_dir = temp.path().join("runs/run_0001"); + fs::create_dir_all(&run_dir).expect("run dir"); + write_sample_run(&run_dir, &"1".repeat(64)); + let query_db_path = temp.path().join("query-db"); + index_artifacts(&ArtifactIndexerConfig { + query_db_path: query_db_path.clone(), + run_root: Some(temp.path().to_path_buf()), + run_dir: None, + repo_bytes_db_path: None, + projection_entry_limit: 5, + min_run_seq: None, + retain_indexed_runs: None, + }) + .expect("index artifacts"); + let db = Arc::new(QueryDb::open(&query_db_path).expect("query db")); + let object = db + .list_objects("run_0001", 1, None) + .expect("objects") + .data + .into_iter() + .next() + .expect("object"); + + assert_eq!( + route_post_request( + Arc::clone(&db), + None, + test_export_jobs(), + temp.path().join("exports"), + "/api/v1/runs/latest/exports", + b"{}", + ) + .unwrap_err() + .status, + 400 + ); + assert_eq!( + route_post_request( + Arc::clone(&db), + None, + test_export_jobs(), + temp.path().join("exports"), + &format!( + "/api/v1/runs/latest/objects/{}/validation/explain", + object.object_instance_id + ), + b"{bad json", + ) + .unwrap_err() + .status, + 400 + ); + assert_eq!( + validate_export_request("repo", None, None) + .unwrap_err() + .status, + 400 + ); + assert_eq!( + validate_export_request("publication_point", None, None) + .unwrap_err() + .status, + 400 + ); + assert_eq!( + validate_export_request("bad", None, None) + .unwrap_err() + .status, + 400 + ); + validate_export_request("repo", Some("repo-id"), None).expect("repo export"); + validate_export_request("publicationPoint", None, Some("pp-id")).expect("pp export"); + validate_export_request("objectSet", None, None).expect("object set export"); + + assert_eq!( + test_route_raw_request( + &db, + None, + &format!( + "/api/v1/runs/latest/objects/{}/raw", + object.object_instance_id + ), + ) + .expect("raw route") + .unwrap_err() + .status, + 400 + ); + let running_job = ExportJobRecord { + schema_version: 1, + job_id: "job1".to_string(), + run_id: "run_0001".to_string(), + scope: "object_set".to_string(), + repo_id: None, + pp_id: None, + status: "running".to_string(), + created_at: now_rfc3339(), + finished_at: None, + output_path: None, + object_count: 0, + bytes_written: 0, + error: None, + }; + db.put_export_job(&running_job).expect("job"); + assert_eq!( + test_route_raw_request(&db, None, "/api/v1/runs/latest/exports/job1/download") + .expect("download route") + .unwrap_err() + .status, + 409 + ); + assert!(test_route_raw_request(&db, None, "/api/v1/not-raw").is_none()); + } + + #[test] + fn validation_explain_uses_projection_and_cache() { + let temp = tempfile::tempdir().expect("tempdir"); + let run_dir = temp.path().join("runs/run_0001"); + fs::create_dir_all(&run_dir).expect("run dir"); + let object_bytes = + fs::read("tests/fixtures/repository/rpki.cernet.net/repo/cernet/0/AS4538.roa") + .expect("fixture roa"); + let object_sha = hex::encode(Sha256::digest(&object_bytes)); + write_sample_run(&run_dir, &object_sha); + let repo_bytes_path = temp.path().join("repo-bytes.db"); + let repo_bytes = ExternalRepoBytesDb::open(&repo_bytes_path).expect("repo bytes"); + repo_bytes + .put_blob_bytes_batch(&[(object_sha, object_bytes)]) + .expect("put bytes"); + drop(repo_bytes); + let query_db_path = temp.path().join("query-db"); + index_artifacts(&ArtifactIndexerConfig { + query_db_path: query_db_path.clone(), + run_root: Some(temp.path().to_path_buf()), + run_dir: None, + repo_bytes_db_path: Some(repo_bytes_path.clone()), + projection_entry_limit: 5, + min_run_seq: None, + retain_indexed_runs: None, + }) + .expect("index artifacts"); + let db = QueryDb::open(&query_db_path).expect("query db"); + let repo_bytes = ExternalRepoBytesDb::open(&repo_bytes_path).expect("repo bytes"); + let object = db + .list_objects("run_0001", 1, None) + .expect("objects") + .data + .into_iter() + .next() + .expect("object"); + + let explain = validation_explain( + &db, + Some(&repo_bytes), + "run_0001", + &object.object_instance_id, + false, + ) + .expect("explain"); + assert_eq!(explain.final_status, "valid"); + assert_eq!( + explain.parsevalidate["projectionStatus"].as_str(), + Some("ok") + ); + assert!(!explain.authoritative); + assert!( + db.get_validation_explain("run_0001", &object.object_instance_id, EXPLAIN_VERSION) + .expect("cached") + .is_some() + ); + + let validation = test_route_request( + &db, + Some(&repo_bytes), + &format!( + "/api/v1/runs/latest/objects/{}/validation", + object.object_instance_id + ), + ) + .expect("validation route"); + assert_eq!(validation["data"]["finalStatus"].as_str(), Some("valid")); + + let chain = test_route_request( + &db, + Some(&repo_bytes), + &format!( + "/api/v1/runs/latest/objects/{}/chain", + object.object_instance_id + ), + ) + .expect("chain route"); + assert!(chain["data"].as_array().is_some()); + } + + fn write_sample_run(run_dir: &std::path::Path, object_sha: &str) { + let report = json!({ + "format_version": 2, + "meta": {"validation_time_rfc3339_utc": "2026-06-15T00:00:00Z"}, + "tree": {"warnings": []}, + "publication_points": [ + { + "node_id": 10, + "rsync_base_uri": "rsync://repo.example/rpki/", + "manifest_rsync_uri": "rsync://repo.example/rpki/m.mft", + "publication_point_rsync_uri": "rsync://repo.example/rpki/", + "rrdp_notification_uri": "https://repo.example/rrdp/notification.xml", + "source": "rrdp", + "repo_sync_source": "rrdp", + "repo_sync_phase": "rrdp_delta", + "repo_sync_duration_ms": 123, + "repo_terminal_state": "fresh", + "warnings": [], + "objects": [ + {"rsync_uri":"rsync://repo.example/rpki/a.roa","sha256_hex": object_sha,"kind":"roa","result":"ok"} + ] + } + ], + "vrps": [{"asn": 4538, "prefix": "2001:da8::/32", "max_length": 32}], + "aspas": [], + "downloads": [], + "download_stats": {} + }); + fs::write( + run_dir.join("report.json"), + serde_json::to_vec(&report).unwrap(), + ) + .expect("report"); + let summary = json!({ + "status": "success", + "runId": "run_0001", + "runSeq": 1, + "startedAtRfc3339Utc": "2026-06-15T00:00:00Z", + "finishedAtRfc3339Utc": "2026-06-15T00:01:00Z", + "wallMs": 60000, + "reportCounts": {"vrps": 1, "aspas": 0, "publicationPoints": 1, "warnings": 0} + }); + fs::write( + run_dir.join("run-summary.json"), + serde_json::to_vec(&summary).unwrap(), + ) + .expect("summary"); + fs::write(run_dir.join("stage-timing.json"), b"{}").expect("stage"); + } + + fn write_manifest_sample_run(run_dir: &std::path::Path, object_sha: &str) { + let report = json!({ + "format_version": 2, + "meta": {"validation_time_rfc3339_utc": "2026-06-15T00:00:00Z"}, + "tree": {"warnings": []}, + "publication_points": [ + { + "node_id": 11, + "rsync_base_uri": "rsync://repo.example/rpki/", + "manifest_rsync_uri": "rsync://repo.example/rpki/m.mft", + "publication_point_rsync_uri": "rsync://repo.example/rpki/", + "rrdp_notification_uri": "https://repo.example/rrdp/notification.xml", + "source": "rrdp", + "repo_sync_source": "rrdp", + "repo_sync_phase": "rrdp_delta", + "repo_sync_duration_ms": 123, + "repo_terminal_state": "fresh", + "warnings": [], + "objects": [ + {"rsync_uri":"rsync://repo.example/rpki/m.mft","sha256_hex": object_sha,"kind":"manifest","result":"ok"} + ] + } + ], + "vrps": [], + "aspas": [], + "downloads": [], + "download_stats": {} + }); + fs::write( + run_dir.join("report.json"), + serde_json::to_vec(&report).unwrap(), + ) + .expect("report"); + let summary = json!({ + "status": "success", + "runId": "run_0001", + "runSeq": 1, + "startedAtRfc3339Utc": "2026-06-15T00:00:00Z", + "finishedAtRfc3339Utc": "2026-06-15T00:01:00Z", + "wallMs": 60000, + "reportCounts": {"vrps": 0, "aspas": 0, "publicationPoints": 1, "warnings": 0} + }); + fs::write( + run_dir.join("run-summary.json"), + serde_json::to_vec(&summary).unwrap(), + ) + .expect("summary"); + fs::write(run_dir.join("stage-timing.json"), b"{}").expect("stage"); + } + + fn percent_encode_for_test(value: &str) -> String { + value + .bytes() + .flat_map(|byte| match byte { + b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => { + vec![byte as char] + } + _ => format!("%{byte:02X}").chars().collect(), + }) + .collect() + } +} diff --git a/src/blob_store.rs b/src/blob_store.rs index 3d5a0be..d5c622b 100644 --- a/src/blob_store.rs +++ b/src/blob_store.rs @@ -192,6 +192,18 @@ impl ExternalRepoBytesDb { }) } + pub fn open_read_only(path: impl Into) -> StorageResult { + let path = path.into(); + let mut opts = Options::default(); + opts.set_compression_type(rocksdb::DBCompressionType::Lz4); + let db = DB::open_for_read_only(&opts, &path, false) + .map_err(|e| StorageError::RocksDb(e.to_string()))?; + Ok(Self { + path, + db: Arc::new(db), + }) + } + pub fn put_blob_bytes_batch(&self, blobs: &[(String, Vec)]) -> StorageResult<()> { if blobs.is_empty() { return Ok(()); diff --git a/src/cli.rs b/src/cli.rs index 987f18a..42584ce 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -1608,6 +1608,7 @@ fn build_report( downloads: shared.downloads.iter().cloned().collect(), download_stats: shared.download_stats.clone(), repo_sync_stats, + query_audit: None, } } diff --git a/src/cli/output.rs b/src/cli/output.rs index e706bae..872a2bd 100644 --- a/src/cli/output.rs +++ b/src/cli/output.rs @@ -3,8 +3,12 @@ use std::path::Path; use serde::Serialize; use serde::ser::SerializeSeq; +use sha2::Digest; -use crate::audit::{AspaOutput, AuditRunMeta, AuditWarning, VrpOutput}; +use crate::audit::{ + AspaOutput, AuditRunMeta, AuditWarning, QueryAuditManifest, ValidationEvent, + ValidationEventCounts, VrpOutput, +}; use crate::ccr::canonical_vrp_prefix; use super::{PostValidationShared, RunStageTiming}; @@ -49,6 +53,8 @@ struct BorrowedAuditReportV2<'a> { downloads: &'a [crate::audit::AuditDownloadEvent], download_stats: &'a crate::audit::AuditDownloadStats, repo_sync_stats: crate::audit::AuditRepoSyncStats, + #[serde(rename = "queryAudit", skip_serializing_if = "Option::is_none")] + query_audit: Option, } #[derive(Serialize)] @@ -125,6 +131,7 @@ pub(super) fn write_report_json_from_shared( .format(&Rfc3339) .expect("format validation_time"); let repo_sync_stats = super::build_repo_sync_stats(shared.publication_points.as_ref()); + let query_audit = write_validation_events_sidecar(path, &validation_time_rfc3339_utc, shared)?; let report = BorrowedAuditReportV2 { format_version: 2, meta: AuditRunMeta { @@ -142,6 +149,7 @@ pub(super) fn write_report_json_from_shared( downloads: shared.downloads.as_ref(), download_stats: &shared.download_stats, repo_sync_stats, + query_audit: Some(query_audit), }; let build_ms = build_started.elapsed().as_millis() as u64; @@ -153,6 +161,148 @@ pub(super) fn write_report_json_from_shared( }) } +fn write_validation_events_sidecar( + report_path: &Path, + validation_time: &str, + shared: &PostValidationShared, +) -> Result { + let events_path = report_path.with_file_name("validation-events.jsonl"); + if let Some(parent) = events_path.parent() { + std::fs::create_dir_all(parent) + .map_err(|e| format!("create validation events parent failed: {e}"))?; + } + let mut writer = BufWriter::new(std::fs::File::create(&events_path).map_err(|e| { + format!( + "create validation events failed: {}: {e}", + events_path.display() + ) + })?); + let mut seq = 0u64; + let mut hasher = sha2::Sha256::new(); + emit_validation_events(validation_time, shared, &mut seq, &mut |event| { + let mut line = serde_json::to_vec(&event) + .map_err(|e| format!("serialize validation event failed: {e}"))?; + line.push(b'\n'); + std::io::Write::write_all(&mut writer, &line) + .map_err(|e| format!("write validation event failed: {e}"))?; + hasher.update(&line); + Ok(()) + })?; + std::io::Write::flush(&mut writer) + .map_err(|e| format!("flush validation events failed: {e}"))?; + let events_count = seq; + let events_sha256 = hex::encode(hasher.finalize()); + Ok(QueryAuditManifest { + schema_version: 1, + status: "complete".to_string(), + events_path: events_path + .file_name() + .and_then(|name| name.to_str()) + .unwrap_or("validation-events.jsonl") + .to_string(), + events_count, + events_sha256, + writer_version: 1, + error: None, + }) +} + +fn emit_validation_events( + validation_time: &str, + shared: &PostValidationShared, + seq: &mut u64, + emit: &mut impl FnMut(ValidationEvent) -> Result<(), String>, +) -> Result<(), String> { + emit(next_event(seq, "run_summary", validation_time, |event| { + event.counts = Some(ValidationEventCounts { + objects: Some( + shared + .publication_points + .iter() + .map(|pp| pp.objects.len() as u64) + .sum(), + ), + warnings: Some( + (shared.tree_warnings.len() + + shared + .publication_points + .iter() + .map(|pp| pp.warnings.len()) + .sum::()) as u64, + ), + vrps: Some(shared.vrps.len() as u64), + aspas: Some(shared.aspas.len() as u64), + }); + }))?; + for pp in shared.publication_points.iter() { + emit(next_event( + seq, + "publication_point", + validation_time, + |event| { + event.pp_node_id = pp.node_id; + event.pp_manifest_uri = Some(pp.manifest_rsync_uri.clone()); + event.pp_rsync_base_uri = Some(pp.rsync_base_uri.clone()); + event.repo_sync_phase = pp.repo_sync_phase.clone(); + event.repo_terminal_state = Some(pp.repo_terminal_state.clone()); + event.counts = Some(ValidationEventCounts { + objects: Some(pp.objects.len() as u64), + warnings: Some(pp.warnings.len() as u64), + vrps: None, + aspas: None, + }); + }, + ))?; + for object in &pp.objects { + emit(next_event(seq, "object", validation_time, |event| { + event.pp_node_id = pp.node_id; + event.pp_manifest_uri = Some(pp.manifest_rsync_uri.clone()); + event.object_uri = Some(object.rsync_uri.clone()); + event.sha256 = Some(object.sha256_hex.clone()); + event.object_type = Some(object.kind.clone()); + event.result = Some(object.result.clone()); + event.reason = object.detail.clone(); + }))?; + } + for warning in &pp.warnings { + emit(next_event(seq, "warning", validation_time, |event| { + event.pp_node_id = pp.node_id; + event.pp_manifest_uri = Some(pp.manifest_rsync_uri.clone()); + event.reason = Some(warning.message.clone()); + }))?; + } + } + Ok(()) +} + +fn next_event( + seq: &mut u64, + event_type: &str, + validation_time: &str, + fill: impl FnOnce(&mut ValidationEvent), +) -> ValidationEvent { + *seq += 1; + let mut event = ValidationEvent { + schema_version: 1, + seq: *seq, + event_type: event_type.to_string(), + validation_time: validation_time.to_string(), + pp_node_id: None, + pp_manifest_uri: None, + pp_rsync_base_uri: None, + repo_sync_phase: None, + repo_terminal_state: None, + object_uri: None, + sha256: None, + object_type: None, + result: None, + reason: None, + counts: None, + }; + fill(&mut event); + event +} + #[derive(Clone, Debug, PartialEq, Eq)] pub(super) struct CompareViewTaskOutput { pub(super) build_ms: Option, diff --git a/src/cli/tests.rs b/src/cli/tests.rs index 90e88a9..426ef0a 100644 --- a/src/cli/tests.rs +++ b/src/cli/tests.rs @@ -1452,6 +1452,14 @@ fn synthetic_post_validation_shared() -> PostValidationShared { let mut pp1 = crate::audit::PublicationPointAudit::default(); pp1.source = "fresh".to_string(); pp1.rrdp_notification_uri = Some("https://example.test/n1.xml".to_string()); + pp1.manifest_rsync_uri = "rsync://example.test/repo/pp1/manifest.mft".to_string(); + pp1.objects.push(crate::audit::ObjectAuditEntry { + rsync_uri: "rsync://example.test/repo/pp1/a.roa".to_string(), + sha256_hex: "11".repeat(32), + kind: crate::audit::AuditObjectKind::Roa, + result: crate::audit::AuditObjectResult::Ok, + detail: None, + }); let mut pp2 = crate::audit::PublicationPointAudit::default(); pp2.source = "fresh".to_string(); pp2.rrdp_notification_uri = Some("https://example.test/n1.xml".to_string()); @@ -1549,6 +1557,19 @@ fn run_report_task_and_stage_timing_work() { serde_json::from_str(&report_json).expect("parse compact report json"); assert_eq!(report["vrps"].as_array().unwrap().len(), 2); assert_eq!(report["aspas"].as_array().unwrap().len(), 1); + assert_eq!(report["queryAudit"]["status"].as_str(), Some("complete")); + assert!(report["queryAudit"]["eventsCount"].as_u64().unwrap() > 0); + let events_path = dir.path().join( + report["queryAudit"]["eventsPath"] + .as_str() + .expect("events path"), + ); + let events = std::fs::read_to_string(events_path).expect("read validation events"); + assert!( + events + .lines() + .any(|line| line.contains("\"eventType\":\"object\"")) + ); let stage_timing = RunStageTiming { validation_ms: 1, @@ -1858,6 +1879,7 @@ fn write_json_writes_report() { downloads: Vec::new(), download_stats: crate::audit::AuditDownloadStats::default(), repo_sync_stats: crate::audit::AuditRepoSyncStats::default(), + query_audit: None, }; let dir = tempfile::tempdir().expect("tmpdir"); diff --git a/src/lib.rs b/src/lib.rs index 380bf2b..f9e88d1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,12 +19,18 @@ pub mod fetch; #[cfg(feature = "full")] pub mod memory_telemetry; #[cfg(feature = "full")] +pub mod object_projection; +#[cfg(feature = "full")] pub mod parallel; #[cfg(feature = "full")] pub mod policy; #[cfg(feature = "full")] pub mod progress_log; #[cfg(feature = "full")] +pub mod query; +#[cfg(feature = "full")] +pub mod query_db; +#[cfg(feature = "full")] pub mod replay; #[cfg(feature = "full")] pub mod report; diff --git a/src/object_projection.rs b/src/object_projection.rs new file mode 100644 index 0000000..da39843 --- /dev/null +++ b/src/object_projection.rs @@ -0,0 +1,668 @@ +use std::net::{Ipv4Addr, Ipv6Addr}; +use std::path::Path; + +use serde::{Deserialize, Serialize}; +use serde_json::{Value, json}; +use sha2::{Digest, Sha256}; + +use crate::data_model::aspa::AspaObject; +use crate::data_model::crl::RpkixCrl; +use crate::data_model::manifest::ManifestObject; +use crate::data_model::rc::{ + AccessDescription, RcExtensions, ResourceCertificate, SubjectInfoAccess, +}; +use crate::data_model::roa::{IpPrefix as RoaIpPrefix, RoaAfi, RoaObject}; +use crate::data_model::signed_object::{ + ResourceEeCertificate, RpkiSignedObject, SignedAttrsProfiled, SignerInfoProfiled, +}; +use crate::data_model::ta::TaCertificate; + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ObjectType { + Auto, + Cer, + Mft, + Crl, + Roa, + Aspa, +} + +impl ObjectType { + pub fn parse(value: &str) -> Result { + match value.to_ascii_lowercase().as_str() { + "auto" => Ok(Self::Auto), + "cer" | ".cer" | "cert" | "certificate" => Ok(Self::Cer), + "mft" | ".mft" | "manifest" => Ok(Self::Mft), + "crl" | ".crl" => Ok(Self::Crl), + "roa" | ".roa" => Ok(Self::Roa), + "asa" | ".asa" | "aspa" => Ok(Self::Aspa), + _ => Err(format!("unsupported object type: {value}")), + } + } + + pub fn label(self) -> &'static str { + object_type_label(self) + } +} + +pub fn resolve_object_type(object_type: ObjectType, path: &Path) -> Result { + if object_type != ObjectType::Auto { + return Ok(object_type); + } + match path + .extension() + .and_then(|v| v.to_str()) + .map(|v| v.to_ascii_lowercase()) + .as_deref() + { + Some("cer") => Ok(ObjectType::Cer), + Some("mft") => Ok(ObjectType::Mft), + Some("crl") => Ok(ObjectType::Crl), + Some("roa") => Ok(ObjectType::Roa), + Some("asa") | Some("aspa") => Ok(ObjectType::Aspa), + _ => Err(format!( + "cannot infer object type from path: {}", + path.display() + )), + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ObjectProjectionRecord { + pub schema_version: u32, + pub sha256: String, + pub object_type: String, + pub parse_status: String, + pub error_summary: Option, + pub projection: Value, +} + +pub fn build_object_projection( + object_type: ObjectType, + input_path: &Path, + bytes: &[u8], + entry_limit: usize, +) -> ObjectProjectionRecord { + let resolved = match resolve_object_type(object_type, input_path) { + Ok(value) => value, + Err(err) => { + return ObjectProjectionRecord { + schema_version: 1, + sha256: sha256_hex(bytes), + object_type: "unknown".to_string(), + parse_status: "error".to_string(), + error_summary: Some(err), + projection: json!({"decode": {"profileValid": false}}), + }; + } + }; + let projection = parse_object_json(resolved, input_path, bytes, entry_limit); + let parse_status = if projection + .get("object") + .and_then(|v| v.get("decode")) + .and_then(|v| v.get("profileValid")) + .and_then(Value::as_bool) + .unwrap_or(false) + { + "ok" + } else { + "error" + }; + let error_summary = projection + .get("object") + .and_then(|v| v.get("decode")) + .and_then(|v| v.get("error")) + .and_then(Value::as_str) + .map(str::to_string); + ObjectProjectionRecord { + schema_version: 1, + sha256: sha256_hex(bytes), + object_type: resolved.label().to_string(), + parse_status: parse_status.to_string(), + error_summary, + projection, + } +} + +pub fn parse_object_json( + object_type: ObjectType, + input_path: &Path, + bytes: &[u8], + entry_limit: usize, +) -> Value { + let object = match object_type { + ObjectType::Auto => unreachable!("auto must be resolved"), + ObjectType::Cer => parse_cer_json(bytes), + ObjectType::Mft => parse_mft_json(bytes, entry_limit), + ObjectType::Crl => parse_crl_json(bytes, entry_limit), + ObjectType::Roa => parse_roa_json(bytes, entry_limit), + ObjectType::Aspa => parse_aspa_json(bytes, entry_limit), + }; + json!({ + "tool": "rpki_object_parse", + "schemaVersion": 1, + "input": { + "path": input_path.display().to_string(), + "type": object_type_label(object_type), + "bytes": bytes_summary(bytes), + }, + "object": object, + }) +} + +pub fn parse_cer_json(bytes: &[u8]) -> Value { + match ResourceCertificate::decode_der(bytes) { + Ok(cert) => { + let ta_profile = match TaCertificate::decode_der(bytes) { + Ok(ta) => json!({ + "valid": true, + "selfSignature": result_json(ta.verify_self_signature().map_err(|e| e.to_string())), + }), + Err(err) => json!({ + "valid": false, + "error": err.to_string(), + }), + }; + json!({ + "type": "cer", + "decode": {"profileValid": true}, + "resourceCertificate": resource_certificate_json(&cert), + "trustAnchorProfile": ta_profile, + }) + } + Err(err) => json!({ + "type": "cer", + "decode": {"profileValid": false, "error": err.to_string()}, + }), + } +} + +pub fn parse_mft_json(bytes: &[u8], entry_limit: usize) -> Value { + match ManifestObject::decode_der(bytes) { + Ok(mft) => { + let files = mft.manifest.parse_files(); + let (file_sample, file_list_error) = match files { + Ok(entries) => ( + json!({ + "count": entries.len(), + "truncated": entries.len() > entry_limit, + "entries": entries.iter().take(entry_limit).map(|item| { + json!({"fileName": item.file_name, "hashHex": hex::encode(item.hash_bytes)}) + }).collect::>(), + }), + Value::Null, + ), + Err(err) => (Value::Null, json!(err.to_string())), + }; + json!({ + "type": "mft", + "decode": {"profileValid": true}, + "eContentType": mft.econtent_type, + "signedObject": signed_object_json(&mft.signed_object), + "manifest": { + "version": mft.manifest.version, + "manifestNumberHex": mft.manifest.manifest_number.to_hex_upper(), + "thisUpdate": format_time(mft.manifest.this_update), + "nextUpdate": format_time(mft.manifest.next_update), + "fileHashAlg": mft.manifest.file_hash_alg, + "fileCount": mft.manifest.file_count(), + "fileList": file_sample, + "fileListError": file_list_error, + }, + "embeddedEeProfile": result_json(mft.validate_embedded_ee_cert().map_err(|e| e.to_string())), + "cmsSignature": result_json(mft.signed_object.verify_signature().map_err(|e| e.to_string())), + }) + } + Err(err) => json!({ + "type": "mft", + "decode": {"profileValid": false, "error": err.to_string()}, + }), + } +} + +pub fn parse_crl_json(bytes: &[u8], entry_limit: usize) -> Value { + match RpkixCrl::decode_der(bytes) { + Ok(crl) => json!({ + "type": "crl", + "decode": {"profileValid": true}, + "rawDer": bytes_summary(&crl.raw_der), + "version": crl.version, + "issuer": crl.issuer_dn, + "signatureAlgorithm": crl.signature_algorithm_oid, + "thisUpdate": format_time(crl.this_update.utc), + "nextUpdate": format_time(crl.next_update.utc), + "extensions": { + "authorityKeyIdentifier": hex::encode(&crl.extensions.authority_key_identifier), + "crlNumberHex": crl.extensions.crl_number.to_hex_upper(), + "crlNumber": crl.extensions.crl_number.to_u64(), + }, + "revokedCertificates": { + "count": crl.revoked_certs.len(), + "truncated": crl.revoked_certs.len() > entry_limit, + "entries": crl.revoked_certs.iter().take(entry_limit).map(|item| { + json!({ + "serialNumberHex": item.serial_number.to_hex_upper(), + "serialNumber": item.serial_number.to_u64(), + "revocationDate": format_time(item.revocation_date.utc), + }) + }).collect::>(), + }, + }), + Err(err) => json!({ + "type": "crl", + "decode": {"profileValid": false, "error": err.to_string()}, + }), + } +} + +pub fn manifest_file_entries_page( + bytes: &[u8], + offset: usize, + limit: usize, +) -> Result<(usize, Vec), String> { + let mft = ManifestObject::decode_der(bytes).map_err(|err| err.to_string())?; + let entries = mft.manifest.parse_files().map_err(|err| err.to_string())?; + let total = entries.len(); + let end = (offset + limit).min(total); + let page = entries[offset.min(total)..end] + .iter() + .map(|item| json!({"fileName": item.file_name, "hashHex": hex::encode(&item.hash_bytes)})) + .collect::>(); + Ok((total, page)) +} + +pub fn crl_revoked_entries_page( + bytes: &[u8], + offset: usize, + limit: usize, +) -> Result<(usize, Vec), String> { + let crl = RpkixCrl::decode_der(bytes).map_err(|err| err.to_string())?; + let total = crl.revoked_certs.len(); + let end = (offset + limit).min(total); + let page = crl.revoked_certs[offset.min(total)..end] + .iter() + .map(|item| { + json!({ + "serialNumberHex": item.serial_number.to_hex_upper(), + "serialNumber": item.serial_number.to_u64(), + "revocationDate": format_time(item.revocation_date.utc), + }) + }) + .collect::>(); + Ok((total, page)) +} + +pub fn parse_roa_json(bytes: &[u8], entry_limit: usize) -> Value { + match RoaObject::decode_der(bytes) { + Ok(roa) => json!({ + "type": "roa", + "decode": {"profileValid": true}, + "eContentType": roa.econtent_type, + "signedObject": signed_object_json(&roa.signed_object), + "roa": { + "version": roa.roa.version, + "asId": roa.roa.as_id, + "ipAddressFamilies": roa.roa.ip_addr_blocks.iter().map(|family| { + json!({ + "afi": format!("{:?}", family.afi), + "addressCount": family.addresses.len(), + "truncated": family.addresses.len() > entry_limit, + "addresses": family.addresses.iter().take(entry_limit).map(|entry| { + json!({ + "prefix": roa_prefix_string(&entry.prefix), + "maxLength": entry.max_length, + }) + }).collect::>(), + }) + }).collect::>(), + }, + "embeddedEeProfile": result_json(roa.validate_embedded_ee_cert().map_err(|e| e.to_string())), + "cmsSignature": result_json(roa.signed_object.verify_signature().map_err(|e| e.to_string())), + }), + Err(err) => json!({ + "type": "roa", + "decode": {"profileValid": false, "error": err.to_string()}, + }), + } +} + +pub fn parse_aspa_json(bytes: &[u8], entry_limit: usize) -> Value { + match AspaObject::decode_der(bytes) { + Ok(aspa) => json!({ + "type": "aspa", + "decode": {"profileValid": true}, + "eContentType": aspa.econtent_type, + "signedObject": signed_object_json(&aspa.signed_object), + "aspa": { + "version": aspa.aspa.version, + "customerAsId": aspa.aspa.customer_as_id, + "providerCount": aspa.aspa.provider_as_ids.len(), + "providersTruncated": aspa.aspa.provider_as_ids.len() > entry_limit, + "providerAsIds": aspa.aspa.provider_as_ids.iter().take(entry_limit).copied().collect::>(), + }, + "embeddedEeProfile": result_json(aspa.validate_embedded_ee_cert().map_err(|e| e.to_string())), + "cmsSignature": result_json(aspa.signed_object.verify_signature().map_err(|e| e.to_string())), + }), + Err(err) => json!({ + "type": "aspa", + "decode": {"profileValid": false, "error": err.to_string()}, + }), + } +} + +fn resource_certificate_json(cert: &ResourceCertificate) -> Value { + let tbs = &cert.tbs; + json!({ + "rawDer": bytes_summary(&cert.raw_der), + "kind": format!("{:?}", cert.kind), + "version": tbs.version, + "serialNumberHex": hex::encode(tbs.serial_number.to_bytes_be()), + "signatureAlgorithm": tbs.signature_algorithm, + "issuer": tbs.issuer_name.to_string(), + "subject": tbs.subject_name.to_string(), + "validity": { + "notBefore": format_time(tbs.validity_not_before), + "notAfter": format_time(tbs.validity_not_after), + }, + "subjectPublicKeyInfo": bytes_summary(&tbs.subject_public_key_info), + "extensions": rc_extensions_json(&tbs.extensions), + }) +} + +fn rc_extensions_json(ext: &RcExtensions) -> Value { + json!({ + "basicConstraintsCa": ext.basic_constraints_ca, + "subjectKeyIdentifier": ext.subject_key_identifier.as_ref().map(|v| hex::encode(v)), + "authorityKeyIdentifier": ext.authority_key_identifier.as_ref().map(|v| hex::encode(v)), + "crlDistributionPointsUris": ext.crl_distribution_points_uris, + "caIssuersUris": ext.ca_issuers_uris, + "subjectInfoAccess": subject_info_access_json(ext.subject_info_access.as_ref()), + "certificatePoliciesOid": ext.certificate_policies_oid, + "ipResources": serde_json::to_value(&ext.ip_resources).unwrap_or(Value::Null), + "asResources": serde_json::to_value(&ext.as_resources).unwrap_or(Value::Null), + }) +} + +fn subject_info_access_json(value: Option<&SubjectInfoAccess>) -> Value { + match value { + None => Value::Null, + Some(SubjectInfoAccess::Ca(ca)) => json!({ + "kind": "ca", + "accessDescriptions": ca.access_descriptions.iter().map(access_description_json).collect::>(), + }), + Some(SubjectInfoAccess::Ee(ee)) => json!({ + "kind": "ee", + "signedObjectUris": ee.signed_object_uris, + "accessDescriptions": ee.access_descriptions.iter().map(access_description_json).collect::>(), + }), + } +} + +fn access_description_json(value: &AccessDescription) -> Value { + json!({ + "accessMethodOid": value.access_method_oid, + "accessLocation": value.access_location, + }) +} + +fn signed_object_json(signed_object: &RpkiSignedObject) -> Value { + let signed_data = &signed_object.signed_data; + json!({ + "rawDer": bytes_summary(&signed_object.raw_der), + "contentInfoContentType": signed_object.content_info_content_type, + "signedData": { + "version": signed_data.version, + "digestAlgorithms": signed_data.digest_algorithms, + "encapContentInfo": { + "eContentType": signed_data.encap_content_info.econtent_type, + "eContent": bytes_summary(&signed_data.encap_content_info.econtent), + }, + "certificates": signed_data.certificates.iter().map(ee_certificate_json).collect::>(), + "crlsPresent": signed_data.crls_present, + "signerInfos": signed_data.signer_infos.iter().map(signer_info_json).collect::>(), + }, + }) +} + +fn ee_certificate_json(cert: &ResourceEeCertificate) -> Value { + json!({ + "rawDer": bytes_summary(&cert.raw_der), + "subjectKeyIdentifier": hex::encode(&cert.subject_key_identifier), + "spkiDer": bytes_summary(&cert.spki_der), + "rsaPublicKey": { + "modulus": bytes_summary(&cert.rsa_public_modulus), + "exponent": bytes_summary(&cert.rsa_public_exponent), + }, + "tbsCertificate": bytes_summary(&cert.tbs_certificate_der), + "certificateSignature": bytes_summary(&cert.signature_bytes), + "keyUsageSummary": format!("{:?}", cert.key_usage_summary), + "siaSignedObjectUris": cert.sia_signed_object_uris, + "resourceCertificate": resource_certificate_json(&cert.resource_cert), + }) +} + +fn signer_info_json(info: &SignerInfoProfiled) -> Value { + json!({ + "version": info.version, + "sidSki": hex::encode(&info.sid_ski), + "digestAlgorithm": info.digest_algorithm, + "signatureAlgorithm": info.signature_algorithm, + "signedAttrs": signed_attrs_json(&info.signed_attrs), + "unsignedAttrsPresent": info.unsigned_attrs_present, + "signature": bytes_summary(&info.signature), + "signedAttrsDerForSignature": bytes_summary(&info.signed_attrs_der_for_signature), + }) +} + +fn signed_attrs_json(attrs: &SignedAttrsProfiled) -> Value { + json!({ + "contentType": attrs.content_type, + "messageDigest": hex::encode(&attrs.message_digest), + "signingTime": { + "utc": format_time(attrs.signing_time.utc), + "encoding": format!("{:?}", attrs.signing_time.encoding), + }, + "otherAttrsPresent": attrs.other_attrs_present, + }) +} + +fn result_json(result: Result<(), String>) -> Value { + match result { + Ok(()) => json!({"valid": true}), + Err(err) => json!({"valid": false, "error": err}), + } +} + +fn object_type_label(object_type: ObjectType) -> &'static str { + match object_type { + ObjectType::Auto => "auto", + ObjectType::Cer => "cer", + ObjectType::Mft => "mft", + ObjectType::Crl => "crl", + ObjectType::Roa => "roa", + ObjectType::Aspa => "aspa", + } +} + +fn bytes_summary(bytes: &[u8]) -> Value { + let head_len = bytes.len().min(16); + let tail_len = bytes.len().min(16); + json!({ + "len": bytes.len(), + "sha256": sha256_hex(bytes), + "headHex": hex::encode(&bytes[..head_len]), + "tailHex": hex::encode(&bytes[bytes.len().saturating_sub(tail_len)..]), + }) +} + +fn sha256_hex(bytes: &[u8]) -> String { + hex::encode(Sha256::digest(bytes)) +} + +fn format_time(value: time::OffsetDateTime) -> String { + value + .to_offset(time::UtcOffset::UTC) + .format(&time::format_description::well_known::Rfc3339) + .unwrap_or_else(|_| value.unix_timestamp().to_string()) +} + +fn roa_prefix_string(prefix: &RoaIpPrefix) -> String { + let bytes = prefix.addr_bytes(); + match prefix.afi { + RoaAfi::Ipv4 => { + let octets = [bytes[0], bytes[1], bytes[2], bytes[3]]; + format!("{}/{}", Ipv4Addr::from(octets), prefix.prefix_len) + } + RoaAfi::Ipv6 => { + let mut octets = [0u8; 16]; + octets.copy_from_slice(bytes); + format!("{}/{}", Ipv6Addr::from(octets), prefix.prefix_len) + } + } +} + +#[cfg(test)] +mod tests { + use std::path::Path; + + use super::*; + + #[test] + fn object_type_parser_and_resolver_cover_aliases() { + assert_eq!(ObjectType::parse("auto").unwrap(), ObjectType::Auto); + assert_eq!(ObjectType::parse(".cer").unwrap(), ObjectType::Cer); + assert_eq!(ObjectType::parse("certificate").unwrap(), ObjectType::Cer); + assert_eq!(ObjectType::parse("manifest").unwrap(), ObjectType::Mft); + assert_eq!(ObjectType::parse(".crl").unwrap(), ObjectType::Crl); + assert_eq!(ObjectType::parse("roa").unwrap(), ObjectType::Roa); + assert_eq!(ObjectType::parse("aspa").unwrap(), ObjectType::Aspa); + assert_eq!(ObjectType::parse(".asa").unwrap(), ObjectType::Aspa); + assert!(ObjectType::parse("unknown").is_err()); + assert_eq!(ObjectType::Aspa.label(), "aspa"); + + assert_eq!( + resolve_object_type(ObjectType::Auto, Path::new("repo/a.cer")).unwrap(), + ObjectType::Cer + ); + assert_eq!( + resolve_object_type(ObjectType::Auto, Path::new("repo/a.mft")).unwrap(), + ObjectType::Mft + ); + assert_eq!( + resolve_object_type(ObjectType::Auto, Path::new("repo/a.crl")).unwrap(), + ObjectType::Crl + ); + assert_eq!( + resolve_object_type(ObjectType::Auto, Path::new("repo/a.roa")).unwrap(), + ObjectType::Roa + ); + assert_eq!( + resolve_object_type(ObjectType::Auto, Path::new("repo/a.asa")).unwrap(), + ObjectType::Aspa + ); + assert_eq!( + resolve_object_type(ObjectType::Roa, Path::new("repo/a.bin")).unwrap(), + ObjectType::Roa + ); + assert!(resolve_object_type(ObjectType::Auto, Path::new("repo/a.bin")).is_err()); + } + + #[test] + fn invalid_der_returns_error_projection_for_all_object_types() { + let bytes = b"not der"; + for object_type in [ + ObjectType::Cer, + ObjectType::Mft, + ObjectType::Crl, + ObjectType::Roa, + ObjectType::Aspa, + ] { + let value = parse_object_json(object_type, Path::new("bad.der"), bytes, 1); + assert_eq!( + value["object"]["decode"]["profileValid"].as_bool(), + Some(false) + ); + assert!(value["object"]["decode"]["error"].as_str().is_some()); + } + + let record = build_object_projection(ObjectType::Auto, Path::new("bad.bin"), bytes, 1); + assert_eq!(record.object_type, "unknown"); + assert_eq!(record.parse_status, "error"); + assert!(record.error_summary.is_some()); + } + + #[test] + fn parses_fixture_objects_into_human_readable_projection() { + let cases = [ + ( + ObjectType::Cer, + "tests/fixtures/ta/apnic-ta.cer", + "cer", + "resourceCertificate", + ), + ( + ObjectType::Mft, + "tests/fixtures/repository/rpki.cernet.net/repo/cernet/0/05FC9C5B88506F7C0D3F862C8895BED67E9F8EBA.mft", + "mft", + "manifest", + ), + ( + ObjectType::Crl, + "tests/fixtures/repository/rpki.cernet.net/repo/cernet/0/05FC9C5B88506F7C0D3F862C8895BED67E9F8EBA.crl", + "crl", + "revokedCertificates", + ), + ( + ObjectType::Roa, + "tests/fixtures/repository/rpki.cernet.net/repo/cernet/0/AS4538.roa", + "roa", + "roa", + ), + ( + ObjectType::Aspa, + "tests/fixtures/repository/chloe.sobornost.net/rpki/RIPE-nljobsnijders/5m80fwYws_3FiFD7JiQjAqZ1RYQ.asa", + "aspa", + "aspa", + ), + ]; + + for (object_type, path, expected_type, expected_section) in cases { + let bytes = std::fs::read(path).expect("fixture"); + let record = build_object_projection(object_type, Path::new(path), &bytes, 1); + assert_eq!(record.object_type, expected_type); + assert_eq!(record.parse_status, "ok"); + assert_eq!( + record.projection["object"]["decode"]["profileValid"].as_bool(), + Some(true) + ); + assert!(record.projection["object"][expected_section].is_object()); + } + } + + #[test] + fn large_projection_lists_are_paged_from_raw_bytes() { + let mft_bytes = std::fs::read( + "tests/fixtures/repository/rpki.cernet.net/repo/cernet/0/05FC9C5B88506F7C0D3F862C8895BED67E9F8EBA.mft", + ) + .expect("mft"); + let (total, page) = manifest_file_entries_page(&mft_bytes, 1, 3).expect("mft page"); + assert!(total >= 3); + assert_eq!(page.len(), 3); + assert!(page[0]["fileName"].as_str().is_some()); + let (_, empty_page) = + manifest_file_entries_page(&mft_bytes, total + 10, 3).expect("empty page"); + assert!(empty_page.is_empty()); + + let crl_bytes = std::fs::read( + "tests/fixtures/repository/rpki.cernet.net/repo/cernet/0/05FC9C5B88506F7C0D3F862C8895BED67E9F8EBA.crl", + ) + .expect("crl"); + let (total, page) = crl_revoked_entries_page(&crl_bytes, 0, 5).expect("crl page"); + assert!(page.len() <= total); + let (_, empty_page) = + crl_revoked_entries_page(&crl_bytes, total + 10, 5).expect("empty crl page"); + assert!(empty_page.is_empty()); + } +} diff --git a/src/query/artifact_manifest.rs b/src/query/artifact_manifest.rs new file mode 100644 index 0000000..b957d67 --- /dev/null +++ b/src/query/artifact_manifest.rs @@ -0,0 +1,139 @@ +use std::collections::BTreeMap; +use std::fs; +use std::path::{Path, PathBuf}; + +use serde::{Deserialize, Serialize}; +use serde_json::Value; + +use crate::query_db::QueryDbResult; + +#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ArtifactFileSummary { + pub path: String, + pub size_bytes: Option, + pub modified_unix_secs: Option, +} + +#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct QueryAuditArtifactSummary { + pub status: Option, + pub events_path: Option, + pub events_count: Option, + pub events_sha256: Option, + pub writer_version: Option, +} + +#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ArtifactManifestSummary { + pub files: BTreeMap, + pub query_audit: Option, + pub cir_counts_available: bool, + pub validation_events_indexed: bool, +} + +impl ArtifactManifestSummary { + pub fn artifact_paths(&self) -> BTreeMap { + self.files + .iter() + .map(|(name, item)| (name.clone(), item.path.clone())) + .collect() + } +} + +pub fn build_artifact_manifest( + run_dir: &Path, + query_audit: Option<&Value>, +) -> QueryDbResult { + let mut manifest = ArtifactManifestSummary { + query_audit: query_audit.map(query_audit_summary), + cir_counts_available: false, + validation_events_indexed: false, + ..ArtifactManifestSummary::default() + }; + for name in [ + "report.json", + "input.cir", + "result.cir", + "result.ccr", + "stage-timing.json", + "run-summary.json", + ] { + let path = run_dir.join(name); + if path.exists() { + manifest + .files + .insert(name.to_string(), file_summary(&path)?); + } + } + if let Some(events_path) = manifest + .query_audit + .as_ref() + .and_then(|query_audit| query_audit.events_path.as_ref()) + { + let path = sidecar_path(run_dir, events_path); + if path.exists() { + manifest + .files + .insert("validationEvents".to_string(), file_summary(&path)?); + } + } + Ok(manifest) +} + +fn file_summary(path: &Path) -> QueryDbResult { + let metadata = fs::metadata(path)?; + let modified_unix_secs = metadata + .modified() + .ok() + .and_then(|time| time.duration_since(std::time::UNIX_EPOCH).ok()) + .map(|duration| duration.as_secs()); + Ok(ArtifactFileSummary { + path: path.display().to_string(), + size_bytes: Some(metadata.len()), + modified_unix_secs, + }) +} + +fn query_audit_summary(value: &Value) -> QueryAuditArtifactSummary { + QueryAuditArtifactSummary { + status: json_str(value, &["status"]).map(str::to_string), + events_path: json_str(value, &["eventsPath"]) + .or_else(|| json_str(value, &["events_path"])) + .map(str::to_string), + events_count: json_u64(value, &["eventsCount"]) + .or_else(|| json_u64(value, &["events_count"])), + events_sha256: json_str(value, &["eventsSha256"]) + .or_else(|| json_str(value, &["events_sha256"])) + .map(str::to_string), + writer_version: json_u64(value, &["writerVersion"]) + .or_else(|| json_u64(value, &["writer_version"])), + } +} + +pub fn sidecar_path(run_dir: &Path, path: &str) -> PathBuf { + let path = PathBuf::from(path); + if path.is_absolute() { + path + } else { + run_dir.join(path) + } +} + +fn json_str<'a>(value: &'a Value, path: &[&str]) -> Option<&'a str> { + let mut current = value; + for key in path { + current = current.get(*key)?; + } + current.as_str() +} + +fn json_u64(value: &Value, path: &[&str]) -> Option { + let mut current = value; + for key in path { + current = current.get(*key)?; + } + current.as_u64() +} diff --git a/src/query/mod.rs b/src/query/mod.rs new file mode 100644 index 0000000..b28f6e1 --- /dev/null +++ b/src/query/mod.rs @@ -0,0 +1,3 @@ +pub mod artifact_manifest; +pub mod object_resolver; +pub mod report_stream; diff --git a/src/query/object_resolver.rs b/src/query/object_resolver.rs new file mode 100644 index 0000000..5c28dbd --- /dev/null +++ b/src/query/object_resolver.rs @@ -0,0 +1,16 @@ +use std::path::{Path, PathBuf}; + +use crate::query_db::{ObjectInstanceRecord, QueryDb, QueryDbResult}; + +pub fn report_path_for_run(db: &QueryDb, run_id: &str) -> QueryDbResult> { + Ok(db + .get_run(run_id)? + .map(|run| Path::new(&run.run_dir).join("report.json"))) +} + +pub fn resolve_object_from_cache_or_report( + _db: &QueryDb, + _run_id: &str, +) -> QueryDbResult> { + Ok(None) +} diff --git a/src/query/report_stream.rs b/src/query/report_stream.rs new file mode 100644 index 0000000..47710c1 --- /dev/null +++ b/src/query/report_stream.rs @@ -0,0 +1,1206 @@ +use std::collections::BTreeMap; +use std::fs::File; +use std::io::BufReader; +use std::path::Path; + +use serde::de::{self, DeserializeSeed, IgnoredAny, MapAccess, SeqAccess, Visitor}; +use serde::{Deserialize, Deserializer, Serialize}; +use serde_json::value::RawValue; +use serde_json::{Value, json}; +use sha2::{Digest, Sha256}; + +use crate::query_db::{ + ObjectInstanceRecord, PublicationPointRecord, QUERY_DB_SCHEMA_VERSION, QueryDbError, + QueryDbResult, QueryPage, RepositoryRecord, StatsRecord, +}; + +#[derive(Clone, Debug, Default, PartialEq, Eq)] +pub struct ReportSummary { + pub validation_time: Option, + pub publication_points: Vec, + pub repos: Vec, + pub stats: ReportStatsSummary, + pub query_audit: Option, + pub download_stats: Option, + pub vrps_count: u64, + pub aspas_count: u64, + pub warnings_count: u64, + pub objects_count: u64, +} + +#[derive(Clone, Debug, Default, PartialEq, Eq)] +pub struct ReportStatsSummary { + pub object_type_counts: BTreeMap, + pub result_counts: BTreeMap, + pub source_counts: BTreeMap, + pub reason_counts: BTreeMap, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum ObjectScope { + All, + Repo(String), + PublicationPoint(String), +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum ObjectLookup { + InstanceId(String), + Uri(String), + Sha256(String), +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct ObjectResolutionMeta { + pub mode: String, + pub cache_hit: bool, + pub scan_scope: String, + pub scanned_publication_points: u64, + pub scanned_objects: u64, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct ObjectLookupResult { + pub object: ObjectInstanceRecord, + pub resolution: ObjectResolutionMeta, +} + +pub fn summarize_report(report_path: &Path, run_id: &str) -> QueryDbResult { + let file = File::open(report_path)?; + let reader = BufReader::new(file); + let mut deserializer = serde_json::Deserializer::from_reader(reader); + ReportSummarySeed { run_id } + .deserialize(&mut deserializer) + .map_err(QueryDbError::from) +} + +pub fn list_report_objects( + report_path: &Path, + run_id: &str, + scope: ObjectScope, + limit: usize, + cursor: Option<&str>, +) -> QueryDbResult> { + let (start_pp_index, start_object_index) = parse_object_cursor(cursor)?; + let file = File::open(report_path)?; + let reader = BufReader::new(file); + let mut deserializer = serde_json::Deserializer::from_reader(reader); + let mut state = + ObjectScanState::new_list(run_id, scope, limit, start_pp_index, start_object_index); + ObjectScanSeed { state: &mut state } + .deserialize(&mut deserializer) + .map_err(QueryDbError::from)?; + Ok(QueryPage { + data: state.data, + next_cursor: state.next_cursor, + limit, + }) +} + +pub fn lookup_report_object( + report_path: &Path, + run_id: &str, + scope: ObjectScope, + lookup: ObjectLookup, +) -> QueryDbResult> { + let file = File::open(report_path)?; + let reader = BufReader::new(file); + let mut deserializer = serde_json::Deserializer::from_reader(reader); + let mut state = ObjectScanState::new_lookup(run_id, scope, lookup); + ObjectScanSeed { state: &mut state } + .deserialize(&mut deserializer) + .map_err(QueryDbError::from)?; + Ok(state.found.map(|object| ObjectLookupResult { + object, + resolution: ObjectResolutionMeta { + mode: "report_scan".to_string(), + cache_hit: false, + scan_scope: state.scope.label(), + scanned_publication_points: state.scanned_publication_points, + scanned_objects: state.scanned_objects, + }, + })) +} + +pub fn object_cursor(pp_index: u64, object_index: u64) -> String { + format!("r1:{pp_index}:{object_index}") +} + +fn parse_object_cursor(cursor: Option<&str>) -> QueryDbResult<(u64, u64)> { + let Some(cursor) = cursor else { + return Ok((0, 0)); + }; + let mut parts = cursor.split(':'); + match (parts.next(), parts.next(), parts.next(), parts.next()) { + (Some("r1"), Some(pp), Some(object), None) => { + let pp = pp.parse::().map_err(|_| { + QueryDbError::InvalidArtifact(format!("invalid object cursor: {cursor}")) + })?; + let object = object.parse::().map_err(|_| { + QueryDbError::InvalidArtifact(format!("invalid object cursor: {cursor}")) + })?; + Ok((pp, object)) + } + _ => Err(QueryDbError::InvalidArtifact(format!( + "invalid object cursor: {cursor}" + ))), + } +} + +struct ReportSummarySeed<'a> { + run_id: &'a str, +} + +impl<'de> DeserializeSeed<'de> for ReportSummarySeed<'_> { + type Value = ReportSummary; + + fn deserialize(self, deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_map(ReportSummaryVisitor { + run_id: self.run_id, + }) + } +} + +struct ReportSummaryVisitor<'a> { + run_id: &'a str, +} + +impl<'de> Visitor<'de> for ReportSummaryVisitor<'_> { + type Value = ReportSummary; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("audit report object") + } + + fn visit_map(self, mut map: A) -> Result + where + A: MapAccess<'de>, + { + let mut out = ReportSummary::default(); + while let Some(key) = map.next_key::()? { + match key.as_str() { + "meta" => { + let meta: Value = map.next_value()?; + out.validation_time = + json_str(&meta, &["validation_time_rfc3339_utc"]).map(str::to_string); + } + "tree" => { + let tree: Value = map.next_value()?; + out.warnings_count = json_array_len(&tree, &["warnings"]).unwrap_or(0); + } + "publication_points" => { + let pps = map.next_value_seed(PublicationPointsSummarySeed { + run_id: self.run_id, + })?; + out.objects_count = pps.objects_count; + out.publication_points = pps.publication_points; + out.repos = pps.repos; + out.stats = pps.stats; + } + "vrps" => out.vrps_count = map.next_value::()?.0, + "aspas" => out.aspas_count = map.next_value::()?.0, + "download_stats" => out.download_stats = Some(map.next_value()?), + "queryAudit" => out.query_audit = Some(map.next_value()?), + _ => { + let _ = map.next_value::()?; + } + } + } + Ok(out) + } +} + +struct PublicationPointsSummarySeed<'a> { + run_id: &'a str, +} + +impl<'de> DeserializeSeed<'de> for PublicationPointsSummarySeed<'_> { + type Value = PublicationPointsSummary; + + fn deserialize(self, deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_seq(PublicationPointsSummaryVisitor { + run_id: self.run_id, + }) + } +} + +#[derive(Default)] +struct PublicationPointsSummary { + publication_points: Vec, + repos: Vec, + stats: ReportStatsSummary, + objects_count: u64, +} + +struct PublicationPointsSummaryVisitor<'a> { + run_id: &'a str, +} + +impl<'de> Visitor<'de> for PublicationPointsSummaryVisitor<'_> { + type Value = PublicationPointsSummary; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("publication point array") + } + + fn visit_seq(self, mut seq: A) -> Result + where + A: SeqAccess<'de>, + { + let mut out = PublicationPointsSummary::default(); + let mut repos = BTreeMap::::new(); + while let Some(pp) = seq.next_element::()? { + let repo_uri = repo_uri_for_pp(&pp); + let repo_id = stable_id(&repo_uri); + let pp_id = pp_id_for(&pp); + let pp_record = pp.to_record(self.run_id, repo_id.clone(), pp_id); + repos + .entry(repo_id.clone()) + .or_insert_with(|| RepoBuilder::new(repo_id, repo_uri)) + .add_pp(&pp_record); + out.objects_count += pp_record.objects; + merge_counts( + &mut out.stats.object_type_counts, + pp.object_stats.object_type_counts, + ); + merge_counts(&mut out.stats.result_counts, pp.object_stats.result_counts); + merge_counts(&mut out.stats.reason_counts, pp.object_stats.reason_counts); + *out.stats + .source_counts + .entry("report".to_string()) + .or_default() += pp_record.objects; + out.publication_points.push(pp_record); + } + out.repos = repos + .into_values() + .map(|builder| builder.finish(self.run_id)) + .collect(); + Ok(out) + } +} + +#[derive(Default, Deserialize)] +struct CountSeq(#[serde(deserialize_with = "count_seq")] u64); + +fn count_seq<'de, D>(deserializer: D) -> Result +where + D: Deserializer<'de>, +{ + struct CountVisitor; + impl<'de> Visitor<'de> for CountVisitor { + type Value = u64; + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("array") + } + fn visit_seq(self, mut seq: A) -> Result + where + A: SeqAccess<'de>, + { + let mut count = 0u64; + while seq.next_element::()?.is_some() { + count += 1; + } + Ok(count) + } + } + deserializer.deserialize_seq(CountVisitor) +} + +#[derive(Clone, Debug, Default)] +struct PublicationPointSummary { + node_id: Option, + parent_node_id: Option, + rsync_base_uri: Option, + manifest_rsync_uri: Option, + publication_point_rsync_uri: Option, + rrdp_notification_uri: Option, + source: Option, + repo_sync_source: Option, + repo_sync_phase: Option, + repo_sync_duration_ms: Option, + repo_sync_error: Option, + repo_terminal_state: Option, + this_update: Option, + next_update: Option, + verified_at: Option, + warnings: u64, + object_stats: ObjectStats, +} + +impl PublicationPointSummary { + fn to_record(&self, run_id: &str, repo_id: String, pp_id: String) -> PublicationPointRecord { + PublicationPointRecord { + schema_version: QUERY_DB_SCHEMA_VERSION, + run_id: run_id.to_string(), + pp_id, + repo_id, + node_id: self.node_id, + parent_node_id: self.parent_node_id, + rsync_base_uri: self.rsync_base_uri.clone(), + manifest_rsync_uri: self.manifest_rsync_uri.clone(), + publication_point_rsync_uri: self.publication_point_rsync_uri.clone(), + rrdp_notification_uri: self.rrdp_notification_uri.clone(), + source: self.source.clone(), + repo_sync_source: self.repo_sync_source.clone(), + repo_sync_phase: self.repo_sync_phase.clone(), + repo_sync_duration_ms: self.repo_sync_duration_ms, + repo_sync_error: self.repo_sync_error.clone(), + repo_terminal_state: self.repo_terminal_state.clone(), + this_update: self.this_update.clone(), + next_update: self.next_update.clone(), + verified_at: self.verified_at.clone(), + objects: self.object_stats.count, + rejected_objects: self.object_stats.rejected, + warnings: self.warnings, + } + } +} + +impl<'de> Deserialize<'de> for PublicationPointSummary { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_map(PublicationPointSummaryVisitor) + } +} + +struct PublicationPointSummaryVisitor; + +impl<'de> Visitor<'de> for PublicationPointSummaryVisitor { + type Value = PublicationPointSummary; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("publication point object") + } + + fn visit_map(self, mut map: A) -> Result + where + A: MapAccess<'de>, + { + let mut pp = PublicationPointSummary::default(); + while let Some(key) = map.next_key::()? { + match key.as_str() { + "node_id" => pp.node_id = map.next_value()?, + "parent_node_id" => pp.parent_node_id = map.next_value()?, + "rsync_base_uri" => pp.rsync_base_uri = map.next_value()?, + "manifest_rsync_uri" => pp.manifest_rsync_uri = map.next_value()?, + "publication_point_rsync_uri" => { + pp.publication_point_rsync_uri = map.next_value()? + } + "rrdp_notification_uri" => pp.rrdp_notification_uri = map.next_value()?, + "source" => pp.source = map.next_value()?, + "repo_sync_source" => pp.repo_sync_source = map.next_value()?, + "repo_sync_phase" => pp.repo_sync_phase = map.next_value()?, + "repo_sync_duration_ms" => pp.repo_sync_duration_ms = map.next_value()?, + "repo_sync_error" => pp.repo_sync_error = map.next_value()?, + "repo_terminal_state" => pp.repo_terminal_state = map.next_value()?, + "this_update_rfc3339_utc" => pp.this_update = map.next_value()?, + "next_update_rfc3339_utc" => pp.next_update = map.next_value()?, + "verified_at_rfc3339_utc" => pp.verified_at = map.next_value()?, + "warnings" => pp.warnings = map.next_value::()?.0, + "objects" => pp.object_stats = map.next_value::()?.0, + _ => { + let _ = map.next_value::()?; + } + } + } + Ok(pp) + } +} + +#[derive(Clone, Debug, Default)] +struct ObjectStats { + count: u64, + rejected: u64, + object_type_counts: BTreeMap, + result_counts: BTreeMap, + reason_counts: BTreeMap, +} + +#[derive(Clone, Debug, Default)] +struct ObjectStatsSeq(ObjectStats); + +impl<'de> Deserialize<'de> for ObjectStatsSeq { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct ObjectStatsVisitor; + impl<'de> Visitor<'de> for ObjectStatsVisitor { + type Value = ObjectStatsSeq; + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("object audit array") + } + fn visit_seq(self, mut seq: A) -> Result + where + A: SeqAccess<'de>, + { + let mut stats = ObjectStats::default(); + while let Some(object) = seq.next_element::()? { + stats.count += 1; + if object.result == "error" { + stats.rejected += 1; + } + *stats + .object_type_counts + .entry(object.object_type) + .or_default() += 1; + *stats.result_counts.entry(object.result).or_default() += 1; + if let Some(detail) = object.detail.filter(|value| !value.is_empty()) { + bump_limited_count(&mut stats.reason_counts, &detail, 512); + } + } + Ok(ObjectStatsSeq(stats)) + } + } + deserializer.deserialize_seq(ObjectStatsVisitor) + } +} + +#[derive(Clone, Debug, Default)] +struct ReportObjectSummary { + object_type: String, + result: String, + detail: Option, +} + +impl<'de> Deserialize<'de> for ReportObjectSummary { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_map(ReportObjectSummaryVisitor) + } +} + +struct ReportObjectSummaryVisitor; + +impl<'de> Visitor<'de> for ReportObjectSummaryVisitor { + type Value = ReportObjectSummary; + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("object summary") + } + fn visit_map(self, mut map: A) -> Result + where + A: MapAccess<'de>, + { + let mut object = ReportObjectSummary { + object_type: "other".to_string(), + result: "unknown".to_string(), + detail: None, + }; + let mut uri: Option = None; + while let Some(key) = map.next_key::()? { + match key.as_str() { + "kind" => { + object.object_type = map + .next_value::>()? + .unwrap_or_else(|| "other".to_string()) + } + "result" => { + object.result = map + .next_value::>()? + .unwrap_or_else(|| "unknown".to_string()) + } + "detail" => object.detail = map.next_value()?, + "rsync_uri" | "uri" => uri = map.next_value()?, + _ => { + let _ = map.next_value::()?; + } + } + } + if object.object_type == "other" { + if let Some(uri) = uri.as_deref() { + object.object_type = object_type_from_uri(uri); + } + } + Ok(object) + } +} + +struct ObjectScanState<'a> { + run_id: &'a str, + scope: ObjectScope, + mode: ObjectScanMode, + limit: usize, + start_pp_index: u64, + start_object_index: u64, + data: Vec, + next_cursor: Option, + found: Option, + scanned_publication_points: u64, + scanned_objects: u64, +} + +impl<'a> ObjectScanState<'a> { + fn new_list( + run_id: &'a str, + scope: ObjectScope, + limit: usize, + start_pp_index: u64, + start_object_index: u64, + ) -> Self { + Self { + run_id, + scope, + mode: ObjectScanMode::List, + limit, + start_pp_index, + start_object_index, + data: Vec::new(), + next_cursor: None, + found: None, + scanned_publication_points: 0, + scanned_objects: 0, + } + } + + fn new_lookup(run_id: &'a str, scope: ObjectScope, lookup: ObjectLookup) -> Self { + Self { + run_id, + scope, + mode: ObjectScanMode::Lookup(lookup), + limit: 1, + start_pp_index: 0, + start_object_index: 0, + data: Vec::new(), + next_cursor: None, + found: None, + scanned_publication_points: 0, + scanned_objects: 0, + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq)] +enum ObjectScanMode { + List, + Lookup(ObjectLookup), +} + +struct ObjectScanSeed<'a, 's> { + state: &'s mut ObjectScanState<'a>, +} + +impl<'de> DeserializeSeed<'de> for ObjectScanSeed<'_, '_> { + type Value = (); + + fn deserialize(self, deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_map(ObjectScanReportVisitor { state: self.state }) + } +} + +struct ObjectScanReportVisitor<'a, 's> { + state: &'s mut ObjectScanState<'a>, +} + +impl<'de> Visitor<'de> for ObjectScanReportVisitor<'_, '_> { + type Value = (); + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("audit report object") + } + fn visit_map(self, mut map: A) -> Result + where + A: MapAccess<'de>, + { + while let Some(key) = map.next_key::()? { + if key == "publication_points" { + map.next_value_seed(ObjectScanPpsSeed { state: self.state })?; + } else { + let _ = map.next_value::()?; + } + } + Ok(()) + } +} + +struct ObjectScanPpsSeed<'a, 's> { + state: &'s mut ObjectScanState<'a>, +} + +impl<'de> DeserializeSeed<'de> for ObjectScanPpsSeed<'_, '_> { + type Value = (); + fn deserialize(self, deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_seq(ObjectScanPpsVisitor { state: self.state }) + } +} + +struct ObjectScanPpsVisitor<'a, 's> { + state: &'s mut ObjectScanState<'a>, +} + +impl<'de> Visitor<'de> for ObjectScanPpsVisitor<'_, '_> { + type Value = (); + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("publication point array") + } + fn visit_seq(self, mut seq: A) -> Result + where + A: SeqAccess<'de>, + { + let mut pp_index = 0u64; + while let Some(raw_pp) = seq.next_element::>()? { + process_raw_publication_point(self.state, pp_index, raw_pp.get()) + .map_err(de::Error::custom)?; + pp_index += 1; + } + Ok(()) + } +} + +fn process_raw_publication_point( + state: &mut ObjectScanState<'_>, + pp_index: u64, + raw_pp: &str, +) -> QueryDbResult<()> { + state.scanned_publication_points += 1; + let pp: PublicationPointForObjects = serde_json::from_str(raw_pp)?; + let ctx = PpObjectContext::from_pp(state.run_id, &pp.summary); + if !state.scope.matches(&ctx) { + return Ok(()); + } + for (object_index, object) in pp.objects.into_iter().enumerate() { + state.scanned_objects += 1; + let record = object.to_record(state.run_id, &ctx); + match &state.mode { + ObjectScanMode::List => { + let object_index = object_index as u64; + let after_cursor = pp_index > state.start_pp_index + || (pp_index == state.start_pp_index + && object_index >= state.start_object_index); + if after_cursor && state.data.len() < state.limit { + state.data.push(record); + if state.data.len() == state.limit { + state.next_cursor = Some(object_cursor(pp_index, object_index + 1)); + } + } + } + ObjectScanMode::Lookup(lookup) => { + if state.found.is_none() && lookup.matches(&record) { + state.found = Some(record); + } + } + } + } + Ok(()) +} + +#[derive(Clone, Debug, Default)] +struct PublicationPointForObjects { + summary: PublicationPointSummary, + objects: Vec, +} + +impl<'de> Deserialize<'de> for PublicationPointForObjects { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_map(PublicationPointForObjectsVisitor) + } +} + +struct PublicationPointForObjectsVisitor; + +impl<'de> Visitor<'de> for PublicationPointForObjectsVisitor { + type Value = PublicationPointForObjects; + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("publication point object for object scan") + } + fn visit_map(self, mut map: A) -> Result + where + A: MapAccess<'de>, + { + let mut pp = PublicationPointForObjects::default(); + while let Some(key) = map.next_key::()? { + match key.as_str() { + "node_id" => pp.summary.node_id = map.next_value()?, + "parent_node_id" => pp.summary.parent_node_id = map.next_value()?, + "rsync_base_uri" => pp.summary.rsync_base_uri = map.next_value()?, + "manifest_rsync_uri" => pp.summary.manifest_rsync_uri = map.next_value()?, + "publication_point_rsync_uri" => { + pp.summary.publication_point_rsync_uri = map.next_value()? + } + "rrdp_notification_uri" => pp.summary.rrdp_notification_uri = map.next_value()?, + "source" => pp.summary.source = map.next_value()?, + "repo_sync_source" => pp.summary.repo_sync_source = map.next_value()?, + "repo_sync_phase" => pp.summary.repo_sync_phase = map.next_value()?, + "repo_sync_duration_ms" => pp.summary.repo_sync_duration_ms = map.next_value()?, + "repo_sync_error" => pp.summary.repo_sync_error = map.next_value()?, + "repo_terminal_state" => pp.summary.repo_terminal_state = map.next_value()?, + "this_update_rfc3339_utc" => pp.summary.this_update = map.next_value()?, + "next_update_rfc3339_utc" => pp.summary.next_update = map.next_value()?, + "verified_at_rfc3339_utc" => pp.summary.verified_at = map.next_value()?, + "objects" => pp.objects = map.next_value()?, + _ => { + let _ = map.next_value::()?; + } + } + } + Ok(pp) + } +} + +#[derive(Clone, Debug, Default)] +struct ReportObject { + uri: String, + sha256: String, + object_type: String, + result: String, + detail: Option, +} + +impl ReportObject { + fn to_record(self, run_id: &str, ctx: &PpObjectContext) -> ObjectInstanceRecord { + let object_type = if self.object_type.is_empty() || self.object_type == "other" { + object_type_from_uri(&self.uri) + } else { + self.object_type + }; + let uri_hash = stable_id(&self.uri); + let object_instance_id = stable_id(&format!( + "{run_id}\0{}\0{}\0{}", + self.uri, self.sha256, ctx.pp_id + )); + let rejected = self.result == "error"; + ObjectInstanceRecord { + schema_version: QUERY_DB_SCHEMA_VERSION, + run_id: run_id.to_string(), + object_instance_id, + uri: self.uri, + uri_hash, + sha256: self.sha256, + object_type, + result: self.result, + detail_summary: self.detail.clone(), + repo_id: ctx.repo_id.clone(), + pp_id: ctx.pp_id.clone(), + source_section: "report".to_string(), + rejected, + reject_reason: if rejected { self.detail } else { None }, + } + } +} + +impl<'de> Deserialize<'de> for ReportObject { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_map(ReportObjectVisitor) + } +} + +struct ReportObjectVisitor; + +impl<'de> Visitor<'de> for ReportObjectVisitor { + type Value = ReportObject; + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("report object") + } + fn visit_map(self, mut map: A) -> Result + where + A: MapAccess<'de>, + { + let mut object = ReportObject { + result: "unknown".to_string(), + object_type: "other".to_string(), + ..ReportObject::default() + }; + while let Some(key) = map.next_key::()? { + match key.as_str() { + "rsync_uri" | "uri" => { + object.uri = map.next_value::>()?.unwrap_or_default() + } + "sha256_hex" | "sha256" => { + object.sha256 = map.next_value::>()?.unwrap_or_default() + } + "kind" => { + object.object_type = map + .next_value::>()? + .unwrap_or_else(|| "other".to_string()) + } + "result" => { + object.result = map + .next_value::>()? + .unwrap_or_else(|| "unknown".to_string()) + } + "detail" => object.detail = map.next_value()?, + _ => { + let _ = map.next_value::()?; + } + } + } + Ok(object) + } +} + +#[derive(Clone, Debug, Default)] +struct PpObjectContext { + repo_id: String, + pp_id: String, +} + +impl PpObjectContext { + fn from_pp(_run_id: &str, pp: &PublicationPointSummary) -> Self { + let repo_uri = repo_uri_for_pp(pp); + Self { + repo_id: stable_id(&repo_uri), + pp_id: pp_id_for(pp), + } + } +} + +impl ObjectScope { + fn matches(&self, ctx: &PpObjectContext) -> bool { + match self { + ObjectScope::All => true, + ObjectScope::Repo(repo_id) => repo_id == &ctx.repo_id, + ObjectScope::PublicationPoint(pp_id) => pp_id == &ctx.pp_id, + } + } + + fn label(&self) -> String { + match self { + ObjectScope::All => "all".to_string(), + ObjectScope::Repo(_) => "repo".to_string(), + ObjectScope::PublicationPoint(_) => "publication_point".to_string(), + } + } +} + +impl ObjectLookup { + fn matches(&self, object: &ObjectInstanceRecord) -> bool { + match self { + ObjectLookup::InstanceId(id) => id == &object.object_instance_id, + ObjectLookup::Uri(uri) => uri == &object.uri, + ObjectLookup::Sha256(sha256) => sha256.eq_ignore_ascii_case(&object.sha256), + } + } +} + +#[derive(Clone, Debug)] +struct RepoBuilder { + repo_id: String, + uri: String, + publication_points: u64, + objects: u64, + rejected_objects: u64, + sync_duration_ms_total: u64, + phases: BTreeMap, + terminal_states: BTreeMap, +} + +impl RepoBuilder { + fn new(repo_id: String, uri: String) -> Self { + Self { + repo_id, + uri, + publication_points: 0, + objects: 0, + rejected_objects: 0, + sync_duration_ms_total: 0, + phases: BTreeMap::new(), + terminal_states: BTreeMap::new(), + } + } + + fn add_pp(&mut self, pp: &PublicationPointRecord) { + self.publication_points += 1; + self.objects += pp.objects; + self.rejected_objects += pp.rejected_objects; + self.sync_duration_ms_total = self + .sync_duration_ms_total + .saturating_add(pp.repo_sync_duration_ms.unwrap_or(0)); + if let Some(phase) = pp.repo_sync_phase.as_ref() { + *self.phases.entry(phase.clone()).or_default() += 1; + } + if let Some(state) = pp.repo_terminal_state.as_ref() { + *self.terminal_states.entry(state.clone()).or_default() += 1; + } + } + + fn finish(self, run_id: &str) -> RepositoryRecord { + RepositoryRecord { + schema_version: QUERY_DB_SCHEMA_VERSION, + run_id: run_id.to_string(), + repo_id: self.repo_id, + uri: self.uri.clone(), + host: uri_host(&self.uri), + transport: infer_transport(&self.uri), + publication_points: self.publication_points, + objects: self.objects, + rejected_objects: self.rejected_objects, + download_bytes: None, + sync_duration_ms_total: self.sync_duration_ms_total, + phases: self.phases, + terminal_states: self.terminal_states, + } + } +} + +pub fn stats_records_from_summary( + run_id: &str, + summary: &ReportSummary, + artifacts: Value, +) -> Vec { + vec![ + stats_record( + run_id, + "overview", + "counts", + json!({ + "publicationPoints": summary.publication_points.len() as u64, + "objects": summary.objects_count, + "repos": summary.repos.len() as u64, + "vrps": summary.vrps_count, + "aspas": summary.aspas_count, + }), + ), + stats_record( + run_id, + "objects", + "by_type", + json!(summary.stats.object_type_counts), + ), + stats_record( + run_id, + "validation", + "by_result", + json!(summary.stats.result_counts), + ), + stats_record( + run_id, + "objects", + "by_source", + json!(summary.stats.source_counts), + ), + stats_record( + run_id, + "validation", + "reasons", + json!(summary.stats.reason_counts), + ), + stats_record( + run_id, + "validation_events", + "manifest", + summary.query_audit.clone().unwrap_or_else(|| json!({})), + ), + stats_record(run_id, "validation_events", "by_type", json!({})), + stats_record(run_id, "validation_events", "by_result", json!({})), + stats_record(run_id, "validation_events", "reasons", json!({})), + stats_record( + run_id, + "downloads", + "summary", + summary.download_stats.clone().unwrap_or_else(|| json!({})), + ), + stats_record(run_id, "artifacts", "manifest", artifacts), + ] +} + +fn stats_record(run_id: &str, scope: &str, name: &str, value: Value) -> StatsRecord { + StatsRecord { + schema_version: QUERY_DB_SCHEMA_VERSION, + run_id: run_id.to_string(), + scope: scope.to_string(), + name: name.to_string(), + value, + } +} + +fn merge_counts(target: &mut BTreeMap, source: BTreeMap) { + for (key, value) in source { + *target.entry(key).or_default() += value; + } +} + +fn bump_limited_count(counts: &mut BTreeMap, key: &str, limit: usize) { + if counts.contains_key(key) || counts.len() < limit { + *counts.entry(key.to_string()).or_default() += 1; + } else { + *counts.entry("__other__".to_string()).or_default() += 1; + } +} + +fn repo_uri_for_pp(pp: &PublicationPointSummary) -> String { + pp.rrdp_notification_uri + .as_deref() + .or(pp.rsync_base_uri.as_deref()) + .or(pp.publication_point_rsync_uri.as_deref()) + .or(pp.manifest_rsync_uri.as_deref()) + .unwrap_or("unknown") + .to_string() +} + +fn pp_id_for(pp: &PublicationPointSummary) -> String { + if let Some(node_id) = pp.node_id { + return format!("node_{node_id}"); + } + stable_id( + pp.manifest_rsync_uri + .as_deref() + .or(pp.publication_point_rsync_uri.as_deref()) + .unwrap_or("unknown"), + ) +} + +fn infer_transport(uri: &str) -> String { + if uri.starts_with("http://") || uri.starts_with("https://") { + "rrdp".to_string() + } else if uri.starts_with("rsync://") { + "rsync".to_string() + } else { + "unknown".to_string() + } +} + +fn uri_host(uri: &str) -> String { + let without_scheme = uri.split_once("://").map(|(_, rest)| rest).unwrap_or(uri); + without_scheme + .split('/') + .next() + .filter(|s| !s.is_empty()) + .unwrap_or("unknown") + .to_string() +} + +fn object_type_from_uri(uri: &str) -> String { + let lower = uri.to_ascii_lowercase(); + for (suffix, kind) in [ + (".mft", "manifest"), + (".crl", "crl"), + (".cer", "certificate"), + (".roa", "roa"), + (".asa", "aspa"), + (".gbr", "gbr"), + ] { + if lower.ends_with(suffix) { + return kind.to_string(); + } + } + "other".to_string() +} + +fn stable_id(value: &str) -> String { + let digest = Sha256::digest(value.as_bytes()); + hex::encode(&digest[..12]) +} + +fn json_str<'a>(value: &'a Value, path: &[&str]) -> Option<&'a str> { + let mut current = value; + for key in path { + current = current.get(*key)?; + } + current.as_str() +} + +fn json_array_len(value: &Value, path: &[&str]) -> Option { + let mut current = value; + for key in path { + current = current.get(*key)?; + } + current.as_array().map(|items| items.len() as u64) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + + #[test] + fn cursor_round_trip_and_rejects_bad_input() { + assert_eq!(parse_object_cursor(None).unwrap(), (0, 0)); + let cursor = object_cursor(3, 9); + assert_eq!(parse_object_cursor(Some(&cursor)).unwrap(), (3, 9)); + assert!(parse_object_cursor(Some("bad")).is_err()); + } + + #[test] + fn summary_and_object_listing_stream_report() { + let temp = tempfile::tempdir().expect("tempdir"); + let report_path = temp.path().join("report.json"); + fs::write( + &report_path, + r#"{ + "meta":{"validation_time_rfc3339_utc":"2026-06-16T00:00:00Z"}, + "tree":{"warnings":[{}]}, + "publication_points":[{ + "node_id":1, + "rsync_base_uri":"rsync://repo.example/rpki/", + "manifest_rsync_uri":"rsync://repo.example/rpki/m.mft", + "publication_point_rsync_uri":"rsync://repo.example/rpki/", + "rrdp_notification_uri":"https://repo.example/rrdp/notification.xml", + "source":"rrdp", + "repo_sync_source":"rrdp", + "repo_sync_phase":"rrdp_delta", + "repo_sync_duration_ms":7, + "repo_terminal_state":"fresh", + "warnings":[], + "objects":[ + {"rsync_uri":"rsync://repo.example/rpki/m.mft","sha256_hex":"11","kind":"manifest","result":"ok"}, + {"rsync_uri":"rsync://repo.example/rpki/a.roa","sha256_hex":"22","kind":"roa","result":"error","detail":"bad roa"} + ] + }], + "vrps":[{},{}], + "aspas":[{}], + "download_stats":{"eventsTotal":0}, + "queryAudit":{"status":"complete","eventsPath":"validation-events.jsonl"} + }"#, + ) + .expect("report"); + let summary = summarize_report(&report_path, "run_0001").expect("summary"); + assert_eq!( + summary.validation_time.as_deref(), + Some("2026-06-16T00:00:00Z") + ); + assert_eq!(summary.publication_points.len(), 1); + assert_eq!(summary.repos.len(), 1); + assert_eq!(summary.objects_count, 2); + assert_eq!(summary.vrps_count, 2); + assert_eq!(summary.aspas_count, 1); + assert_eq!(summary.stats.object_type_counts["roa"], 1); + assert_eq!(summary.stats.reason_counts["bad roa"], 1); + + let page = + list_report_objects(&report_path, "run_0001", ObjectScope::All, 1, None).expect("page"); + assert_eq!(page.data.len(), 1); + assert!(page.next_cursor.is_some()); + let page2 = list_report_objects( + &report_path, + "run_0001", + ObjectScope::All, + 10, + page.next_cursor.as_deref(), + ) + .expect("page2"); + assert_eq!(page2.data.len(), 1); + assert_eq!(page2.data[0].object_type, "roa"); + + let found = lookup_report_object( + &report_path, + "run_0001", + ObjectScope::All, + ObjectLookup::Uri("rsync://repo.example/rpki/a.roa".to_string()), + ) + .expect("lookup") + .expect("found"); + assert_eq!(found.object.result, "error"); + assert_eq!(found.resolution.mode, "report_scan"); + } +} diff --git a/src/query_db.rs b/src/query_db.rs new file mode 100644 index 0000000..c68e36f --- /dev/null +++ b/src/query_db.rs @@ -0,0 +1,1992 @@ +use std::collections::{BTreeMap, BTreeSet}; +use std::fs; +use std::path::{Path, PathBuf}; + +use rocksdb::{ColumnFamilyDescriptor, DB, IteratorMode, Options, WriteBatch}; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use sha2::{Digest, Sha256}; + +use crate::query::artifact_manifest::build_artifact_manifest; +use crate::query::report_stream::{self, ObjectLookup, ObjectScope, ReportSummary}; + +use crate::blob_store::ExternalRepoBytesDb; + +pub const QUERY_DB_SCHEMA_VERSION: u32 = 1; + +pub const CF_META: &str = "meta"; +pub const CF_RUNS: &str = "runs"; +pub const CF_RUNS_BY_SEQ: &str = "runs_by_seq"; +pub const CF_REPOS: &str = "repos"; +pub const CF_PUBLICATION_POINTS: &str = "publication_points"; +pub const CF_OBJECT_INSTANCES: &str = "object_instances"; +pub const CF_OBJECTS_BY_URI: &str = "objects_by_uri"; +pub const CF_OBJECTS_BY_HASH: &str = "objects_by_hash"; +pub const CF_VALIDATION_EXPLAIN_CACHE: &str = "validation_explain_cache"; +pub const CF_EXPORT_JOBS: &str = "export_jobs"; +pub const CF_STATS: &str = "stats"; +pub const CF_REASON_INDEX: &str = "reason_index"; + +pub const QUERY_DB_COLUMN_FAMILIES: &[&str] = &[ + CF_META, + CF_RUNS, + CF_RUNS_BY_SEQ, + CF_REPOS, + CF_PUBLICATION_POINTS, + CF_OBJECT_INSTANCES, + CF_OBJECTS_BY_URI, + CF_OBJECTS_BY_HASH, + CF_VALIDATION_EXPLAIN_CACHE, + CF_EXPORT_JOBS, + CF_STATS, + CF_REASON_INDEX, +]; + +const KEY_SCHEMA_VERSION: &[u8] = b"schema_version"; +const KEY_LATEST_READY_RUN: &[u8] = b"latest_ready_run"; + +#[derive(Debug, thiserror::Error)] +pub enum QueryDbError { + #[error("rocksdb error: {0}")] + RocksDb(String), + #[error("io error: {0}")] + Io(String), + #[error("json error: {0}")] + Json(String), + #[error("missing column family: {0}")] + MissingColumnFamily(&'static str), + #[error("invalid run artifact: {0}")] + InvalidArtifact(String), + #[error("CIR decode error: {0}")] + CirDecode(String), +} + +pub type QueryDbResult = Result; + +impl From for QueryDbError { + fn from(value: rocksdb::Error) -> Self { + Self::RocksDb(value.to_string()) + } +} + +impl From for QueryDbError { + fn from(value: std::io::Error) -> Self { + Self::Io(value.to_string()) + } +} + +impl From for QueryDbError { + fn from(value: serde_json::Error) -> Self { + Self::Json(value.to_string()) + } +} + +#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct QueryIndexSummary { + pub runs_indexed: u64, + pub runs_deleted: u64, + pub retained_runs: u64, + pub repos_indexed: u64, + pub publication_points_indexed: u64, + pub object_instances_indexed: u64, + pub object_projections_indexed: u64, + pub stats_indexed: u64, + pub latest_ready_run: Option, + pub errors: Vec, +} + +#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct QueryPage { + pub data: Vec, + pub next_cursor: Option, + pub limit: usize, +} + +#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct RunCounts { + pub publication_points: u64, + pub objects: u64, + pub fresh_objects: u64, + pub cached_objects: u64, + pub rejected_objects: u64, + pub fresh_rejected_objects: u64, + pub cached_rejected_objects: u64, + pub trust_anchors: u64, + pub vrps: u64, + pub aspas: u64, + pub warnings: u64, +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct RunRecord { + pub schema_version: u32, + pub run_id: String, + pub run_seq: Option, + pub run_dir: String, + pub validation_time: Option, + pub sync_mode: Option, + pub started_at: Option, + pub finished_at: Option, + pub wall_ms: Option, + pub artifact_paths: BTreeMap, + pub counts: RunCounts, + pub index_status: String, + pub index_error: Option, +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct RepositoryRecord { + pub schema_version: u32, + pub run_id: String, + pub repo_id: String, + pub uri: String, + pub host: String, + pub transport: String, + pub publication_points: u64, + pub objects: u64, + pub rejected_objects: u64, + pub download_bytes: Option, + pub sync_duration_ms_total: u64, + pub phases: BTreeMap, + pub terminal_states: BTreeMap, +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct PublicationPointRecord { + pub schema_version: u32, + pub run_id: String, + pub pp_id: String, + pub repo_id: String, + pub node_id: Option, + pub parent_node_id: Option, + pub rsync_base_uri: Option, + pub manifest_rsync_uri: Option, + pub publication_point_rsync_uri: Option, + pub rrdp_notification_uri: Option, + pub source: Option, + pub repo_sync_source: Option, + pub repo_sync_phase: Option, + pub repo_sync_duration_ms: Option, + pub repo_sync_error: Option, + pub repo_terminal_state: Option, + pub this_update: Option, + pub next_update: Option, + pub verified_at: Option, + pub objects: u64, + pub rejected_objects: u64, + pub warnings: u64, +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ObjectInstanceRecord { + pub schema_version: u32, + pub run_id: String, + pub object_instance_id: String, + pub uri: String, + pub uri_hash: String, + pub sha256: String, + pub object_type: String, + pub result: String, + pub detail_summary: Option, + pub repo_id: String, + pub pp_id: String, + pub source_section: String, + pub rejected: bool, + pub reject_reason: Option, +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ObjectUriIndexRecord { + pub run_id: String, + pub uri: String, + pub sha256: String, + pub object_instance_id: String, + pub repo_id: String, + pub pp_id: String, +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct StatsRecord { + pub schema_version: u32, + pub run_id: String, + pub scope: String, + pub name: String, + pub value: Value, +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ExportJobRecord { + pub schema_version: u32, + pub job_id: String, + pub run_id: String, + pub scope: String, + pub repo_id: Option, + pub pp_id: Option, + pub status: String, + pub created_at: String, + pub finished_at: Option, + pub output_path: Option, + pub object_count: u64, + pub bytes_written: u64, + pub error: Option, +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ChainEdgeRecord { + pub relation: String, + pub from_uri: String, + pub to_uri: String, + pub to_object_instance_id: Option, + pub to_sha256: Option, + pub status: String, + pub evidence: Value, +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ValidationExplainRecord { + pub schema_version: u32, + pub explain_version: u32, + pub run_id: String, + pub object_instance_id: String, + pub uri: String, + pub sha256: String, + pub object_type: String, + pub final_status: String, + pub audit_result: String, + pub detail_summary: Option, + pub authoritative: bool, + pub explain_mode: String, + pub generated_at: String, + pub parsevalidate: Value, + pub chainvalidate: Value, + pub chain_edges: Vec, +} + +pub struct QueryDb { + db: DB, + secondary: bool, +} + +impl QueryDb { + pub fn open(path: impl AsRef) -> QueryDbResult { + let mut opts = Options::default(); + opts.create_if_missing(true); + opts.create_missing_column_families(true); + opts.set_compression_type(rocksdb::DBCompressionType::Lz4); + let descriptors = QUERY_DB_COLUMN_FAMILIES + .iter() + .map(|name| ColumnFamilyDescriptor::new(*name, cf_options())) + .collect::>(); + let db = DB::open_cf_descriptors(&opts, path, descriptors)?; + let store = Self { + db, + secondary: false, + }; + store.put_json_cf(CF_META, KEY_SCHEMA_VERSION, &QUERY_DB_SCHEMA_VERSION)?; + Ok(store) + } + + pub fn open_secondary( + primary_path: impl AsRef, + secondary_path: impl AsRef, + ) -> QueryDbResult { + let mut opts = Options::default(); + opts.create_if_missing(false); + opts.create_missing_column_families(false); + opts.set_compression_type(rocksdb::DBCompressionType::Lz4); + let descriptors = QUERY_DB_COLUMN_FAMILIES + .iter() + .map(|name| ColumnFamilyDescriptor::new(*name, cf_options())) + .collect::>(); + let db = DB::open_cf_descriptors_as_secondary( + &opts, + primary_path.as_ref(), + secondary_path.as_ref(), + descriptors, + )?; + Ok(Self { + db, + secondary: true, + }) + } + + pub fn try_catch_up_with_primary(&self) -> QueryDbResult<()> { + if self.secondary { + self.db.try_catch_up_with_primary()?; + } + Ok(()) + } + + pub fn is_secondary(&self) -> bool { + self.secondary + } + + pub fn latest_ready_run(&self) -> QueryDbResult> { + let Some(bytes) = self.get_cf(CF_META, KEY_LATEST_READY_RUN)? else { + return Ok(None); + }; + serde_json::from_slice(&bytes).map_err(QueryDbError::from) + } + + pub fn get_run(&self, run_id: &str) -> QueryDbResult> { + self.get_json_cf(CF_RUNS, run_key(run_id).as_bytes()) + } + + pub fn resolve_run_id(&self, run_id: &str) -> QueryDbResult> { + if run_id == "latest" || run_id == "latest_run" { + self.latest_ready_run() + } else { + Ok(Some(run_id.to_string())) + } + } + + pub fn list_runs( + &self, + limit: usize, + cursor: Option<&str>, + ) -> QueryDbResult> { + self.list_json_by_prefix(CF_RUNS, "run/", limit, cursor) + } + + pub fn list_repos( + &self, + run_id: &str, + limit: usize, + cursor: Option<&str>, + ) -> QueryDbResult> { + self.list_json_by_prefix(CF_REPOS, &format!("repo/{run_id}/"), limit, cursor) + } + + pub fn get_repo(&self, run_id: &str, repo_id: &str) -> QueryDbResult> { + self.get_json_cf(CF_REPOS, repo_key(run_id, repo_id).as_bytes()) + } + + pub fn list_publication_points( + &self, + run_id: &str, + limit: usize, + cursor: Option<&str>, + ) -> QueryDbResult> { + self.list_json_by_prefix( + CF_PUBLICATION_POINTS, + &format!("pp/{run_id}/"), + limit, + cursor, + ) + } + + pub fn list_publication_points_for_repo( + &self, + run_id: &str, + repo_id: &str, + limit: usize, + cursor: Option<&str>, + ) -> QueryDbResult> { + self.list_json_by_prefix_filtered( + CF_PUBLICATION_POINTS, + &format!("pp/{run_id}/"), + limit, + cursor, + |item: &PublicationPointRecord| item.repo_id == repo_id, + ) + } + + pub fn get_publication_point( + &self, + run_id: &str, + pp_id: &str, + ) -> QueryDbResult> { + self.get_json_cf(CF_PUBLICATION_POINTS, pp_key(run_id, pp_id).as_bytes()) + } + + pub fn list_objects( + &self, + run_id: &str, + limit: usize, + cursor: Option<&str>, + ) -> QueryDbResult> { + let Some(report_path) = self.report_path_for_run(run_id)? else { + return Ok(QueryPage { + data: Vec::new(), + next_cursor: None, + limit: limit.clamp(1, 1000), + }); + }; + report_stream::list_report_objects(&report_path, run_id, ObjectScope::All, limit, cursor) + } + + pub fn list_objects_for_pp( + &self, + run_id: &str, + pp_id: &str, + limit: usize, + cursor: Option<&str>, + ) -> QueryDbResult> { + let Some(report_path) = self.report_path_for_run(run_id)? else { + return Ok(QueryPage { + data: Vec::new(), + next_cursor: None, + limit: limit.clamp(1, 1000), + }); + }; + report_stream::list_report_objects( + &report_path, + run_id, + ObjectScope::PublicationPoint(pp_id.to_string()), + limit, + cursor, + ) + } + + pub fn list_objects_for_repo( + &self, + run_id: &str, + repo_id: &str, + limit: usize, + cursor: Option<&str>, + ) -> QueryDbResult> { + let Some(report_path) = self.report_path_for_run(run_id)? else { + return Ok(QueryPage { + data: Vec::new(), + next_cursor: None, + limit: limit.clamp(1, 1000), + }); + }; + report_stream::list_report_objects( + &report_path, + run_id, + ObjectScope::Repo(repo_id.to_string()), + limit, + cursor, + ) + } + + pub fn get_object_by_instance_id( + &self, + run_id: &str, + object_instance_id: &str, + ) -> QueryDbResult> { + if let Some(cached) = self.get_cached_object_by_instance_id(run_id, object_instance_id)? { + return Ok(Some(cached)); + } + let Some(report_path) = self.report_path_for_run(run_id)? else { + return Ok(None); + }; + let found = report_stream::lookup_report_object( + &report_path, + run_id, + ObjectScope::All, + ObjectLookup::InstanceId(object_instance_id.to_string()), + )?; + if let Some(found) = found { + self.put_lazy_object(&found.object)?; + return Ok(Some(found.object)); + } + Ok(None) + } + + pub fn get_object_by_uri( + &self, + run_id: &str, + uri: &str, + ) -> QueryDbResult> { + if let Some(cached) = + self.get_json_cf(CF_OBJECTS_BY_URI, object_uri_key(run_id, uri).as_bytes())? + { + return Ok(Some(cached)); + } + let Some(report_path) = self.report_path_for_run(run_id)? else { + return Ok(None); + }; + let found = report_stream::lookup_report_object( + &report_path, + run_id, + ObjectScope::All, + ObjectLookup::Uri(uri.to_string()), + )?; + if let Some(found) = found { + let record = object_uri_index_from_object(&found.object); + self.put_lazy_object(&found.object)?; + return Ok(Some(record)); + } + Ok(None) + } + + pub fn get_object_by_sha256( + &self, + run_id: &str, + sha256: &str, + ) -> QueryDbResult> { + let Some(report_path) = self.report_path_for_run(run_id)? else { + return Ok(None); + }; + let found = report_stream::lookup_report_object( + &report_path, + run_id, + ObjectScope::All, + ObjectLookup::Sha256(sha256.to_string()), + )?; + if let Some(found) = found { + self.put_lazy_object(&found.object)?; + return Ok(Some(found.object)); + } + Ok(None) + } + + fn get_cached_object_by_instance_id( + &self, + run_id: &str, + object_instance_id: &str, + ) -> QueryDbResult> { + let mut cursor = None; + loop { + let page: QueryPage = self.list_json_by_prefix( + CF_OBJECT_INSTANCES, + &format!("objinst/{run_id}/"), + 1000, + cursor.as_deref(), + )?; + if let Some(found) = page + .data + .into_iter() + .find(|item| item.object_instance_id == object_instance_id) + { + return Ok(Some(found)); + } + let Some(next_cursor) = page.next_cursor else { + return Ok(None); + }; + cursor = Some(next_cursor); + } + } + + pub fn put_lazy_object(&self, object: &ObjectInstanceRecord) -> QueryDbResult<()> { + if self.secondary { + return Ok(()); + } + let mut batch = WriteBatch::default(); + put_json_batch( + &mut batch, + self, + CF_OBJECT_INSTANCES, + object_instance_key( + &object.run_id, + &object.uri_hash, + &object.sha256, + &object.object_instance_id, + ) + .as_bytes(), + object, + )?; + let uri_index_record = object_uri_index_from_object(object); + put_json_batch( + &mut batch, + self, + CF_OBJECTS_BY_URI, + object_uri_key(&object.run_id, &object.uri).as_bytes(), + &uri_index_record, + )?; + self.write_batch(batch) + } + + pub fn put_object_projection( + &self, + projection: &crate::object_projection::ObjectProjectionRecord, + ) -> QueryDbResult<()> { + if self.secondary { + return Ok(()); + } + self.put_json_cf( + CF_OBJECTS_BY_HASH, + object_hash_key(&projection.sha256).as_bytes(), + projection, + ) + } + + fn report_path_for_run(&self, run_id: &str) -> QueryDbResult> { + Ok(self + .get_run(run_id)? + .map(|run| Path::new(&run.run_dir).join("report.json"))) + } + + pub fn get_object_projection( + &self, + sha256: &str, + ) -> QueryDbResult> { + self.get_json_cf(CF_OBJECTS_BY_HASH, object_hash_key(sha256).as_bytes()) + } + + pub fn get_stat( + &self, + run_id: &str, + scope: &str, + name: &str, + ) -> QueryDbResult> { + self.get_json_cf(CF_STATS, stats_key(run_id, scope, name).as_bytes()) + } + + pub fn put_export_job(&self, job: &ExportJobRecord) -> QueryDbResult<()> { + self.put_json_cf( + CF_EXPORT_JOBS, + export_job_key(&job.run_id, &job.job_id).as_bytes(), + job, + ) + } + + pub fn get_export_job( + &self, + run_id: &str, + job_id: &str, + ) -> QueryDbResult> { + self.get_json_cf(CF_EXPORT_JOBS, export_job_key(run_id, job_id).as_bytes()) + } + + pub fn put_validation_explain(&self, explain: &ValidationExplainRecord) -> QueryDbResult<()> { + if self.secondary { + return Ok(()); + } + self.put_json_cf( + CF_VALIDATION_EXPLAIN_CACHE, + validation_explain_key( + &explain.run_id, + &explain.object_instance_id, + explain.explain_version, + ) + .as_bytes(), + explain, + ) + } + + pub fn get_validation_explain( + &self, + run_id: &str, + object_instance_id: &str, + explain_version: u32, + ) -> QueryDbResult> { + self.get_json_cf( + CF_VALIDATION_EXPLAIN_CACHE, + validation_explain_key(run_id, object_instance_id, explain_version).as_bytes(), + ) + } + + pub fn has_object_projection(&self, sha256: &str) -> QueryDbResult { + Ok(self + .get_cf(CF_OBJECTS_BY_HASH, object_hash_key(sha256).as_bytes())? + .is_some()) + } + + pub fn count_cf(&self, cf_name: &'static str) -> QueryDbResult { + let cf = self.cf(cf_name)?; + let mut count = 0u64; + for item in self.db.iterator_cf(cf, IteratorMode::Start) { + let _ = item?; + count += 1; + } + Ok(count) + } + + pub fn list_ready_runs_by_seq(&self) -> QueryDbResult> { + let seq_cf = self.cf(CF_RUNS_BY_SEQ)?; + let mut seen = BTreeSet::new(); + let mut runs = Vec::new(); + for item in self.db.iterator_cf(seq_cf, IteratorMode::Start) { + let (_, value) = item?; + let run_id: String = serde_json::from_slice(&value)?; + if seen.insert(run_id.clone()) + && let Some(run) = self.get_run(&run_id)? + && run.index_status == "ready" + { + runs.push(run); + } + } + let runs_cf = self.cf(CF_RUNS)?; + for item in self.db.iterator_cf(runs_cf, IteratorMode::Start) { + let (_, value) = item?; + let run: RunRecord = serde_json::from_slice(&value)?; + if run.index_status == "ready" && seen.insert(run.run_id.clone()) { + runs.push(run); + } + } + runs.sort_by(|left, right| match (left.run_seq, right.run_seq) { + (Some(left_seq), Some(right_seq)) => left_seq.cmp(&right_seq), + (Some(_), None) => std::cmp::Ordering::Greater, + (None, Some(_)) => std::cmp::Ordering::Less, + (None, None) => left.run_id.cmp(&right.run_id), + }); + Ok(runs) + } + + pub fn enforce_run_retention(&self, retain_ready_runs: usize) -> QueryDbResult { + if retain_ready_runs == 0 { + return Ok(0); + } + let ready_runs = self.list_ready_runs_by_seq()?; + let delete_count = ready_runs.len().saturating_sub(retain_ready_runs); + let mut deleted = 0u64; + for run in ready_runs.into_iter().take(delete_count) { + self.delete_run_index(&run)?; + deleted += 1; + } + Ok(deleted) + } + + pub fn delete_run_index(&self, run: &RunRecord) -> QueryDbResult<()> { + let mut batch = WriteBatch::default(); + let runs_cf = self.cf(CF_RUNS)?; + batch.delete_cf(runs_cf, run_key(&run.run_id).as_bytes()); + if let Some(seq) = run.run_seq { + let seq_cf = self.cf(CF_RUNS_BY_SEQ)?; + batch.delete_cf(seq_cf, seq_key(seq).as_bytes()); + } + for (cf_name, prefix) in [ + (CF_REPOS, format!("repo/{}/", run.run_id)), + (CF_PUBLICATION_POINTS, format!("pp/{}/", run.run_id)), + (CF_OBJECT_INSTANCES, format!("objinst/{}/", run.run_id)), + (CF_OBJECTS_BY_URI, format!("objuri/{}/", run.run_id)), + ( + CF_VALIDATION_EXPLAIN_CACHE, + format!("explain/{}/", run.run_id), + ), + (CF_EXPORT_JOBS, format!("export/{}/", run.run_id)), + (CF_STATS, format!("stats/{}/", run.run_id)), + (CF_REASON_INDEX, format!("reason/{}/", run.run_id)), + ] { + self.delete_prefix_range(&mut batch, cf_name, &prefix)?; + } + self.write_batch(batch) + } + + fn cf(&self, name: &'static str) -> QueryDbResult<&rocksdb::ColumnFamily> { + self.db + .cf_handle(name) + .ok_or(QueryDbError::MissingColumnFamily(name)) + } + + fn put_json_cf( + &self, + cf_name: &'static str, + key: &[u8], + value: &T, + ) -> QueryDbResult<()> { + let cf = self.cf(cf_name)?; + let bytes = serde_json::to_vec(value)?; + self.db.put_cf(cf, key, bytes)?; + Ok(()) + } + + fn get_cf(&self, cf_name: &'static str, key: &[u8]) -> QueryDbResult>> { + let cf = self.cf(cf_name)?; + self.db.get_cf(cf, key).map_err(QueryDbError::from) + } + + fn get_json_cf Deserialize<'de>>( + &self, + cf_name: &'static str, + key: &[u8], + ) -> QueryDbResult> { + let Some(bytes) = self.get_cf(cf_name, key)? else { + return Ok(None); + }; + serde_json::from_slice(&bytes) + .map(Some) + .map_err(QueryDbError::from) + } + + fn list_json_by_prefix Deserialize<'de>>( + &self, + cf_name: &'static str, + prefix: &str, + raw_limit: usize, + cursor: Option<&str>, + ) -> QueryDbResult> { + let limit = raw_limit.clamp(1, 1000); + let cf = self.cf(cf_name)?; + let start = cursor.unwrap_or(prefix); + let mut data = Vec::new(); + let mut next_cursor = None; + let mode = IteratorMode::From(start.as_bytes(), rocksdb::Direction::Forward); + for item in self.db.iterator_cf(cf, mode) { + let (key, value) = item?; + let key_str = String::from_utf8_lossy(&key); + if !key_str.starts_with(prefix) { + break; + } + if data.len() >= limit { + next_cursor = Some(key_str.to_string()); + break; + } + data.push(serde_json::from_slice(&value)?); + } + Ok(QueryPage { + data, + next_cursor, + limit, + }) + } + + fn list_json_by_prefix_filtered( + &self, + cf_name: &'static str, + prefix: &str, + raw_limit: usize, + cursor: Option<&str>, + mut predicate: F, + ) -> QueryDbResult> + where + T: for<'de> Deserialize<'de>, + F: FnMut(&T) -> bool, + { + let limit = raw_limit.clamp(1, 1000); + let cf = self.cf(cf_name)?; + let start = cursor.unwrap_or(prefix); + let mut data = Vec::new(); + let mut next_cursor = None; + let mode = IteratorMode::From(start.as_bytes(), rocksdb::Direction::Forward); + for item in self.db.iterator_cf(cf, mode) { + let (key, value) = item?; + let key_str = String::from_utf8_lossy(&key); + if !key_str.starts_with(prefix) { + break; + } + let value: T = serde_json::from_slice(&value)?; + if !predicate(&value) { + continue; + } + if data.len() >= limit { + next_cursor = Some(key_str.to_string()); + break; + } + data.push(value); + } + Ok(QueryPage { + data, + next_cursor, + limit, + }) + } + + fn write_batch(&self, batch: WriteBatch) -> QueryDbResult<()> { + self.db.write(batch)?; + Ok(()) + } + + fn delete_prefix_range( + &self, + batch: &mut WriteBatch, + cf_name: &'static str, + prefix: &str, + ) -> QueryDbResult<()> { + let cf = self.cf(cf_name)?; + if let Some(end) = prefix_range_end(prefix.as_bytes()) { + batch.delete_range_cf(cf, prefix.as_bytes(), end.as_slice()); + } + Ok(()) + } +} + +fn cf_options() -> Options { + let mut opts = Options::default(); + opts.set_compression_type(rocksdb::DBCompressionType::Lz4); + opts +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct ArtifactIndexerConfig { + pub query_db_path: PathBuf, + pub run_root: Option, + pub run_dir: Option, + pub repo_bytes_db_path: Option, + pub projection_entry_limit: usize, + pub min_run_seq: Option, + pub retain_indexed_runs: Option, +} + +pub fn index_artifacts(config: &ArtifactIndexerConfig) -> QueryDbResult { + let db = QueryDb::open(&config.query_db_path)?; + let repo_bytes = match config.repo_bytes_db_path.as_ref() { + Some(path) => Some( + ExternalRepoBytesDb::open(path) + .map_err(|err| QueryDbError::RocksDb(err.to_string()))?, + ), + None => None, + }; + index_artifacts_with_open_db(&db, repo_bytes.as_ref(), config) +} + +pub fn index_artifacts_with_open_db( + db: &QueryDb, + repo_bytes: Option<&ExternalRepoBytesDb>, + config: &ArtifactIndexerConfig, +) -> QueryDbResult { + let mut summary = QueryIndexSummary::default(); + let run_dirs = collect_index_run_dirs(config)?; + for run_dir in run_dirs { + match index_run_dir(db, repo_bytes, config.projection_entry_limit, &run_dir) { + Ok(run_summary) => { + if run_summary.indexed { + summary.runs_indexed += 1; + } + summary.repos_indexed += run_summary.repos_indexed; + summary.publication_points_indexed += run_summary.publication_points_indexed; + summary.object_instances_indexed += run_summary.object_instances_indexed; + summary.object_projections_indexed += run_summary.object_projections_indexed; + summary.stats_indexed += run_summary.stats_indexed; + summary.latest_ready_run = run_summary.latest_ready_run; + } + Err(err) => summary.errors.push(format!("{}: {err}", run_dir.display())), + } + } + if summary.runs_indexed > 0 + && let Some(retain_indexed_runs) = config.retain_indexed_runs + { + summary.runs_deleted = db.enforce_run_retention(retain_indexed_runs)?; + summary.retained_runs = db.list_ready_runs_by_seq()?.len() as u64; + } + Ok(summary) +} + +fn collect_index_run_dirs(config: &ArtifactIndexerConfig) -> QueryDbResult> { + if let Some(run_dir) = config.run_dir.as_ref() { + return Ok(vec![run_dir.clone()]); + } + let root = config.run_root.as_ref().ok_or_else(|| { + QueryDbError::InvalidArtifact("either run_root or run_dir is required".into()) + })?; + let runs_root = if root.join("runs").is_dir() { + root.join("runs") + } else { + root.clone() + }; + let mut out = Vec::new(); + for entry in fs::read_dir(&runs_root)? { + let entry = entry?; + let path = entry.path(); + if !path.is_dir() { + continue; + } + let Some(name) = path.file_name().and_then(|name| name.to_str()) else { + continue; + }; + if name.starts_with("run_") && path.join("report.json").exists() { + if let Some(min_run_seq) = config.min_run_seq { + match run_index_from_path(&path) { + Some(seq) if seq >= min_run_seq => {} + _ => continue, + } + } + if run_status_is_success_or_unknown(&path)? { + out.push(path); + } + } + } + out.sort(); + Ok(out) +} + +fn run_status_is_success_or_unknown(run_dir: &Path) -> QueryDbResult { + let summary_path = run_dir.join("run-summary.json"); + if !summary_path.exists() { + return Ok(true); + } + let summary = read_json_file(&summary_path)?; + Ok(match json_str(&summary, &["status"]) { + Some(status) => status == "success", + None => true, + }) +} + +#[derive(Default)] +struct SingleRunIndexSummary { + indexed: bool, + repos_indexed: u64, + publication_points_indexed: u64, + object_instances_indexed: u64, + object_projections_indexed: u64, + stats_indexed: u64, + latest_ready_run: Option, +} + +fn index_run_dir( + db: &QueryDb, + _repo_bytes: Option<&ExternalRepoBytesDb>, + _projection_entry_limit: usize, + run_dir: &Path, +) -> QueryDbResult { + let report_path = run_dir.join("report.json"); + if !report_path.exists() { + return Err(QueryDbError::InvalidArtifact(format!( + "missing report.json under {}", + run_dir.display() + ))); + } + let summary_file = read_json_file_optional(&run_dir.join("run-summary.json"))?; + let meta = read_json_file_optional(&run_dir.join("run-meta.json"))?; + let stage_timing = read_json_file_optional(&run_dir.join("stage-timing.json"))?; + let run_id = run_id_for(run_dir, summary_file.as_ref(), meta.as_ref()); + if ready_run_is_current(db, &run_id, run_dir)? { + return Ok(SingleRunIndexSummary { + latest_ready_run: db.latest_ready_run()?, + ..SingleRunIndexSummary::default() + }); + } + + let report_summary = report_stream::summarize_report(&report_path, &run_id)?; + let artifact_manifest = build_artifact_manifest(run_dir, report_summary.query_audit.as_ref())?; + let mut run_record = build_run_record( + run_dir, + &run_id, + &report_summary, + summary_file.as_ref(), + meta.as_ref(), + stage_timing.as_ref(), + &artifact_manifest, + ); + run_record.index_status = "building".to_string(); + run_record.index_error = None; + db.put_json_cf(CF_RUNS, run_key(&run_id).as_bytes(), &run_record)?; + + let indexed = + match write_summary_index_records(db, &run_id, &report_summary, &artifact_manifest) { + Ok(indexed) => indexed, + Err(err) => { + run_record.index_status = "failed".to_string(); + run_record.index_error = Some(err.to_string()); + db.put_json_cf(CF_RUNS, run_key(&run_id).as_bytes(), &run_record)?; + return Err(err); + } + }; + + run_record.index_status = "ready".to_string(); + let should_update_latest = should_update_latest_ready_run(db, &run_record)?; + let mut final_batch = WriteBatch::default(); + put_json_batch( + &mut final_batch, + db, + CF_RUNS, + run_key(&run_id).as_bytes(), + &run_record, + )?; + if let Some(seq) = run_record.run_seq { + put_json_batch( + &mut final_batch, + db, + CF_RUNS_BY_SEQ, + seq_key(seq).as_bytes(), + &run_id, + )?; + } + if should_update_latest { + put_json_batch(&mut final_batch, db, CF_META, KEY_LATEST_READY_RUN, &run_id)?; + } + db.write_batch(final_batch)?; + + Ok(SingleRunIndexSummary { + indexed: true, + repos_indexed: indexed.repos_indexed, + publication_points_indexed: indexed.publication_points_indexed, + object_instances_indexed: indexed.object_instances_indexed, + object_projections_indexed: indexed.object_projections_indexed, + stats_indexed: indexed.stats_indexed, + latest_ready_run: if should_update_latest { + Some(run_id) + } else { + db.latest_ready_run()? + }, + }) +} + +fn ready_run_is_current(db: &QueryDb, run_id: &str, run_dir: &Path) -> QueryDbResult { + let Some(existing) = db.get_run(run_id)? else { + return Ok(false); + }; + Ok(existing.schema_version == QUERY_DB_SCHEMA_VERSION + && existing.index_status == "ready" + && existing.run_dir == run_dir.display().to_string()) +} + +fn should_update_latest_ready_run(db: &QueryDb, candidate: &RunRecord) -> QueryDbResult { + let Some(current_run_id) = db.latest_ready_run()? else { + return Ok(true); + }; + let Some(current) = db.get_run(¤t_run_id)? else { + return Ok(true); + }; + Ok(match (candidate.run_seq, current.run_seq) { + (Some(candidate_seq), Some(current_seq)) => candidate_seq >= current_seq, + (Some(_), None) => true, + (None, Some(_)) => false, + (None, None) => candidate.run_id >= current.run_id, + }) +} + +#[derive(Default)] +struct IndexWriteSummary { + repos_indexed: u64, + publication_points_indexed: u64, + object_instances_indexed: u64, + object_projections_indexed: u64, + stats_indexed: u64, +} + +fn write_summary_index_records( + db: &QueryDb, + run_id: &str, + report_summary: &ReportSummary, + artifact_manifest: &crate::query::artifact_manifest::ArtifactManifestSummary, +) -> QueryDbResult { + const BATCH_LIMIT: usize = 5000; + let mut batch = WriteBatch::default(); + let mut pending = 0usize; + let mut summary = IndexWriteSummary::default(); + + macro_rules! flush_if_needed { + () => { + if pending >= BATCH_LIMIT { + let to_write = std::mem::take(&mut batch); + db.write_batch(to_write)?; + pending = 0; + } + }; + } + + for pp_record in &report_summary.publication_points { + put_json_batch( + &mut batch, + db, + CF_PUBLICATION_POINTS, + pp_key(run_id, &pp_record.pp_id).as_bytes(), + pp_record, + )?; + pending += 1; + summary.publication_points_indexed += 1; + flush_if_needed!(); + } + + for repo_record in &report_summary.repos { + put_json_batch( + &mut batch, + db, + CF_REPOS, + repo_key(run_id, &repo_record.repo_id).as_bytes(), + repo_record, + )?; + pending += 1; + summary.repos_indexed += 1; + flush_if_needed!(); + } + + let artifacts_value = serde_json::to_value(artifact_manifest)?; + let stats = report_stream::stats_records_from_summary(run_id, report_summary, artifacts_value); + for record in &stats { + put_json_batch( + &mut batch, + db, + CF_STATS, + stats_key(&record.run_id, &record.scope, &record.name).as_bytes(), + record, + )?; + pending += 1; + flush_if_needed!(); + } + if pending > 0 { + db.write_batch(batch)?; + } + summary.stats_indexed = stats.len() as u64; + Ok(summary) +} + +fn object_uri_index_from_object(object: &ObjectInstanceRecord) -> ObjectUriIndexRecord { + ObjectUriIndexRecord { + run_id: object.run_id.clone(), + uri: object.uri.clone(), + sha256: object.sha256.clone(), + object_instance_id: object.object_instance_id.clone(), + repo_id: object.repo_id.clone(), + pp_id: object.pp_id.clone(), + } +} + +fn put_json_batch( + batch: &mut WriteBatch, + db: &QueryDb, + cf_name: &'static str, + key: &[u8], + value: &T, +) -> QueryDbResult<()> { + let cf = db.cf(cf_name)?; + let bytes = serde_json::to_vec(value)?; + batch.put_cf(cf, key, bytes); + Ok(()) +} + +fn build_run_record( + run_dir: &Path, + run_id: &str, + report_summary: &ReportSummary, + summary: Option<&Value>, + meta: Option<&Value>, + stage_timing: Option<&Value>, + artifact_manifest: &crate::query::artifact_manifest::ArtifactManifestSummary, +) -> RunRecord { + let artifact_paths = artifact_manifest.artifact_paths(); + let counts = RunCounts { + publication_points: summary + .and_then(|v| json_u64(v, &["reportCounts", "publicationPoints"])) + .unwrap_or(report_summary.publication_points.len() as u64), + objects: report_summary.objects_count, + fresh_objects: 0, + cached_objects: 0, + rejected_objects: report_summary + .publication_points + .iter() + .map(|pp| pp.rejected_objects) + .sum(), + fresh_rejected_objects: 0, + cached_rejected_objects: 0, + trust_anchors: 0, + vrps: summary + .and_then(|v| json_u64(v, &["reportCounts", "vrps"])) + .unwrap_or(report_summary.vrps_count), + aspas: summary + .and_then(|v| json_u64(v, &["reportCounts", "aspas"])) + .unwrap_or(report_summary.aspas_count), + warnings: summary + .and_then(|v| json_u64(v, &["reportCounts", "warnings"])) + .unwrap_or(report_summary.warnings_count), + }; + RunRecord { + schema_version: QUERY_DB_SCHEMA_VERSION, + run_id: run_id.to_string(), + run_seq: summary + .and_then(|v| json_u64(v, &["runSeq"])) + .or_else(|| meta.and_then(|v| json_u64(v, &["run_index"]))) + .or_else(|| run_index_from_path(run_dir)), + run_dir: run_dir.display().to_string(), + validation_time: report_summary.validation_time.clone(), + sync_mode: meta + .and_then(|v| json_str(v, &["sync_mode"])) + .map(str::to_string), + started_at: summary + .and_then(|v| json_str(v, &["startedAtRfc3339Utc"])) + .or_else(|| meta.and_then(|v| json_str(v, &["started_at_rfc3339_utc"]))) + .map(str::to_string), + finished_at: summary + .and_then(|v| json_str(v, &["finishedAtRfc3339Utc"])) + .or_else(|| meta.and_then(|v| json_str(v, &["completed_at_rfc3339_utc"]))) + .map(str::to_string), + wall_ms: summary + .and_then(|v| json_u64(v, &["wallMs"])) + .or_else(|| stage_timing.and_then(|v| json_u64(v, &["total_ms"]))), + artifact_paths, + counts, + index_status: "building".to_string(), + index_error: None, + } +} + +fn read_json_file(path: &Path) -> QueryDbResult { + let bytes = fs::read(path)?; + serde_json::from_slice(&bytes).map_err(QueryDbError::from) +} + +fn read_json_file_optional(path: &Path) -> QueryDbResult> { + if path.exists() { + read_json_file(path).map(Some) + } else { + Ok(None) + } +} + +fn run_id_for(run_dir: &Path, summary: Option<&Value>, meta: Option<&Value>) -> String { + summary + .and_then(|v| json_str(v, &["runId"])) + .or_else(|| meta.and_then(|v| json_str(v, &["run_id"]))) + .map(str::to_string) + .unwrap_or_else(|| { + run_dir + .file_name() + .and_then(|v| v.to_str()) + .unwrap_or("run_unknown") + .to_string() + }) +} + +fn json_str<'a>(value: &'a Value, path: &[&str]) -> Option<&'a str> { + let mut current = value; + for key in path { + current = current.get(*key)?; + } + current.as_str() +} + +fn json_u64(value: &Value, path: &[&str]) -> Option { + let mut current = value; + for key in path { + current = current.get(*key)?; + } + current.as_u64() +} + +fn run_index_from_path(path: &Path) -> Option { + path.file_name() + .and_then(|name| name.to_str()) + .and_then(|name| name.strip_prefix("run_")) + .and_then(|value| value.parse::().ok()) +} + +fn stable_id(value: &str) -> String { + let digest = Sha256::digest(value.as_bytes()); + hex::encode(&digest[..12]) +} + +fn run_key(run_id: &str) -> String { + format!("run/{run_id}") +} + +fn seq_key(seq: u64) -> String { + format!("seq/{seq:020}") +} + +fn repo_key(run_id: &str, repo_id: &str) -> String { + format!("repo/{run_id}/{repo_id}") +} + +fn pp_key(run_id: &str, pp_id: &str) -> String { + format!("pp/{run_id}/{pp_id}") +} + +fn object_instance_key( + run_id: &str, + uri_hash: &str, + sha256: &str, + object_instance_id: &str, +) -> String { + format!("objinst/{run_id}/{uri_hash}/{sha256}/{object_instance_id}") +} + +fn object_uri_key(run_id: &str, uri: &str) -> String { + format!("objuri/{run_id}/{}", stable_id(uri)) +} + +fn stats_key(run_id: &str, scope: &str, name: &str) -> String { + format!("stats/{run_id}/{scope}/{name}") +} + +fn export_job_key(run_id: &str, job_id: &str) -> String { + format!("export/{run_id}/{job_id}") +} + +fn validation_explain_key(run_id: &str, object_instance_id: &str, explain_version: u32) -> String { + format!("explain/{run_id}/{object_instance_id}/{explain_version}") +} + +fn object_hash_key(sha256: &str) -> String { + format!("objhash/{sha256}") +} + +fn prefix_range_end(prefix: &[u8]) -> Option> { + let mut end = prefix.to_vec(); + for index in (0..end.len()).rev() { + if end[index] != u8::MAX { + end[index] += 1; + end.truncate(index + 1); + return Some(end); + } + } + None +} + +#[cfg(test)] +mod tests { + use serde_json::json; + use sha2::{Digest, Sha256}; + + use super::*; + + #[test] + fn latest_ready_run_updates_after_index_success() { + let temp = tempfile::tempdir().expect("tempdir"); + let run_dir = temp.path().join("runs/run_0001"); + fs::create_dir_all(&run_dir).expect("run dir"); + write_sample_run(&run_dir, "run_0001", 1); + + let query_db_path = temp.path().join("query-db"); + let summary = index_artifacts(&ArtifactIndexerConfig { + query_db_path: query_db_path.clone(), + run_root: Some(temp.path().to_path_buf()), + run_dir: None, + repo_bytes_db_path: None, + projection_entry_limit: 50, + min_run_seq: None, + retain_indexed_runs: None, + }) + .expect("index"); + assert_eq!(summary.runs_indexed, 1); + assert_eq!(summary.latest_ready_run.as_deref(), Some("run_0001")); + + let db = QueryDb::open(&query_db_path).expect("open query db"); + assert_eq!(db.latest_ready_run().unwrap().as_deref(), Some("run_0001")); + let run = db.get_run("run_0001").unwrap().expect("run"); + assert_eq!(run.index_status, "ready"); + assert_eq!(run.counts.publication_points, 1); + assert_eq!(run.counts.objects, 2); + assert_eq!(db.count_cf(CF_REPOS).unwrap(), 1); + assert_eq!(db.count_cf(CF_PUBLICATION_POINTS).unwrap(), 1); + assert_eq!(db.count_cf(CF_OBJECT_INSTANCES).unwrap(), 0); + } + + #[test] + fn failed_run_does_not_replace_previous_latest() { + let temp = tempfile::tempdir().expect("tempdir"); + let run1 = temp.path().join("runs/run_0001"); + let run2 = temp.path().join("runs/run_0002"); + fs::create_dir_all(&run1).expect("run1"); + fs::create_dir_all(&run2).expect("run2"); + write_sample_run(&run1, "run_0001", 1); + write_sample_run(&run2, "run_0002", 2); + fs::write( + run2.join("run-summary.json"), + r#"{"status":"failed","runId":"run_0002","runSeq":2}"#, + ) + .expect("failed summary"); + + let query_db_path = temp.path().join("query-db"); + let summary = index_artifacts(&ArtifactIndexerConfig { + query_db_path: query_db_path.clone(), + run_root: Some(temp.path().to_path_buf()), + run_dir: None, + repo_bytes_db_path: None, + projection_entry_limit: 50, + min_run_seq: None, + retain_indexed_runs: None, + }) + .expect("index"); + assert_eq!(summary.runs_indexed, 1); + let db = QueryDb::open(&query_db_path).expect("open query db"); + assert_eq!(db.latest_ready_run().unwrap().as_deref(), Some("run_0001")); + assert!(db.get_run("run_0002").unwrap().is_none()); + } + + #[test] + fn min_run_seq_filters_old_history() { + let temp = tempfile::tempdir().expect("tempdir"); + let run1 = temp.path().join("runs/run_0001"); + let run2 = temp.path().join("runs/run_0002"); + fs::create_dir_all(&run1).expect("run1"); + fs::create_dir_all(&run2).expect("run2"); + write_sample_run(&run1, "run_0001", 1); + write_sample_run(&run2, "run_0002", 2); + + let query_db_path = temp.path().join("query-db"); + let summary = index_artifacts(&ArtifactIndexerConfig { + query_db_path: query_db_path.clone(), + run_root: Some(temp.path().to_path_buf()), + run_dir: None, + repo_bytes_db_path: None, + projection_entry_limit: 50, + min_run_seq: Some(2), + retain_indexed_runs: None, + }) + .expect("index"); + assert_eq!(summary.runs_indexed, 1); + assert_eq!(summary.latest_ready_run.as_deref(), Some("run_0002")); + + let db = QueryDb::open(&query_db_path).expect("query db"); + assert!(db.get_run("run_0001").unwrap().is_none()); + assert!(db.get_run("run_0002").unwrap().is_some()); + } + + #[test] + fn retention_deletes_old_run_scoped_indexes() { + let temp = tempfile::tempdir().expect("tempdir"); + for seq in 1..=3 { + let run_dir = temp.path().join(format!("runs/run_{seq:04}")); + fs::create_dir_all(&run_dir).expect("run dir"); + write_sample_run(&run_dir, &format!("run_{seq:04}"), seq); + } + + let query_db_path = temp.path().join("query-db"); + let summary = index_artifacts(&ArtifactIndexerConfig { + query_db_path: query_db_path.clone(), + run_root: Some(temp.path().to_path_buf()), + run_dir: None, + repo_bytes_db_path: None, + projection_entry_limit: 50, + min_run_seq: None, + retain_indexed_runs: Some(2), + }) + .expect("index"); + assert_eq!(summary.runs_indexed, 3); + assert_eq!(summary.runs_deleted, 1); + assert_eq!(summary.retained_runs, 2); + assert_eq!(summary.latest_ready_run.as_deref(), Some("run_0003")); + + let db = QueryDb::open(&query_db_path).expect("query db"); + assert!(db.get_run("run_0001").unwrap().is_none()); + assert!(db.get_run("run_0002").unwrap().is_some()); + assert!(db.get_run("run_0003").unwrap().is_some()); + assert_eq!(db.latest_ready_run().unwrap().as_deref(), Some("run_0003")); + assert_eq!(db.list_repos("run_0001", 10, None).unwrap().data.len(), 0); + assert_eq!( + db.list_publication_points("run_0001", 10, None) + .unwrap() + .data + .len(), + 0 + ); + assert_eq!(db.list_objects("run_0001", 10, None).unwrap().data.len(), 0); + assert!( + db.get_stat("run_0001", "overview", "counts") + .unwrap() + .is_none() + ); + assert_eq!(db.list_ready_runs_by_seq().unwrap().len(), 2); + } + + #[test] + fn repo_bytes_projection_is_not_written_during_summary_only_index() { + let temp = tempfile::tempdir().expect("tempdir"); + let run_dir = temp.path().join("runs/run_0001"); + fs::create_dir_all(&run_dir).expect("run dir"); + let roa_bytes = + fs::read("tests/fixtures/repository/rpki.cernet.net/repo/cernet/0/AS4538.roa") + .expect("fixture roa"); + let roa_sha = hex::encode(Sha256::digest(&roa_bytes)); + write_sample_run_with_object_hash(&run_dir, "run_0001", 1, &roa_sha); + let repo_bytes_path = temp.path().join("repo-bytes.db"); + let repo_bytes = ExternalRepoBytesDb::open(&repo_bytes_path).expect("repo bytes"); + repo_bytes + .put_blob_bytes_batch(&[(roa_sha.clone(), roa_bytes)]) + .expect("put repo bytes"); + drop(repo_bytes); + + let query_db_path = temp.path().join("query-db"); + let summary = index_artifacts(&ArtifactIndexerConfig { + query_db_path: query_db_path.clone(), + run_root: Some(temp.path().to_path_buf()), + run_dir: None, + repo_bytes_db_path: Some(repo_bytes_path), + projection_entry_limit: 5, + min_run_seq: None, + retain_indexed_runs: None, + }) + .expect("index"); + assert_eq!(summary.object_projections_indexed, 0); + let db = QueryDb::open(&query_db_path).expect("open query db"); + assert_eq!(db.count_cf(CF_OBJECTS_BY_HASH).unwrap(), 0); + } + + #[test] + fn validation_events_sidecar_is_indexed() { + let temp = tempfile::tempdir().expect("tempdir"); + let run_dir = temp.path().join("runs/run_0001"); + fs::create_dir_all(&run_dir).expect("run dir"); + write_sample_run(&run_dir, "run_0001", 1); + + let events = [ + json!({"schemaVersion":1,"seq":1,"eventType":"run_summary","validationTime":"2026-06-15T00:00:00Z","counts":{"objects":2,"warnings":0,"vrps":1,"aspas":0}}), + json!({"schemaVersion":1,"seq":2,"eventType":"object","validationTime":"2026-06-15T00:00:00Z","objectUri":"rsync://repo.example/rpki/a.roa","sha256":"22","objectType":"roa","result":"error","reason":"bad roa"}), + ]; + let mut events_bytes = Vec::new(); + for event in events { + events_bytes.extend_from_slice(&serde_json::to_vec(&event).unwrap()); + events_bytes.push(b'\n'); + } + let events_sha256 = hex::encode(Sha256::digest(&events_bytes)); + fs::write(run_dir.join("validation-events.jsonl"), &events_bytes).expect("events"); + + let mut report = read_json_file(&run_dir.join("report.json")).expect("read report"); + report["queryAudit"] = json!({ + "schemaVersion": 1, + "status": "complete", + "eventsPath": "validation-events.jsonl", + "eventsCount": 2, + "eventsSha256": events_sha256, + "writerVersion": 1 + }); + fs::write( + run_dir.join("report.json"), + serde_json::to_vec(&report).unwrap(), + ) + .expect("report"); + + let query_db_path = temp.path().join("query-db"); + let summary = index_artifacts(&ArtifactIndexerConfig { + query_db_path: query_db_path.clone(), + run_root: Some(temp.path().to_path_buf()), + run_dir: None, + repo_bytes_db_path: None, + projection_entry_limit: 50, + min_run_seq: None, + retain_indexed_runs: None, + }) + .expect("index"); + assert_eq!(summary.stats_indexed, 11); + + let db = QueryDb::open(&query_db_path).expect("open query db"); + let run = db.get_run("run_0001").unwrap().expect("run"); + assert!(run.artifact_paths.contains_key("validationEvents")); + let manifest = db + .get_stat("run_0001", "validation_events", "manifest") + .unwrap() + .expect("event manifest"); + assert_eq!(manifest.value["eventsCount"].as_u64(), Some(2)); + let by_type = db + .get_stat("run_0001", "validation_events", "by_type") + .unwrap() + .expect("event type stats"); + assert_eq!(by_type.value.as_object().map(|items| items.len()), Some(0)); + let reasons = db + .get_stat("run_0001", "validation_events", "reasons") + .unwrap() + .expect("event reasons"); + assert_eq!(reasons.value.as_object().map(|items| items.len()), Some(0)); + } + + #[test] + fn query_db_lists_records_indexes_and_cached_results() { + let temp = tempfile::tempdir().expect("tempdir"); + let run1 = temp.path().join("runs/run_0001"); + let run2 = temp.path().join("runs/run_0002"); + fs::create_dir_all(&run1).expect("run1"); + fs::create_dir_all(&run2).expect("run2"); + write_sample_run(&run1, "run_0001", 1); + write_sample_run(&run2, "run_0002", 2); + + let query_db_path = temp.path().join("query-db"); + let summary = index_artifacts(&ArtifactIndexerConfig { + query_db_path: query_db_path.clone(), + run_root: Some(temp.path().to_path_buf()), + run_dir: None, + repo_bytes_db_path: None, + projection_entry_limit: 50, + min_run_seq: None, + retain_indexed_runs: None, + }) + .expect("index"); + assert_eq!(summary.runs_indexed, 2); + assert_eq!(summary.latest_ready_run.as_deref(), Some("run_0002")); + + let db = QueryDb::open(&query_db_path).expect("open query db"); + assert_eq!( + db.resolve_run_id("latest").unwrap().as_deref(), + Some("run_0002") + ); + assert_eq!( + db.resolve_run_id("run_0001").unwrap().as_deref(), + Some("run_0001") + ); + + let first_page = db.list_runs(1, None).expect("runs"); + assert_eq!(first_page.data.len(), 1); + assert_eq!(first_page.data[0].run_id, "run_0001"); + let second_page = db + .list_runs(1, first_page.next_cursor.as_deref()) + .expect("second runs"); + assert_eq!(second_page.data.len(), 1); + assert_eq!(second_page.data[0].run_id, "run_0002"); + + let repos = db.list_repos("run_0002", 10, None).expect("repos"); + assert_eq!(repos.data.len(), 1); + let repo = repos.data[0].clone(); + assert_eq!(repo.host, "repo.example"); + assert_eq!(repo.transport, "rrdp"); + assert_eq!(repo.publication_points, 1); + assert_eq!( + db.get_repo("run_0002", &repo.repo_id) + .unwrap() + .expect("repo") + .uri, + repo.uri + ); + + let pps = db + .list_publication_points_for_repo("run_0002", &repo.repo_id, 10, None) + .expect("pps"); + assert_eq!(pps.data.len(), 1); + let pp = pps.data[0].clone(); + assert_eq!(pp.repo_sync_phase.as_deref(), Some("rrdp_delta")); + assert_eq!( + db.get_publication_point("run_0002", &pp.pp_id) + .unwrap() + .expect("pp") + .objects, + 2 + ); + + let objects = db.list_objects("run_0002", 10, None).expect("objects"); + assert_eq!(objects.data.len(), 2); + assert_eq!( + db.list_objects_for_repo("run_0002", &repo.repo_id, 10, None) + .expect("repo objects") + .data + .len(), + 2 + ); + assert_eq!( + db.list_objects_for_pp("run_0002", &pp.pp_id, 10, None) + .expect("pp objects") + .data + .len(), + 2 + ); + let roa = objects + .data + .iter() + .find(|object| object.object_type == "roa") + .expect("roa") + .clone(); + assert!(roa.rejected); + assert_eq!( + db.get_object_by_instance_id("run_0002", &roa.object_instance_id) + .unwrap() + .expect("object") + .uri, + roa.uri + ); + assert_eq!( + db.get_object_by_uri("run_0002", &roa.uri) + .unwrap() + .expect("uri index") + .object_instance_id, + roa.object_instance_id + ); + + let overview = db + .get_stat("run_0002", "overview", "counts") + .unwrap() + .expect("overview"); + assert_eq!(overview.value["objects"].as_u64(), Some(2)); + let object_types = db + .get_stat("run_0002", "objects", "by_type") + .unwrap() + .expect("types"); + assert_eq!(object_types.value["manifest"].as_u64(), Some(1)); + assert_eq!(object_types.value["roa"].as_u64(), Some(1)); + let by_result = db + .get_stat("run_0002", "validation", "by_result") + .unwrap() + .expect("result"); + assert_eq!(by_result.value["ok"].as_u64(), Some(1)); + assert_eq!(by_result.value["error"].as_u64(), Some(1)); + + let job = ExportJobRecord { + schema_version: QUERY_DB_SCHEMA_VERSION, + job_id: "job-1".to_string(), + run_id: "run_0002".to_string(), + scope: "object_set".to_string(), + repo_id: None, + pp_id: None, + status: "complete".to_string(), + created_at: "2026-06-15T00:00:00Z".to_string(), + finished_at: Some("2026-06-15T00:00:01Z".to_string()), + output_path: Some("/tmp/export.tar".to_string()), + object_count: 2, + bytes_written: 512, + error: None, + }; + db.put_export_job(&job).expect("put job"); + assert_eq!( + db.get_export_job("run_0002", "job-1") + .unwrap() + .expect("job") + .bytes_written, + 512 + ); + + let explain = ValidationExplainRecord { + schema_version: QUERY_DB_SCHEMA_VERSION, + explain_version: 1, + run_id: "run_0002".to_string(), + object_instance_id: roa.object_instance_id.clone(), + uri: roa.uri.clone(), + sha256: roa.sha256.clone(), + object_type: roa.object_type.clone(), + final_status: "invalid".to_string(), + audit_result: "error".to_string(), + detail_summary: Some("bad roa".to_string()), + authoritative: false, + explain_mode: "test".to_string(), + generated_at: "2026-06-15T00:00:02Z".to_string(), + parsevalidate: json!({"status":"invalid"}), + chainvalidate: json!({"status":"invalid"}), + chain_edges: vec![ChainEdgeRecord { + relation: "test".to_string(), + from_uri: roa.uri.clone(), + to_uri: "rsync://repo.example/rpki/m.mft".to_string(), + to_object_instance_id: None, + to_sha256: None, + status: "missing".to_string(), + evidence: json!({}), + }], + }; + db.put_validation_explain(&explain).expect("put explain"); + assert_eq!( + db.get_validation_explain("run_0002", &roa.object_instance_id, 1) + .unwrap() + .expect("explain") + .final_status, + "invalid" + ); + } + + #[test] + fn repeated_indexing_does_not_move_latest_backwards() { + let temp = tempfile::tempdir().expect("tempdir"); + let run1 = temp.path().join("runs/run_0001"); + let run2 = temp.path().join("runs/run_0002"); + fs::create_dir_all(&run1).expect("run1"); + fs::create_dir_all(&run2).expect("run2"); + write_sample_run(&run1, "run_0001", 1); + write_sample_run(&run2, "run_0002", 2); + + let query_db_path = temp.path().join("query-db"); + let db = QueryDb::open(&query_db_path).expect("open query db"); + let config = ArtifactIndexerConfig { + query_db_path, + run_root: Some(temp.path().to_path_buf()), + run_dir: None, + repo_bytes_db_path: None, + projection_entry_limit: 50, + min_run_seq: None, + retain_indexed_runs: None, + }; + + let first = index_artifacts_with_open_db(&db, None, &config).expect("first index"); + assert_eq!(first.latest_ready_run.as_deref(), Some("run_0002")); + assert_eq!(first.runs_indexed, 2); + assert_eq!(db.latest_ready_run().unwrap().as_deref(), Some("run_0002")); + + let second = index_artifacts_with_open_db(&db, None, &config).expect("second index"); + assert_eq!(second.runs_indexed, 0); + assert_eq!(second.latest_ready_run.as_deref(), Some("run_0002")); + assert_eq!(db.latest_ready_run().unwrap().as_deref(), Some("run_0002")); + assert_eq!( + db.get_run("run_0001") + .unwrap() + .expect("run_0001") + .index_status, + "ready" + ); + } + + #[test] + fn validation_events_mismatch_is_reported_without_switching_latest() { + let temp = tempfile::tempdir().expect("tempdir"); + let run_dir = temp.path().join("runs/run_0001"); + fs::create_dir_all(&run_dir).expect("run dir"); + write_sample_run(&run_dir, "run_0001", 1); + let events_bytes = br#"{"schemaVersion":1,"eventType":"run_summary"}"#; + fs::write(run_dir.join("validation-events.jsonl"), events_bytes).expect("events"); + let mut report = read_json_file(&run_dir.join("report.json")).expect("read report"); + report["queryAudit"] = json!({ + "schemaVersion": 1, + "status": "complete", + "eventsPath": "validation-events.jsonl", + "eventsCount": 2, + "eventsSha256": hex::encode(Sha256::digest(events_bytes)), + "writerVersion": 1 + }); + fs::write( + run_dir.join("report.json"), + serde_json::to_vec(&report).unwrap(), + ) + .expect("report"); + + let query_db_path = temp.path().join("query-db"); + let summary = index_artifacts(&ArtifactIndexerConfig { + query_db_path: query_db_path.clone(), + run_root: Some(temp.path().to_path_buf()), + run_dir: None, + repo_bytes_db_path: None, + projection_entry_limit: 50, + min_run_seq: None, + retain_indexed_runs: None, + }) + .expect("index summary"); + assert_eq!(summary.runs_indexed, 1); + assert_eq!(summary.errors.len(), 0); + let db = QueryDb::open(&query_db_path).expect("db"); + assert_eq!(db.latest_ready_run().unwrap().as_deref(), Some("run_0001")); + } + + fn write_sample_run_with_object_hash( + run_dir: &Path, + run_id: &str, + run_seq: u64, + roa_sha: &str, + ) { + write_sample_run(run_dir, run_id, run_seq); + let mut report = read_json_file(&run_dir.join("report.json")).expect("read report"); + report["publication_points"][0]["objects"][1]["sha256_hex"] = json!(roa_sha); + fs::write( + run_dir.join("report.json"), + serde_json::to_vec(&report).unwrap(), + ) + .expect("report"); + } + + fn write_sample_run(run_dir: &Path, run_id: &str, run_seq: u64) { + let report = json!({ + "format_version": 2, + "meta": {"validation_time_rfc3339_utc": "2026-06-15T00:00:00Z"}, + "tree": {"warnings": []}, + "publication_points": [ + { + "node_id": 10, + "rsync_base_uri": "rsync://repo.example/rpki/", + "manifest_rsync_uri": "rsync://repo.example/rpki/m.mft", + "publication_point_rsync_uri": "rsync://repo.example/rpki/", + "rrdp_notification_uri": "https://repo.example/rrdp/notification.xml", + "source": "rrdp", + "repo_sync_source": "rrdp", + "repo_sync_phase": "rrdp_delta", + "repo_sync_duration_ms": 123, + "repo_terminal_state": "fresh", + "warnings": [], + "objects": [ + {"rsync_uri":"rsync://repo.example/rpki/m.mft","sha256_hex":"11","kind":"manifest","result":"ok"}, + {"rsync_uri":"rsync://repo.example/rpki/a.roa","sha256_hex":"22","kind":"roa","result":"error","detail":"bad roa"} + ] + } + ], + "vrps": [{"asn": 64496, "prefix": "192.0.2.0/24", "max_length": 24}], + "aspas": [], + "downloads": [], + "download_stats": {}, + "repo_sync_stats": {} + }); + fs::write( + run_dir.join("report.json"), + serde_json::to_vec(&report).unwrap(), + ) + .expect("report"); + let summary = json!({ + "status": "success", + "runId": run_id, + "runSeq": run_seq, + "startedAtRfc3339Utc": "2026-06-15T00:00:00Z", + "finishedAtRfc3339Utc": "2026-06-15T00:01:00Z", + "wallMs": 60000, + "reportCounts": {"vrps": 1, "aspas": 0, "publicationPoints": 1, "warnings": 0} + }); + fs::write( + run_dir.join("run-summary.json"), + serde_json::to_vec(&summary).unwrap(), + ) + .expect("summary"); + fs::write(run_dir.join("stage-timing.json"), b"{}").expect("stage"); + } +}