use std::collections::{BTreeMap, BTreeSet}; use std::fs; use std::path::{Path, PathBuf}; use rocksdb::{ColumnFamilyDescriptor, DB, IteratorMode, Options, WriteBatch}; use serde::{Deserialize, Serialize}; use serde_json::Value; use sha2::{Digest, Sha256}; use crate::query::artifact_manifest::build_artifact_manifest; use crate::query::report_stream::{self, ObjectLookup, ObjectScope, ReportSummary}; use crate::blob_store::ExternalRepoBytesDb; pub const QUERY_DB_SCHEMA_VERSION: u32 = 1; pub const CF_META: &str = "meta"; pub const CF_RUNS: &str = "runs"; pub const CF_RUNS_BY_SEQ: &str = "runs_by_seq"; pub const CF_REPOS: &str = "repos"; pub const CF_PUBLICATION_POINTS: &str = "publication_points"; pub const CF_OBJECT_INSTANCES: &str = "object_instances"; pub const CF_OBJECTS_BY_URI: &str = "objects_by_uri"; pub const CF_OBJECTS_BY_HASH: &str = "objects_by_hash"; pub const CF_VALIDATION_EXPLAIN_CACHE: &str = "validation_explain_cache"; pub const CF_EXPORT_JOBS: &str = "export_jobs"; pub const CF_STATS: &str = "stats"; pub const CF_REASON_INDEX: &str = "reason_index"; pub const QUERY_DB_COLUMN_FAMILIES: &[&str] = &[ CF_META, CF_RUNS, CF_RUNS_BY_SEQ, CF_REPOS, CF_PUBLICATION_POINTS, CF_OBJECT_INSTANCES, CF_OBJECTS_BY_URI, CF_OBJECTS_BY_HASH, CF_VALIDATION_EXPLAIN_CACHE, CF_EXPORT_JOBS, CF_STATS, CF_REASON_INDEX, ]; const KEY_SCHEMA_VERSION: &[u8] = b"schema_version"; const KEY_LATEST_READY_RUN: &[u8] = b"latest_ready_run"; #[derive(Debug, thiserror::Error)] pub enum QueryDbError { #[error("rocksdb error: {0}")] RocksDb(String), #[error("io error: {0}")] Io(String), #[error("json error: {0}")] Json(String), #[error("missing column family: {0}")] MissingColumnFamily(&'static str), #[error("invalid run artifact: {0}")] InvalidArtifact(String), #[error("CIR decode error: {0}")] CirDecode(String), } pub type QueryDbResult = Result; impl From for QueryDbError { fn from(value: rocksdb::Error) -> Self { Self::RocksDb(value.to_string()) } } impl From for QueryDbError { fn from(value: std::io::Error) -> Self { Self::Io(value.to_string()) } } impl From for QueryDbError { fn from(value: serde_json::Error) -> Self { Self::Json(value.to_string()) } } #[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct QueryIndexSummary { pub runs_indexed: u64, pub runs_deleted: u64, pub retained_runs: u64, pub repos_indexed: u64, pub publication_points_indexed: u64, pub object_instances_indexed: u64, pub object_projections_indexed: u64, pub stats_indexed: u64, pub latest_ready_run: Option, pub errors: Vec, } #[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct QueryPage { pub data: Vec, pub next_cursor: Option, pub limit: usize, } #[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct RunCounts { pub publication_points: u64, pub objects: u64, pub fresh_objects: u64, pub cached_objects: u64, pub rejected_objects: u64, pub fresh_rejected_objects: u64, pub cached_rejected_objects: u64, pub trust_anchors: u64, pub vrps: u64, pub aspas: u64, pub warnings: u64, } #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct RunRecord { pub schema_version: u32, pub run_id: String, pub run_seq: Option, pub run_dir: String, pub validation_time: Option, pub sync_mode: Option, pub started_at: Option, pub finished_at: Option, pub wall_ms: Option, pub artifact_paths: BTreeMap, pub counts: RunCounts, pub index_status: String, pub index_error: Option, } #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct RepositoryRecord { pub schema_version: u32, pub run_id: String, pub repo_id: String, pub uri: String, pub host: String, pub transport: String, pub publication_points: u64, pub objects: u64, pub rejected_objects: u64, pub download_bytes: Option, pub sync_duration_ms_total: u64, pub phases: BTreeMap, pub terminal_states: BTreeMap, } #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct PublicationPointRecord { pub schema_version: u32, pub run_id: String, pub pp_id: String, pub repo_id: String, pub node_id: Option, pub parent_node_id: Option, pub rsync_base_uri: Option, pub manifest_rsync_uri: Option, pub publication_point_rsync_uri: Option, pub rrdp_notification_uri: Option, pub source: Option, pub repo_sync_source: Option, pub repo_sync_phase: Option, pub repo_sync_duration_ms: Option, pub repo_sync_error: Option, pub repo_terminal_state: Option, pub this_update: Option, pub next_update: Option, pub verified_at: Option, pub objects: u64, pub rejected_objects: u64, pub warnings: u64, } #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct ObjectInstanceRecord { pub schema_version: u32, pub run_id: String, pub object_instance_id: String, pub uri: String, pub uri_hash: String, pub sha256: String, pub object_type: String, pub result: String, pub detail_summary: Option, pub repo_id: String, pub pp_id: String, pub source_section: String, pub rejected: bool, pub reject_reason: Option, } #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct ObjectUriIndexRecord { pub run_id: String, pub uri: String, pub sha256: String, pub object_instance_id: String, pub repo_id: String, pub pp_id: String, } #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct StatsRecord { pub schema_version: u32, pub run_id: String, pub scope: String, pub name: String, pub value: Value, } #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct ExportJobRecord { pub schema_version: u32, pub job_id: String, pub run_id: String, pub scope: String, pub repo_id: Option, pub pp_id: Option, pub status: String, pub created_at: String, pub finished_at: Option, pub output_path: Option, pub object_count: u64, pub bytes_written: u64, pub error: Option, } #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct ChainEdgeRecord { pub relation: String, pub from_uri: String, pub to_uri: String, pub to_object_instance_id: Option, pub to_sha256: Option, pub status: String, pub evidence: Value, } #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct ValidationExplainRecord { pub schema_version: u32, pub explain_version: u32, pub run_id: String, pub object_instance_id: String, pub uri: String, pub sha256: String, pub object_type: String, pub final_status: String, pub audit_result: String, pub detail_summary: Option, pub authoritative: bool, pub explain_mode: String, pub generated_at: String, pub parsevalidate: Value, pub chainvalidate: Value, pub chain_edges: Vec, } pub struct QueryDb { db: DB, secondary: bool, } impl QueryDb { pub fn open(path: impl AsRef) -> QueryDbResult { let mut opts = Options::default(); opts.create_if_missing(true); opts.create_missing_column_families(true); opts.set_compression_type(rocksdb::DBCompressionType::Lz4); let descriptors = QUERY_DB_COLUMN_FAMILIES .iter() .map(|name| ColumnFamilyDescriptor::new(*name, cf_options())) .collect::>(); let db = DB::open_cf_descriptors(&opts, path, descriptors)?; let store = Self { db, secondary: false, }; store.put_json_cf(CF_META, KEY_SCHEMA_VERSION, &QUERY_DB_SCHEMA_VERSION)?; Ok(store) } pub fn open_secondary( primary_path: impl AsRef, secondary_path: impl AsRef, ) -> QueryDbResult { let mut opts = Options::default(); opts.create_if_missing(false); opts.create_missing_column_families(false); opts.set_compression_type(rocksdb::DBCompressionType::Lz4); let descriptors = QUERY_DB_COLUMN_FAMILIES .iter() .map(|name| ColumnFamilyDescriptor::new(*name, cf_options())) .collect::>(); let db = DB::open_cf_descriptors_as_secondary( &opts, primary_path.as_ref(), secondary_path.as_ref(), descriptors, )?; Ok(Self { db, secondary: true, }) } pub fn try_catch_up_with_primary(&self) -> QueryDbResult<()> { if self.secondary { self.db.try_catch_up_with_primary()?; } Ok(()) } pub fn is_secondary(&self) -> bool { self.secondary } pub fn latest_ready_run(&self) -> QueryDbResult> { let Some(bytes) = self.get_cf(CF_META, KEY_LATEST_READY_RUN)? else { return Ok(None); }; serde_json::from_slice(&bytes).map_err(QueryDbError::from) } pub fn get_run(&self, run_id: &str) -> QueryDbResult> { self.get_json_cf(CF_RUNS, run_key(run_id).as_bytes()) } pub fn resolve_run_id(&self, run_id: &str) -> QueryDbResult> { if run_id == "latest" || run_id == "latest_run" { self.latest_ready_run() } else { Ok(Some(run_id.to_string())) } } pub fn list_runs( &self, limit: usize, cursor: Option<&str>, ) -> QueryDbResult> { self.list_json_by_prefix(CF_RUNS, "run/", limit, cursor) } pub fn list_repos( &self, run_id: &str, limit: usize, cursor: Option<&str>, ) -> QueryDbResult> { self.list_json_by_prefix(CF_REPOS, &format!("repo/{run_id}/"), limit, cursor) } pub fn get_repo(&self, run_id: &str, repo_id: &str) -> QueryDbResult> { self.get_json_cf(CF_REPOS, repo_key(run_id, repo_id).as_bytes()) } pub fn list_publication_points( &self, run_id: &str, limit: usize, cursor: Option<&str>, ) -> QueryDbResult> { self.list_json_by_prefix( CF_PUBLICATION_POINTS, &format!("pp/{run_id}/"), limit, cursor, ) } pub fn list_publication_points_for_repo( &self, run_id: &str, repo_id: &str, limit: usize, cursor: Option<&str>, ) -> QueryDbResult> { self.list_json_by_prefix_filtered( CF_PUBLICATION_POINTS, &format!("pp/{run_id}/"), limit, cursor, |item: &PublicationPointRecord| item.repo_id == repo_id, ) } pub fn get_publication_point( &self, run_id: &str, pp_id: &str, ) -> QueryDbResult> { self.get_json_cf(CF_PUBLICATION_POINTS, pp_key(run_id, pp_id).as_bytes()) } pub fn list_objects( &self, run_id: &str, limit: usize, cursor: Option<&str>, ) -> QueryDbResult> { let Some(report_path) = self.report_path_for_run(run_id)? else { return Ok(QueryPage { data: Vec::new(), next_cursor: None, limit: limit.clamp(1, 1000), }); }; report_stream::list_report_objects(&report_path, run_id, ObjectScope::All, limit, cursor) } pub fn list_objects_for_pp( &self, run_id: &str, pp_id: &str, limit: usize, cursor: Option<&str>, ) -> QueryDbResult> { let Some(report_path) = self.report_path_for_run(run_id)? else { return Ok(QueryPage { data: Vec::new(), next_cursor: None, limit: limit.clamp(1, 1000), }); }; report_stream::list_report_objects( &report_path, run_id, ObjectScope::PublicationPoint(pp_id.to_string()), limit, cursor, ) } pub fn list_objects_for_repo( &self, run_id: &str, repo_id: &str, limit: usize, cursor: Option<&str>, ) -> QueryDbResult> { let Some(report_path) = self.report_path_for_run(run_id)? else { return Ok(QueryPage { data: Vec::new(), next_cursor: None, limit: limit.clamp(1, 1000), }); }; report_stream::list_report_objects( &report_path, run_id, ObjectScope::Repo(repo_id.to_string()), limit, cursor, ) } pub fn get_object_by_instance_id( &self, run_id: &str, object_instance_id: &str, ) -> QueryDbResult> { if let Some(cached) = self.get_cached_object_by_instance_id(run_id, object_instance_id)? { return Ok(Some(cached)); } let Some(report_path) = self.report_path_for_run(run_id)? else { return Ok(None); }; let found = report_stream::lookup_report_object( &report_path, run_id, ObjectScope::All, ObjectLookup::InstanceId(object_instance_id.to_string()), )?; if let Some(found) = found { self.put_lazy_object(&found.object)?; return Ok(Some(found.object)); } Ok(None) } pub fn get_object_by_uri( &self, run_id: &str, uri: &str, ) -> QueryDbResult> { if let Some(cached) = self.get_json_cf(CF_OBJECTS_BY_URI, object_uri_key(run_id, uri).as_bytes())? { return Ok(Some(cached)); } let Some(report_path) = self.report_path_for_run(run_id)? else { return Ok(None); }; let found = report_stream::lookup_report_object( &report_path, run_id, ObjectScope::All, ObjectLookup::Uri(uri.to_string()), )?; if let Some(found) = found { let record = object_uri_index_from_object(&found.object); self.put_lazy_object(&found.object)?; return Ok(Some(record)); } Ok(None) } pub fn get_object_by_sha256( &self, run_id: &str, sha256: &str, ) -> QueryDbResult> { let Some(report_path) = self.report_path_for_run(run_id)? else { return Ok(None); }; let found = report_stream::lookup_report_object( &report_path, run_id, ObjectScope::All, ObjectLookup::Sha256(sha256.to_string()), )?; if let Some(found) = found { self.put_lazy_object(&found.object)?; return Ok(Some(found.object)); } Ok(None) } fn get_cached_object_by_instance_id( &self, run_id: &str, object_instance_id: &str, ) -> QueryDbResult> { let mut cursor = None; loop { let page: QueryPage = self.list_json_by_prefix( CF_OBJECT_INSTANCES, &format!("objinst/{run_id}/"), 1000, cursor.as_deref(), )?; if let Some(found) = page .data .into_iter() .find(|item| item.object_instance_id == object_instance_id) { return Ok(Some(found)); } let Some(next_cursor) = page.next_cursor else { return Ok(None); }; cursor = Some(next_cursor); } } pub fn put_lazy_object(&self, object: &ObjectInstanceRecord) -> QueryDbResult<()> { if self.secondary { return Ok(()); } let mut batch = WriteBatch::default(); put_json_batch( &mut batch, self, CF_OBJECT_INSTANCES, object_instance_key( &object.run_id, &object.uri_hash, &object.sha256, &object.object_instance_id, ) .as_bytes(), object, )?; let uri_index_record = object_uri_index_from_object(object); put_json_batch( &mut batch, self, CF_OBJECTS_BY_URI, object_uri_key(&object.run_id, &object.uri).as_bytes(), &uri_index_record, )?; self.write_batch(batch) } pub fn put_object_projection( &self, projection: &crate::object_projection::ObjectProjectionRecord, ) -> QueryDbResult<()> { if self.secondary { return Ok(()); } self.put_json_cf( CF_OBJECTS_BY_HASH, object_hash_key(&projection.sha256).as_bytes(), projection, ) } fn report_path_for_run(&self, run_id: &str) -> QueryDbResult> { Ok(self .get_run(run_id)? .map(|run| Path::new(&run.run_dir).join("report.json"))) } pub fn get_object_projection( &self, sha256: &str, ) -> QueryDbResult> { self.get_json_cf(CF_OBJECTS_BY_HASH, object_hash_key(sha256).as_bytes()) } pub fn get_stat( &self, run_id: &str, scope: &str, name: &str, ) -> QueryDbResult> { self.get_json_cf(CF_STATS, stats_key(run_id, scope, name).as_bytes()) } pub fn put_export_job(&self, job: &ExportJobRecord) -> QueryDbResult<()> { self.put_json_cf( CF_EXPORT_JOBS, export_job_key(&job.run_id, &job.job_id).as_bytes(), job, ) } pub fn get_export_job( &self, run_id: &str, job_id: &str, ) -> QueryDbResult> { self.get_json_cf(CF_EXPORT_JOBS, export_job_key(run_id, job_id).as_bytes()) } pub fn put_validation_explain(&self, explain: &ValidationExplainRecord) -> QueryDbResult<()> { if self.secondary { return Ok(()); } self.put_json_cf( CF_VALIDATION_EXPLAIN_CACHE, validation_explain_key( &explain.run_id, &explain.object_instance_id, explain.explain_version, ) .as_bytes(), explain, ) } pub fn get_validation_explain( &self, run_id: &str, object_instance_id: &str, explain_version: u32, ) -> QueryDbResult> { self.get_json_cf( CF_VALIDATION_EXPLAIN_CACHE, validation_explain_key(run_id, object_instance_id, explain_version).as_bytes(), ) } pub fn has_object_projection(&self, sha256: &str) -> QueryDbResult { Ok(self .get_cf(CF_OBJECTS_BY_HASH, object_hash_key(sha256).as_bytes())? .is_some()) } pub fn count_cf(&self, cf_name: &'static str) -> QueryDbResult { let cf = self.cf(cf_name)?; let mut count = 0u64; for item in self.db.iterator_cf(cf, IteratorMode::Start) { let _ = item?; count += 1; } Ok(count) } pub fn list_ready_runs_by_seq(&self) -> QueryDbResult> { let seq_cf = self.cf(CF_RUNS_BY_SEQ)?; let mut seen = BTreeSet::new(); let mut runs = Vec::new(); for item in self.db.iterator_cf(seq_cf, IteratorMode::Start) { let (_, value) = item?; let run_id: String = serde_json::from_slice(&value)?; if seen.insert(run_id.clone()) && let Some(run) = self.get_run(&run_id)? && run.index_status == "ready" { runs.push(run); } } let runs_cf = self.cf(CF_RUNS)?; for item in self.db.iterator_cf(runs_cf, IteratorMode::Start) { let (_, value) = item?; let run: RunRecord = serde_json::from_slice(&value)?; if run.index_status == "ready" && seen.insert(run.run_id.clone()) { runs.push(run); } } runs.sort_by(|left, right| match (left.run_seq, right.run_seq) { (Some(left_seq), Some(right_seq)) => left_seq.cmp(&right_seq), (Some(_), None) => std::cmp::Ordering::Greater, (None, Some(_)) => std::cmp::Ordering::Less, (None, None) => left.run_id.cmp(&right.run_id), }); Ok(runs) } pub fn enforce_run_retention(&self, retain_ready_runs: usize) -> QueryDbResult { if retain_ready_runs == 0 { return Ok(0); } let ready_runs = self.list_ready_runs_by_seq()?; let delete_count = ready_runs.len().saturating_sub(retain_ready_runs); let mut deleted = 0u64; for run in ready_runs.into_iter().take(delete_count) { self.delete_run_index(&run)?; deleted += 1; } Ok(deleted) } pub fn delete_run_index(&self, run: &RunRecord) -> QueryDbResult<()> { let mut batch = WriteBatch::default(); let runs_cf = self.cf(CF_RUNS)?; batch.delete_cf(runs_cf, run_key(&run.run_id).as_bytes()); if let Some(seq) = run.run_seq { let seq_cf = self.cf(CF_RUNS_BY_SEQ)?; batch.delete_cf(seq_cf, seq_key(seq).as_bytes()); } for (cf_name, prefix) in [ (CF_REPOS, format!("repo/{}/", run.run_id)), (CF_PUBLICATION_POINTS, format!("pp/{}/", run.run_id)), (CF_OBJECT_INSTANCES, format!("objinst/{}/", run.run_id)), (CF_OBJECTS_BY_URI, format!("objuri/{}/", run.run_id)), ( CF_VALIDATION_EXPLAIN_CACHE, format!("explain/{}/", run.run_id), ), (CF_EXPORT_JOBS, format!("export/{}/", run.run_id)), (CF_STATS, format!("stats/{}/", run.run_id)), (CF_REASON_INDEX, format!("reason/{}/", run.run_id)), ] { self.delete_prefix_range(&mut batch, cf_name, &prefix)?; } self.write_batch(batch) } fn cf(&self, name: &'static str) -> QueryDbResult<&rocksdb::ColumnFamily> { self.db .cf_handle(name) .ok_or(QueryDbError::MissingColumnFamily(name)) } fn put_json_cf( &self, cf_name: &'static str, key: &[u8], value: &T, ) -> QueryDbResult<()> { let cf = self.cf(cf_name)?; let bytes = serde_json::to_vec(value)?; self.db.put_cf(cf, key, bytes)?; Ok(()) } fn get_cf(&self, cf_name: &'static str, key: &[u8]) -> QueryDbResult>> { let cf = self.cf(cf_name)?; self.db.get_cf(cf, key).map_err(QueryDbError::from) } fn get_json_cf Deserialize<'de>>( &self, cf_name: &'static str, key: &[u8], ) -> QueryDbResult> { let Some(bytes) = self.get_cf(cf_name, key)? else { return Ok(None); }; serde_json::from_slice(&bytes) .map(Some) .map_err(QueryDbError::from) } fn list_json_by_prefix Deserialize<'de>>( &self, cf_name: &'static str, prefix: &str, raw_limit: usize, cursor: Option<&str>, ) -> QueryDbResult> { let limit = raw_limit.clamp(1, 1000); let cf = self.cf(cf_name)?; let start = cursor.unwrap_or(prefix); let mut data = Vec::new(); let mut next_cursor = None; let mode = IteratorMode::From(start.as_bytes(), rocksdb::Direction::Forward); for item in self.db.iterator_cf(cf, mode) { let (key, value) = item?; let key_str = String::from_utf8_lossy(&key); if !key_str.starts_with(prefix) { break; } if data.len() >= limit { next_cursor = Some(key_str.to_string()); break; } data.push(serde_json::from_slice(&value)?); } Ok(QueryPage { data, next_cursor, limit, }) } fn list_json_by_prefix_filtered( &self, cf_name: &'static str, prefix: &str, raw_limit: usize, cursor: Option<&str>, mut predicate: F, ) -> QueryDbResult> where T: for<'de> Deserialize<'de>, F: FnMut(&T) -> bool, { let limit = raw_limit.clamp(1, 1000); let cf = self.cf(cf_name)?; let start = cursor.unwrap_or(prefix); let mut data = Vec::new(); let mut next_cursor = None; let mode = IteratorMode::From(start.as_bytes(), rocksdb::Direction::Forward); for item in self.db.iterator_cf(cf, mode) { let (key, value) = item?; let key_str = String::from_utf8_lossy(&key); if !key_str.starts_with(prefix) { break; } let value: T = serde_json::from_slice(&value)?; if !predicate(&value) { continue; } if data.len() >= limit { next_cursor = Some(key_str.to_string()); break; } data.push(value); } Ok(QueryPage { data, next_cursor, limit, }) } fn write_batch(&self, batch: WriteBatch) -> QueryDbResult<()> { self.db.write(batch)?; Ok(()) } fn delete_prefix_range( &self, batch: &mut WriteBatch, cf_name: &'static str, prefix: &str, ) -> QueryDbResult<()> { let cf = self.cf(cf_name)?; if let Some(end) = prefix_range_end(prefix.as_bytes()) { batch.delete_range_cf(cf, prefix.as_bytes(), end.as_slice()); } Ok(()) } } fn cf_options() -> Options { let mut opts = Options::default(); opts.set_compression_type(rocksdb::DBCompressionType::Lz4); opts } #[derive(Clone, Debug, PartialEq, Eq)] pub struct ArtifactIndexerConfig { pub query_db_path: PathBuf, pub run_root: Option, pub run_dir: Option, pub repo_bytes_db_path: Option, pub projection_entry_limit: usize, pub min_run_seq: Option, pub retain_indexed_runs: Option, } pub fn index_artifacts(config: &ArtifactIndexerConfig) -> QueryDbResult { let db = QueryDb::open(&config.query_db_path)?; let repo_bytes = match config.repo_bytes_db_path.as_ref() { Some(path) => Some( ExternalRepoBytesDb::open(path) .map_err(|err| QueryDbError::RocksDb(err.to_string()))?, ), None => None, }; index_artifacts_with_open_db(&db, repo_bytes.as_ref(), config) } pub fn index_artifacts_with_open_db( db: &QueryDb, repo_bytes: Option<&ExternalRepoBytesDb>, config: &ArtifactIndexerConfig, ) -> QueryDbResult { let mut summary = QueryIndexSummary::default(); let run_dirs = collect_index_run_dirs(config)?; for run_dir in run_dirs { match index_run_dir(db, repo_bytes, config.projection_entry_limit, &run_dir) { Ok(run_summary) => { if run_summary.indexed { summary.runs_indexed += 1; } summary.repos_indexed += run_summary.repos_indexed; summary.publication_points_indexed += run_summary.publication_points_indexed; summary.object_instances_indexed += run_summary.object_instances_indexed; summary.object_projections_indexed += run_summary.object_projections_indexed; summary.stats_indexed += run_summary.stats_indexed; summary.latest_ready_run = run_summary.latest_ready_run; } Err(err) => summary.errors.push(format!("{}: {err}", run_dir.display())), } } if summary.runs_indexed > 0 && let Some(retain_indexed_runs) = config.retain_indexed_runs { summary.runs_deleted = db.enforce_run_retention(retain_indexed_runs)?; summary.retained_runs = db.list_ready_runs_by_seq()?.len() as u64; } Ok(summary) } fn collect_index_run_dirs(config: &ArtifactIndexerConfig) -> QueryDbResult> { if let Some(run_dir) = config.run_dir.as_ref() { return Ok(vec![run_dir.clone()]); } let root = config.run_root.as_ref().ok_or_else(|| { QueryDbError::InvalidArtifact("either run_root or run_dir is required".into()) })?; let runs_root = if root.join("runs").is_dir() { root.join("runs") } else { root.clone() }; let mut out = Vec::new(); for entry in fs::read_dir(&runs_root)? { let entry = entry?; let path = entry.path(); if !path.is_dir() { continue; } let Some(name) = path.file_name().and_then(|name| name.to_str()) else { continue; }; if name.starts_with("run_") && path.join("report.json").exists() { if let Some(min_run_seq) = config.min_run_seq { match run_index_from_path(&path) { Some(seq) if seq >= min_run_seq => {} _ => continue, } } if run_status_is_success_or_unknown(&path)? { out.push(path); } } } out.sort(); Ok(out) } fn run_status_is_success_or_unknown(run_dir: &Path) -> QueryDbResult { let summary_path = run_dir.join("run-summary.json"); if !summary_path.exists() { return Ok(true); } let summary = read_json_file(&summary_path)?; Ok(match json_str(&summary, &["status"]) { Some(status) => status == "success", None => true, }) } #[derive(Default)] struct SingleRunIndexSummary { indexed: bool, repos_indexed: u64, publication_points_indexed: u64, object_instances_indexed: u64, object_projections_indexed: u64, stats_indexed: u64, latest_ready_run: Option, } fn index_run_dir( db: &QueryDb, _repo_bytes: Option<&ExternalRepoBytesDb>, _projection_entry_limit: usize, run_dir: &Path, ) -> QueryDbResult { let report_path = run_dir.join("report.json"); if !report_path.exists() { return Err(QueryDbError::InvalidArtifact(format!( "missing report.json under {}", run_dir.display() ))); } let summary_file = read_json_file_optional(&run_dir.join("run-summary.json"))?; let meta = read_json_file_optional(&run_dir.join("run-meta.json"))?; let stage_timing = read_json_file_optional(&run_dir.join("stage-timing.json"))?; let run_id = run_id_for(run_dir, summary_file.as_ref(), meta.as_ref()); if ready_run_is_current(db, &run_id, run_dir)? { return Ok(SingleRunIndexSummary { latest_ready_run: db.latest_ready_run()?, ..SingleRunIndexSummary::default() }); } let report_summary = report_stream::summarize_report(&report_path, &run_id)?; let artifact_manifest = build_artifact_manifest(run_dir, report_summary.query_audit.as_ref())?; let mut run_record = build_run_record( run_dir, &run_id, &report_summary, summary_file.as_ref(), meta.as_ref(), stage_timing.as_ref(), &artifact_manifest, ); run_record.index_status = "building".to_string(); run_record.index_error = None; db.put_json_cf(CF_RUNS, run_key(&run_id).as_bytes(), &run_record)?; let indexed = match write_summary_index_records(db, &run_id, &report_summary, &artifact_manifest) { Ok(indexed) => indexed, Err(err) => { run_record.index_status = "failed".to_string(); run_record.index_error = Some(err.to_string()); db.put_json_cf(CF_RUNS, run_key(&run_id).as_bytes(), &run_record)?; return Err(err); } }; run_record.index_status = "ready".to_string(); let should_update_latest = should_update_latest_ready_run(db, &run_record)?; let mut final_batch = WriteBatch::default(); put_json_batch( &mut final_batch, db, CF_RUNS, run_key(&run_id).as_bytes(), &run_record, )?; if let Some(seq) = run_record.run_seq { put_json_batch( &mut final_batch, db, CF_RUNS_BY_SEQ, seq_key(seq).as_bytes(), &run_id, )?; } if should_update_latest { put_json_batch(&mut final_batch, db, CF_META, KEY_LATEST_READY_RUN, &run_id)?; } db.write_batch(final_batch)?; Ok(SingleRunIndexSummary { indexed: true, repos_indexed: indexed.repos_indexed, publication_points_indexed: indexed.publication_points_indexed, object_instances_indexed: indexed.object_instances_indexed, object_projections_indexed: indexed.object_projections_indexed, stats_indexed: indexed.stats_indexed, latest_ready_run: if should_update_latest { Some(run_id) } else { db.latest_ready_run()? }, }) } fn ready_run_is_current(db: &QueryDb, run_id: &str, run_dir: &Path) -> QueryDbResult { let Some(existing) = db.get_run(run_id)? else { return Ok(false); }; Ok(existing.schema_version == QUERY_DB_SCHEMA_VERSION && existing.index_status == "ready" && existing.run_dir == run_dir.display().to_string()) } fn should_update_latest_ready_run(db: &QueryDb, candidate: &RunRecord) -> QueryDbResult { let Some(current_run_id) = db.latest_ready_run()? else { return Ok(true); }; let Some(current) = db.get_run(¤t_run_id)? else { return Ok(true); }; Ok(match (candidate.run_seq, current.run_seq) { (Some(candidate_seq), Some(current_seq)) => candidate_seq >= current_seq, (Some(_), None) => true, (None, Some(_)) => false, (None, None) => candidate.run_id >= current.run_id, }) } #[derive(Default)] struct IndexWriteSummary { repos_indexed: u64, publication_points_indexed: u64, object_instances_indexed: u64, object_projections_indexed: u64, stats_indexed: u64, } fn write_summary_index_records( db: &QueryDb, run_id: &str, report_summary: &ReportSummary, artifact_manifest: &crate::query::artifact_manifest::ArtifactManifestSummary, ) -> QueryDbResult { const BATCH_LIMIT: usize = 5000; let mut batch = WriteBatch::default(); let mut pending = 0usize; let mut summary = IndexWriteSummary::default(); macro_rules! flush_if_needed { () => { if pending >= BATCH_LIMIT { let to_write = std::mem::take(&mut batch); db.write_batch(to_write)?; pending = 0; } }; } for pp_record in &report_summary.publication_points { put_json_batch( &mut batch, db, CF_PUBLICATION_POINTS, pp_key(run_id, &pp_record.pp_id).as_bytes(), pp_record, )?; pending += 1; summary.publication_points_indexed += 1; flush_if_needed!(); } for repo_record in &report_summary.repos { put_json_batch( &mut batch, db, CF_REPOS, repo_key(run_id, &repo_record.repo_id).as_bytes(), repo_record, )?; pending += 1; summary.repos_indexed += 1; flush_if_needed!(); } let artifacts_value = serde_json::to_value(artifact_manifest)?; let stats = report_stream::stats_records_from_summary(run_id, report_summary, artifacts_value); for record in &stats { put_json_batch( &mut batch, db, CF_STATS, stats_key(&record.run_id, &record.scope, &record.name).as_bytes(), record, )?; pending += 1; flush_if_needed!(); } if pending > 0 { db.write_batch(batch)?; } summary.stats_indexed = stats.len() as u64; Ok(summary) } fn object_uri_index_from_object(object: &ObjectInstanceRecord) -> ObjectUriIndexRecord { ObjectUriIndexRecord { run_id: object.run_id.clone(), uri: object.uri.clone(), sha256: object.sha256.clone(), object_instance_id: object.object_instance_id.clone(), repo_id: object.repo_id.clone(), pp_id: object.pp_id.clone(), } } fn put_json_batch( batch: &mut WriteBatch, db: &QueryDb, cf_name: &'static str, key: &[u8], value: &T, ) -> QueryDbResult<()> { let cf = db.cf(cf_name)?; let bytes = serde_json::to_vec(value)?; batch.put_cf(cf, key, bytes); Ok(()) } fn build_run_record( run_dir: &Path, run_id: &str, report_summary: &ReportSummary, summary: Option<&Value>, meta: Option<&Value>, stage_timing: Option<&Value>, artifact_manifest: &crate::query::artifact_manifest::ArtifactManifestSummary, ) -> RunRecord { let artifact_paths = artifact_manifest.artifact_paths(); let counts = RunCounts { publication_points: summary .and_then(|v| json_u64(v, &["reportCounts", "publicationPoints"])) .unwrap_or(report_summary.publication_points.len() as u64), objects: report_summary.objects_count, fresh_objects: 0, cached_objects: 0, rejected_objects: report_summary .publication_points .iter() .map(|pp| pp.rejected_objects) .sum(), fresh_rejected_objects: 0, cached_rejected_objects: 0, trust_anchors: 0, vrps: summary .and_then(|v| json_u64(v, &["reportCounts", "vrps"])) .unwrap_or(report_summary.vrps_count), aspas: summary .and_then(|v| json_u64(v, &["reportCounts", "aspas"])) .unwrap_or(report_summary.aspas_count), warnings: summary .and_then(|v| json_u64(v, &["reportCounts", "warnings"])) .unwrap_or(report_summary.warnings_count), }; RunRecord { schema_version: QUERY_DB_SCHEMA_VERSION, run_id: run_id.to_string(), run_seq: summary .and_then(|v| json_u64(v, &["runSeq"])) .or_else(|| meta.and_then(|v| json_u64(v, &["run_index"]))) .or_else(|| run_index_from_path(run_dir)), run_dir: run_dir.display().to_string(), validation_time: report_summary.validation_time.clone(), sync_mode: meta .and_then(|v| json_str(v, &["sync_mode"])) .map(str::to_string), started_at: summary .and_then(|v| json_str(v, &["startedAtRfc3339Utc"])) .or_else(|| meta.and_then(|v| json_str(v, &["started_at_rfc3339_utc"]))) .map(str::to_string), finished_at: summary .and_then(|v| json_str(v, &["finishedAtRfc3339Utc"])) .or_else(|| meta.and_then(|v| json_str(v, &["completed_at_rfc3339_utc"]))) .map(str::to_string), wall_ms: summary .and_then(|v| json_u64(v, &["wallMs"])) .or_else(|| stage_timing.and_then(|v| json_u64(v, &["total_ms"]))), artifact_paths, counts, index_status: "building".to_string(), index_error: None, } } fn read_json_file(path: &Path) -> QueryDbResult { let bytes = fs::read(path)?; serde_json::from_slice(&bytes).map_err(QueryDbError::from) } fn read_json_file_optional(path: &Path) -> QueryDbResult> { if path.exists() { read_json_file(path).map(Some) } else { Ok(None) } } fn run_id_for(run_dir: &Path, summary: Option<&Value>, meta: Option<&Value>) -> String { summary .and_then(|v| json_str(v, &["runId"])) .or_else(|| meta.and_then(|v| json_str(v, &["run_id"]))) .map(str::to_string) .unwrap_or_else(|| { run_dir .file_name() .and_then(|v| v.to_str()) .unwrap_or("run_unknown") .to_string() }) } fn json_str<'a>(value: &'a Value, path: &[&str]) -> Option<&'a str> { let mut current = value; for key in path { current = current.get(*key)?; } current.as_str() } fn json_u64(value: &Value, path: &[&str]) -> Option { let mut current = value; for key in path { current = current.get(*key)?; } current.as_u64() } fn run_index_from_path(path: &Path) -> Option { path.file_name() .and_then(|name| name.to_str()) .and_then(|name| name.strip_prefix("run_")) .and_then(|value| value.parse::().ok()) } fn stable_id(value: &str) -> String { let digest = Sha256::digest(value.as_bytes()); hex::encode(&digest[..12]) } fn run_key(run_id: &str) -> String { format!("run/{run_id}") } fn seq_key(seq: u64) -> String { format!("seq/{seq:020}") } fn repo_key(run_id: &str, repo_id: &str) -> String { format!("repo/{run_id}/{repo_id}") } fn pp_key(run_id: &str, pp_id: &str) -> String { format!("pp/{run_id}/{pp_id}") } fn object_instance_key( run_id: &str, uri_hash: &str, sha256: &str, object_instance_id: &str, ) -> String { format!("objinst/{run_id}/{uri_hash}/{sha256}/{object_instance_id}") } fn object_uri_key(run_id: &str, uri: &str) -> String { format!("objuri/{run_id}/{}", stable_id(uri)) } fn stats_key(run_id: &str, scope: &str, name: &str) -> String { format!("stats/{run_id}/{scope}/{name}") } fn export_job_key(run_id: &str, job_id: &str) -> String { format!("export/{run_id}/{job_id}") } fn validation_explain_key(run_id: &str, object_instance_id: &str, explain_version: u32) -> String { format!("explain/{run_id}/{object_instance_id}/{explain_version}") } fn object_hash_key(sha256: &str) -> String { format!("objhash/{sha256}") } fn prefix_range_end(prefix: &[u8]) -> Option> { let mut end = prefix.to_vec(); for index in (0..end.len()).rev() { if end[index] != u8::MAX { end[index] += 1; end.truncate(index + 1); return Some(end); } } None } #[cfg(test)] mod tests { use serde_json::json; use sha2::{Digest, Sha256}; use super::*; #[test] fn latest_ready_run_updates_after_index_success() { let temp = tempfile::tempdir().expect("tempdir"); let run_dir = temp.path().join("runs/run_0001"); fs::create_dir_all(&run_dir).expect("run dir"); write_sample_run(&run_dir, "run_0001", 1); let query_db_path = temp.path().join("query-db"); let summary = index_artifacts(&ArtifactIndexerConfig { query_db_path: query_db_path.clone(), run_root: Some(temp.path().to_path_buf()), run_dir: None, repo_bytes_db_path: None, projection_entry_limit: 50, min_run_seq: None, retain_indexed_runs: None, }) .expect("index"); assert_eq!(summary.runs_indexed, 1); assert_eq!(summary.latest_ready_run.as_deref(), Some("run_0001")); let db = QueryDb::open(&query_db_path).expect("open query db"); assert_eq!(db.latest_ready_run().unwrap().as_deref(), Some("run_0001")); let run = db.get_run("run_0001").unwrap().expect("run"); assert_eq!(run.index_status, "ready"); assert_eq!(run.counts.publication_points, 1); assert_eq!(run.counts.objects, 2); assert_eq!(db.count_cf(CF_REPOS).unwrap(), 1); assert_eq!(db.count_cf(CF_PUBLICATION_POINTS).unwrap(), 1); assert_eq!(db.count_cf(CF_OBJECT_INSTANCES).unwrap(), 0); } #[test] fn failed_run_does_not_replace_previous_latest() { let temp = tempfile::tempdir().expect("tempdir"); let run1 = temp.path().join("runs/run_0001"); let run2 = temp.path().join("runs/run_0002"); fs::create_dir_all(&run1).expect("run1"); fs::create_dir_all(&run2).expect("run2"); write_sample_run(&run1, "run_0001", 1); write_sample_run(&run2, "run_0002", 2); fs::write( run2.join("run-summary.json"), r#"{"status":"failed","runId":"run_0002","runSeq":2}"#, ) .expect("failed summary"); let query_db_path = temp.path().join("query-db"); let summary = index_artifacts(&ArtifactIndexerConfig { query_db_path: query_db_path.clone(), run_root: Some(temp.path().to_path_buf()), run_dir: None, repo_bytes_db_path: None, projection_entry_limit: 50, min_run_seq: None, retain_indexed_runs: None, }) .expect("index"); assert_eq!(summary.runs_indexed, 1); let db = QueryDb::open(&query_db_path).expect("open query db"); assert_eq!(db.latest_ready_run().unwrap().as_deref(), Some("run_0001")); assert!(db.get_run("run_0002").unwrap().is_none()); } #[test] fn min_run_seq_filters_old_history() { let temp = tempfile::tempdir().expect("tempdir"); let run1 = temp.path().join("runs/run_0001"); let run2 = temp.path().join("runs/run_0002"); fs::create_dir_all(&run1).expect("run1"); fs::create_dir_all(&run2).expect("run2"); write_sample_run(&run1, "run_0001", 1); write_sample_run(&run2, "run_0002", 2); let query_db_path = temp.path().join("query-db"); let summary = index_artifacts(&ArtifactIndexerConfig { query_db_path: query_db_path.clone(), run_root: Some(temp.path().to_path_buf()), run_dir: None, repo_bytes_db_path: None, projection_entry_limit: 50, min_run_seq: Some(2), retain_indexed_runs: None, }) .expect("index"); assert_eq!(summary.runs_indexed, 1); assert_eq!(summary.latest_ready_run.as_deref(), Some("run_0002")); let db = QueryDb::open(&query_db_path).expect("query db"); assert!(db.get_run("run_0001").unwrap().is_none()); assert!(db.get_run("run_0002").unwrap().is_some()); } #[test] fn retention_deletes_old_run_scoped_indexes() { let temp = tempfile::tempdir().expect("tempdir"); for seq in 1..=3 { let run_dir = temp.path().join(format!("runs/run_{seq:04}")); fs::create_dir_all(&run_dir).expect("run dir"); write_sample_run(&run_dir, &format!("run_{seq:04}"), seq); } let query_db_path = temp.path().join("query-db"); let summary = index_artifacts(&ArtifactIndexerConfig { query_db_path: query_db_path.clone(), run_root: Some(temp.path().to_path_buf()), run_dir: None, repo_bytes_db_path: None, projection_entry_limit: 50, min_run_seq: None, retain_indexed_runs: Some(2), }) .expect("index"); assert_eq!(summary.runs_indexed, 3); assert_eq!(summary.runs_deleted, 1); assert_eq!(summary.retained_runs, 2); assert_eq!(summary.latest_ready_run.as_deref(), Some("run_0003")); let db = QueryDb::open(&query_db_path).expect("query db"); assert!(db.get_run("run_0001").unwrap().is_none()); assert!(db.get_run("run_0002").unwrap().is_some()); assert!(db.get_run("run_0003").unwrap().is_some()); assert_eq!(db.latest_ready_run().unwrap().as_deref(), Some("run_0003")); assert_eq!(db.list_repos("run_0001", 10, None).unwrap().data.len(), 0); assert_eq!( db.list_publication_points("run_0001", 10, None) .unwrap() .data .len(), 0 ); assert_eq!(db.list_objects("run_0001", 10, None).unwrap().data.len(), 0); assert!( db.get_stat("run_0001", "overview", "counts") .unwrap() .is_none() ); assert_eq!(db.list_ready_runs_by_seq().unwrap().len(), 2); } #[test] fn repo_bytes_projection_is_not_written_during_summary_only_index() { let temp = tempfile::tempdir().expect("tempdir"); let run_dir = temp.path().join("runs/run_0001"); fs::create_dir_all(&run_dir).expect("run dir"); let roa_bytes = fs::read("tests/fixtures/repository/rpki.cernet.net/repo/cernet/0/AS4538.roa") .expect("fixture roa"); let roa_sha = hex::encode(Sha256::digest(&roa_bytes)); write_sample_run_with_object_hash(&run_dir, "run_0001", 1, &roa_sha); let repo_bytes_path = temp.path().join("repo-bytes.db"); let repo_bytes = ExternalRepoBytesDb::open(&repo_bytes_path).expect("repo bytes"); repo_bytes .put_blob_bytes_batch(&[(roa_sha.clone(), roa_bytes)]) .expect("put repo bytes"); drop(repo_bytes); let query_db_path = temp.path().join("query-db"); let summary = index_artifacts(&ArtifactIndexerConfig { query_db_path: query_db_path.clone(), run_root: Some(temp.path().to_path_buf()), run_dir: None, repo_bytes_db_path: Some(repo_bytes_path), projection_entry_limit: 5, min_run_seq: None, retain_indexed_runs: None, }) .expect("index"); assert_eq!(summary.object_projections_indexed, 0); let db = QueryDb::open(&query_db_path).expect("open query db"); assert_eq!(db.count_cf(CF_OBJECTS_BY_HASH).unwrap(), 0); } #[test] fn validation_events_sidecar_is_indexed() { let temp = tempfile::tempdir().expect("tempdir"); let run_dir = temp.path().join("runs/run_0001"); fs::create_dir_all(&run_dir).expect("run dir"); write_sample_run(&run_dir, "run_0001", 1); let events = [ json!({"schemaVersion":1,"seq":1,"eventType":"run_summary","validationTime":"2026-06-15T00:00:00Z","counts":{"objects":2,"warnings":0,"vrps":1,"aspas":0}}), json!({"schemaVersion":1,"seq":2,"eventType":"object","validationTime":"2026-06-15T00:00:00Z","objectUri":"rsync://repo.example/rpki/a.roa","sha256":"22","objectType":"roa","result":"error","reason":"bad roa"}), ]; let mut events_bytes = Vec::new(); for event in events { events_bytes.extend_from_slice(&serde_json::to_vec(&event).unwrap()); events_bytes.push(b'\n'); } let events_sha256 = hex::encode(Sha256::digest(&events_bytes)); fs::write(run_dir.join("validation-events.jsonl"), &events_bytes).expect("events"); let mut report = read_json_file(&run_dir.join("report.json")).expect("read report"); report["queryAudit"] = json!({ "schemaVersion": 1, "status": "complete", "eventsPath": "validation-events.jsonl", "eventsCount": 2, "eventsSha256": events_sha256, "writerVersion": 1 }); fs::write( run_dir.join("report.json"), serde_json::to_vec(&report).unwrap(), ) .expect("report"); let query_db_path = temp.path().join("query-db"); let summary = index_artifacts(&ArtifactIndexerConfig { query_db_path: query_db_path.clone(), run_root: Some(temp.path().to_path_buf()), run_dir: None, repo_bytes_db_path: None, projection_entry_limit: 50, min_run_seq: None, retain_indexed_runs: None, }) .expect("index"); assert_eq!(summary.stats_indexed, 11); let db = QueryDb::open(&query_db_path).expect("open query db"); let run = db.get_run("run_0001").unwrap().expect("run"); assert!(run.artifact_paths.contains_key("validationEvents")); let manifest = db .get_stat("run_0001", "validation_events", "manifest") .unwrap() .expect("event manifest"); assert_eq!(manifest.value["eventsCount"].as_u64(), Some(2)); let by_type = db .get_stat("run_0001", "validation_events", "by_type") .unwrap() .expect("event type stats"); assert_eq!(by_type.value.as_object().map(|items| items.len()), Some(0)); let reasons = db .get_stat("run_0001", "validation_events", "reasons") .unwrap() .expect("event reasons"); assert_eq!(reasons.value.as_object().map(|items| items.len()), Some(0)); } #[test] fn query_db_lists_records_indexes_and_cached_results() { let temp = tempfile::tempdir().expect("tempdir"); let run1 = temp.path().join("runs/run_0001"); let run2 = temp.path().join("runs/run_0002"); fs::create_dir_all(&run1).expect("run1"); fs::create_dir_all(&run2).expect("run2"); write_sample_run(&run1, "run_0001", 1); write_sample_run(&run2, "run_0002", 2); let query_db_path = temp.path().join("query-db"); let summary = index_artifacts(&ArtifactIndexerConfig { query_db_path: query_db_path.clone(), run_root: Some(temp.path().to_path_buf()), run_dir: None, repo_bytes_db_path: None, projection_entry_limit: 50, min_run_seq: None, retain_indexed_runs: None, }) .expect("index"); assert_eq!(summary.runs_indexed, 2); assert_eq!(summary.latest_ready_run.as_deref(), Some("run_0002")); let db = QueryDb::open(&query_db_path).expect("open query db"); assert_eq!( db.resolve_run_id("latest").unwrap().as_deref(), Some("run_0002") ); assert_eq!( db.resolve_run_id("run_0001").unwrap().as_deref(), Some("run_0001") ); let first_page = db.list_runs(1, None).expect("runs"); assert_eq!(first_page.data.len(), 1); assert_eq!(first_page.data[0].run_id, "run_0001"); let second_page = db .list_runs(1, first_page.next_cursor.as_deref()) .expect("second runs"); assert_eq!(second_page.data.len(), 1); assert_eq!(second_page.data[0].run_id, "run_0002"); let repos = db.list_repos("run_0002", 10, None).expect("repos"); assert_eq!(repos.data.len(), 1); let repo = repos.data[0].clone(); assert_eq!(repo.host, "repo.example"); assert_eq!(repo.transport, "rrdp"); assert_eq!(repo.publication_points, 1); assert_eq!( db.get_repo("run_0002", &repo.repo_id) .unwrap() .expect("repo") .uri, repo.uri ); let pps = db .list_publication_points_for_repo("run_0002", &repo.repo_id, 10, None) .expect("pps"); assert_eq!(pps.data.len(), 1); let pp = pps.data[0].clone(); assert_eq!(pp.repo_sync_phase.as_deref(), Some("rrdp_delta")); assert_eq!( db.get_publication_point("run_0002", &pp.pp_id) .unwrap() .expect("pp") .objects, 2 ); let objects = db.list_objects("run_0002", 10, None).expect("objects"); assert_eq!(objects.data.len(), 2); assert_eq!( db.list_objects_for_repo("run_0002", &repo.repo_id, 10, None) .expect("repo objects") .data .len(), 2 ); assert_eq!( db.list_objects_for_pp("run_0002", &pp.pp_id, 10, None) .expect("pp objects") .data .len(), 2 ); let roa = objects .data .iter() .find(|object| object.object_type == "roa") .expect("roa") .clone(); assert!(roa.rejected); assert_eq!( db.get_object_by_instance_id("run_0002", &roa.object_instance_id) .unwrap() .expect("object") .uri, roa.uri ); assert_eq!( db.get_object_by_uri("run_0002", &roa.uri) .unwrap() .expect("uri index") .object_instance_id, roa.object_instance_id ); let overview = db .get_stat("run_0002", "overview", "counts") .unwrap() .expect("overview"); assert_eq!(overview.value["objects"].as_u64(), Some(2)); let object_types = db .get_stat("run_0002", "objects", "by_type") .unwrap() .expect("types"); assert_eq!(object_types.value["manifest"].as_u64(), Some(1)); assert_eq!(object_types.value["roa"].as_u64(), Some(1)); let by_result = db .get_stat("run_0002", "validation", "by_result") .unwrap() .expect("result"); assert_eq!(by_result.value["ok"].as_u64(), Some(1)); assert_eq!(by_result.value["error"].as_u64(), Some(1)); let job = ExportJobRecord { schema_version: QUERY_DB_SCHEMA_VERSION, job_id: "job-1".to_string(), run_id: "run_0002".to_string(), scope: "object_set".to_string(), repo_id: None, pp_id: None, status: "complete".to_string(), created_at: "2026-06-15T00:00:00Z".to_string(), finished_at: Some("2026-06-15T00:00:01Z".to_string()), output_path: Some("/tmp/export.tar".to_string()), object_count: 2, bytes_written: 512, error: None, }; db.put_export_job(&job).expect("put job"); assert_eq!( db.get_export_job("run_0002", "job-1") .unwrap() .expect("job") .bytes_written, 512 ); let explain = ValidationExplainRecord { schema_version: QUERY_DB_SCHEMA_VERSION, explain_version: 1, run_id: "run_0002".to_string(), object_instance_id: roa.object_instance_id.clone(), uri: roa.uri.clone(), sha256: roa.sha256.clone(), object_type: roa.object_type.clone(), final_status: "invalid".to_string(), audit_result: "error".to_string(), detail_summary: Some("bad roa".to_string()), authoritative: false, explain_mode: "test".to_string(), generated_at: "2026-06-15T00:00:02Z".to_string(), parsevalidate: json!({"status":"invalid"}), chainvalidate: json!({"status":"invalid"}), chain_edges: vec![ChainEdgeRecord { relation: "test".to_string(), from_uri: roa.uri.clone(), to_uri: "rsync://repo.example/rpki/m.mft".to_string(), to_object_instance_id: None, to_sha256: None, status: "missing".to_string(), evidence: json!({}), }], }; db.put_validation_explain(&explain).expect("put explain"); assert_eq!( db.get_validation_explain("run_0002", &roa.object_instance_id, 1) .unwrap() .expect("explain") .final_status, "invalid" ); } #[test] fn repeated_indexing_does_not_move_latest_backwards() { let temp = tempfile::tempdir().expect("tempdir"); let run1 = temp.path().join("runs/run_0001"); let run2 = temp.path().join("runs/run_0002"); fs::create_dir_all(&run1).expect("run1"); fs::create_dir_all(&run2).expect("run2"); write_sample_run(&run1, "run_0001", 1); write_sample_run(&run2, "run_0002", 2); let query_db_path = temp.path().join("query-db"); let db = QueryDb::open(&query_db_path).expect("open query db"); let config = ArtifactIndexerConfig { query_db_path, run_root: Some(temp.path().to_path_buf()), run_dir: None, repo_bytes_db_path: None, projection_entry_limit: 50, min_run_seq: None, retain_indexed_runs: None, }; let first = index_artifacts_with_open_db(&db, None, &config).expect("first index"); assert_eq!(first.latest_ready_run.as_deref(), Some("run_0002")); assert_eq!(first.runs_indexed, 2); assert_eq!(db.latest_ready_run().unwrap().as_deref(), Some("run_0002")); let second = index_artifacts_with_open_db(&db, None, &config).expect("second index"); assert_eq!(second.runs_indexed, 0); assert_eq!(second.latest_ready_run.as_deref(), Some("run_0002")); assert_eq!(db.latest_ready_run().unwrap().as_deref(), Some("run_0002")); assert_eq!( db.get_run("run_0001") .unwrap() .expect("run_0001") .index_status, "ready" ); } #[test] fn validation_events_mismatch_is_reported_without_switching_latest() { let temp = tempfile::tempdir().expect("tempdir"); let run_dir = temp.path().join("runs/run_0001"); fs::create_dir_all(&run_dir).expect("run dir"); write_sample_run(&run_dir, "run_0001", 1); let events_bytes = br#"{"schemaVersion":1,"eventType":"run_summary"}"#; fs::write(run_dir.join("validation-events.jsonl"), events_bytes).expect("events"); let mut report = read_json_file(&run_dir.join("report.json")).expect("read report"); report["queryAudit"] = json!({ "schemaVersion": 1, "status": "complete", "eventsPath": "validation-events.jsonl", "eventsCount": 2, "eventsSha256": hex::encode(Sha256::digest(events_bytes)), "writerVersion": 1 }); fs::write( run_dir.join("report.json"), serde_json::to_vec(&report).unwrap(), ) .expect("report"); let query_db_path = temp.path().join("query-db"); let summary = index_artifacts(&ArtifactIndexerConfig { query_db_path: query_db_path.clone(), run_root: Some(temp.path().to_path_buf()), run_dir: None, repo_bytes_db_path: None, projection_entry_limit: 50, min_run_seq: None, retain_indexed_runs: None, }) .expect("index summary"); assert_eq!(summary.runs_indexed, 1); assert_eq!(summary.errors.len(), 0); let db = QueryDb::open(&query_db_path).expect("db"); assert_eq!(db.latest_ready_run().unwrap().as_deref(), Some("run_0001")); } fn write_sample_run_with_object_hash( run_dir: &Path, run_id: &str, run_seq: u64, roa_sha: &str, ) { write_sample_run(run_dir, run_id, run_seq); let mut report = read_json_file(&run_dir.join("report.json")).expect("read report"); report["publication_points"][0]["objects"][1]["sha256_hex"] = json!(roa_sha); fs::write( run_dir.join("report.json"), serde_json::to_vec(&report).unwrap(), ) .expect("report"); } fn write_sample_run(run_dir: &Path, run_id: &str, run_seq: u64) { let report = json!({ "format_version": 2, "meta": {"validation_time_rfc3339_utc": "2026-06-15T00:00:00Z"}, "tree": {"warnings": []}, "publication_points": [ { "node_id": 10, "rsync_base_uri": "rsync://repo.example/rpki/", "manifest_rsync_uri": "rsync://repo.example/rpki/m.mft", "publication_point_rsync_uri": "rsync://repo.example/rpki/", "rrdp_notification_uri": "https://repo.example/rrdp/notification.xml", "source": "rrdp", "repo_sync_source": "rrdp", "repo_sync_phase": "rrdp_delta", "repo_sync_duration_ms": 123, "repo_terminal_state": "fresh", "warnings": [], "objects": [ {"rsync_uri":"rsync://repo.example/rpki/m.mft","sha256_hex":"11","kind":"manifest","result":"ok"}, {"rsync_uri":"rsync://repo.example/rpki/a.roa","sha256_hex":"22","kind":"roa","result":"error","detail":"bad roa"} ] } ], "vrps": [{"asn": 64496, "prefix": "192.0.2.0/24", "max_length": 24}], "aspas": [], "downloads": [], "download_stats": {}, "repo_sync_stats": {} }); fs::write( run_dir.join("report.json"), serde_json::to_vec(&report).unwrap(), ) .expect("report"); let summary = json!({ "status": "success", "runId": run_id, "runSeq": run_seq, "startedAtRfc3339Utc": "2026-06-15T00:00:00Z", "finishedAtRfc3339Utc": "2026-06-15T00:01:00Z", "wallMs": 60000, "reportCounts": {"vrps": 1, "aspas": 0, "publicationPoints": 1, "warnings": 0} }); fs::write( run_dir.join("run-summary.json"), serde_json::to_vec(&summary).unwrap(), ) .expect("summary"); fs::write(run_dir.join("stage-timing.json"), b"{}").expect("stage"); } }