use std::collections::{BTreeMap, BTreeSet}; use std::path::Path; use crate::ccr::{decode_ccr_compare_views, decode_content_info}; use crate::cir::decode_cir; use super::io::{object_hash_key, parse_rfc3339, read_file, resolve_path}; use super::model::{SequenceItemRaw, SequenceMeta, SequenceSample, Side}; pub(super) fn load_sequence_meta(path: &Path, side: Side) -> Result, String> { let base_dir = path.parent().unwrap_or_else(|| Path::new(".")); let text = std::fs::read_to_string(path) .map_err(|e| format!("read sequence failed: {}: {e}", path.display()))?; let mut samples = Vec::new(); let mut seen_seq = BTreeSet::new(); for (line_index, line) in text.lines().enumerate() { let line = line.trim(); if line.is_empty() { continue; } let raw: SequenceItemRaw = serde_json::from_str(line).map_err(|e| { format!( "parse sequence JSONL failed: {}:{}: {e}", path.display(), line_index + 1 ) })?; validate_raw(path, line_index, &raw, side, &mut seen_seq)?; let cir_path = resolve_path(base_dir, &raw.cir_path); let ccr_path = resolve_path(base_dir, &raw.ccr_path); let cir = decode_cir(&read_file(&cir_path)?).map_err(|e| { format!( "decode CIR metadata failed for sample {} ({}): {e}", raw.run_id, cir_path.display() ) })?; let _sequence_validation_time = raw .validation_time .as_deref() .map(parse_rfc3339) .transpose()?; let validation_time = cir.validation_time; samples.push(SequenceMeta { cir_object_count: raw .cir_object_count .or(Some(cir.validated_object_count() as u64)), cir_reject_count: raw .cir_reject_count .or(Some(cir.rejected_object_count() as u64)), cir_trust_anchor_count: raw .cir_trust_anchor_count .or(Some(cir.trust_anchors.len() as u64)), raw, validation_time, ccr_path, cir_path, }); } samples.sort_by(|left, right| { left.validation_time .cmp(&right.validation_time) .then_with(|| left.raw.seq.cmp(&right.raw.seq)) .then_with(|| left.raw.run_id.cmp(&right.raw.run_id)) }); Ok(samples) } pub(super) fn load_sample_cir_from_meta(meta: &SequenceMeta) -> Result { load_sample_parts(meta, true, false) } pub(super) fn load_sample_ccr_from_meta(meta: &SequenceMeta) -> Result { load_sample_parts(meta, false, true) } fn load_sample_parts( meta: &SequenceMeta, include_cir: bool, include_ccr: bool, ) -> Result { let mut objects = BTreeMap::new(); let mut object_hashes = BTreeSet::new(); let mut rejects = BTreeSet::new(); if include_cir { let cir = decode_cir(&read_file(&meta.cir_path)?).map_err(|e| { format!( "decode CIR failed for sample {} ({}): {e}", meta.raw.run_id, meta.cir_path.display() ) })?; objects = cir .validated_objects() .map(|item| (item.rsync_uri.clone(), hex::encode(&item.sha256))) .collect::>(); object_hashes = objects .iter() .map(|(uri, hash)| object_hash_key(uri, hash)) .collect::>(); rejects = cir .rejected_objects() .map(|item| item.object_uri.clone()) .collect::>(); } let mut vrps = BTreeSet::new(); let mut vaps = BTreeSet::new(); if include_ccr { let ccr = decode_content_info(&read_file(&meta.ccr_path)?).map_err(|e| { format!( "decode CCR failed for sample {} ({}): {e}", meta.raw.run_id, meta.ccr_path.display() ) })?; let (decoded_vrps, decoded_vaps) = decode_ccr_compare_views(&ccr).map_err(|e| { format!( "decode CCR compare views failed for sample {} ({}): {e}", meta.raw.run_id, meta.ccr_path.display() ) })?; vrps = decoded_vrps .into_iter() .map(|row| format!("{}|{}|{}", row.asn, row.ip_prefix, row.max_length)) .collect::>(); vaps = decoded_vaps .into_iter() .map(|row| format!("{}|{}", row.customer_asn, row.providers)) .collect::>(); } Ok(SequenceSample { raw: meta.raw.clone(), validation_time: meta.validation_time, objects, object_hashes, rejects, vrps, vaps, }) } fn validate_raw( path: &Path, line_index: usize, raw: &SequenceItemRaw, side: Side, seen_seq: &mut BTreeSet, ) -> Result<(), String> { if raw.schema_version.unwrap_or(1) != 1 { return Err(format!( "unsupported sequence item schemaVersion in {}:{}", path.display(), line_index + 1 )); } if !seen_seq.insert(raw.seq) { return Err(format!("duplicate seq {} in {}", raw.seq, path.display())); } if let Some(status) = &raw.status && status != "success" { return Err(format!( "sequence sample {} has non-success status: {status}", raw.run_id )); } if raw .side .as_deref() .is_some_and(|item| item != side.as_str()) { return Err(format!( "sequence side field does not match expected side in {}:{}", path.display(), line_index + 1 )); } Ok(()) }