use std::collections::{BTreeMap, BTreeSet}; use super::model::{ChurnRecord, ChurnSummaryRecord, IntraRpChurn, SequenceSample, Side}; pub(super) fn build_intra_rp_churn( left: &[SequenceSample], right: &[SequenceSample], ) -> IntraRpChurn { let left_records = build_side_records(Side::Left, left); let right_records = build_side_records(Side::Right, right); let mut summary = summarize_records(&left_records); summary.extend(summarize_records(&right_records)); IntraRpChurn { left: left_records, right: right_records, summary, } } fn build_side_records(side: Side, samples: &[SequenceSample]) -> Vec { let mut records = Vec::new(); for pair in samples.windows(2) { let from = &pair[0]; let to = &pair[1]; records.push(record_churn( side, from, to, "object", &from.object_hashes, &to.object_hashes, )); let from_output = output_keys(from); let to_output = output_keys(to); records.push(record_churn( side, from, to, "output", &from_output, &to_output, )); records.push(record_churn( side, from, to, "vrp_output", &from.vrps, &to.vrps, )); records.push(record_churn( side, from, to, "vap_output", &from.vaps, &to.vaps, )); records.push(record_churn( side, from, to, "reject", &from.rejects, &to.rejects, )); } records } fn output_keys(sample: &SequenceSample) -> BTreeSet { sample .vrps .iter() .map(|key| format!("vrp|{key}")) .chain(sample.vaps.iter().map(|key| format!("vap|{key}"))) .collect() } fn record_churn( side: Side, from: &SequenceSample, to: &SequenceSample, set_type: &'static str, from_set: &BTreeSet, to_set: &BTreeSet, ) -> ChurnRecord { let added_count = to_set.difference(from_set).count(); let removed_count = from_set.difference(to_set).count(); let changed_abs = added_count + removed_count; let union_count = from_set.union(to_set).count(); let changed_ratio_from = ratio(changed_abs, from_set.len()); let changed_ratio_union = ratio(changed_abs, union_count); ChurnRecord { side, rp_id: from.raw.rp_id.clone(), from_seq: from.raw.seq, to_seq: to.raw.seq, from_run_id: from.raw.run_id.clone(), to_run_id: to.raw.run_id.clone(), set_type, from_count: from_set.len(), to_count: to_set.len(), added_count, removed_count, changed_abs, union_count, changed_ratio_from, changed_ratio_union, } } fn ratio(numerator: usize, denominator: usize) -> f64 { if denominator == 0 { 0.0 } else { numerator as f64 / denominator as f64 } } fn summarize_records(records: &[ChurnRecord]) -> Vec { let mut grouped: BTreeMap<(Side, &'static str), Vec<&ChurnRecord>> = BTreeMap::new(); for record in records { grouped .entry((record.side, record.set_type)) .or_default() .push(record); } grouped .into_iter() .map(|((side, set_type), group)| { let count = group.len() as f64; let max_changed_abs = group .iter() .map(|record| record.changed_abs) .max() .unwrap_or(0); let avg_changed_abs = if count == 0.0 { 0.0 } else { group .iter() .map(|record| record.changed_abs as f64) .sum::() / count }; let max_changed_ratio_from = group .iter() .map(|record| record.changed_ratio_from) .fold(0.0, f64::max); let avg_changed_ratio_from = if count == 0.0 { 0.0 } else { group .iter() .map(|record| record.changed_ratio_from) .sum::() / count }; let max_changed_ratio_union = group .iter() .map(|record| record.changed_ratio_union) .fold(0.0, f64::max); let avg_changed_ratio_union = if count == 0.0 { 0.0 } else { group .iter() .map(|record| record.changed_ratio_union) .sum::() / count }; ChurnSummaryRecord { side, set_type, max_changed_abs, avg_changed_abs, max_changed_ratio_from, avg_changed_ratio_from, max_changed_ratio_union, avg_changed_ratio_union, } }) .collect() }