186 lines
5.2 KiB
Rust

use std::collections::{BTreeMap, BTreeSet};
use super::model::{ChurnRecord, ChurnSummaryRecord, IntraRpChurn, SequenceSample, Side};
pub(super) fn build_intra_rp_churn(
left: &[SequenceSample],
right: &[SequenceSample],
) -> IntraRpChurn {
let left_records = build_side_records(Side::Left, left);
let right_records = build_side_records(Side::Right, right);
let mut summary = summarize_records(&left_records);
summary.extend(summarize_records(&right_records));
IntraRpChurn {
left: left_records,
right: right_records,
summary,
}
}
fn build_side_records(side: Side, samples: &[SequenceSample]) -> Vec<ChurnRecord> {
let mut records = Vec::new();
for pair in samples.windows(2) {
let from = &pair[0];
let to = &pair[1];
records.push(record_churn(
side,
from,
to,
"object",
&from.object_hashes,
&to.object_hashes,
));
let from_output = output_keys(from);
let to_output = output_keys(to);
records.push(record_churn(
side,
from,
to,
"output",
&from_output,
&to_output,
));
records.push(record_churn(
side,
from,
to,
"vrp_output",
&from.vrps,
&to.vrps,
));
records.push(record_churn(
side,
from,
to,
"vap_output",
&from.vaps,
&to.vaps,
));
records.push(record_churn(
side,
from,
to,
"reject",
&from.rejects,
&to.rejects,
));
}
records
}
fn output_keys(sample: &SequenceSample) -> BTreeSet<String> {
sample
.vrps
.iter()
.map(|key| format!("vrp|{key}"))
.chain(sample.vaps.iter().map(|key| format!("vap|{key}")))
.collect()
}
fn record_churn(
side: Side,
from: &SequenceSample,
to: &SequenceSample,
set_type: &'static str,
from_set: &BTreeSet<String>,
to_set: &BTreeSet<String>,
) -> ChurnRecord {
let added_count = to_set.difference(from_set).count();
let removed_count = from_set.difference(to_set).count();
let changed_abs = added_count + removed_count;
let union_count = from_set.union(to_set).count();
let changed_ratio_from = ratio(changed_abs, from_set.len());
let changed_ratio_union = ratio(changed_abs, union_count);
ChurnRecord {
side,
rp_id: from.raw.rp_id.clone(),
from_seq: from.raw.seq,
to_seq: to.raw.seq,
from_run_id: from.raw.run_id.clone(),
to_run_id: to.raw.run_id.clone(),
set_type,
from_count: from_set.len(),
to_count: to_set.len(),
added_count,
removed_count,
changed_abs,
union_count,
changed_ratio_from,
changed_ratio_union,
}
}
fn ratio(numerator: usize, denominator: usize) -> f64 {
if denominator == 0 {
0.0
} else {
numerator as f64 / denominator as f64
}
}
fn summarize_records(records: &[ChurnRecord]) -> Vec<ChurnSummaryRecord> {
let mut grouped: BTreeMap<(Side, &'static str), Vec<&ChurnRecord>> = BTreeMap::new();
for record in records {
grouped
.entry((record.side, record.set_type))
.or_default()
.push(record);
}
grouped
.into_iter()
.map(|((side, set_type), group)| {
let count = group.len() as f64;
let max_changed_abs = group
.iter()
.map(|record| record.changed_abs)
.max()
.unwrap_or(0);
let avg_changed_abs = if count == 0.0 {
0.0
} else {
group
.iter()
.map(|record| record.changed_abs as f64)
.sum::<f64>()
/ count
};
let max_changed_ratio_from = group
.iter()
.map(|record| record.changed_ratio_from)
.fold(0.0, f64::max);
let avg_changed_ratio_from = if count == 0.0 {
0.0
} else {
group
.iter()
.map(|record| record.changed_ratio_from)
.sum::<f64>()
/ count
};
let max_changed_ratio_union = group
.iter()
.map(|record| record.changed_ratio_union)
.fold(0.0, f64::max);
let avg_changed_ratio_union = if count == 0.0 {
0.0
} else {
group
.iter()
.map(|record| record.changed_ratio_union)
.sum::<f64>()
/ count
};
ChurnSummaryRecord {
side,
set_type,
max_changed_abs,
avg_changed_abs,
max_changed_ratio_from,
avg_changed_ratio_from,
max_changed_ratio_union,
avg_changed_ratio_union,
}
})
.collect()
}