186 lines
5.2 KiB
Rust
186 lines
5.2 KiB
Rust
use std::collections::{BTreeMap, BTreeSet};
|
|
|
|
use super::model::{ChurnRecord, ChurnSummaryRecord, IntraRpChurn, SequenceSample, Side};
|
|
|
|
pub(super) fn build_intra_rp_churn(
|
|
left: &[SequenceSample],
|
|
right: &[SequenceSample],
|
|
) -> IntraRpChurn {
|
|
let left_records = build_side_records(Side::Left, left);
|
|
let right_records = build_side_records(Side::Right, right);
|
|
let mut summary = summarize_records(&left_records);
|
|
summary.extend(summarize_records(&right_records));
|
|
IntraRpChurn {
|
|
left: left_records,
|
|
right: right_records,
|
|
summary,
|
|
}
|
|
}
|
|
|
|
fn build_side_records(side: Side, samples: &[SequenceSample]) -> Vec<ChurnRecord> {
|
|
let mut records = Vec::new();
|
|
for pair in samples.windows(2) {
|
|
let from = &pair[0];
|
|
let to = &pair[1];
|
|
records.push(record_churn(
|
|
side,
|
|
from,
|
|
to,
|
|
"object",
|
|
&from.object_hashes,
|
|
&to.object_hashes,
|
|
));
|
|
let from_output = output_keys(from);
|
|
let to_output = output_keys(to);
|
|
records.push(record_churn(
|
|
side,
|
|
from,
|
|
to,
|
|
"output",
|
|
&from_output,
|
|
&to_output,
|
|
));
|
|
records.push(record_churn(
|
|
side,
|
|
from,
|
|
to,
|
|
"vrp_output",
|
|
&from.vrps,
|
|
&to.vrps,
|
|
));
|
|
records.push(record_churn(
|
|
side,
|
|
from,
|
|
to,
|
|
"vap_output",
|
|
&from.vaps,
|
|
&to.vaps,
|
|
));
|
|
records.push(record_churn(
|
|
side,
|
|
from,
|
|
to,
|
|
"reject",
|
|
&from.rejects,
|
|
&to.rejects,
|
|
));
|
|
}
|
|
records
|
|
}
|
|
|
|
fn output_keys(sample: &SequenceSample) -> BTreeSet<String> {
|
|
sample
|
|
.vrps
|
|
.iter()
|
|
.map(|key| format!("vrp|{key}"))
|
|
.chain(sample.vaps.iter().map(|key| format!("vap|{key}")))
|
|
.collect()
|
|
}
|
|
|
|
fn record_churn(
|
|
side: Side,
|
|
from: &SequenceSample,
|
|
to: &SequenceSample,
|
|
set_type: &'static str,
|
|
from_set: &BTreeSet<String>,
|
|
to_set: &BTreeSet<String>,
|
|
) -> ChurnRecord {
|
|
let added_count = to_set.difference(from_set).count();
|
|
let removed_count = from_set.difference(to_set).count();
|
|
let changed_abs = added_count + removed_count;
|
|
let union_count = from_set.union(to_set).count();
|
|
let changed_ratio_from = ratio(changed_abs, from_set.len());
|
|
let changed_ratio_union = ratio(changed_abs, union_count);
|
|
ChurnRecord {
|
|
side,
|
|
rp_id: from.raw.rp_id.clone(),
|
|
from_seq: from.raw.seq,
|
|
to_seq: to.raw.seq,
|
|
from_run_id: from.raw.run_id.clone(),
|
|
to_run_id: to.raw.run_id.clone(),
|
|
set_type,
|
|
from_count: from_set.len(),
|
|
to_count: to_set.len(),
|
|
added_count,
|
|
removed_count,
|
|
changed_abs,
|
|
union_count,
|
|
changed_ratio_from,
|
|
changed_ratio_union,
|
|
}
|
|
}
|
|
|
|
fn ratio(numerator: usize, denominator: usize) -> f64 {
|
|
if denominator == 0 {
|
|
0.0
|
|
} else {
|
|
numerator as f64 / denominator as f64
|
|
}
|
|
}
|
|
|
|
fn summarize_records(records: &[ChurnRecord]) -> Vec<ChurnSummaryRecord> {
|
|
let mut grouped: BTreeMap<(Side, &'static str), Vec<&ChurnRecord>> = BTreeMap::new();
|
|
for record in records {
|
|
grouped
|
|
.entry((record.side, record.set_type))
|
|
.or_default()
|
|
.push(record);
|
|
}
|
|
grouped
|
|
.into_iter()
|
|
.map(|((side, set_type), group)| {
|
|
let count = group.len() as f64;
|
|
let max_changed_abs = group
|
|
.iter()
|
|
.map(|record| record.changed_abs)
|
|
.max()
|
|
.unwrap_or(0);
|
|
let avg_changed_abs = if count == 0.0 {
|
|
0.0
|
|
} else {
|
|
group
|
|
.iter()
|
|
.map(|record| record.changed_abs as f64)
|
|
.sum::<f64>()
|
|
/ count
|
|
};
|
|
let max_changed_ratio_from = group
|
|
.iter()
|
|
.map(|record| record.changed_ratio_from)
|
|
.fold(0.0, f64::max);
|
|
let avg_changed_ratio_from = if count == 0.0 {
|
|
0.0
|
|
} else {
|
|
group
|
|
.iter()
|
|
.map(|record| record.changed_ratio_from)
|
|
.sum::<f64>()
|
|
/ count
|
|
};
|
|
let max_changed_ratio_union = group
|
|
.iter()
|
|
.map(|record| record.changed_ratio_union)
|
|
.fold(0.0, f64::max);
|
|
let avg_changed_ratio_union = if count == 0.0 {
|
|
0.0
|
|
} else {
|
|
group
|
|
.iter()
|
|
.map(|record| record.changed_ratio_union)
|
|
.sum::<f64>()
|
|
/ count
|
|
};
|
|
ChurnSummaryRecord {
|
|
side,
|
|
set_type,
|
|
max_changed_abs,
|
|
avg_changed_abs,
|
|
max_changed_ratio_from,
|
|
avg_changed_ratio_from,
|
|
max_changed_ratio_union,
|
|
avg_changed_ratio_union,
|
|
}
|
|
})
|
|
.collect()
|
|
}
|