365 lines
10 KiB
Rust

use std::collections::BTreeSet;
use super::args::Args;
use super::io::object_hash_key;
use super::model::{
AnalysisResult, DiffEvent, EventOccurrence, SampleRecord, SequenceSample, Side,
};
pub(super) fn analyze_set<F>(
result: &mut AnalysisResult,
event_type: &'static str,
left: &[SequenceSample],
right: &[SequenceSample],
extract: F,
resolved_class: &'static str,
persistent_class: &'static str,
args: &Args,
) where
F: for<'a> Fn(&'a SequenceSample) -> &'a BTreeSet<String>,
{
analyze_direction(
result,
event_type,
Side::Left,
left,
right,
&extract,
resolved_class,
persistent_class,
args,
);
analyze_direction(
result,
event_type,
Side::Right,
right,
left,
&extract,
resolved_class,
persistent_class,
args,
);
}
fn analyze_direction<F>(
result: &mut AnalysisResult,
event_type: &'static str,
source_side: Side,
source: &[SequenceSample],
peer: &[SequenceSample],
extract: &F,
resolved_class: &'static str,
persistent_class: &'static str,
args: &Args,
) where
F: for<'a> Fn(&'a SequenceSample) -> &'a BTreeSet<String>,
{
for sample in source {
let source_set = extract(sample);
for key in source_set {
if peer_sample_at_seq(peer, sample.raw.seq)
.is_some_and(|peer_sample| extract(peer_sample).contains(key))
{
continue;
}
if let Some(matched) = find_future_match(peer, sample, key, extract, args) {
result.add(
resolved_class,
SampleRecord {
classification: resolved_class,
event_type,
key: key.clone(),
source_side,
source_seq: sample.raw.seq,
source_run_id: sample.raw.run_id.clone(),
matched_seq: Some(matched.seq),
matched_run_id: Some(matched.run_id),
note: format!(
"matched in {} sequence within alignment window",
matched.side.as_str()
),
},
args.sample_limit,
);
} else {
result.add(
persistent_class,
SampleRecord {
classification: persistent_class,
event_type,
key: key.clone(),
source_side,
source_seq: sample.raw.seq,
source_run_id: sample.raw.run_id.clone(),
matched_seq: None,
matched_run_id: None,
note: "no matching event in peer sequence alignment window".to_string(),
},
args.sample_limit,
);
}
}
}
}
pub(super) fn analyze_hash_rollover(
result: &mut AnalysisResult,
left: &[SequenceSample],
right: &[SequenceSample],
args: &Args,
) {
for (source_side, source, peer) in [(Side::Left, left, right), (Side::Right, right, left)] {
for sample in source {
for (uri, hash) in &sample.objects {
if peer_sample_at_seq(peer, sample.raw.seq)
.and_then(|peer_sample| peer_sample.objects.get(uri))
.is_some_and(|peer_hash| peer_hash == hash)
{
continue;
}
if let Some(peer_sample) = peer_sample_at_seq(peer, sample.raw.seq)
&& peer_sample.objects.contains_key(uri)
&& find_future_hash_match(peer, sample, uri, hash, args).is_some()
{
let matched =
find_future_hash_match(peer, sample, uri, hash, args).expect("match");
result.add(
"CONTENT_ROLLOVER_RESOLVED",
SampleRecord {
classification: "CONTENT_ROLLOVER_RESOLVED",
event_type: "object_content_rollover",
key: object_hash_key(uri, hash),
source_side,
source_seq: sample.raw.seq,
source_run_id: sample.raw.run_id.clone(),
matched_seq: Some(matched.seq),
matched_run_id: Some(matched.run_id),
note: "same URI hash appeared in peer sequence later".to_string(),
},
args.sample_limit,
);
}
}
}
}
}
pub(super) fn collect_persistent_events(
left: &[SequenceSample],
right: &[SequenceSample],
args: &Args,
) -> Vec<DiffEvent> {
let mut events = Vec::new();
collect_persistent_set(
&mut events,
"object_uri",
"PERSISTENT_OBJECT_SET_DIVERGENCE",
left,
right,
|sample| &sample.object_uris,
args,
);
collect_persistent_set(
&mut events,
"object_hash",
"PERSISTENT_CONTENT_DIVERGENCE",
left,
right,
|sample| &sample.object_hashes,
args,
);
collect_persistent_set(
&mut events,
"reject_uri",
"PERSISTENT_REJECT_DIVERGENCE",
left,
right,
|sample| &sample.rejects,
args,
);
collect_persistent_set(
&mut events,
"trust_anchor",
"PERSISTENT_TA_DIFFERENCE",
left,
right,
|sample| &sample.trust_anchors,
args,
);
collect_persistent_set(
&mut events,
"vrp_output",
"PERSISTENT_OUTPUT_DIVERGENCE",
left,
right,
|sample| &sample.vrps,
args,
);
collect_persistent_set(
&mut events,
"vap_output",
"PERSISTENT_OUTPUT_DIVERGENCE",
left,
right,
|sample| &sample.vaps,
args,
);
events
}
fn collect_persistent_set<F>(
events: &mut Vec<DiffEvent>,
event_type: &'static str,
raw_class: &'static str,
left: &[SequenceSample],
right: &[SequenceSample],
extract: F,
args: &Args,
) where
F: for<'a> Fn(&'a SequenceSample) -> &'a BTreeSet<String>,
{
collect_persistent_direction(
events,
event_type,
raw_class,
Side::Left,
left,
right,
&extract,
args,
);
collect_persistent_direction(
events,
event_type,
raw_class,
Side::Right,
right,
left,
&extract,
args,
);
}
fn collect_persistent_direction<F>(
events: &mut Vec<DiffEvent>,
event_type: &'static str,
raw_class: &'static str,
source_side: Side,
source: &[SequenceSample],
peer: &[SequenceSample],
extract: &F,
args: &Args,
) where
F: for<'a> Fn(&'a SequenceSample) -> &'a BTreeSet<String>,
{
for sample in source {
for key in extract(sample) {
if peer_sample_at_seq(peer, sample.raw.seq)
.is_some_and(|peer_sample| extract(peer_sample).contains(key))
{
continue;
}
if find_future_match(peer, sample, key, extract, args).is_some() {
continue;
}
events.push(DiffEvent {
event_type,
raw_class,
key: key.clone(),
source_side,
source_seq: sample.raw.seq,
source_run_id: sample.raw.run_id.clone(),
});
}
}
}
impl AnalysisResult {
fn add(&mut self, class: &'static str, record: SampleRecord, sample_limit: usize) {
let stats = self.stats.entry(class).or_default();
stats.total += 1;
if stats.samples.len() < sample_limit {
stats.samples.push(record);
}
}
}
pub(super) fn peer_sample_at_seq(peer: &[SequenceSample], seq: u32) -> Option<&SequenceSample> {
peer.iter().find(|sample| sample.raw.seq == seq)
}
fn find_future_match<F>(
peer: &[SequenceSample],
source: &SequenceSample,
key: &str,
extract: &F,
args: &Args,
) -> Option<EventOccurrence>
where
F: for<'a> Fn(&'a SequenceSample) -> &'a BTreeSet<String>,
{
peer.iter()
.filter(|candidate| is_in_alignment_window(source, candidate, args))
.find(|candidate| extract(candidate).contains(key))
.map(|candidate| {
occurrence(
candidate,
if candidate.raw.side.as_deref() == Some("left") {
Side::Left
} else {
Side::Right
},
)
})
}
fn find_future_hash_match(
peer: &[SequenceSample],
source: &SequenceSample,
uri: &str,
hash: &str,
args: &Args,
) -> Option<EventOccurrence> {
peer.iter()
.filter(|candidate| is_in_alignment_window(source, candidate, args))
.find(|candidate| {
candidate
.objects
.get(uri)
.is_some_and(|peer_hash| peer_hash == hash)
})
.map(|candidate| {
occurrence(
candidate,
if candidate.raw.side.as_deref() == Some("left") {
Side::Left
} else {
Side::Right
},
)
})
}
fn is_in_alignment_window(
source: &SequenceSample,
candidate: &SequenceSample,
args: &Args,
) -> bool {
if candidate.raw.seq < source.raw.seq {
return false;
}
let run_delta = candidate.raw.seq.saturating_sub(source.raw.seq);
let time_delta = candidate.validation_time - source.validation_time;
let secs = time_delta.whole_seconds().abs();
run_delta <= args.align_window_runs || secs <= args.align_window_secs
}
fn occurrence(sample: &SequenceSample, side: Side) -> EventOccurrence {
EventOccurrence {
side,
seq: sample.raw.seq,
run_id: sample.raw.run_id.clone(),
}
}