365 lines
10 KiB
Rust
365 lines
10 KiB
Rust
use std::collections::BTreeSet;
|
|
|
|
use super::args::Args;
|
|
use super::io::object_hash_key;
|
|
use super::model::{
|
|
AnalysisResult, DiffEvent, EventOccurrence, SampleRecord, SequenceSample, Side,
|
|
};
|
|
|
|
pub(super) fn analyze_set<F>(
|
|
result: &mut AnalysisResult,
|
|
event_type: &'static str,
|
|
left: &[SequenceSample],
|
|
right: &[SequenceSample],
|
|
extract: F,
|
|
resolved_class: &'static str,
|
|
persistent_class: &'static str,
|
|
args: &Args,
|
|
) where
|
|
F: for<'a> Fn(&'a SequenceSample) -> &'a BTreeSet<String>,
|
|
{
|
|
analyze_direction(
|
|
result,
|
|
event_type,
|
|
Side::Left,
|
|
left,
|
|
right,
|
|
&extract,
|
|
resolved_class,
|
|
persistent_class,
|
|
args,
|
|
);
|
|
analyze_direction(
|
|
result,
|
|
event_type,
|
|
Side::Right,
|
|
right,
|
|
left,
|
|
&extract,
|
|
resolved_class,
|
|
persistent_class,
|
|
args,
|
|
);
|
|
}
|
|
|
|
fn analyze_direction<F>(
|
|
result: &mut AnalysisResult,
|
|
event_type: &'static str,
|
|
source_side: Side,
|
|
source: &[SequenceSample],
|
|
peer: &[SequenceSample],
|
|
extract: &F,
|
|
resolved_class: &'static str,
|
|
persistent_class: &'static str,
|
|
args: &Args,
|
|
) where
|
|
F: for<'a> Fn(&'a SequenceSample) -> &'a BTreeSet<String>,
|
|
{
|
|
for sample in source {
|
|
let source_set = extract(sample);
|
|
for key in source_set {
|
|
if peer_sample_at_seq(peer, sample.raw.seq)
|
|
.is_some_and(|peer_sample| extract(peer_sample).contains(key))
|
|
{
|
|
continue;
|
|
}
|
|
if let Some(matched) = find_future_match(peer, sample, key, extract, args) {
|
|
result.add(
|
|
resolved_class,
|
|
SampleRecord {
|
|
classification: resolved_class,
|
|
event_type,
|
|
key: key.clone(),
|
|
source_side,
|
|
source_seq: sample.raw.seq,
|
|
source_run_id: sample.raw.run_id.clone(),
|
|
matched_seq: Some(matched.seq),
|
|
matched_run_id: Some(matched.run_id),
|
|
note: format!(
|
|
"matched in {} sequence within alignment window",
|
|
matched.side.as_str()
|
|
),
|
|
},
|
|
args.sample_limit,
|
|
);
|
|
} else {
|
|
result.add(
|
|
persistent_class,
|
|
SampleRecord {
|
|
classification: persistent_class,
|
|
event_type,
|
|
key: key.clone(),
|
|
source_side,
|
|
source_seq: sample.raw.seq,
|
|
source_run_id: sample.raw.run_id.clone(),
|
|
matched_seq: None,
|
|
matched_run_id: None,
|
|
note: "no matching event in peer sequence alignment window".to_string(),
|
|
},
|
|
args.sample_limit,
|
|
);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
pub(super) fn analyze_hash_rollover(
|
|
result: &mut AnalysisResult,
|
|
left: &[SequenceSample],
|
|
right: &[SequenceSample],
|
|
args: &Args,
|
|
) {
|
|
for (source_side, source, peer) in [(Side::Left, left, right), (Side::Right, right, left)] {
|
|
for sample in source {
|
|
for (uri, hash) in &sample.objects {
|
|
if peer_sample_at_seq(peer, sample.raw.seq)
|
|
.and_then(|peer_sample| peer_sample.objects.get(uri))
|
|
.is_some_and(|peer_hash| peer_hash == hash)
|
|
{
|
|
continue;
|
|
}
|
|
if let Some(peer_sample) = peer_sample_at_seq(peer, sample.raw.seq)
|
|
&& peer_sample.objects.contains_key(uri)
|
|
&& find_future_hash_match(peer, sample, uri, hash, args).is_some()
|
|
{
|
|
let matched =
|
|
find_future_hash_match(peer, sample, uri, hash, args).expect("match");
|
|
result.add(
|
|
"CONTENT_ROLLOVER_RESOLVED",
|
|
SampleRecord {
|
|
classification: "CONTENT_ROLLOVER_RESOLVED",
|
|
event_type: "object_content_rollover",
|
|
key: object_hash_key(uri, hash),
|
|
source_side,
|
|
source_seq: sample.raw.seq,
|
|
source_run_id: sample.raw.run_id.clone(),
|
|
matched_seq: Some(matched.seq),
|
|
matched_run_id: Some(matched.run_id),
|
|
note: "same URI hash appeared in peer sequence later".to_string(),
|
|
},
|
|
args.sample_limit,
|
|
);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
pub(super) fn collect_persistent_events(
|
|
left: &[SequenceSample],
|
|
right: &[SequenceSample],
|
|
args: &Args,
|
|
) -> Vec<DiffEvent> {
|
|
let mut events = Vec::new();
|
|
collect_persistent_set(
|
|
&mut events,
|
|
"object_uri",
|
|
"PERSISTENT_OBJECT_SET_DIVERGENCE",
|
|
left,
|
|
right,
|
|
|sample| &sample.object_uris,
|
|
args,
|
|
);
|
|
collect_persistent_set(
|
|
&mut events,
|
|
"object_hash",
|
|
"PERSISTENT_CONTENT_DIVERGENCE",
|
|
left,
|
|
right,
|
|
|sample| &sample.object_hashes,
|
|
args,
|
|
);
|
|
collect_persistent_set(
|
|
&mut events,
|
|
"reject_uri",
|
|
"PERSISTENT_REJECT_DIVERGENCE",
|
|
left,
|
|
right,
|
|
|sample| &sample.rejects,
|
|
args,
|
|
);
|
|
collect_persistent_set(
|
|
&mut events,
|
|
"trust_anchor",
|
|
"PERSISTENT_TA_DIFFERENCE",
|
|
left,
|
|
right,
|
|
|sample| &sample.trust_anchors,
|
|
args,
|
|
);
|
|
collect_persistent_set(
|
|
&mut events,
|
|
"vrp_output",
|
|
"PERSISTENT_OUTPUT_DIVERGENCE",
|
|
left,
|
|
right,
|
|
|sample| &sample.vrps,
|
|
args,
|
|
);
|
|
collect_persistent_set(
|
|
&mut events,
|
|
"vap_output",
|
|
"PERSISTENT_OUTPUT_DIVERGENCE",
|
|
left,
|
|
right,
|
|
|sample| &sample.vaps,
|
|
args,
|
|
);
|
|
events
|
|
}
|
|
|
|
fn collect_persistent_set<F>(
|
|
events: &mut Vec<DiffEvent>,
|
|
event_type: &'static str,
|
|
raw_class: &'static str,
|
|
left: &[SequenceSample],
|
|
right: &[SequenceSample],
|
|
extract: F,
|
|
args: &Args,
|
|
) where
|
|
F: for<'a> Fn(&'a SequenceSample) -> &'a BTreeSet<String>,
|
|
{
|
|
collect_persistent_direction(
|
|
events,
|
|
event_type,
|
|
raw_class,
|
|
Side::Left,
|
|
left,
|
|
right,
|
|
&extract,
|
|
args,
|
|
);
|
|
collect_persistent_direction(
|
|
events,
|
|
event_type,
|
|
raw_class,
|
|
Side::Right,
|
|
right,
|
|
left,
|
|
&extract,
|
|
args,
|
|
);
|
|
}
|
|
|
|
fn collect_persistent_direction<F>(
|
|
events: &mut Vec<DiffEvent>,
|
|
event_type: &'static str,
|
|
raw_class: &'static str,
|
|
source_side: Side,
|
|
source: &[SequenceSample],
|
|
peer: &[SequenceSample],
|
|
extract: &F,
|
|
args: &Args,
|
|
) where
|
|
F: for<'a> Fn(&'a SequenceSample) -> &'a BTreeSet<String>,
|
|
{
|
|
for sample in source {
|
|
for key in extract(sample) {
|
|
if peer_sample_at_seq(peer, sample.raw.seq)
|
|
.is_some_and(|peer_sample| extract(peer_sample).contains(key))
|
|
{
|
|
continue;
|
|
}
|
|
if find_future_match(peer, sample, key, extract, args).is_some() {
|
|
continue;
|
|
}
|
|
events.push(DiffEvent {
|
|
event_type,
|
|
raw_class,
|
|
key: key.clone(),
|
|
source_side,
|
|
source_seq: sample.raw.seq,
|
|
source_run_id: sample.raw.run_id.clone(),
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
impl AnalysisResult {
|
|
fn add(&mut self, class: &'static str, record: SampleRecord, sample_limit: usize) {
|
|
let stats = self.stats.entry(class).or_default();
|
|
stats.total += 1;
|
|
if stats.samples.len() < sample_limit {
|
|
stats.samples.push(record);
|
|
}
|
|
}
|
|
}
|
|
|
|
pub(super) fn peer_sample_at_seq(peer: &[SequenceSample], seq: u32) -> Option<&SequenceSample> {
|
|
peer.iter().find(|sample| sample.raw.seq == seq)
|
|
}
|
|
|
|
fn find_future_match<F>(
|
|
peer: &[SequenceSample],
|
|
source: &SequenceSample,
|
|
key: &str,
|
|
extract: &F,
|
|
args: &Args,
|
|
) -> Option<EventOccurrence>
|
|
where
|
|
F: for<'a> Fn(&'a SequenceSample) -> &'a BTreeSet<String>,
|
|
{
|
|
peer.iter()
|
|
.filter(|candidate| is_in_alignment_window(source, candidate, args))
|
|
.find(|candidate| extract(candidate).contains(key))
|
|
.map(|candidate| {
|
|
occurrence(
|
|
candidate,
|
|
if candidate.raw.side.as_deref() == Some("left") {
|
|
Side::Left
|
|
} else {
|
|
Side::Right
|
|
},
|
|
)
|
|
})
|
|
}
|
|
|
|
fn find_future_hash_match(
|
|
peer: &[SequenceSample],
|
|
source: &SequenceSample,
|
|
uri: &str,
|
|
hash: &str,
|
|
args: &Args,
|
|
) -> Option<EventOccurrence> {
|
|
peer.iter()
|
|
.filter(|candidate| is_in_alignment_window(source, candidate, args))
|
|
.find(|candidate| {
|
|
candidate
|
|
.objects
|
|
.get(uri)
|
|
.is_some_and(|peer_hash| peer_hash == hash)
|
|
})
|
|
.map(|candidate| {
|
|
occurrence(
|
|
candidate,
|
|
if candidate.raw.side.as_deref() == Some("left") {
|
|
Side::Left
|
|
} else {
|
|
Side::Right
|
|
},
|
|
)
|
|
})
|
|
}
|
|
|
|
fn is_in_alignment_window(
|
|
source: &SequenceSample,
|
|
candidate: &SequenceSample,
|
|
args: &Args,
|
|
) -> bool {
|
|
if candidate.raw.seq < source.raw.seq {
|
|
return false;
|
|
}
|
|
let run_delta = candidate.raw.seq.saturating_sub(source.raw.seq);
|
|
let time_delta = candidate.validation_time - source.validation_time;
|
|
let secs = time_delta.whole_seconds().abs();
|
|
run_delta <= args.align_window_runs || secs <= args.align_window_secs
|
|
}
|
|
|
|
fn occurrence(sample: &SequenceSample, side: Side) -> EventOccurrence {
|
|
EventOccurrence {
|
|
side,
|
|
seq: sample.raw.seq,
|
|
run_id: sample.raw.run_id.clone(),
|
|
}
|
|
}
|