20260601 精简sequence triage并增加churn统计
This commit is contained in:
parent
bf8c924326
commit
ae00e676d7
@ -1,18 +1,16 @@
|
|||||||
mod adjusted;
|
|
||||||
mod analysis;
|
|
||||||
mod args;
|
mod args;
|
||||||
|
mod churn;
|
||||||
mod io;
|
mod io;
|
||||||
mod loader;
|
mod loader;
|
||||||
mod model;
|
mod model;
|
||||||
mod output;
|
mod output;
|
||||||
mod sandwich;
|
mod sandwich;
|
||||||
|
|
||||||
use adjusted::build_adjusted_analysis;
|
|
||||||
use analysis::{analyze_hash_rollover, analyze_set, collect_persistent_events};
|
|
||||||
use args::{Args, parse_args};
|
use args::{Args, parse_args};
|
||||||
|
use churn::build_intra_rp_churn;
|
||||||
use loader::load_sequence;
|
use loader::load_sequence;
|
||||||
use model::{AnalysisResult, Side};
|
use model::Side;
|
||||||
use output::{build_output, write_json, write_markdown, write_samples_jsonl};
|
use output::{build_output, write_json, write_markdown};
|
||||||
use sandwich::build_sandwich_analysis;
|
use sandwich::build_sandwich_analysis;
|
||||||
|
|
||||||
pub fn main_entry() -> Result<(), String> {
|
pub fn main_entry() -> Result<(), String> {
|
||||||
@ -33,76 +31,11 @@ fn run(args: Args) -> Result<(), String> {
|
|||||||
return Err("left and right sequences must both contain at least one sample".into());
|
return Err("left and right sequences must both contain at least one sample".into());
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut result = AnalysisResult::default();
|
|
||||||
analyze_set(
|
|
||||||
&mut result,
|
|
||||||
"object_uri",
|
|
||||||
&left,
|
|
||||||
&right,
|
|
||||||
|sample| &sample.object_uris,
|
|
||||||
"TEMPORAL_LAG_RESOLVED",
|
|
||||||
"PERSISTENT_OBJECT_SET_DIVERGENCE",
|
|
||||||
&args,
|
|
||||||
);
|
|
||||||
analyze_set(
|
|
||||||
&mut result,
|
|
||||||
"object_hash",
|
|
||||||
&left,
|
|
||||||
&right,
|
|
||||||
|sample| &sample.object_hashes,
|
|
||||||
"TEMPORAL_LAG_RESOLVED",
|
|
||||||
"PERSISTENT_CONTENT_DIVERGENCE",
|
|
||||||
&args,
|
|
||||||
);
|
|
||||||
analyze_hash_rollover(&mut result, &left, &right, &args);
|
|
||||||
analyze_set(
|
|
||||||
&mut result,
|
|
||||||
"reject_uri",
|
|
||||||
&left,
|
|
||||||
&right,
|
|
||||||
|sample| &sample.rejects,
|
|
||||||
"TEMPORAL_LAG_RESOLVED",
|
|
||||||
"PERSISTENT_REJECT_DIVERGENCE",
|
|
||||||
&args,
|
|
||||||
);
|
|
||||||
analyze_set(
|
|
||||||
&mut result,
|
|
||||||
"trust_anchor",
|
|
||||||
&left,
|
|
||||||
&right,
|
|
||||||
|sample| &sample.trust_anchors,
|
|
||||||
"TEMPORAL_LAG_RESOLVED",
|
|
||||||
"PERSISTENT_TA_DIFFERENCE",
|
|
||||||
&args,
|
|
||||||
);
|
|
||||||
analyze_set(
|
|
||||||
&mut result,
|
|
||||||
"vrp_output",
|
|
||||||
&left,
|
|
||||||
&right,
|
|
||||||
|sample| &sample.vrps,
|
|
||||||
"TEMPORAL_LAG_RESOLVED",
|
|
||||||
"PERSISTENT_OUTPUT_DIVERGENCE",
|
|
||||||
&args,
|
|
||||||
);
|
|
||||||
analyze_set(
|
|
||||||
&mut result,
|
|
||||||
"vap_output",
|
|
||||||
&left,
|
|
||||||
&right,
|
|
||||||
|sample| &sample.vaps,
|
|
||||||
"TEMPORAL_LAG_RESOLVED",
|
|
||||||
"PERSISTENT_OUTPUT_DIVERGENCE",
|
|
||||||
&args,
|
|
||||||
);
|
|
||||||
|
|
||||||
let persistent_events = collect_persistent_events(&left, &right, &args);
|
|
||||||
let adjusted = build_adjusted_analysis(&args, &left, &right, &persistent_events);
|
|
||||||
let sandwich = build_sandwich_analysis(&args, &left, &right);
|
let sandwich = build_sandwich_analysis(&args, &left, &right);
|
||||||
let output = build_output(&args, &left, &right, &result, &adjusted, &sandwich);
|
let churn = build_intra_rp_churn(&left, &right);
|
||||||
|
let output = build_output(&args, &left, &right, &sandwich, &churn);
|
||||||
write_json(&args.out_dir.join("sequence-triage.json"), &output)?;
|
write_json(&args.out_dir.join("sequence-triage.json"), &output)?;
|
||||||
write_markdown(&args.out_dir.join("sequence-triage.md"), &output)?;
|
write_markdown(&args.out_dir.join("sequence-triage.md"), &output)?;
|
||||||
write_samples_jsonl(&args.out_dir.join("sequence-diff-samples.jsonl"), &result)?;
|
|
||||||
println!("{}", args.out_dir.display());
|
println!("{}", args.out_dir.display());
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@ -147,7 +80,7 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn run_classifies_temporal_and_persistent_differences() {
|
fn run_outputs_sandwich_only_schema_and_churn() {
|
||||||
let temp = tempfile::tempdir().expect("tempdir");
|
let temp = tempfile::tempdir().expect("tempdir");
|
||||||
let root = temp.path();
|
let root = temp.path();
|
||||||
write_sample(
|
write_sample(
|
||||||
@ -156,7 +89,7 @@ mod tests {
|
|||||||
"left",
|
"left",
|
||||||
1,
|
1,
|
||||||
&[object("rsync://example.net/a.roa", 0x11)],
|
&[object("rsync://example.net/a.roa", 0x11)],
|
||||||
&[],
|
&["rsync://example.net/reject-old.roa"],
|
||||||
64496,
|
64496,
|
||||||
);
|
);
|
||||||
write_sample(
|
write_sample(
|
||||||
@ -213,12 +146,29 @@ mod tests {
|
|||||||
&std::fs::read_to_string(root.join("out/sequence-triage.json")).unwrap(),
|
&std::fs::read_to_string(root.join("out/sequence-triage.json")).unwrap(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert!(class_count(&output, "TEMPORAL_LAG_RESOLVED") > 0);
|
assert_eq!(output["schemaVersion"].as_u64(), Some(3));
|
||||||
assert!(class_count(&output, "PERSISTENT_OBJECT_SET_DIVERGENCE") > 0);
|
assert!(output.get("classificationCounts").is_none());
|
||||||
|
assert!(output.get("adjusted").is_none());
|
||||||
|
assert_eq!(
|
||||||
|
churn_record(&output, "left", "object", "left-1", "left-2")["changedAbs"].as_u64(),
|
||||||
|
Some(1)
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
churn_record(&output, "left", "output", "left-1", "left-2")["changedAbs"].as_u64(),
|
||||||
|
Some(2)
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
churn_record(&output, "right", "object", "right-1", "right-2")["changedAbs"].as_u64(),
|
||||||
|
Some(1)
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
churn_record(&output, "left", "reject", "left-1", "left-2")["changedAbs"].as_u64(),
|
||||||
|
Some(1)
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn run_classifies_reject_and_output_divergence() {
|
fn run_single_sample_keeps_legacy_sections_absent() {
|
||||||
let temp = tempfile::tempdir().expect("tempdir");
|
let temp = tempfile::tempdir().expect("tempdir");
|
||||||
let root = temp.path();
|
let root = temp.path();
|
||||||
let objects = [object("rsync://example.net/a.roa", 0x11)];
|
let objects = [object("rsync://example.net/a.roa", 0x11)];
|
||||||
@ -258,12 +208,19 @@ mod tests {
|
|||||||
&std::fs::read_to_string(root.join("out/sequence-triage.json")).unwrap(),
|
&std::fs::read_to_string(root.join("out/sequence-triage.json")).unwrap(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert!(class_count(&output, "PERSISTENT_REJECT_DIVERGENCE") > 0);
|
assert_eq!(output["schemaVersion"].as_u64(), Some(3));
|
||||||
assert!(class_count(&output, "PERSISTENT_OUTPUT_DIVERGENCE") > 0);
|
assert!(output.get("classificationCounts").is_none());
|
||||||
|
assert!(output.get("adjusted").is_none());
|
||||||
|
assert_eq!(
|
||||||
|
output["sandwich"]["totals"]["occurrences"].as_u64(),
|
||||||
|
Some(0)
|
||||||
|
);
|
||||||
|
assert_eq!(output["intraRpChurn"]["left"].as_array().unwrap().len(), 0);
|
||||||
|
assert_eq!(output["intraRpChurn"]["right"].as_array().unwrap().len(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn run_adjusted_classifies_leading_content_rollover() {
|
fn run_content_rollover_fixture_keeps_legacy_sections_absent() {
|
||||||
let temp = tempfile::tempdir().expect("tempdir");
|
let temp = tempfile::tempdir().expect("tempdir");
|
||||||
let root = temp.path();
|
let root = temp.path();
|
||||||
let uri = "rsync://example.net/a.roa";
|
let uri = "rsync://example.net/a.roa";
|
||||||
@ -303,21 +260,13 @@ mod tests {
|
|||||||
&std::fs::read_to_string(root.join("out/sequence-triage.json")).unwrap(),
|
&std::fs::read_to_string(root.join("out/sequence-triage.json")).unwrap(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert!(adjusted_class_occurrences(&output, "EDGE_LEADING_CONTENT_ROLLOVER") > 0);
|
assert_eq!(output["schemaVersion"].as_u64(), Some(3));
|
||||||
assert_eq!(
|
assert!(output.get("classificationCounts").is_none());
|
||||||
adjusted_class_occurrences(&output, "STABLE_CONTENT_DIVERGENCE"),
|
assert!(output.get("adjusted").is_none());
|
||||||
0
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
output["adjusted"]["adjustedStablePersistent"]["occurrences"]
|
|
||||||
.as_u64()
|
|
||||||
.unwrap(),
|
|
||||||
0
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn run_adjusted_classifies_stable_middle_content_divergence() {
|
fn run_middle_content_fixture_keeps_legacy_sections_absent() {
|
||||||
let temp = tempfile::tempdir().expect("tempdir");
|
let temp = tempfile::tempdir().expect("tempdir");
|
||||||
let root = temp.path();
|
let root = temp.path();
|
||||||
let uri = "rsync://example.net/a.roa";
|
let uri = "rsync://example.net/a.roa";
|
||||||
@ -375,17 +324,13 @@ mod tests {
|
|||||||
&std::fs::read_to_string(root.join("out/sequence-triage.json")).unwrap(),
|
&std::fs::read_to_string(root.join("out/sequence-triage.json")).unwrap(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert!(adjusted_class_occurrences(&output, "STABLE_CONTENT_DIVERGENCE") > 0);
|
assert_eq!(output["schemaVersion"].as_u64(), Some(3));
|
||||||
assert_eq!(
|
assert!(output.get("classificationCounts").is_none());
|
||||||
output["adjusted"]["adjustedStablePersistent"]["occurrences"]
|
assert!(output.get("adjusted").is_none());
|
||||||
.as_u64()
|
|
||||||
.unwrap(),
|
|
||||||
2
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn run_adjusted_filters_trailing_output() {
|
fn run_trailing_output_fixture_keeps_legacy_sections_absent() {
|
||||||
let temp = tempfile::tempdir().expect("tempdir");
|
let temp = tempfile::tempdir().expect("tempdir");
|
||||||
let root = temp.path();
|
let root = temp.path();
|
||||||
let objects = [object("rsync://example.net/a.roa", 0x11)];
|
let objects = [object("rsync://example.net/a.roa", 0x11)];
|
||||||
@ -425,15 +370,13 @@ mod tests {
|
|||||||
&std::fs::read_to_string(root.join("out/sequence-triage.json")).unwrap(),
|
&std::fs::read_to_string(root.join("out/sequence-triage.json")).unwrap(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert!(adjusted_class_occurrences(&output, "EDGE_TRAILING_UNRESOLVED") > 0);
|
assert_eq!(output["schemaVersion"].as_u64(), Some(3));
|
||||||
assert_eq!(
|
assert!(output.get("classificationCounts").is_none());
|
||||||
adjusted_class_occurrences(&output, "STABLE_OUTPUT_DIVERGENCE"),
|
assert!(output.get("adjusted").is_none());
|
||||||
0
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn run_groups_stable_object_events_by_physical_object() {
|
fn run_object_group_fixture_keeps_legacy_sections_absent() {
|
||||||
let temp = tempfile::tempdir().expect("tempdir");
|
let temp = tempfile::tempdir().expect("tempdir");
|
||||||
let root = temp.path();
|
let root = temp.path();
|
||||||
let missing = "rsync://example.net/repo/pp/a.roa";
|
let missing = "rsync://example.net/repo/pp/a.roa";
|
||||||
@ -512,21 +455,9 @@ mod tests {
|
|||||||
&std::fs::read_to_string(root.join("out/sequence-triage.json")).unwrap(),
|
&std::fs::read_to_string(root.join("out/sequence-triage.json")).unwrap(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let groups = output["adjusted"]["stableObjectGroups"].as_array().unwrap();
|
assert_eq!(output["schemaVersion"].as_u64(), Some(3));
|
||||||
assert_eq!(groups.len(), 1);
|
assert!(output.get("classificationCounts").is_none());
|
||||||
assert_eq!(groups[0]["eventCount"].as_u64(), Some(2));
|
assert!(output.get("adjusted").is_none());
|
||||||
assert_eq!(groups[0]["physicalObjectCount"].as_u64(), Some(1));
|
|
||||||
assert_eq!(
|
|
||||||
groups[0]["publicationPoint"].as_str(),
|
|
||||||
Some("rsync://example.net/repo/pp/")
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
groups[0]["physicalObjects"][0]["eventTypes"]
|
|
||||||
.as_array()
|
|
||||||
.unwrap()
|
|
||||||
.len(),
|
|
||||||
2
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -600,16 +531,10 @@ mod tests {
|
|||||||
sandwich_class_occurrences(&output, "PEER_MISSING_STABLE_OUTPUT"),
|
sandwich_class_occurrences(&output, "PEER_MISSING_STABLE_OUTPUT"),
|
||||||
2
|
2
|
||||||
);
|
);
|
||||||
}
|
assert_eq!(output["sandwich"]["heatmap"].as_array().unwrap().len(), 1);
|
||||||
|
assert_eq!(output["sandwich"]["heatmap"][0]["total"].as_u64(), Some(5));
|
||||||
fn class_count(output: &Value, class: &str) -> u64 {
|
assert!(output.get("classificationCounts").is_none());
|
||||||
output["classificationCounts"]
|
assert!(output.get("adjusted").is_none());
|
||||||
.as_array()
|
|
||||||
.unwrap()
|
|
||||||
.iter()
|
|
||||||
.find(|item| item["classification"].as_str() == Some(class))
|
|
||||||
.and_then(|item| item["count"].as_u64())
|
|
||||||
.unwrap_or(0)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn sandwich_class_occurrences(output: &Value, class: &str) -> u64 {
|
fn sandwich_class_occurrences(output: &Value, class: &str) -> u64 {
|
||||||
@ -622,14 +547,23 @@ mod tests {
|
|||||||
.unwrap_or(0)
|
.unwrap_or(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn adjusted_class_occurrences(output: &Value, class: &str) -> u64 {
|
fn churn_record<'a>(
|
||||||
output["adjusted"]["classificationCounts"]
|
output: &'a Value,
|
||||||
|
side: &str,
|
||||||
|
set_type: &str,
|
||||||
|
from_run_id: &str,
|
||||||
|
to_run_id: &str,
|
||||||
|
) -> &'a Value {
|
||||||
|
output["intraRpChurn"][side]
|
||||||
.as_array()
|
.as_array()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.iter()
|
.iter()
|
||||||
.find(|item| item["classification"].as_str() == Some(class))
|
.find(|item| {
|
||||||
.and_then(|item| item["occurrences"].as_u64())
|
item["setType"].as_str() == Some(set_type)
|
||||||
.unwrap_or(0)
|
&& item["fromRunId"].as_str() == Some(from_run_id)
|
||||||
|
&& item["toRunId"].as_str() == Some(to_run_id)
|
||||||
|
})
|
||||||
|
.unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn object(uri: &str, byte: u8) -> CirObject {
|
fn object(uri: &str, byte: u8) -> CirObject {
|
||||||
|
|||||||
@ -1,503 +0,0 @@
|
|||||||
use std::collections::{BTreeMap, BTreeSet};
|
|
||||||
|
|
||||||
use serde_json::{Value, json};
|
|
||||||
|
|
||||||
use super::analysis::peer_sample_at_seq;
|
|
||||||
use super::args::Args;
|
|
||||||
use super::io::{format_time, path_string};
|
|
||||||
use super::model::{
|
|
||||||
AdjustedAnalysis, AdjustedRecord, DiffEvent, EdgePosition, SequenceSample, Side,
|
|
||||||
};
|
|
||||||
|
|
||||||
pub(super) fn build_adjusted_analysis(
|
|
||||||
args: &Args,
|
|
||||||
left: &[SequenceSample],
|
|
||||||
right: &[SequenceSample],
|
|
||||||
events: &[DiffEvent],
|
|
||||||
) -> AdjustedAnalysis {
|
|
||||||
let mut analysis = AdjustedAnalysis {
|
|
||||||
raw_persistent_occurrences: events.len(),
|
|
||||||
raw_persistent_unique_keys: unique_event_count(events),
|
|
||||||
..Default::default()
|
|
||||||
};
|
|
||||||
let mut edge_filtered_unique = BTreeSet::new();
|
|
||||||
|
|
||||||
for event in events {
|
|
||||||
let source_samples = samples_for_side(event.source_side, left, right);
|
|
||||||
let peer_samples = samples_for_side(opposite_side(event.source_side), left, right);
|
|
||||||
let edge = edge_position(source_samples, event.source_seq, args);
|
|
||||||
if edge == EdgePosition::Stable {
|
|
||||||
analysis.edge_filtered_occurrences += 1;
|
|
||||||
edge_filtered_unique.insert(event_identity(event));
|
|
||||||
}
|
|
||||||
let (classification, note) =
|
|
||||||
adjusted_classification(event, edge, source_samples, peer_samples);
|
|
||||||
analysis.add(
|
|
||||||
classification,
|
|
||||||
AdjustedRecord {
|
|
||||||
classification,
|
|
||||||
event_type: event.event_type,
|
|
||||||
key: event.key.clone(),
|
|
||||||
source_side: event.source_side,
|
|
||||||
source_seq: event.source_seq,
|
|
||||||
source_run_id: event.source_run_id.clone(),
|
|
||||||
note,
|
|
||||||
},
|
|
||||||
args.sample_limit,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
analysis.edge_filtered_unique_keys = edge_filtered_unique.len();
|
|
||||||
let mut stable_unique = BTreeSet::new();
|
|
||||||
for (class, stats) in &analysis.stats {
|
|
||||||
if class.starts_with("STABLE_") {
|
|
||||||
analysis.adjusted_stable_occurrences += stats.total;
|
|
||||||
stable_unique.extend(stats.unique_keys.iter().cloned());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
analysis.adjusted_stable_unique_keys = stable_unique.len();
|
|
||||||
let timeline_limit = if args.timeline_sample_limit == 0 {
|
|
||||||
args.sample_limit
|
|
||||||
} else {
|
|
||||||
args.timeline_sample_limit
|
|
||||||
};
|
|
||||||
analysis.uri_timeline_samples =
|
|
||||||
build_uri_timeline_samples(left, right, &analysis, timeline_limit);
|
|
||||||
analysis.stable_object_groups =
|
|
||||||
build_stable_object_groups(&analysis, left, right, timeline_limit);
|
|
||||||
analysis
|
|
||||||
}
|
|
||||||
|
|
||||||
fn adjusted_classification(
|
|
||||||
event: &DiffEvent,
|
|
||||||
edge: EdgePosition,
|
|
||||||
source: &[SequenceSample],
|
|
||||||
peer: &[SequenceSample],
|
|
||||||
) -> (&'static str, String) {
|
|
||||||
if event.event_type == "trust_anchor" {
|
|
||||||
if peer_has_same_ta_identity(peer, &event.key) {
|
|
||||||
return (
|
|
||||||
"TA_PROJECTION_FORMAT_DIFFERENCE",
|
|
||||||
"same TAL hash and TA certificate hash exist on peer side; full TA projection string differs".to_string(),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if edge == EdgePosition::Stable {
|
|
||||||
return (
|
|
||||||
"STABLE_TA_DIVERGENCE",
|
|
||||||
"trust-anchor identity is not aligned in a non-boundary sample".to_string(),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
return edge_unresolved_class(edge);
|
|
||||||
}
|
|
||||||
|
|
||||||
if event.event_type == "object_hash" {
|
|
||||||
if let Some((uri, hash)) = split_object_hash_key(&event.key) {
|
|
||||||
let peer_has_uri = peer_has_uri_any(peer, uri);
|
|
||||||
let peer_has_different_hash = peer_has_uri_different_hash_any(peer, uri, hash);
|
|
||||||
let peer_hash_at_source_seq = peer_sample_at_seq(peer, event.source_seq)
|
|
||||||
.and_then(|peer_sample| peer_sample.objects.get(uri));
|
|
||||||
let source_later_matches_peer_hash = peer_hash_at_source_seq.is_some_and(|peer_hash| {
|
|
||||||
source_future_has_hash(source, event.source_seq, uri, peer_hash)
|
|
||||||
});
|
|
||||||
if edge == EdgePosition::Leading && peer_has_different_hash {
|
|
||||||
return (
|
|
||||||
"EDGE_LEADING_CONTENT_ROLLOVER",
|
|
||||||
"source leading-edge hash is absent, while peer already has the same URI with another hash".to_string(),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if edge == EdgePosition::Trailing && peer_has_different_hash {
|
|
||||||
return (
|
|
||||||
"EDGE_TRAILING_CONTENT_ROLLOVER",
|
|
||||||
"source trailing-edge hash is absent, while peer has the same URI with another hash and no later source observation exists".to_string(),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if edge == EdgePosition::Stable && source_later_matches_peer_hash {
|
|
||||||
return (
|
|
||||||
"MID_SEQUENCE_CONTENT_ROLLOVER_RESOLVED",
|
|
||||||
"peer already has a newer same-URI hash at this seq and source catches up later in the observed sequence".to_string(),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if edge == EdgePosition::Stable && peer_has_different_hash {
|
|
||||||
return (
|
|
||||||
"STABLE_CONTENT_DIVERGENCE",
|
|
||||||
"same URI exists on peer side with a different hash in a non-boundary sample"
|
|
||||||
.to_string(),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if edge == EdgePosition::Stable && !peer_has_uri {
|
|
||||||
return (
|
|
||||||
"STABLE_OBJECT_SET_DIVERGENCE",
|
|
||||||
"content key is derived from a non-boundary URI that is absent on peer side"
|
|
||||||
.to_string(),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if edge == EdgePosition::Stable {
|
|
||||||
return (
|
|
||||||
"STABLE_CONTENT_DIVERGENCE",
|
|
||||||
"object hash key remains unaligned in a non-boundary sample".to_string(),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
return edge_unresolved_class(edge);
|
|
||||||
}
|
|
||||||
|
|
||||||
if edge != EdgePosition::Stable {
|
|
||||||
return edge_unresolved_class(edge);
|
|
||||||
}
|
|
||||||
|
|
||||||
let stable_class = match event.raw_class {
|
|
||||||
"PERSISTENT_OBJECT_SET_DIVERGENCE" => "STABLE_OBJECT_SET_DIVERGENCE",
|
|
||||||
"PERSISTENT_REJECT_DIVERGENCE" => "STABLE_REJECT_DIVERGENCE",
|
|
||||||
"PERSISTENT_OUTPUT_DIVERGENCE" => "STABLE_OUTPUT_DIVERGENCE",
|
|
||||||
"PERSISTENT_CONTENT_DIVERGENCE" => "STABLE_CONTENT_DIVERGENCE",
|
|
||||||
"PERSISTENT_TA_DIFFERENCE" => "STABLE_TA_DIVERGENCE",
|
|
||||||
_ => "STABLE_UNCLASSIFIED_DIVERGENCE",
|
|
||||||
};
|
|
||||||
(
|
|
||||||
stable_class,
|
|
||||||
"persistent event appears in a non-boundary sample after sequence edge filtering"
|
|
||||||
.to_string(),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn edge_unresolved_class(edge: EdgePosition) -> (&'static str, String) {
|
|
||||||
match edge {
|
|
||||||
EdgePosition::Leading => (
|
|
||||||
"EDGE_LEADING_UNRESOLVED",
|
|
||||||
"event appears only from the warmup edge and may predate the observed sequence"
|
|
||||||
.to_string(),
|
|
||||||
),
|
|
||||||
EdgePosition::Trailing => (
|
|
||||||
"EDGE_TRAILING_UNRESOLVED",
|
|
||||||
"event appears at the cooldown edge and lacks later peer observations".to_string(),
|
|
||||||
),
|
|
||||||
EdgePosition::Stable => (
|
|
||||||
"STABLE_UNCLASSIFIED_DIVERGENCE",
|
|
||||||
"event is not on an edge but no specific stable category matched".to_string(),
|
|
||||||
),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl AdjustedAnalysis {
|
|
||||||
fn add(&mut self, class: &'static str, record: AdjustedRecord, sample_limit: usize) {
|
|
||||||
let stats = self.stats.entry(class).or_default();
|
|
||||||
stats.total += 1;
|
|
||||||
stats.unique_keys.insert(format!(
|
|
||||||
"{}|{}|{}",
|
|
||||||
record.event_type,
|
|
||||||
record.source_side.as_str(),
|
|
||||||
record.key
|
|
||||||
));
|
|
||||||
if stats.samples.len() < sample_limit {
|
|
||||||
stats.samples.push(record);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn unique_event_count(events: &[DiffEvent]) -> usize {
|
|
||||||
events
|
|
||||||
.iter()
|
|
||||||
.map(event_identity)
|
|
||||||
.collect::<BTreeSet<_>>()
|
|
||||||
.len()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn event_identity(event: &DiffEvent) -> String {
|
|
||||||
format!(
|
|
||||||
"{}|{}|{}",
|
|
||||||
event.event_type,
|
|
||||||
event.source_side.as_str(),
|
|
||||||
event.key
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn samples_for_side<'a>(
|
|
||||||
side: Side,
|
|
||||||
left: &'a [SequenceSample],
|
|
||||||
right: &'a [SequenceSample],
|
|
||||||
) -> &'a [SequenceSample] {
|
|
||||||
match side {
|
|
||||||
Side::Left => left,
|
|
||||||
Side::Right => right,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn opposite_side(side: Side) -> Side {
|
|
||||||
match side {
|
|
||||||
Side::Left => Side::Right,
|
|
||||||
Side::Right => Side::Left,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn edge_position(samples: &[SequenceSample], seq: u32, args: &Args) -> EdgePosition {
|
|
||||||
let Some(index) = samples.iter().position(|sample| sample.raw.seq == seq) else {
|
|
||||||
return EdgePosition::Stable;
|
|
||||||
};
|
|
||||||
if index < args.warmup_samples {
|
|
||||||
return EdgePosition::Leading;
|
|
||||||
}
|
|
||||||
if samples.len().saturating_sub(index) <= args.cooldown_samples {
|
|
||||||
return EdgePosition::Trailing;
|
|
||||||
}
|
|
||||||
EdgePosition::Stable
|
|
||||||
}
|
|
||||||
|
|
||||||
fn peer_has_uri_any(peer: &[SequenceSample], uri: &str) -> bool {
|
|
||||||
peer.iter().any(|sample| sample.objects.contains_key(uri))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn peer_has_uri_different_hash_any(peer: &[SequenceSample], uri: &str, hash: &str) -> bool {
|
|
||||||
peer.iter()
|
|
||||||
.filter_map(|sample| sample.objects.get(uri))
|
|
||||||
.any(|peer_hash| peer_hash != hash)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn source_future_has_hash(
|
|
||||||
source: &[SequenceSample],
|
|
||||||
seq: u32,
|
|
||||||
uri: &str,
|
|
||||||
expected_hash: &str,
|
|
||||||
) -> bool {
|
|
||||||
source
|
|
||||||
.iter()
|
|
||||||
.filter(|sample| sample.raw.seq > seq)
|
|
||||||
.any(|sample| {
|
|
||||||
sample
|
|
||||||
.objects
|
|
||||||
.get(uri)
|
|
||||||
.is_some_and(|hash| hash == expected_hash)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn peer_has_same_ta_identity(peer: &[SequenceSample], key: &str) -> bool {
|
|
||||||
let Some(identity) = trust_anchor_identity(key) else {
|
|
||||||
return false;
|
|
||||||
};
|
|
||||||
peer.iter().any(|sample| {
|
|
||||||
sample
|
|
||||||
.trust_anchors
|
|
||||||
.iter()
|
|
||||||
.filter_map(|peer_key| trust_anchor_identity(peer_key))
|
|
||||||
.any(|peer_identity| peer_identity == identity)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn trust_anchor_identity(key: &str) -> Option<String> {
|
|
||||||
let parts = key.split('|').collect::<Vec<_>>();
|
|
||||||
if parts.len() != 4 {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
Some(format!("{}|{}", parts[2], parts[3]))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn split_object_hash_key(key: &str) -> Option<(&str, &str)> {
|
|
||||||
key.rsplit_once('|')
|
|
||||||
}
|
|
||||||
|
|
||||||
fn build_uri_timeline_samples(
|
|
||||||
left: &[SequenceSample],
|
|
||||||
right: &[SequenceSample],
|
|
||||||
adjusted: &AdjustedAnalysis,
|
|
||||||
limit: usize,
|
|
||||||
) -> Vec<Value> {
|
|
||||||
let mut uris = BTreeSet::new();
|
|
||||||
for stats in adjusted.stats.values() {
|
|
||||||
for sample in &stats.samples {
|
|
||||||
if sample.event_type == "object_hash"
|
|
||||||
&& let Some((uri, _)) = split_object_hash_key(&sample.key)
|
|
||||||
{
|
|
||||||
uris.insert(uri.to_string());
|
|
||||||
}
|
|
||||||
if sample.event_type == "object_uri" {
|
|
||||||
uris.insert(sample.key.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
uris.into_iter()
|
|
||||||
.take(limit)
|
|
||||||
.map(|uri| {
|
|
||||||
json!({
|
|
||||||
"uri": uri,
|
|
||||||
"left": timeline_for_uri(left, &uri),
|
|
||||||
"right": timeline_for_uri(right, &uri),
|
|
||||||
})
|
|
||||||
})
|
|
||||||
.collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn timeline_for_uri(samples: &[SequenceSample], uri: &str) -> Vec<Value> {
|
|
||||||
samples
|
|
||||||
.iter()
|
|
||||||
.filter_map(|sample| {
|
|
||||||
sample.objects.get(uri).map(|hash| {
|
|
||||||
json!({
|
|
||||||
"seq": sample.raw.seq,
|
|
||||||
"runId": sample.raw.run_id,
|
|
||||||
"validationTime": format_time(sample.validation_time),
|
|
||||||
"hash": hash,
|
|
||||||
})
|
|
||||||
})
|
|
||||||
})
|
|
||||||
.collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn build_stable_object_groups(
|
|
||||||
adjusted: &AdjustedAnalysis,
|
|
||||||
left: &[SequenceSample],
|
|
||||||
right: &[SequenceSample],
|
|
||||||
limit: usize,
|
|
||||||
) -> Vec<Value> {
|
|
||||||
let mut groups: BTreeMap<String, StableObjectGroup> = BTreeMap::new();
|
|
||||||
let Some(stats) = adjusted.stats.get("STABLE_OBJECT_SET_DIVERGENCE") else {
|
|
||||||
return Vec::new();
|
|
||||||
};
|
|
||||||
for sample in &stats.samples {
|
|
||||||
let physical_uri = physical_object_uri(sample);
|
|
||||||
let group_key = format!(
|
|
||||||
"{}|{}|{}|{}",
|
|
||||||
sample.source_side.as_str(),
|
|
||||||
sample.source_seq,
|
|
||||||
sample.source_run_id,
|
|
||||||
publication_point_prefix(&physical_uri)
|
|
||||||
);
|
|
||||||
let group = groups
|
|
||||||
.entry(group_key)
|
|
||||||
.or_insert_with(|| StableObjectGroup::new(sample, &physical_uri));
|
|
||||||
if group.source_cir_path.is_none()
|
|
||||||
&& let Some(source_sample) = sample_by_side_seq_run(
|
|
||||||
left,
|
|
||||||
right,
|
|
||||||
sample.source_side,
|
|
||||||
sample.source_seq,
|
|
||||||
&sample.source_run_id,
|
|
||||||
)
|
|
||||||
{
|
|
||||||
group.source_cir_path = Some(path_string(&source_sample.cir_path));
|
|
||||||
}
|
|
||||||
group.add(sample, physical_uri);
|
|
||||||
}
|
|
||||||
groups
|
|
||||||
.into_values()
|
|
||||||
.take(limit)
|
|
||||||
.map(StableObjectGroup::to_json)
|
|
||||||
.collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
struct StableObjectGroup {
|
|
||||||
source_side: Side,
|
|
||||||
source_seq: u32,
|
|
||||||
source_run_id: String,
|
|
||||||
source_cir_path: Option<String>,
|
|
||||||
publication_point: String,
|
|
||||||
event_count: usize,
|
|
||||||
event_types: BTreeMap<&'static str, usize>,
|
|
||||||
physical_objects: BTreeMap<String, StablePhysicalObject>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl StableObjectGroup {
|
|
||||||
fn new(sample: &AdjustedRecord, physical_uri: &str) -> Self {
|
|
||||||
Self {
|
|
||||||
source_side: sample.source_side,
|
|
||||||
source_seq: sample.source_seq,
|
|
||||||
source_run_id: sample.source_run_id.clone(),
|
|
||||||
source_cir_path: None,
|
|
||||||
publication_point: publication_point_prefix(physical_uri),
|
|
||||||
event_count: 0,
|
|
||||||
event_types: BTreeMap::new(),
|
|
||||||
physical_objects: BTreeMap::new(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn add(&mut self, sample: &AdjustedRecord, physical_uri: String) {
|
|
||||||
self.event_count += 1;
|
|
||||||
*self.event_types.entry(sample.event_type).or_default() += 1;
|
|
||||||
let object = self
|
|
||||||
.physical_objects
|
|
||||||
.entry(physical_uri.clone())
|
|
||||||
.or_insert_with(|| StablePhysicalObject {
|
|
||||||
extension: object_extension(&physical_uri).to_string(),
|
|
||||||
uri: physical_uri,
|
|
||||||
event_types: BTreeSet::new(),
|
|
||||||
hashes: BTreeSet::new(),
|
|
||||||
});
|
|
||||||
object.event_types.insert(sample.event_type);
|
|
||||||
if let Some(hash) = event_hash(sample) {
|
|
||||||
object.hashes.insert(hash.to_string());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn to_json(self) -> Value {
|
|
||||||
json!({
|
|
||||||
"sourceSide": self.source_side.as_str(),
|
|
||||||
"sourceSeq": self.source_seq,
|
|
||||||
"sourceRunId": self.source_run_id,
|
|
||||||
"sourceCirPath": self.source_cir_path,
|
|
||||||
"publicationPoint": self.publication_point,
|
|
||||||
"eventCount": self.event_count,
|
|
||||||
"eventTypes": self.event_types,
|
|
||||||
"physicalObjectCount": self.physical_objects.len(),
|
|
||||||
"physicalObjects": self.physical_objects.into_values().map(StablePhysicalObject::to_json).collect::<Vec<_>>(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
struct StablePhysicalObject {
|
|
||||||
uri: String,
|
|
||||||
extension: String,
|
|
||||||
event_types: BTreeSet<&'static str>,
|
|
||||||
hashes: BTreeSet<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl StablePhysicalObject {
|
|
||||||
fn to_json(self) -> Value {
|
|
||||||
json!({
|
|
||||||
"uri": self.uri,
|
|
||||||
"extension": self.extension,
|
|
||||||
"eventTypes": self.event_types,
|
|
||||||
"hashes": self.hashes,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn physical_object_uri(sample: &AdjustedRecord) -> String {
|
|
||||||
if sample.event_type == "object_hash"
|
|
||||||
&& let Some((uri, _)) = split_object_hash_key(&sample.key)
|
|
||||||
{
|
|
||||||
return uri.to_string();
|
|
||||||
}
|
|
||||||
sample.key.clone()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn event_hash(sample: &AdjustedRecord) -> Option<&str> {
|
|
||||||
if sample.event_type == "object_hash" {
|
|
||||||
split_object_hash_key(&sample.key).map(|(_, hash)| hash)
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn publication_point_prefix(uri: &str) -> String {
|
|
||||||
uri.rsplit_once('/')
|
|
||||||
.map(|(prefix, _)| format!("{prefix}/"))
|
|
||||||
.unwrap_or_else(|| uri.to_string())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn object_extension(uri: &str) -> &str {
|
|
||||||
uri.rsplit_once('.')
|
|
||||||
.map(|(_, extension)| extension)
|
|
||||||
.unwrap_or("")
|
|
||||||
}
|
|
||||||
|
|
||||||
fn sample_by_side_seq_run<'a>(
|
|
||||||
left: &'a [SequenceSample],
|
|
||||||
right: &'a [SequenceSample],
|
|
||||||
side: Side,
|
|
||||||
seq: u32,
|
|
||||||
run_id: &str,
|
|
||||||
) -> Option<&'a SequenceSample> {
|
|
||||||
samples_for_side(side, left, right)
|
|
||||||
.iter()
|
|
||||||
.find(|sample| sample.raw.seq == seq && sample.raw.run_id == run_id)
|
|
||||||
}
|
|
||||||
@ -1,364 +0,0 @@
|
|||||||
use std::collections::BTreeSet;
|
|
||||||
|
|
||||||
use super::args::Args;
|
|
||||||
use super::io::object_hash_key;
|
|
||||||
use super::model::{
|
|
||||||
AnalysisResult, DiffEvent, EventOccurrence, SampleRecord, SequenceSample, Side,
|
|
||||||
};
|
|
||||||
|
|
||||||
pub(super) fn analyze_set<F>(
|
|
||||||
result: &mut AnalysisResult,
|
|
||||||
event_type: &'static str,
|
|
||||||
left: &[SequenceSample],
|
|
||||||
right: &[SequenceSample],
|
|
||||||
extract: F,
|
|
||||||
resolved_class: &'static str,
|
|
||||||
persistent_class: &'static str,
|
|
||||||
args: &Args,
|
|
||||||
) where
|
|
||||||
F: for<'a> Fn(&'a SequenceSample) -> &'a BTreeSet<String>,
|
|
||||||
{
|
|
||||||
analyze_direction(
|
|
||||||
result,
|
|
||||||
event_type,
|
|
||||||
Side::Left,
|
|
||||||
left,
|
|
||||||
right,
|
|
||||||
&extract,
|
|
||||||
resolved_class,
|
|
||||||
persistent_class,
|
|
||||||
args,
|
|
||||||
);
|
|
||||||
analyze_direction(
|
|
||||||
result,
|
|
||||||
event_type,
|
|
||||||
Side::Right,
|
|
||||||
right,
|
|
||||||
left,
|
|
||||||
&extract,
|
|
||||||
resolved_class,
|
|
||||||
persistent_class,
|
|
||||||
args,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn analyze_direction<F>(
|
|
||||||
result: &mut AnalysisResult,
|
|
||||||
event_type: &'static str,
|
|
||||||
source_side: Side,
|
|
||||||
source: &[SequenceSample],
|
|
||||||
peer: &[SequenceSample],
|
|
||||||
extract: &F,
|
|
||||||
resolved_class: &'static str,
|
|
||||||
persistent_class: &'static str,
|
|
||||||
args: &Args,
|
|
||||||
) where
|
|
||||||
F: for<'a> Fn(&'a SequenceSample) -> &'a BTreeSet<String>,
|
|
||||||
{
|
|
||||||
for sample in source {
|
|
||||||
let source_set = extract(sample);
|
|
||||||
for key in source_set {
|
|
||||||
if peer_sample_at_seq(peer, sample.raw.seq)
|
|
||||||
.is_some_and(|peer_sample| extract(peer_sample).contains(key))
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if let Some(matched) = find_future_match(peer, sample, key, extract, args) {
|
|
||||||
result.add(
|
|
||||||
resolved_class,
|
|
||||||
SampleRecord {
|
|
||||||
classification: resolved_class,
|
|
||||||
event_type,
|
|
||||||
key: key.clone(),
|
|
||||||
source_side,
|
|
||||||
source_seq: sample.raw.seq,
|
|
||||||
source_run_id: sample.raw.run_id.clone(),
|
|
||||||
matched_seq: Some(matched.seq),
|
|
||||||
matched_run_id: Some(matched.run_id),
|
|
||||||
note: format!(
|
|
||||||
"matched in {} sequence within alignment window",
|
|
||||||
matched.side.as_str()
|
|
||||||
),
|
|
||||||
},
|
|
||||||
args.sample_limit,
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
result.add(
|
|
||||||
persistent_class,
|
|
||||||
SampleRecord {
|
|
||||||
classification: persistent_class,
|
|
||||||
event_type,
|
|
||||||
key: key.clone(),
|
|
||||||
source_side,
|
|
||||||
source_seq: sample.raw.seq,
|
|
||||||
source_run_id: sample.raw.run_id.clone(),
|
|
||||||
matched_seq: None,
|
|
||||||
matched_run_id: None,
|
|
||||||
note: "no matching event in peer sequence alignment window".to_string(),
|
|
||||||
},
|
|
||||||
args.sample_limit,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(super) fn analyze_hash_rollover(
|
|
||||||
result: &mut AnalysisResult,
|
|
||||||
left: &[SequenceSample],
|
|
||||||
right: &[SequenceSample],
|
|
||||||
args: &Args,
|
|
||||||
) {
|
|
||||||
for (source_side, source, peer) in [(Side::Left, left, right), (Side::Right, right, left)] {
|
|
||||||
for sample in source {
|
|
||||||
for (uri, hash) in &sample.objects {
|
|
||||||
if peer_sample_at_seq(peer, sample.raw.seq)
|
|
||||||
.and_then(|peer_sample| peer_sample.objects.get(uri))
|
|
||||||
.is_some_and(|peer_hash| peer_hash == hash)
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if let Some(peer_sample) = peer_sample_at_seq(peer, sample.raw.seq)
|
|
||||||
&& peer_sample.objects.contains_key(uri)
|
|
||||||
&& find_future_hash_match(peer, sample, uri, hash, args).is_some()
|
|
||||||
{
|
|
||||||
let matched =
|
|
||||||
find_future_hash_match(peer, sample, uri, hash, args).expect("match");
|
|
||||||
result.add(
|
|
||||||
"CONTENT_ROLLOVER_RESOLVED",
|
|
||||||
SampleRecord {
|
|
||||||
classification: "CONTENT_ROLLOVER_RESOLVED",
|
|
||||||
event_type: "object_content_rollover",
|
|
||||||
key: object_hash_key(uri, hash),
|
|
||||||
source_side,
|
|
||||||
source_seq: sample.raw.seq,
|
|
||||||
source_run_id: sample.raw.run_id.clone(),
|
|
||||||
matched_seq: Some(matched.seq),
|
|
||||||
matched_run_id: Some(matched.run_id),
|
|
||||||
note: "same URI hash appeared in peer sequence later".to_string(),
|
|
||||||
},
|
|
||||||
args.sample_limit,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(super) fn collect_persistent_events(
|
|
||||||
left: &[SequenceSample],
|
|
||||||
right: &[SequenceSample],
|
|
||||||
args: &Args,
|
|
||||||
) -> Vec<DiffEvent> {
|
|
||||||
let mut events = Vec::new();
|
|
||||||
collect_persistent_set(
|
|
||||||
&mut events,
|
|
||||||
"object_uri",
|
|
||||||
"PERSISTENT_OBJECT_SET_DIVERGENCE",
|
|
||||||
left,
|
|
||||||
right,
|
|
||||||
|sample| &sample.object_uris,
|
|
||||||
args,
|
|
||||||
);
|
|
||||||
collect_persistent_set(
|
|
||||||
&mut events,
|
|
||||||
"object_hash",
|
|
||||||
"PERSISTENT_CONTENT_DIVERGENCE",
|
|
||||||
left,
|
|
||||||
right,
|
|
||||||
|sample| &sample.object_hashes,
|
|
||||||
args,
|
|
||||||
);
|
|
||||||
collect_persistent_set(
|
|
||||||
&mut events,
|
|
||||||
"reject_uri",
|
|
||||||
"PERSISTENT_REJECT_DIVERGENCE",
|
|
||||||
left,
|
|
||||||
right,
|
|
||||||
|sample| &sample.rejects,
|
|
||||||
args,
|
|
||||||
);
|
|
||||||
collect_persistent_set(
|
|
||||||
&mut events,
|
|
||||||
"trust_anchor",
|
|
||||||
"PERSISTENT_TA_DIFFERENCE",
|
|
||||||
left,
|
|
||||||
right,
|
|
||||||
|sample| &sample.trust_anchors,
|
|
||||||
args,
|
|
||||||
);
|
|
||||||
collect_persistent_set(
|
|
||||||
&mut events,
|
|
||||||
"vrp_output",
|
|
||||||
"PERSISTENT_OUTPUT_DIVERGENCE",
|
|
||||||
left,
|
|
||||||
right,
|
|
||||||
|sample| &sample.vrps,
|
|
||||||
args,
|
|
||||||
);
|
|
||||||
collect_persistent_set(
|
|
||||||
&mut events,
|
|
||||||
"vap_output",
|
|
||||||
"PERSISTENT_OUTPUT_DIVERGENCE",
|
|
||||||
left,
|
|
||||||
right,
|
|
||||||
|sample| &sample.vaps,
|
|
||||||
args,
|
|
||||||
);
|
|
||||||
events
|
|
||||||
}
|
|
||||||
|
|
||||||
fn collect_persistent_set<F>(
|
|
||||||
events: &mut Vec<DiffEvent>,
|
|
||||||
event_type: &'static str,
|
|
||||||
raw_class: &'static str,
|
|
||||||
left: &[SequenceSample],
|
|
||||||
right: &[SequenceSample],
|
|
||||||
extract: F,
|
|
||||||
args: &Args,
|
|
||||||
) where
|
|
||||||
F: for<'a> Fn(&'a SequenceSample) -> &'a BTreeSet<String>,
|
|
||||||
{
|
|
||||||
collect_persistent_direction(
|
|
||||||
events,
|
|
||||||
event_type,
|
|
||||||
raw_class,
|
|
||||||
Side::Left,
|
|
||||||
left,
|
|
||||||
right,
|
|
||||||
&extract,
|
|
||||||
args,
|
|
||||||
);
|
|
||||||
collect_persistent_direction(
|
|
||||||
events,
|
|
||||||
event_type,
|
|
||||||
raw_class,
|
|
||||||
Side::Right,
|
|
||||||
right,
|
|
||||||
left,
|
|
||||||
&extract,
|
|
||||||
args,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn collect_persistent_direction<F>(
|
|
||||||
events: &mut Vec<DiffEvent>,
|
|
||||||
event_type: &'static str,
|
|
||||||
raw_class: &'static str,
|
|
||||||
source_side: Side,
|
|
||||||
source: &[SequenceSample],
|
|
||||||
peer: &[SequenceSample],
|
|
||||||
extract: &F,
|
|
||||||
args: &Args,
|
|
||||||
) where
|
|
||||||
F: for<'a> Fn(&'a SequenceSample) -> &'a BTreeSet<String>,
|
|
||||||
{
|
|
||||||
for sample in source {
|
|
||||||
for key in extract(sample) {
|
|
||||||
if peer_sample_at_seq(peer, sample.raw.seq)
|
|
||||||
.is_some_and(|peer_sample| extract(peer_sample).contains(key))
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if find_future_match(peer, sample, key, extract, args).is_some() {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
events.push(DiffEvent {
|
|
||||||
event_type,
|
|
||||||
raw_class,
|
|
||||||
key: key.clone(),
|
|
||||||
source_side,
|
|
||||||
source_seq: sample.raw.seq,
|
|
||||||
source_run_id: sample.raw.run_id.clone(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl AnalysisResult {
|
|
||||||
fn add(&mut self, class: &'static str, record: SampleRecord, sample_limit: usize) {
|
|
||||||
let stats = self.stats.entry(class).or_default();
|
|
||||||
stats.total += 1;
|
|
||||||
if stats.samples.len() < sample_limit {
|
|
||||||
stats.samples.push(record);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(super) fn peer_sample_at_seq(peer: &[SequenceSample], seq: u32) -> Option<&SequenceSample> {
|
|
||||||
peer.iter().find(|sample| sample.raw.seq == seq)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn find_future_match<F>(
|
|
||||||
peer: &[SequenceSample],
|
|
||||||
source: &SequenceSample,
|
|
||||||
key: &str,
|
|
||||||
extract: &F,
|
|
||||||
args: &Args,
|
|
||||||
) -> Option<EventOccurrence>
|
|
||||||
where
|
|
||||||
F: for<'a> Fn(&'a SequenceSample) -> &'a BTreeSet<String>,
|
|
||||||
{
|
|
||||||
peer.iter()
|
|
||||||
.filter(|candidate| is_in_alignment_window(source, candidate, args))
|
|
||||||
.find(|candidate| extract(candidate).contains(key))
|
|
||||||
.map(|candidate| {
|
|
||||||
occurrence(
|
|
||||||
candidate,
|
|
||||||
if candidate.raw.side.as_deref() == Some("left") {
|
|
||||||
Side::Left
|
|
||||||
} else {
|
|
||||||
Side::Right
|
|
||||||
},
|
|
||||||
)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn find_future_hash_match(
|
|
||||||
peer: &[SequenceSample],
|
|
||||||
source: &SequenceSample,
|
|
||||||
uri: &str,
|
|
||||||
hash: &str,
|
|
||||||
args: &Args,
|
|
||||||
) -> Option<EventOccurrence> {
|
|
||||||
peer.iter()
|
|
||||||
.filter(|candidate| is_in_alignment_window(source, candidate, args))
|
|
||||||
.find(|candidate| {
|
|
||||||
candidate
|
|
||||||
.objects
|
|
||||||
.get(uri)
|
|
||||||
.is_some_and(|peer_hash| peer_hash == hash)
|
|
||||||
})
|
|
||||||
.map(|candidate| {
|
|
||||||
occurrence(
|
|
||||||
candidate,
|
|
||||||
if candidate.raw.side.as_deref() == Some("left") {
|
|
||||||
Side::Left
|
|
||||||
} else {
|
|
||||||
Side::Right
|
|
||||||
},
|
|
||||||
)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_in_alignment_window(
|
|
||||||
source: &SequenceSample,
|
|
||||||
candidate: &SequenceSample,
|
|
||||||
args: &Args,
|
|
||||||
) -> bool {
|
|
||||||
if candidate.raw.seq < source.raw.seq {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
let run_delta = candidate.raw.seq.saturating_sub(source.raw.seq);
|
|
||||||
let time_delta = candidate.validation_time - source.validation_time;
|
|
||||||
let secs = time_delta.whole_seconds().abs();
|
|
||||||
run_delta <= args.align_window_runs || secs <= args.align_window_secs
|
|
||||||
}
|
|
||||||
|
|
||||||
fn occurrence(sample: &SequenceSample, side: Side) -> EventOccurrence {
|
|
||||||
EventOccurrence {
|
|
||||||
side,
|
|
||||||
seq: sample.raw.seq,
|
|
||||||
run_id: sample.raw.run_id.clone(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
185
src/tools/sequence_triage_ccr_cir/churn.rs
Normal file
185
src/tools/sequence_triage_ccr_cir/churn.rs
Normal file
@ -0,0 +1,185 @@
|
|||||||
|
use std::collections::{BTreeMap, BTreeSet};
|
||||||
|
|
||||||
|
use super::model::{ChurnRecord, ChurnSummaryRecord, IntraRpChurn, SequenceSample, Side};
|
||||||
|
|
||||||
|
pub(super) fn build_intra_rp_churn(
|
||||||
|
left: &[SequenceSample],
|
||||||
|
right: &[SequenceSample],
|
||||||
|
) -> IntraRpChurn {
|
||||||
|
let left_records = build_side_records(Side::Left, left);
|
||||||
|
let right_records = build_side_records(Side::Right, right);
|
||||||
|
let mut summary = summarize_records(&left_records);
|
||||||
|
summary.extend(summarize_records(&right_records));
|
||||||
|
IntraRpChurn {
|
||||||
|
left: left_records,
|
||||||
|
right: right_records,
|
||||||
|
summary,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_side_records(side: Side, samples: &[SequenceSample]) -> Vec<ChurnRecord> {
|
||||||
|
let mut records = Vec::new();
|
||||||
|
for pair in samples.windows(2) {
|
||||||
|
let from = &pair[0];
|
||||||
|
let to = &pair[1];
|
||||||
|
records.push(record_churn(
|
||||||
|
side,
|
||||||
|
from,
|
||||||
|
to,
|
||||||
|
"object",
|
||||||
|
&from.object_hashes,
|
||||||
|
&to.object_hashes,
|
||||||
|
));
|
||||||
|
let from_output = output_keys(from);
|
||||||
|
let to_output = output_keys(to);
|
||||||
|
records.push(record_churn(
|
||||||
|
side,
|
||||||
|
from,
|
||||||
|
to,
|
||||||
|
"output",
|
||||||
|
&from_output,
|
||||||
|
&to_output,
|
||||||
|
));
|
||||||
|
records.push(record_churn(
|
||||||
|
side,
|
||||||
|
from,
|
||||||
|
to,
|
||||||
|
"vrp_output",
|
||||||
|
&from.vrps,
|
||||||
|
&to.vrps,
|
||||||
|
));
|
||||||
|
records.push(record_churn(
|
||||||
|
side,
|
||||||
|
from,
|
||||||
|
to,
|
||||||
|
"vap_output",
|
||||||
|
&from.vaps,
|
||||||
|
&to.vaps,
|
||||||
|
));
|
||||||
|
records.push(record_churn(
|
||||||
|
side,
|
||||||
|
from,
|
||||||
|
to,
|
||||||
|
"reject",
|
||||||
|
&from.rejects,
|
||||||
|
&to.rejects,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
records
|
||||||
|
}
|
||||||
|
|
||||||
|
fn output_keys(sample: &SequenceSample) -> BTreeSet<String> {
|
||||||
|
sample
|
||||||
|
.vrps
|
||||||
|
.iter()
|
||||||
|
.map(|key| format!("vrp|{key}"))
|
||||||
|
.chain(sample.vaps.iter().map(|key| format!("vap|{key}")))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn record_churn(
|
||||||
|
side: Side,
|
||||||
|
from: &SequenceSample,
|
||||||
|
to: &SequenceSample,
|
||||||
|
set_type: &'static str,
|
||||||
|
from_set: &BTreeSet<String>,
|
||||||
|
to_set: &BTreeSet<String>,
|
||||||
|
) -> ChurnRecord {
|
||||||
|
let added_count = to_set.difference(from_set).count();
|
||||||
|
let removed_count = from_set.difference(to_set).count();
|
||||||
|
let changed_abs = added_count + removed_count;
|
||||||
|
let union_count = from_set.union(to_set).count();
|
||||||
|
let changed_ratio_from = ratio(changed_abs, from_set.len());
|
||||||
|
let changed_ratio_union = ratio(changed_abs, union_count);
|
||||||
|
ChurnRecord {
|
||||||
|
side,
|
||||||
|
rp_id: from.raw.rp_id.clone(),
|
||||||
|
from_seq: from.raw.seq,
|
||||||
|
to_seq: to.raw.seq,
|
||||||
|
from_run_id: from.raw.run_id.clone(),
|
||||||
|
to_run_id: to.raw.run_id.clone(),
|
||||||
|
set_type,
|
||||||
|
from_count: from_set.len(),
|
||||||
|
to_count: to_set.len(),
|
||||||
|
added_count,
|
||||||
|
removed_count,
|
||||||
|
changed_abs,
|
||||||
|
union_count,
|
||||||
|
changed_ratio_from,
|
||||||
|
changed_ratio_union,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ratio(numerator: usize, denominator: usize) -> f64 {
|
||||||
|
if denominator == 0 {
|
||||||
|
0.0
|
||||||
|
} else {
|
||||||
|
numerator as f64 / denominator as f64
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn summarize_records(records: &[ChurnRecord]) -> Vec<ChurnSummaryRecord> {
|
||||||
|
let mut grouped: BTreeMap<(Side, &'static str), Vec<&ChurnRecord>> = BTreeMap::new();
|
||||||
|
for record in records {
|
||||||
|
grouped
|
||||||
|
.entry((record.side, record.set_type))
|
||||||
|
.or_default()
|
||||||
|
.push(record);
|
||||||
|
}
|
||||||
|
grouped
|
||||||
|
.into_iter()
|
||||||
|
.map(|((side, set_type), group)| {
|
||||||
|
let count = group.len() as f64;
|
||||||
|
let max_changed_abs = group
|
||||||
|
.iter()
|
||||||
|
.map(|record| record.changed_abs)
|
||||||
|
.max()
|
||||||
|
.unwrap_or(0);
|
||||||
|
let avg_changed_abs = if count == 0.0 {
|
||||||
|
0.0
|
||||||
|
} else {
|
||||||
|
group
|
||||||
|
.iter()
|
||||||
|
.map(|record| record.changed_abs as f64)
|
||||||
|
.sum::<f64>()
|
||||||
|
/ count
|
||||||
|
};
|
||||||
|
let max_changed_ratio_from = group
|
||||||
|
.iter()
|
||||||
|
.map(|record| record.changed_ratio_from)
|
||||||
|
.fold(0.0, f64::max);
|
||||||
|
let avg_changed_ratio_from = if count == 0.0 {
|
||||||
|
0.0
|
||||||
|
} else {
|
||||||
|
group
|
||||||
|
.iter()
|
||||||
|
.map(|record| record.changed_ratio_from)
|
||||||
|
.sum::<f64>()
|
||||||
|
/ count
|
||||||
|
};
|
||||||
|
let max_changed_ratio_union = group
|
||||||
|
.iter()
|
||||||
|
.map(|record| record.changed_ratio_union)
|
||||||
|
.fold(0.0, f64::max);
|
||||||
|
let avg_changed_ratio_union = if count == 0.0 {
|
||||||
|
0.0
|
||||||
|
} else {
|
||||||
|
group
|
||||||
|
.iter()
|
||||||
|
.map(|record| record.changed_ratio_union)
|
||||||
|
.sum::<f64>()
|
||||||
|
/ count
|
||||||
|
};
|
||||||
|
ChurnSummaryRecord {
|
||||||
|
side,
|
||||||
|
set_type,
|
||||||
|
max_changed_abs,
|
||||||
|
avg_changed_abs,
|
||||||
|
max_changed_ratio_from,
|
||||||
|
avg_changed_ratio_from,
|
||||||
|
max_changed_ratio_union,
|
||||||
|
avg_changed_ratio_union,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
@ -2,7 +2,6 @@ use std::collections::{BTreeMap, BTreeSet};
|
|||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use serde_json::Value;
|
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
|
|
||||||
#[derive(Clone, Debug, Deserialize)]
|
#[derive(Clone, Debug, Deserialize)]
|
||||||
@ -43,8 +42,9 @@ pub(super) struct SequenceSample {
|
|||||||
pub(super) vaps: BTreeSet<String>,
|
pub(super) vaps: BTreeSet<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
pub(super) enum Side {
|
pub(super) enum Side {
|
||||||
|
#[default]
|
||||||
Left,
|
Left,
|
||||||
Right,
|
Right,
|
||||||
}
|
}
|
||||||
@ -58,85 +58,6 @@ impl Side {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
pub(super) struct EventOccurrence {
|
|
||||||
pub(super) side: Side,
|
|
||||||
pub(super) seq: u32,
|
|
||||||
pub(super) run_id: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
pub(super) struct SampleRecord {
|
|
||||||
pub(super) classification: &'static str,
|
|
||||||
pub(super) event_type: &'static str,
|
|
||||||
pub(super) key: String,
|
|
||||||
pub(super) source_side: Side,
|
|
||||||
pub(super) source_seq: u32,
|
|
||||||
pub(super) source_run_id: String,
|
|
||||||
pub(super) matched_seq: Option<u32>,
|
|
||||||
pub(super) matched_run_id: Option<String>,
|
|
||||||
pub(super) note: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug, Default)]
|
|
||||||
pub(super) struct ClassStats {
|
|
||||||
pub(super) total: usize,
|
|
||||||
pub(super) samples: Vec<SampleRecord>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug, Default)]
|
|
||||||
pub(super) struct AnalysisResult {
|
|
||||||
pub(super) stats: BTreeMap<&'static str, ClassStats>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
pub(super) struct DiffEvent {
|
|
||||||
pub(super) event_type: &'static str,
|
|
||||||
pub(super) raw_class: &'static str,
|
|
||||||
pub(super) key: String,
|
|
||||||
pub(super) source_side: Side,
|
|
||||||
pub(super) source_seq: u32,
|
|
||||||
pub(super) source_run_id: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
|
||||||
pub(super) enum EdgePosition {
|
|
||||||
Leading,
|
|
||||||
Stable,
|
|
||||||
Trailing,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
pub(super) struct AdjustedRecord {
|
|
||||||
pub(super) classification: &'static str,
|
|
||||||
pub(super) event_type: &'static str,
|
|
||||||
pub(super) key: String,
|
|
||||||
pub(super) source_side: Side,
|
|
||||||
pub(super) source_seq: u32,
|
|
||||||
pub(super) source_run_id: String,
|
|
||||||
pub(super) note: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug, Default)]
|
|
||||||
pub(super) struct AdjustedClassStats {
|
|
||||||
pub(super) total: usize,
|
|
||||||
pub(super) unique_keys: BTreeSet<String>,
|
|
||||||
pub(super) samples: Vec<AdjustedRecord>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug, Default)]
|
|
||||||
pub(super) struct AdjustedAnalysis {
|
|
||||||
pub(super) raw_persistent_occurrences: usize,
|
|
||||||
pub(super) raw_persistent_unique_keys: usize,
|
|
||||||
pub(super) edge_filtered_occurrences: usize,
|
|
||||||
pub(super) edge_filtered_unique_keys: usize,
|
|
||||||
pub(super) adjusted_stable_occurrences: usize,
|
|
||||||
pub(super) adjusted_stable_unique_keys: usize,
|
|
||||||
pub(super) stats: BTreeMap<&'static str, AdjustedClassStats>,
|
|
||||||
pub(super) uri_timeline_samples: Vec<Value>,
|
|
||||||
pub(super) stable_object_groups: Vec<Value>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub(super) struct SandwichRecord {
|
pub(super) struct SandwichRecord {
|
||||||
pub(super) classification: &'static str,
|
pub(super) classification: &'static str,
|
||||||
@ -164,10 +85,66 @@ pub(super) struct SandwichClassStats {
|
|||||||
pub(super) samples: Vec<SandwichRecord>,
|
pub(super) samples: Vec<SandwichRecord>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Default)]
|
||||||
|
pub(super) struct SandwichHeatmapRow {
|
||||||
|
pub(super) window: String,
|
||||||
|
pub(super) source_side: Side,
|
||||||
|
pub(super) source_start_seq: u32,
|
||||||
|
pub(super) source_start_run_id: String,
|
||||||
|
pub(super) source_end_seq: u32,
|
||||||
|
pub(super) source_end_run_id: String,
|
||||||
|
pub(super) peer_seq: u32,
|
||||||
|
pub(super) peer_run_id: String,
|
||||||
|
pub(super) source_start_time: String,
|
||||||
|
pub(super) peer_time: String,
|
||||||
|
pub(super) source_end_time: String,
|
||||||
|
pub(super) total: usize,
|
||||||
|
pub(super) class_counts: BTreeMap<&'static str, usize>,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Default)]
|
#[derive(Clone, Debug, Default)]
|
||||||
pub(super) struct SandwichAnalysis {
|
pub(super) struct SandwichAnalysis {
|
||||||
pub(super) total_occurrences: usize,
|
pub(super) total_occurrences: usize,
|
||||||
pub(super) unique_keys: BTreeSet<String>,
|
pub(super) unique_keys: BTreeSet<String>,
|
||||||
pub(super) by_set_type: BTreeMap<&'static str, usize>,
|
pub(super) by_set_type: BTreeMap<&'static str, usize>,
|
||||||
pub(super) stats: BTreeMap<&'static str, SandwichClassStats>,
|
pub(super) stats: BTreeMap<&'static str, SandwichClassStats>,
|
||||||
|
pub(super) heatmap: BTreeMap<String, SandwichHeatmapRow>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub(super) struct ChurnRecord {
|
||||||
|
pub(super) side: Side,
|
||||||
|
pub(super) rp_id: String,
|
||||||
|
pub(super) from_seq: u32,
|
||||||
|
pub(super) to_seq: u32,
|
||||||
|
pub(super) from_run_id: String,
|
||||||
|
pub(super) to_run_id: String,
|
||||||
|
pub(super) set_type: &'static str,
|
||||||
|
pub(super) from_count: usize,
|
||||||
|
pub(super) to_count: usize,
|
||||||
|
pub(super) added_count: usize,
|
||||||
|
pub(super) removed_count: usize,
|
||||||
|
pub(super) changed_abs: usize,
|
||||||
|
pub(super) union_count: usize,
|
||||||
|
pub(super) changed_ratio_from: f64,
|
||||||
|
pub(super) changed_ratio_union: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub(super) struct ChurnSummaryRecord {
|
||||||
|
pub(super) side: Side,
|
||||||
|
pub(super) set_type: &'static str,
|
||||||
|
pub(super) max_changed_abs: usize,
|
||||||
|
pub(super) avg_changed_abs: f64,
|
||||||
|
pub(super) max_changed_ratio_from: f64,
|
||||||
|
pub(super) avg_changed_ratio_from: f64,
|
||||||
|
pub(super) max_changed_ratio_union: f64,
|
||||||
|
pub(super) avg_changed_ratio_union: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Default)]
|
||||||
|
pub(super) struct IntraRpChurn {
|
||||||
|
pub(super) left: Vec<ChurnRecord>,
|
||||||
|
pub(super) right: Vec<ChurnRecord>,
|
||||||
|
pub(super) summary: Vec<ChurnSummaryRecord>,
|
||||||
}
|
}
|
||||||
|
|||||||
@ -6,7 +6,7 @@ use serde_json::{Value, json};
|
|||||||
use super::args::Args;
|
use super::args::Args;
|
||||||
use super::io::{format_time, path_string};
|
use super::io::{format_time, path_string};
|
||||||
use super::model::{
|
use super::model::{
|
||||||
AdjustedAnalysis, AdjustedRecord, AnalysisResult, SampleRecord, SandwichAnalysis,
|
ChurnRecord, ChurnSummaryRecord, IntraRpChurn, SandwichAnalysis, SandwichHeatmapRow,
|
||||||
SandwichRecord, SequenceSample,
|
SandwichRecord, SequenceSample,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -14,49 +14,13 @@ pub(super) fn build_output(
|
|||||||
args: &Args,
|
args: &Args,
|
||||||
left: &[SequenceSample],
|
left: &[SequenceSample],
|
||||||
right: &[SequenceSample],
|
right: &[SequenceSample],
|
||||||
result: &AnalysisResult,
|
|
||||||
adjusted: &AdjustedAnalysis,
|
|
||||||
sandwich: &SandwichAnalysis,
|
sandwich: &SandwichAnalysis,
|
||||||
|
churn: &IntraRpChurn,
|
||||||
) -> Value {
|
) -> Value {
|
||||||
let classifications = result
|
|
||||||
.stats
|
|
||||||
.iter()
|
|
||||||
.map(|(class, stats)| {
|
|
||||||
json!({
|
json!({
|
||||||
"classification": class,
|
"schemaVersion": 3,
|
||||||
"count": stats.total,
|
|
||||||
"samples": stats.samples.iter().map(sample_to_json).collect::<Vec<_>>(),
|
|
||||||
})
|
|
||||||
})
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
let adjusted_classifications = adjusted
|
|
||||||
.stats
|
|
||||||
.iter()
|
|
||||||
.map(|(class, stats)| {
|
|
||||||
json!({
|
|
||||||
"classification": class,
|
|
||||||
"occurrences": stats.total,
|
|
||||||
"uniqueKeys": stats.unique_keys.len(),
|
|
||||||
"samples": stats.samples.iter().map(adjusted_sample_to_json).collect::<Vec<_>>(),
|
|
||||||
})
|
|
||||||
})
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
let sandwich_classifications = sandwich
|
|
||||||
.stats
|
|
||||||
.iter()
|
|
||||||
.map(|(class, stats)| {
|
|
||||||
json!({
|
|
||||||
"classification": class,
|
|
||||||
"occurrences": stats.total,
|
|
||||||
"uniqueKeys": stats.unique_keys.len(),
|
|
||||||
"samples": stats.samples.iter().map(sandwich_sample_to_json).collect::<Vec<_>>(),
|
|
||||||
})
|
|
||||||
})
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
json!({
|
|
||||||
"schemaVersion": 1,
|
|
||||||
"generatedBy": "sequence_triage_ccr_cir",
|
"generatedBy": "sequence_triage_ccr_cir",
|
||||||
"inputContract": "left-right-sequence-jsonl-with-ccr-cir-artifacts",
|
"inputContract": "sequence-jsonl-cir-ccr-sandwich-only",
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"leftSequence": path_string(&args.left_sequence),
|
"leftSequence": path_string(&args.left_sequence),
|
||||||
"rightSequence": path_string(&args.right_sequence),
|
"rightSequence": path_string(&args.right_sequence),
|
||||||
@ -69,37 +33,6 @@ pub(super) fn build_output(
|
|||||||
},
|
},
|
||||||
"left": sequence_summary(left),
|
"left": sequence_summary(left),
|
||||||
"right": sequence_summary(right),
|
"right": sequence_summary(right),
|
||||||
"classificationCounts": classifications,
|
|
||||||
"totals": {
|
|
||||||
"resolvedTemporalLike": count_class(result, "TEMPORAL_LAG_RESOLVED") + count_class(result, "CONTENT_ROLLOVER_RESOLVED"),
|
|
||||||
"persistent": count_prefix(result, "PERSISTENT_"),
|
|
||||||
"unclassified": count_class(result, "UNCLASSIFIED_INSUFFICIENT_WINDOW"),
|
|
||||||
},
|
|
||||||
"adjusted": {
|
|
||||||
"warmupSamples": args.warmup_samples,
|
|
||||||
"cooldownSamples": args.cooldown_samples,
|
|
||||||
"rawPersistent": {
|
|
||||||
"occurrences": adjusted.raw_persistent_occurrences,
|
|
||||||
"uniqueKeys": adjusted.raw_persistent_unique_keys,
|
|
||||||
},
|
|
||||||
"edgeFilteredPersistent": {
|
|
||||||
"occurrences": adjusted.edge_filtered_occurrences,
|
|
||||||
"uniqueKeys": adjusted.edge_filtered_unique_keys,
|
|
||||||
},
|
|
||||||
"adjustedStablePersistent": {
|
|
||||||
"occurrences": adjusted.adjusted_stable_occurrences,
|
|
||||||
"uniqueKeys": adjusted.adjusted_stable_unique_keys,
|
|
||||||
},
|
|
||||||
"classificationCounts": adjusted_classifications,
|
|
||||||
"uriTimelineSamples": adjusted.uri_timeline_samples,
|
|
||||||
"stableObjectGroups": adjusted.stable_object_groups,
|
|
||||||
"interpretation": {
|
|
||||||
"rawPersistentMeaning": "raw persistent keeps the original #043 single-event matching semantics.",
|
|
||||||
"edgeFilteredMeaning": "edge-filtered persistent keeps only non-warmup and non-cooldown source occurrences.",
|
|
||||||
"adjustedStableMeaning": "adjusted stable persistent keeps non-edge findings after URI-level rollover and TA projection filtering.",
|
|
||||||
"stableObjectGroupsMeaning": "STABLE_OBJECT_SET_DIVERGENCE events are additionally collapsed by source CIR, publication point, and physical object URI so object_uri/object_hash duplicate event views do not inflate physical-object counts.",
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"sandwich": {
|
"sandwich": {
|
||||||
"strictTimeWindow": true,
|
"strictTimeWindow": true,
|
||||||
"method": "For each side, use two adjacent source samples as a stable interval. If source_start.time < peer.time < source_end.time and the source value is identical at both interval endpoints, the peer sample is expected to contain the same value.",
|
"method": "For each side, use two adjacent source samples as a stable interval. If source_start.time < peer.time < source_end.time and the source value is identical at both interval endpoints, the peer sample is expected to contain the same value.",
|
||||||
@ -108,7 +41,8 @@ pub(super) fn build_output(
|
|||||||
"uniqueKeys": sandwich.unique_keys.len(),
|
"uniqueKeys": sandwich.unique_keys.len(),
|
||||||
},
|
},
|
||||||
"bySetType": sandwich.by_set_type,
|
"bySetType": sandwich.by_set_type,
|
||||||
"classificationCounts": sandwich_classifications,
|
"classificationCounts": sandwich_classifications_to_json(sandwich),
|
||||||
|
"heatmap": sandwich_heatmap_to_json(sandwich),
|
||||||
"interpretation": {
|
"interpretation": {
|
||||||
"missingStableObject": "The source side proves a URI/hash is stable across an interval that contains the peer sample, but the peer sample has no such URI.",
|
"missingStableObject": "The source side proves a URI/hash is stable across an interval that contains the peer sample, but the peer sample has no such URI.",
|
||||||
"hashMismatchStableObject": "The source side proves a URI/hash is stable across an interval that contains the peer sample, but the peer sample has the same URI with a different hash.",
|
"hashMismatchStableObject": "The source side proves a URI/hash is stable across an interval that contains the peer sample, but the peer sample has the same URI with a different hash.",
|
||||||
@ -116,13 +50,19 @@ pub(super) fn build_output(
|
|||||||
"missingStableOutput": "The source side consistently outputs a VRP/VAP key across an interval that contains the peer sample, but the peer sample does not output it."
|
"missingStableOutput": "The source side consistently outputs a VRP/VAP key across an interval that contains the peer sample, but the peer sample does not output it."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"intraRpChurn": {
|
||||||
|
"method": "For each side, compare adjacent samples independently. Object keys use object_uri|sha256, output keys use typed VRP/VAP canonical keys, and reject keys use rejected object URI without reason.",
|
||||||
|
"left": churn_records_to_json(&churn.left),
|
||||||
|
"right": churn_records_to_json(&churn.right),
|
||||||
|
"summary": churn_summary_to_json(&churn.summary),
|
||||||
|
},
|
||||||
"interpretation": {
|
"interpretation": {
|
||||||
"temporalResolvedMeaning": "Event appeared only on one side at one sample but aligned in the peer sequence later.",
|
"sandwichOnlyMeaning": "This schema intentionally exposes only sandwich anomalies for cross-RP triage. Older raw persistent, adjusted, edge, and rollover classifications are not emitted.",
|
||||||
"persistentMeaning": "Event did not align within the configured run/time window; inspect as a candidate RP behavior difference or persistent sync/input difference.",
|
"intraRpChurnMeaning": "Adjacent-run churn measures how much each RP's own observed object/output/reject sets changed during the sequence. It is a baseline for judging whether cross-RP anomalies exceed natural sequence movement.",
|
||||||
"limits": [
|
"limits": [
|
||||||
"Sequence triage only reads sequence JSONL plus referenced CCR/CIR files.",
|
"Sequence triage only reads sequence JSONL plus referenced CCR/CIR files.",
|
||||||
"It does not read report.json, logs, repo-bytes DB, cache, mirror, or raw objects for root cause proof.",
|
"It does not read report.json, logs, repo-bytes DB, cache, mirror, or raw objects for root cause proof.",
|
||||||
"Resolved temporal findings reduce false positives but do not prove the exact external repository update time."
|
"Sandwich anomalies indicate stable-interval contradictions, not final root cause. Manual evidence is still required for network, repository, or implementation attribution."
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@ -153,35 +93,51 @@ fn sequence_summary(samples: &[SequenceSample]) -> Value {
|
|||||||
"vrps": sample.raw.vrps.or(Some(sample.vrps.len() as u64)),
|
"vrps": sample.raw.vrps.or(Some(sample.vrps.len() as u64)),
|
||||||
"vaps": sample.raw.vaps.or(Some(sample.vaps.len() as u64)),
|
"vaps": sample.raw.vaps.or(Some(sample.vaps.len() as u64)),
|
||||||
"objectCount": sample.object_uris.len(),
|
"objectCount": sample.object_uris.len(),
|
||||||
|
"objectHashCount": sample.object_hashes.len(),
|
||||||
"rejectCount": sample.rejects.len(),
|
"rejectCount": sample.rejects.len(),
|
||||||
"trustAnchorCount": sample.trust_anchors.len(),
|
"trustAnchorCount": sample.trust_anchors.len(),
|
||||||
})).collect::<Vec<_>>(),
|
})).collect::<Vec<_>>(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn sample_to_json(sample: &SampleRecord) -> Value {
|
fn sandwich_classifications_to_json(sandwich: &SandwichAnalysis) -> Vec<Value> {
|
||||||
|
sandwich
|
||||||
|
.stats
|
||||||
|
.iter()
|
||||||
|
.map(|(class, stats)| {
|
||||||
json!({
|
json!({
|
||||||
"classification": sample.classification,
|
"classification": class,
|
||||||
"eventType": sample.event_type,
|
"occurrences": stats.total,
|
||||||
"key": sample.key,
|
"uniqueKeys": stats.unique_keys.len(),
|
||||||
"sourceSide": sample.source_side.as_str(),
|
"samples": stats.samples.iter().map(sandwich_sample_to_json).collect::<Vec<_>>(),
|
||||||
"sourceSeq": sample.source_seq,
|
|
||||||
"sourceRunId": sample.source_run_id,
|
|
||||||
"matchedSeq": sample.matched_seq,
|
|
||||||
"matchedRunId": sample.matched_run_id,
|
|
||||||
"note": sample.note,
|
|
||||||
})
|
})
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn adjusted_sample_to_json(sample: &AdjustedRecord) -> Value {
|
fn sandwich_heatmap_to_json(sandwich: &SandwichAnalysis) -> Vec<Value> {
|
||||||
|
sandwich
|
||||||
|
.heatmap
|
||||||
|
.values()
|
||||||
|
.map(sandwich_heatmap_row_to_json)
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn sandwich_heatmap_row_to_json(row: &SandwichHeatmapRow) -> Value {
|
||||||
json!({
|
json!({
|
||||||
"classification": sample.classification,
|
"window": row.window,
|
||||||
"eventType": sample.event_type,
|
"sourceSide": row.source_side.as_str(),
|
||||||
"key": sample.key,
|
"sourceStartSeq": row.source_start_seq,
|
||||||
"sourceSide": sample.source_side.as_str(),
|
"sourceStartRunId": row.source_start_run_id,
|
||||||
"sourceSeq": sample.source_seq,
|
"peerSeq": row.peer_seq,
|
||||||
"sourceRunId": sample.source_run_id,
|
"peerRunId": row.peer_run_id,
|
||||||
"note": sample.note,
|
"sourceEndSeq": row.source_end_seq,
|
||||||
|
"sourceEndRunId": row.source_end_run_id,
|
||||||
|
"sourceStartTime": row.source_start_time,
|
||||||
|
"peerTime": row.peer_time,
|
||||||
|
"sourceEndTime": row.source_end_time,
|
||||||
|
"total": row.total,
|
||||||
|
"counts": row.class_counts,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -206,21 +162,47 @@ fn sandwich_sample_to_json(sample: &SandwichRecord) -> Value {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn count_class(result: &AnalysisResult, class: &'static str) -> usize {
|
fn churn_records_to_json(records: &[ChurnRecord]) -> Vec<Value> {
|
||||||
result
|
records
|
||||||
.stats
|
.iter()
|
||||||
.get(class)
|
.map(|record| {
|
||||||
.map(|stats| stats.total)
|
json!({
|
||||||
.unwrap_or(0)
|
"side": record.side.as_str(),
|
||||||
|
"rpId": record.rp_id,
|
||||||
|
"fromSeq": record.from_seq,
|
||||||
|
"toSeq": record.to_seq,
|
||||||
|
"fromRunId": record.from_run_id,
|
||||||
|
"toRunId": record.to_run_id,
|
||||||
|
"setType": record.set_type,
|
||||||
|
"fromCount": record.from_count,
|
||||||
|
"toCount": record.to_count,
|
||||||
|
"addedCount": record.added_count,
|
||||||
|
"removedCount": record.removed_count,
|
||||||
|
"changedAbs": record.changed_abs,
|
||||||
|
"unionCount": record.union_count,
|
||||||
|
"changedRatioFrom": record.changed_ratio_from,
|
||||||
|
"changedRatioUnion": record.changed_ratio_union,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn count_prefix(result: &AnalysisResult, prefix: &str) -> usize {
|
fn churn_summary_to_json(summary: &[ChurnSummaryRecord]) -> Vec<Value> {
|
||||||
result
|
summary
|
||||||
.stats
|
|
||||||
.iter()
|
.iter()
|
||||||
.filter(|(class, _)| class.starts_with(prefix))
|
.map(|record| {
|
||||||
.map(|(_, stats)| stats.total)
|
json!({
|
||||||
.sum()
|
"side": record.side.as_str(),
|
||||||
|
"setType": record.set_type,
|
||||||
|
"maxChangedAbs": record.max_changed_abs,
|
||||||
|
"avgChangedAbs": record.avg_changed_abs,
|
||||||
|
"maxChangedRatioFrom": record.max_changed_ratio_from,
|
||||||
|
"avgChangedRatioFrom": record.avg_changed_ratio_from,
|
||||||
|
"maxChangedRatioUnion": record.max_changed_ratio_union,
|
||||||
|
"avgChangedRatioUnion": record.avg_changed_ratio_union,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) fn write_json(path: &Path, value: &Value) -> Result<(), String> {
|
pub(super) fn write_json(path: &Path, value: &Value) -> Result<(), String> {
|
||||||
@ -235,142 +217,22 @@ pub(super) fn write_markdown(path: &Path, output: &Value) -> Result<(), String>
|
|||||||
let mut lines = vec![
|
let mut lines = vec![
|
||||||
"# CCR/CIR Sequence Triage Summary".to_string(),
|
"# CCR/CIR Sequence Triage Summary".to_string(),
|
||||||
"".to_string(),
|
"".to_string(),
|
||||||
format!(
|
format!("- `schemaVersion`: `{}`", output["schemaVersion"].as_u64().unwrap_or(0)),
|
||||||
"- `generatedBy`: `{}`",
|
format!("- `generatedBy`: `{}`", output["generatedBy"].as_str().unwrap_or("")),
|
||||||
output["generatedBy"].as_str().unwrap_or("")
|
format!("- `inputContract`: `{}`", output["inputContract"].as_str().unwrap_or("")),
|
||||||
),
|
format!("- `leftSamples`: `{}`", output["left"]["sampleCount"].as_u64().unwrap_or(0)),
|
||||||
format!(
|
format!("- `rightSamples`: `{}`", output["right"]["sampleCount"].as_u64().unwrap_or(0)),
|
||||||
"- `leftSamples`: `{}`",
|
format!("- `alignWindowSecs`: `{}`", output["parameters"]["alignWindowSecs"].as_i64().unwrap_or(0)),
|
||||||
output["left"]["sampleCount"].as_u64().unwrap_or(0)
|
|
||||||
),
|
|
||||||
format!(
|
|
||||||
"- `rightSamples`: `{}`",
|
|
||||||
output["right"]["sampleCount"].as_u64().unwrap_or(0)
|
|
||||||
),
|
|
||||||
format!(
|
|
||||||
"- `alignWindowRuns`: `{}`",
|
|
||||||
output["parameters"]["alignWindowRuns"]
|
|
||||||
.as_u64()
|
|
||||||
.unwrap_or(0)
|
|
||||||
),
|
|
||||||
format!(
|
|
||||||
"- `alignWindowSecs`: `{}`",
|
|
||||||
output["parameters"]["alignWindowSecs"]
|
|
||||||
.as_i64()
|
|
||||||
.unwrap_or(0)
|
|
||||||
),
|
|
||||||
format!(
|
|
||||||
"- `warmupSamples`: `{}`",
|
|
||||||
output["adjusted"]["warmupSamples"].as_u64().unwrap_or(0)
|
|
||||||
),
|
|
||||||
format!(
|
|
||||||
"- `cooldownSamples`: `{}`",
|
|
||||||
output["adjusted"]["cooldownSamples"].as_u64().unwrap_or(0)
|
|
||||||
),
|
|
||||||
"".to_string(),
|
|
||||||
"## Classification Counts".to_string(),
|
|
||||||
"".to_string(),
|
|
||||||
"| Classification | Count |".to_string(),
|
|
||||||
"|---|---:|".to_string(),
|
|
||||||
];
|
|
||||||
if let Some(classes) = output["classificationCounts"].as_array() {
|
|
||||||
for item in classes {
|
|
||||||
lines.push(format!(
|
|
||||||
"| `{}` | {} |",
|
|
||||||
item["classification"].as_str().unwrap_or(""),
|
|
||||||
item["count"].as_u64().unwrap_or(0)
|
|
||||||
));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
lines.extend([
|
|
||||||
"".to_string(),
|
|
||||||
"## Adjusted Boundary / Rollover Classification".to_string(),
|
|
||||||
"".to_string(),
|
|
||||||
format!(
|
|
||||||
"- `rawPersistent`: `{}` occurrences / `{}` unique keys",
|
|
||||||
output["adjusted"]["rawPersistent"]["occurrences"]
|
|
||||||
.as_u64()
|
|
||||||
.unwrap_or(0),
|
|
||||||
output["adjusted"]["rawPersistent"]["uniqueKeys"]
|
|
||||||
.as_u64()
|
|
||||||
.unwrap_or(0)
|
|
||||||
),
|
|
||||||
format!(
|
|
||||||
"- `edgeFilteredPersistent`: `{}` occurrences / `{}` unique keys",
|
|
||||||
output["adjusted"]["edgeFilteredPersistent"]["occurrences"]
|
|
||||||
.as_u64()
|
|
||||||
.unwrap_or(0),
|
|
||||||
output["adjusted"]["edgeFilteredPersistent"]["uniqueKeys"]
|
|
||||||
.as_u64()
|
|
||||||
.unwrap_or(0)
|
|
||||||
),
|
|
||||||
format!(
|
|
||||||
"- `adjustedStablePersistent`: `{}` occurrences / `{}` unique keys",
|
|
||||||
output["adjusted"]["adjustedStablePersistent"]["occurrences"]
|
|
||||||
.as_u64()
|
|
||||||
.unwrap_or(0),
|
|
||||||
output["adjusted"]["adjustedStablePersistent"]["uniqueKeys"]
|
|
||||||
.as_u64()
|
|
||||||
.unwrap_or(0)
|
|
||||||
),
|
|
||||||
"".to_string(),
|
|
||||||
"| Adjusted Classification | Occurrences | Unique Keys |".to_string(),
|
|
||||||
"|---|---:|---:|".to_string(),
|
|
||||||
]);
|
|
||||||
if let Some(classes) = output["adjusted"]["classificationCounts"].as_array() {
|
|
||||||
for item in classes {
|
|
||||||
lines.push(format!(
|
|
||||||
"| `{}` | {} | {} |",
|
|
||||||
item["classification"].as_str().unwrap_or(""),
|
|
||||||
item["occurrences"].as_u64().unwrap_or(0),
|
|
||||||
item["uniqueKeys"].as_u64().unwrap_or(0)
|
|
||||||
));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if let Some(groups) = output["adjusted"]["stableObjectGroups"].as_array()
|
|
||||||
&& !groups.is_empty()
|
|
||||||
{
|
|
||||||
lines.extend([
|
|
||||||
"".to_string(),
|
|
||||||
"## Stable Object Groups".to_string(),
|
|
||||||
"".to_string(),
|
|
||||||
"| Source | CIR | Publication Point | Events | Physical Objects |".to_string(),
|
|
||||||
"|---|---|---|---:|---:|".to_string(),
|
|
||||||
]);
|
|
||||||
for group in groups {
|
|
||||||
lines.push(format!(
|
|
||||||
"| `{}/seq{}/{}` | `{}` | `{}` | {} | {} |",
|
|
||||||
group["sourceSide"].as_str().unwrap_or(""),
|
|
||||||
group["sourceSeq"].as_u64().unwrap_or(0),
|
|
||||||
group["sourceRunId"].as_str().unwrap_or(""),
|
|
||||||
group["sourceCirPath"].as_str().unwrap_or(""),
|
|
||||||
group["publicationPoint"].as_str().unwrap_or(""),
|
|
||||||
group["eventCount"].as_u64().unwrap_or(0),
|
|
||||||
group["physicalObjectCount"].as_u64().unwrap_or(0),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
lines.extend([
|
|
||||||
"".to_string(),
|
"".to_string(),
|
||||||
"## Sandwich Anomaly Check".to_string(),
|
"## Sandwich Anomaly Check".to_string(),
|
||||||
"".to_string(),
|
"".to_string(),
|
||||||
format!(
|
format!("- `occurrences`: `{}`", output["sandwich"]["totals"]["occurrences"].as_u64().unwrap_or(0)),
|
||||||
"- `occurrences`: `{}`",
|
format!("- `uniqueKeys`: `{}`", output["sandwich"]["totals"]["uniqueKeys"].as_u64().unwrap_or(0)),
|
||||||
output["sandwich"]["totals"]["occurrences"]
|
|
||||||
.as_u64()
|
|
||||||
.unwrap_or(0)
|
|
||||||
),
|
|
||||||
format!(
|
|
||||||
"- `uniqueKeys`: `{}`",
|
|
||||||
output["sandwich"]["totals"]["uniqueKeys"]
|
|
||||||
.as_u64()
|
|
||||||
.unwrap_or(0)
|
|
||||||
),
|
|
||||||
"- Rule: source side has two adjacent stable samples, and peer sample timestamp is strictly between them.".to_string(),
|
"- Rule: source side has two adjacent stable samples, and peer sample timestamp is strictly between them.".to_string(),
|
||||||
"".to_string(),
|
"".to_string(),
|
||||||
"| Sandwich Classification | Occurrences | Unique Keys |".to_string(),
|
"| Sandwich Classification | Occurrences | Unique Keys |".to_string(),
|
||||||
"|---|---:|---:|".to_string(),
|
"|---|---:|---:|".to_string(),
|
||||||
]);
|
];
|
||||||
if let Some(classes) = output["sandwich"]["classificationCounts"].as_array() {
|
if let Some(classes) = output["sandwich"]["classificationCounts"].as_array() {
|
||||||
for item in classes {
|
for item in classes {
|
||||||
lines.push(format!(
|
lines.push(format!(
|
||||||
@ -381,28 +243,86 @@ pub(super) fn write_markdown(path: &Path, output: &Value) -> Result<(), String>
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
lines.extend([
|
||||||
|
"".to_string(),
|
||||||
|
"## Sandwich Heatmap".to_string(),
|
||||||
|
"".to_string(),
|
||||||
|
"| Window | Missing Object | Hash Mismatch Object | Missing Output | Missing Reject | Total |".to_string(),
|
||||||
|
"|---|---:|---:|---:|---:|---:|".to_string(),
|
||||||
|
]);
|
||||||
|
if let Some(rows) = output["sandwich"]["heatmap"].as_array() {
|
||||||
|
for row in rows {
|
||||||
|
let counts = &row["counts"];
|
||||||
|
lines.push(format!(
|
||||||
|
"| `{}` | {} | {} | {} | {} | {} |",
|
||||||
|
row["window"].as_str().unwrap_or(""),
|
||||||
|
counts["PEER_MISSING_STABLE_OBJECT"].as_u64().unwrap_or(0),
|
||||||
|
counts["PEER_HASH_MISMATCH_STABLE_OBJECT"]
|
||||||
|
.as_u64()
|
||||||
|
.unwrap_or(0),
|
||||||
|
counts["PEER_MISSING_STABLE_OUTPUT"].as_u64().unwrap_or(0),
|
||||||
|
counts["PEER_MISSING_STABLE_REJECT"].as_u64().unwrap_or(0),
|
||||||
|
row["total"].as_u64().unwrap_or(0)
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
lines.extend([
|
||||||
|
"".to_string(),
|
||||||
|
"## Intra-RP Adjacent Churn Summary".to_string(),
|
||||||
|
"".to_string(),
|
||||||
|
"| Side | Set Type | Max Changed | Avg Changed | Max Ratio From | Max Ratio Union |"
|
||||||
|
.to_string(),
|
||||||
|
"|---|---|---:|---:|---:|---:|".to_string(),
|
||||||
|
]);
|
||||||
|
if let Some(summary) = output["intraRpChurn"]["summary"].as_array() {
|
||||||
|
for item in summary {
|
||||||
|
lines.push(format!(
|
||||||
|
"| `{}` | `{}` | {} | {:.2} | {:.6} | {:.6} |",
|
||||||
|
item["side"].as_str().unwrap_or(""),
|
||||||
|
item["setType"].as_str().unwrap_or(""),
|
||||||
|
item["maxChangedAbs"].as_u64().unwrap_or(0),
|
||||||
|
item["avgChangedAbs"].as_f64().unwrap_or(0.0),
|
||||||
|
item["maxChangedRatioFrom"].as_f64().unwrap_or(0.0),
|
||||||
|
item["maxChangedRatioUnion"].as_f64().unwrap_or(0.0)
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
lines.extend([
|
||||||
|
"".to_string(),
|
||||||
|
"## Intra-RP Adjacent Churn Details".to_string(),
|
||||||
|
"".to_string(),
|
||||||
|
"| Side | From -> To | Set Type | From | To | Added | Removed | Changed | Ratio From | Ratio Union |".to_string(),
|
||||||
|
"|---|---|---|---:|---:|---:|---:|---:|---:|---:|".to_string(),
|
||||||
|
]);
|
||||||
|
for side_key in ["left", "right"] {
|
||||||
|
if let Some(records) = output["intraRpChurn"][side_key].as_array() {
|
||||||
|
for item in records {
|
||||||
|
lines.push(format!(
|
||||||
|
"| `{}` | `{}` -> `{}` | `{}` | {} | {} | {} | {} | {} | {:.6} | {:.6} |",
|
||||||
|
item["side"].as_str().unwrap_or(""),
|
||||||
|
item["fromRunId"].as_str().unwrap_or(""),
|
||||||
|
item["toRunId"].as_str().unwrap_or(""),
|
||||||
|
item["setType"].as_str().unwrap_or(""),
|
||||||
|
item["fromCount"].as_u64().unwrap_or(0),
|
||||||
|
item["toCount"].as_u64().unwrap_or(0),
|
||||||
|
item["addedCount"].as_u64().unwrap_or(0),
|
||||||
|
item["removedCount"].as_u64().unwrap_or(0),
|
||||||
|
item["changedAbs"].as_u64().unwrap_or(0),
|
||||||
|
item["changedRatioFrom"].as_f64().unwrap_or(0.0),
|
||||||
|
item["changedRatioUnion"].as_f64().unwrap_or(0.0)
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
lines.extend([
|
lines.extend([
|
||||||
"".to_string(),
|
"".to_string(),
|
||||||
"## Interpretation".to_string(),
|
"## Interpretation".to_string(),
|
||||||
"".to_string(),
|
"".to_string(),
|
||||||
"- `TEMPORAL_LAG_RESOLVED` / `CONTENT_ROLLOVER_RESOLVED` 表示差异在后续采样中对齐,优先视为采样时刻或仓库滚动窗口差异。".to_string(),
|
"- `PEER_MISSING_STABLE_OBJECT` / `PEER_HASH_MISMATCH_STABLE_OBJECT` 表示输入对象在一个 RP 的稳定夹层内,与另一个 RP 的中间采样点不一致。".to_string(),
|
||||||
"- `PERSISTENT_*` 表示配置窗口内仍未对齐,才是后续人工排查的高价值候选。".to_string(),
|
"- `PEER_MISSING_STABLE_OUTPUT` 表示输出 VRP/VAP 在稳定夹层内缺失,是更接近最终产物差异的信号。".to_string(),
|
||||||
"- `EDGE_*` 表示差异落在序列首尾,缺少前置或后续观察,不能直接当作实现差异。".to_string(),
|
"- `PEER_MISSING_STABLE_REJECT` 表示拒绝列表在稳定夹层内缺失,用于发现验证失败捕获差异。".to_string(),
|
||||||
"- `STABLE_*` 表示经过首尾边界过滤和 URI-level rollover 过滤后仍存在的稳定候选。".to_string(),
|
"- `intraRpChurn` 是同一个 RP 自身相邻 run 的自然变化基线,不代表跨 RP 异常。".to_string(),
|
||||||
]);
|
]);
|
||||||
std::fs::write(path, lines.join("\n") + "\n")
|
std::fs::write(path, lines.join("\n") + "\n")
|
||||||
.map_err(|e| format!("write markdown failed: {}: {e}", path.display()))
|
.map_err(|e| format!("write markdown failed: {}: {e}", path.display()))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) fn write_samples_jsonl(path: &Path, result: &AnalysisResult) -> Result<(), String> {
|
|
||||||
let mut body = String::new();
|
|
||||||
for stats in result.stats.values() {
|
|
||||||
for sample in &stats.samples {
|
|
||||||
body.push_str(
|
|
||||||
&serde_json::to_string(&sample_to_json(sample)).map_err(|e| e.to_string())?,
|
|
||||||
);
|
|
||||||
body.push('\n');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::fs::write(path, body).map_err(|e| format!("write samples failed: {}: {e}", path.display()))
|
|
||||||
}
|
|
||||||
|
|||||||
@ -2,7 +2,7 @@ use std::collections::BTreeSet;
|
|||||||
|
|
||||||
use super::args::Args;
|
use super::args::Args;
|
||||||
use super::io::format_time;
|
use super::io::format_time;
|
||||||
use super::model::{SandwichAnalysis, SandwichRecord, SequenceSample, Side};
|
use super::model::{SandwichAnalysis, SandwichHeatmapRow, SandwichRecord, SequenceSample, Side};
|
||||||
|
|
||||||
pub(super) fn build_sandwich_analysis(
|
pub(super) fn build_sandwich_analysis(
|
||||||
args: &Args,
|
args: &Args,
|
||||||
@ -241,6 +241,7 @@ impl SandwichAnalysis {
|
|||||||
self.total_occurrences += 1;
|
self.total_occurrences += 1;
|
||||||
self.unique_keys.insert(sandwich_unique_key(&record));
|
self.unique_keys.insert(sandwich_unique_key(&record));
|
||||||
*self.by_set_type.entry(record.set_type).or_default() += 1;
|
*self.by_set_type.entry(record.set_type).or_default() += 1;
|
||||||
|
self.add_heatmap_record(class, &record);
|
||||||
let stats = self.stats.entry(class).or_default();
|
let stats = self.stats.entry(class).or_default();
|
||||||
stats.total += 1;
|
stats.total += 1;
|
||||||
stats.unique_keys.insert(sandwich_unique_key(&record));
|
stats.unique_keys.insert(sandwich_unique_key(&record));
|
||||||
@ -248,6 +249,37 @@ impl SandwichAnalysis {
|
|||||||
stats.samples.push(record);
|
stats.samples.push(record);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn add_heatmap_record(&mut self, class: &'static str, record: &SandwichRecord) {
|
||||||
|
let key = format!(
|
||||||
|
"{}|{}|{}",
|
||||||
|
record.source_start_run_id, record.peer_run_id, record.source_end_run_id
|
||||||
|
);
|
||||||
|
let window = format!(
|
||||||
|
"{} - {} - {}",
|
||||||
|
record.source_start_run_id, record.peer_run_id, record.source_end_run_id
|
||||||
|
);
|
||||||
|
let row = self
|
||||||
|
.heatmap
|
||||||
|
.entry(key)
|
||||||
|
.or_insert_with(|| SandwichHeatmapRow {
|
||||||
|
window,
|
||||||
|
source_side: record.source_side,
|
||||||
|
source_start_seq: record.source_start_seq,
|
||||||
|
source_start_run_id: record.source_start_run_id.clone(),
|
||||||
|
source_end_seq: record.source_end_seq,
|
||||||
|
source_end_run_id: record.source_end_run_id.clone(),
|
||||||
|
peer_seq: record.peer_seq,
|
||||||
|
peer_run_id: record.peer_run_id.clone(),
|
||||||
|
source_start_time: record.source_start_time.clone(),
|
||||||
|
peer_time: record.peer_time.clone(),
|
||||||
|
source_end_time: record.source_end_time.clone(),
|
||||||
|
total: 0,
|
||||||
|
class_counts: Default::default(),
|
||||||
|
});
|
||||||
|
row.total += 1;
|
||||||
|
*row.class_counts.entry(class).or_default() += 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn sandwich_unique_key(record: &SandwichRecord) -> String {
|
fn sandwich_unique_key(record: &SandwichRecord) -> String {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user