diff --git a/src/tools/sequence_triage_ccr_cir.rs b/src/tools/sequence_triage_ccr_cir.rs index 8d1dc95..009abe7 100644 --- a/src/tools/sequence_triage_ccr_cir.rs +++ b/src/tools/sequence_triage_ccr_cir.rs @@ -533,6 +533,16 @@ mod tests { ); assert_eq!(output["sandwich"]["heatmap"].as_array().unwrap().len(), 1); assert_eq!(output["sandwich"]["heatmap"][0]["total"].as_u64(), Some(5)); + assert_eq!( + uri_prefix_group_occurrences(&output, "object", "rsync://example.net/pp/"), + 2 + ); + assert_eq!( + uri_prefix_group_occurrences(&output, "reject_uri", "rsync://example.net/pp/"), + 1 + ); + assert_eq!(asn_group_occurrences(&output, "vrp_output", "AS64496"), 1); + assert_eq!(asn_group_occurrences(&output, "vap_output", "AS64496"), 1); assert!(output.get("classificationCounts").is_none()); assert!(output.get("adjusted").is_none()); } @@ -547,6 +557,31 @@ mod tests { .unwrap_or(0) } + fn uri_prefix_group_occurrences(output: &Value, set_type: &str, uri_prefix: &str) -> u64 { + output["sandwich"]["uriPrefixGroups"] + .as_array() + .unwrap() + .iter() + .find(|item| { + item["setType"].as_str() == Some(set_type) + && item["uriPrefix"].as_str() == Some(uri_prefix) + }) + .and_then(|item| item["occurrences"].as_u64()) + .unwrap_or(0) + } + + fn asn_group_occurrences(output: &Value, set_type: &str, asn: &str) -> u64 { + output["sandwich"]["asnGroups"] + .as_array() + .unwrap() + .iter() + .find(|item| { + item["setType"].as_str() == Some(set_type) && item["asn"].as_str() == Some(asn) + }) + .and_then(|item| item["occurrences"].as_u64()) + .unwrap_or(0) + } + fn churn_record<'a>( output: &'a Value, side: &str, diff --git a/src/tools/sequence_triage_ccr_cir/model.rs b/src/tools/sequence_triage_ccr_cir/model.rs index 2280e1c..9b21989 100644 --- a/src/tools/sequence_triage_ccr_cir/model.rs +++ b/src/tools/sequence_triage_ccr_cir/model.rs @@ -102,6 +102,16 @@ pub(super) struct SandwichHeatmapRow { pub(super) class_counts: BTreeMap<&'static str, usize>, } +#[derive(Clone, Debug, Default)] +pub(super) struct SandwichGroupStats { + pub(super) group_value: String, + pub(super) set_type: &'static str, + pub(super) total: usize, + pub(super) unique_keys: BTreeSet, + pub(super) class_counts: BTreeMap<&'static str, usize>, + pub(super) sample_keys: Vec, +} + #[derive(Clone, Debug, Default)] pub(super) struct SandwichAnalysis { pub(super) total_occurrences: usize, @@ -109,6 +119,8 @@ pub(super) struct SandwichAnalysis { pub(super) by_set_type: BTreeMap<&'static str, usize>, pub(super) stats: BTreeMap<&'static str, SandwichClassStats>, pub(super) heatmap: BTreeMap, + pub(super) uri_prefix_groups: BTreeMap, + pub(super) asn_groups: BTreeMap, } #[derive(Clone, Debug)] diff --git a/src/tools/sequence_triage_ccr_cir/output.rs b/src/tools/sequence_triage_ccr_cir/output.rs index 25a2721..a0ce02c 100644 --- a/src/tools/sequence_triage_ccr_cir/output.rs +++ b/src/tools/sequence_triage_ccr_cir/output.rs @@ -6,8 +6,8 @@ use serde_json::{Value, json}; use super::args::Args; use super::io::{format_time, path_string}; use super::model::{ - ChurnRecord, ChurnSummaryRecord, IntraRpChurn, SandwichAnalysis, SandwichHeatmapRow, - SandwichRecord, SequenceSample, + ChurnRecord, ChurnSummaryRecord, IntraRpChurn, SandwichAnalysis, SandwichGroupStats, + SandwichHeatmapRow, SandwichRecord, SequenceSample, }; pub(super) fn build_output( @@ -43,6 +43,8 @@ pub(super) fn build_output( "bySetType": sandwich.by_set_type, "classificationCounts": sandwich_classifications_to_json(sandwich), "heatmap": sandwich_heatmap_to_json(sandwich), + "uriPrefixGroups": sandwich_uri_prefix_groups_to_json(sandwich), + "asnGroups": sandwich_asn_groups_to_json(sandwich), "interpretation": { "missingStableObject": "The source side proves a URI/hash is stable across an interval that contains the peer sample, but the peer sample has no such URI.", "hashMismatchStableObject": "The source side proves a URI/hash is stable across an interval that contains the peer sample, but the peer sample has the same URI with a different hash.", @@ -123,6 +125,61 @@ fn sandwich_heatmap_to_json(sandwich: &SandwichAnalysis) -> Vec { .collect() } +fn sandwich_uri_prefix_groups_to_json(sandwich: &SandwichAnalysis) -> Vec { + let mut groups = sandwich + .uri_prefix_groups + .values() + .collect::>(); + groups.sort_by(|left, right| { + right + .total + .cmp(&left.total) + .then_with(|| left.set_type.cmp(right.set_type)) + .then_with(|| left.group_value.cmp(&right.group_value)) + }); + groups + .into_iter() + .map(sandwich_uri_prefix_group_to_json) + .collect() +} + +fn sandwich_asn_groups_to_json(sandwich: &SandwichAnalysis) -> Vec { + let mut groups = sandwich + .asn_groups + .values() + .collect::>(); + groups.sort_by(|left, right| { + right + .total + .cmp(&left.total) + .then_with(|| left.set_type.cmp(right.set_type)) + .then_with(|| left.group_value.cmp(&right.group_value)) + }); + groups.into_iter().map(sandwich_asn_group_to_json).collect() +} + +fn sandwich_uri_prefix_group_to_json(group: &SandwichGroupStats) -> Value { + json!({ + "uriPrefix": group.group_value, + "setType": group.set_type, + "occurrences": group.total, + "uniqueKeys": group.unique_keys.len(), + "classificationCounts": group.class_counts, + "sampleKeys": group.sample_keys, + }) +} + +fn sandwich_asn_group_to_json(group: &SandwichGroupStats) -> Value { + json!({ + "asn": group.group_value, + "setType": group.set_type, + "occurrences": group.total, + "uniqueKeys": group.unique_keys.len(), + "classificationCounts": group.class_counts, + "sampleKeys": group.sample_keys, + }) +} + fn sandwich_heatmap_row_to_json(row: &SandwichHeatmapRow) -> Value { json!({ "window": row.window, @@ -266,6 +323,46 @@ pub(super) fn write_markdown(path: &Path, output: &Value) -> Result<(), String> )); } } + lines.extend([ + "".to_string(), + "## Object / Reject URI Prefix Groups".to_string(), + "".to_string(), + "| Set Type | URI Prefix | Occurrences | Unique Keys | Classes | Sample Keys |".to_string(), + "|---|---|---:|---:|---|---|".to_string(), + ]); + if let Some(groups) = output["sandwich"]["uriPrefixGroups"].as_array() { + for group in groups { + lines.push(format!( + "| `{}` | `{}` | {} | {} | {} | {} |", + md_escape(group["setType"].as_str().unwrap_or("")), + md_escape(group["uriPrefix"].as_str().unwrap_or("")), + group["occurrences"].as_u64().unwrap_or(0), + group["uniqueKeys"].as_u64().unwrap_or(0), + md_escape(&compact_counts(&group["classificationCounts"])), + md_escape(&compact_sample_keys(group)) + )); + } + } + lines.extend([ + "".to_string(), + "## Output ASN Groups".to_string(), + "".to_string(), + "| Set Type | ASN | Occurrences | Unique Keys | Classes | Sample Keys |".to_string(), + "|---|---|---:|---:|---|---|".to_string(), + ]); + if let Some(groups) = output["sandwich"]["asnGroups"].as_array() { + for group in groups { + lines.push(format!( + "| `{}` | `{}` | {} | {} | {} | {} |", + md_escape(group["setType"].as_str().unwrap_or("")), + md_escape(group["asn"].as_str().unwrap_or("")), + group["occurrences"].as_u64().unwrap_or(0), + group["uniqueKeys"].as_u64().unwrap_or(0), + md_escape(&compact_counts(&group["classificationCounts"])), + md_escape(&compact_sample_keys(group)) + )); + } + } lines.extend([ "".to_string(), "## Intra-RP Adjacent Churn Summary".to_string(), @@ -326,3 +423,35 @@ pub(super) fn write_markdown(path: &Path, output: &Value) -> Result<(), String> std::fs::write(path, lines.join("\n") + "\n") .map_err(|e| format!("write markdown failed: {}: {e}", path.display())) } + +fn compact_counts(value: &Value) -> String { + value + .as_object() + .map(|counts| { + let mut items = counts + .iter() + .map(|(key, count)| format!("{}={}", key, count.as_u64().unwrap_or(0))) + .collect::>(); + items.sort(); + items.join(", ") + }) + .unwrap_or_default() +} + +fn compact_sample_keys(group: &Value) -> String { + group["sampleKeys"] + .as_array() + .map(|items| { + items + .iter() + .take(5) + .filter_map(|item| item.as_str()) + .collect::>() + .join("; ") + }) + .unwrap_or_default() +} + +fn md_escape(value: &str) -> String { + value.replace('|', "\\|").replace('\n', " ") +} diff --git a/src/tools/sequence_triage_ccr_cir/sandwich.rs b/src/tools/sequence_triage_ccr_cir/sandwich.rs index 1c27e2e..f7c796f 100644 --- a/src/tools/sequence_triage_ccr_cir/sandwich.rs +++ b/src/tools/sequence_triage_ccr_cir/sandwich.rs @@ -2,7 +2,9 @@ use std::collections::BTreeSet; use super::args::Args; use super::io::format_time; -use super::model::{SandwichAnalysis, SandwichHeatmapRow, SandwichRecord, SequenceSample, Side}; +use super::model::{ + SandwichAnalysis, SandwichGroupStats, SandwichHeatmapRow, SandwichRecord, SequenceSample, Side, +}; pub(super) fn build_sandwich_analysis( args: &Args, @@ -242,6 +244,7 @@ impl SandwichAnalysis { self.unique_keys.insert(sandwich_unique_key(&record)); *self.by_set_type.entry(record.set_type).or_default() += 1; self.add_heatmap_record(class, &record); + self.add_group_record(class, &record, sample_limit.min(20)); let stats = self.stats.entry(class).or_default(); stats.total += 1; stats.unique_keys.insert(sandwich_unique_key(&record)); @@ -280,6 +283,43 @@ impl SandwichAnalysis { row.total += 1; *row.class_counts.entry(class).or_default() += 1; } + + fn add_group_record( + &mut self, + class: &'static str, + record: &SandwichRecord, + group_sample_limit: usize, + ) { + match record.set_type { + "object" | "reject_uri" => { + let prefix = uri_directory_prefix(&record.key); + let group_key = format!("{}|{}", record.set_type, prefix); + let group = + self.uri_prefix_groups + .entry(group_key) + .or_insert_with(|| SandwichGroupStats { + group_value: prefix, + set_type: record.set_type, + ..Default::default() + }); + add_to_group(group, class, record, group_sample_limit); + } + "vrp_output" | "vap_output" => { + let asn = output_key_asn(&record.key); + let group_key = format!("{}|{}", record.set_type, asn); + let group = + self.asn_groups + .entry(group_key) + .or_insert_with(|| SandwichGroupStats { + group_value: asn, + set_type: record.set_type, + ..Default::default() + }); + add_to_group(group, class, record, group_sample_limit); + } + _ => {} + } + } } fn sandwich_unique_key(record: &SandwichRecord) -> String { @@ -291,3 +331,33 @@ fn sandwich_unique_key(record: &SandwichRecord) -> String { record.key ) } + +fn add_to_group( + group: &mut SandwichGroupStats, + class: &'static str, + record: &SandwichRecord, + group_sample_limit: usize, +) { + group.total += 1; + group.unique_keys.insert(sandwich_unique_key(record)); + *group.class_counts.entry(class).or_default() += 1; + if group.sample_keys.len() < group_sample_limit + && !group.sample_keys.iter().any(|item| item == &record.key) + { + group.sample_keys.push(record.key.clone()); + } +} + +fn uri_directory_prefix(uri: &str) -> String { + let scheme_end = uri.find("://").map(|index| index + 3).unwrap_or(0); + if let Some(slash_index) = uri.rfind('/') + && slash_index >= scheme_end + { + return uri[..=slash_index].to_string(); + } + uri.to_string() +} + +fn output_key_asn(key: &str) -> String { + key.split('|').next().unwrap_or(key).to_string() +}