20260601 sequence triage增加聚合视图

This commit is contained in:
yuyr 2026-06-01 20:55:39 +08:00
parent ae00e676d7
commit 938ef53173
4 changed files with 249 additions and 3 deletions

View File

@ -533,6 +533,16 @@ mod tests {
); );
assert_eq!(output["sandwich"]["heatmap"].as_array().unwrap().len(), 1); assert_eq!(output["sandwich"]["heatmap"].as_array().unwrap().len(), 1);
assert_eq!(output["sandwich"]["heatmap"][0]["total"].as_u64(), Some(5)); assert_eq!(output["sandwich"]["heatmap"][0]["total"].as_u64(), Some(5));
assert_eq!(
uri_prefix_group_occurrences(&output, "object", "rsync://example.net/pp/"),
2
);
assert_eq!(
uri_prefix_group_occurrences(&output, "reject_uri", "rsync://example.net/pp/"),
1
);
assert_eq!(asn_group_occurrences(&output, "vrp_output", "AS64496"), 1);
assert_eq!(asn_group_occurrences(&output, "vap_output", "AS64496"), 1);
assert!(output.get("classificationCounts").is_none()); assert!(output.get("classificationCounts").is_none());
assert!(output.get("adjusted").is_none()); assert!(output.get("adjusted").is_none());
} }
@ -547,6 +557,31 @@ mod tests {
.unwrap_or(0) .unwrap_or(0)
} }
fn uri_prefix_group_occurrences(output: &Value, set_type: &str, uri_prefix: &str) -> u64 {
output["sandwich"]["uriPrefixGroups"]
.as_array()
.unwrap()
.iter()
.find(|item| {
item["setType"].as_str() == Some(set_type)
&& item["uriPrefix"].as_str() == Some(uri_prefix)
})
.and_then(|item| item["occurrences"].as_u64())
.unwrap_or(0)
}
fn asn_group_occurrences(output: &Value, set_type: &str, asn: &str) -> u64 {
output["sandwich"]["asnGroups"]
.as_array()
.unwrap()
.iter()
.find(|item| {
item["setType"].as_str() == Some(set_type) && item["asn"].as_str() == Some(asn)
})
.and_then(|item| item["occurrences"].as_u64())
.unwrap_or(0)
}
fn churn_record<'a>( fn churn_record<'a>(
output: &'a Value, output: &'a Value,
side: &str, side: &str,

View File

@ -102,6 +102,16 @@ pub(super) struct SandwichHeatmapRow {
pub(super) class_counts: BTreeMap<&'static str, usize>, pub(super) class_counts: BTreeMap<&'static str, usize>,
} }
#[derive(Clone, Debug, Default)]
pub(super) struct SandwichGroupStats {
pub(super) group_value: String,
pub(super) set_type: &'static str,
pub(super) total: usize,
pub(super) unique_keys: BTreeSet<String>,
pub(super) class_counts: BTreeMap<&'static str, usize>,
pub(super) sample_keys: Vec<String>,
}
#[derive(Clone, Debug, Default)] #[derive(Clone, Debug, Default)]
pub(super) struct SandwichAnalysis { pub(super) struct SandwichAnalysis {
pub(super) total_occurrences: usize, pub(super) total_occurrences: usize,
@ -109,6 +119,8 @@ pub(super) struct SandwichAnalysis {
pub(super) by_set_type: BTreeMap<&'static str, usize>, pub(super) by_set_type: BTreeMap<&'static str, usize>,
pub(super) stats: BTreeMap<&'static str, SandwichClassStats>, pub(super) stats: BTreeMap<&'static str, SandwichClassStats>,
pub(super) heatmap: BTreeMap<String, SandwichHeatmapRow>, pub(super) heatmap: BTreeMap<String, SandwichHeatmapRow>,
pub(super) uri_prefix_groups: BTreeMap<String, SandwichGroupStats>,
pub(super) asn_groups: BTreeMap<String, SandwichGroupStats>,
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug)]

View File

@ -6,8 +6,8 @@ use serde_json::{Value, json};
use super::args::Args; use super::args::Args;
use super::io::{format_time, path_string}; use super::io::{format_time, path_string};
use super::model::{ use super::model::{
ChurnRecord, ChurnSummaryRecord, IntraRpChurn, SandwichAnalysis, SandwichHeatmapRow, ChurnRecord, ChurnSummaryRecord, IntraRpChurn, SandwichAnalysis, SandwichGroupStats,
SandwichRecord, SequenceSample, SandwichHeatmapRow, SandwichRecord, SequenceSample,
}; };
pub(super) fn build_output( pub(super) fn build_output(
@ -43,6 +43,8 @@ pub(super) fn build_output(
"bySetType": sandwich.by_set_type, "bySetType": sandwich.by_set_type,
"classificationCounts": sandwich_classifications_to_json(sandwich), "classificationCounts": sandwich_classifications_to_json(sandwich),
"heatmap": sandwich_heatmap_to_json(sandwich), "heatmap": sandwich_heatmap_to_json(sandwich),
"uriPrefixGroups": sandwich_uri_prefix_groups_to_json(sandwich),
"asnGroups": sandwich_asn_groups_to_json(sandwich),
"interpretation": { "interpretation": {
"missingStableObject": "The source side proves a URI/hash is stable across an interval that contains the peer sample, but the peer sample has no such URI.", "missingStableObject": "The source side proves a URI/hash is stable across an interval that contains the peer sample, but the peer sample has no such URI.",
"hashMismatchStableObject": "The source side proves a URI/hash is stable across an interval that contains the peer sample, but the peer sample has the same URI with a different hash.", "hashMismatchStableObject": "The source side proves a URI/hash is stable across an interval that contains the peer sample, but the peer sample has the same URI with a different hash.",
@ -123,6 +125,61 @@ fn sandwich_heatmap_to_json(sandwich: &SandwichAnalysis) -> Vec<Value> {
.collect() .collect()
} }
fn sandwich_uri_prefix_groups_to_json(sandwich: &SandwichAnalysis) -> Vec<Value> {
let mut groups = sandwich
.uri_prefix_groups
.values()
.collect::<Vec<&SandwichGroupStats>>();
groups.sort_by(|left, right| {
right
.total
.cmp(&left.total)
.then_with(|| left.set_type.cmp(right.set_type))
.then_with(|| left.group_value.cmp(&right.group_value))
});
groups
.into_iter()
.map(sandwich_uri_prefix_group_to_json)
.collect()
}
fn sandwich_asn_groups_to_json(sandwich: &SandwichAnalysis) -> Vec<Value> {
let mut groups = sandwich
.asn_groups
.values()
.collect::<Vec<&SandwichGroupStats>>();
groups.sort_by(|left, right| {
right
.total
.cmp(&left.total)
.then_with(|| left.set_type.cmp(right.set_type))
.then_with(|| left.group_value.cmp(&right.group_value))
});
groups.into_iter().map(sandwich_asn_group_to_json).collect()
}
fn sandwich_uri_prefix_group_to_json(group: &SandwichGroupStats) -> Value {
json!({
"uriPrefix": group.group_value,
"setType": group.set_type,
"occurrences": group.total,
"uniqueKeys": group.unique_keys.len(),
"classificationCounts": group.class_counts,
"sampleKeys": group.sample_keys,
})
}
fn sandwich_asn_group_to_json(group: &SandwichGroupStats) -> Value {
json!({
"asn": group.group_value,
"setType": group.set_type,
"occurrences": group.total,
"uniqueKeys": group.unique_keys.len(),
"classificationCounts": group.class_counts,
"sampleKeys": group.sample_keys,
})
}
fn sandwich_heatmap_row_to_json(row: &SandwichHeatmapRow) -> Value { fn sandwich_heatmap_row_to_json(row: &SandwichHeatmapRow) -> Value {
json!({ json!({
"window": row.window, "window": row.window,
@ -266,6 +323,46 @@ pub(super) fn write_markdown(path: &Path, output: &Value) -> Result<(), String>
)); ));
} }
} }
lines.extend([
"".to_string(),
"## Object / Reject URI Prefix Groups".to_string(),
"".to_string(),
"| Set Type | URI Prefix | Occurrences | Unique Keys | Classes | Sample Keys |".to_string(),
"|---|---|---:|---:|---|---|".to_string(),
]);
if let Some(groups) = output["sandwich"]["uriPrefixGroups"].as_array() {
for group in groups {
lines.push(format!(
"| `{}` | `{}` | {} | {} | {} | {} |",
md_escape(group["setType"].as_str().unwrap_or("")),
md_escape(group["uriPrefix"].as_str().unwrap_or("")),
group["occurrences"].as_u64().unwrap_or(0),
group["uniqueKeys"].as_u64().unwrap_or(0),
md_escape(&compact_counts(&group["classificationCounts"])),
md_escape(&compact_sample_keys(group))
));
}
}
lines.extend([
"".to_string(),
"## Output ASN Groups".to_string(),
"".to_string(),
"| Set Type | ASN | Occurrences | Unique Keys | Classes | Sample Keys |".to_string(),
"|---|---|---:|---:|---|---|".to_string(),
]);
if let Some(groups) = output["sandwich"]["asnGroups"].as_array() {
for group in groups {
lines.push(format!(
"| `{}` | `{}` | {} | {} | {} | {} |",
md_escape(group["setType"].as_str().unwrap_or("")),
md_escape(group["asn"].as_str().unwrap_or("")),
group["occurrences"].as_u64().unwrap_or(0),
group["uniqueKeys"].as_u64().unwrap_or(0),
md_escape(&compact_counts(&group["classificationCounts"])),
md_escape(&compact_sample_keys(group))
));
}
}
lines.extend([ lines.extend([
"".to_string(), "".to_string(),
"## Intra-RP Adjacent Churn Summary".to_string(), "## Intra-RP Adjacent Churn Summary".to_string(),
@ -326,3 +423,35 @@ pub(super) fn write_markdown(path: &Path, output: &Value) -> Result<(), String>
std::fs::write(path, lines.join("\n") + "\n") std::fs::write(path, lines.join("\n") + "\n")
.map_err(|e| format!("write markdown failed: {}: {e}", path.display())) .map_err(|e| format!("write markdown failed: {}: {e}", path.display()))
} }
fn compact_counts(value: &Value) -> String {
value
.as_object()
.map(|counts| {
let mut items = counts
.iter()
.map(|(key, count)| format!("{}={}", key, count.as_u64().unwrap_or(0)))
.collect::<Vec<_>>();
items.sort();
items.join(", ")
})
.unwrap_or_default()
}
fn compact_sample_keys(group: &Value) -> String {
group["sampleKeys"]
.as_array()
.map(|items| {
items
.iter()
.take(5)
.filter_map(|item| item.as_str())
.collect::<Vec<_>>()
.join("; ")
})
.unwrap_or_default()
}
fn md_escape(value: &str) -> String {
value.replace('|', "\\|").replace('\n', " ")
}

View File

@ -2,7 +2,9 @@ use std::collections::BTreeSet;
use super::args::Args; use super::args::Args;
use super::io::format_time; use super::io::format_time;
use super::model::{SandwichAnalysis, SandwichHeatmapRow, SandwichRecord, SequenceSample, Side}; use super::model::{
SandwichAnalysis, SandwichGroupStats, SandwichHeatmapRow, SandwichRecord, SequenceSample, Side,
};
pub(super) fn build_sandwich_analysis( pub(super) fn build_sandwich_analysis(
args: &Args, args: &Args,
@ -242,6 +244,7 @@ impl SandwichAnalysis {
self.unique_keys.insert(sandwich_unique_key(&record)); self.unique_keys.insert(sandwich_unique_key(&record));
*self.by_set_type.entry(record.set_type).or_default() += 1; *self.by_set_type.entry(record.set_type).or_default() += 1;
self.add_heatmap_record(class, &record); self.add_heatmap_record(class, &record);
self.add_group_record(class, &record, sample_limit.min(20));
let stats = self.stats.entry(class).or_default(); let stats = self.stats.entry(class).or_default();
stats.total += 1; stats.total += 1;
stats.unique_keys.insert(sandwich_unique_key(&record)); stats.unique_keys.insert(sandwich_unique_key(&record));
@ -280,6 +283,43 @@ impl SandwichAnalysis {
row.total += 1; row.total += 1;
*row.class_counts.entry(class).or_default() += 1; *row.class_counts.entry(class).or_default() += 1;
} }
fn add_group_record(
&mut self,
class: &'static str,
record: &SandwichRecord,
group_sample_limit: usize,
) {
match record.set_type {
"object" | "reject_uri" => {
let prefix = uri_directory_prefix(&record.key);
let group_key = format!("{}|{}", record.set_type, prefix);
let group =
self.uri_prefix_groups
.entry(group_key)
.or_insert_with(|| SandwichGroupStats {
group_value: prefix,
set_type: record.set_type,
..Default::default()
});
add_to_group(group, class, record, group_sample_limit);
}
"vrp_output" | "vap_output" => {
let asn = output_key_asn(&record.key);
let group_key = format!("{}|{}", record.set_type, asn);
let group =
self.asn_groups
.entry(group_key)
.or_insert_with(|| SandwichGroupStats {
group_value: asn,
set_type: record.set_type,
..Default::default()
});
add_to_group(group, class, record, group_sample_limit);
}
_ => {}
}
}
} }
fn sandwich_unique_key(record: &SandwichRecord) -> String { fn sandwich_unique_key(record: &SandwichRecord) -> String {
@ -291,3 +331,33 @@ fn sandwich_unique_key(record: &SandwichRecord) -> String {
record.key record.key
) )
} }
fn add_to_group(
group: &mut SandwichGroupStats,
class: &'static str,
record: &SandwichRecord,
group_sample_limit: usize,
) {
group.total += 1;
group.unique_keys.insert(sandwich_unique_key(record));
*group.class_counts.entry(class).or_default() += 1;
if group.sample_keys.len() < group_sample_limit
&& !group.sample_keys.iter().any(|item| item == &record.key)
{
group.sample_keys.push(record.key.clone());
}
}
fn uri_directory_prefix(uri: &str) -> String {
let scheme_end = uri.find("://").map(|index| index + 3).unwrap_or(0);
if let Some(slash_index) = uri.rfind('/')
&& slash_index >= scheme_end
{
return uri[..=slash_index].to_string();
}
uri.to_string()
}
fn output_key_asn(key: &str) -> String {
key.split('|').next().unwrap_or(key).to_string()
}