20260531 拆分sequence triage工具并修复远端产物拉取
This commit is contained in:
parent
a29fe266a4
commit
00d7109503
@ -56,17 +56,16 @@ def rsync_from_remote(target: str, source: str | Path, destination: Path) -> Non
|
||||
|
||||
def rsync_run_artifacts_from_remote(target: str, source: str | Path, destination: Path) -> None:
|
||||
destination.mkdir(parents=True, exist_ok=True)
|
||||
include_args = [
|
||||
"--include", "result.ccr",
|
||||
"--include", "result.cir",
|
||||
"--include", "process-time.txt",
|
||||
"--include", "remote-run-meta.json",
|
||||
"--include", "exit-code.txt",
|
||||
"--include", "started-at.txt",
|
||||
"--include", "finished-at.txt",
|
||||
"--exclude", "*",
|
||||
]
|
||||
run_local(["rsync", "-a", *include_args, f"{target}:{source}/", f"{destination}/"])
|
||||
for name in [
|
||||
"result.ccr",
|
||||
"result.cir",
|
||||
"process-time.txt",
|
||||
"remote-run-meta.json",
|
||||
"exit-code.txt",
|
||||
"started-at.txt",
|
||||
"finished-at.txt",
|
||||
]:
|
||||
run_local(["rsync", "-a", f"{target}:{source}/{name}", f"{destination}/"])
|
||||
|
||||
|
||||
def load_json(path: Path) -> Any:
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
503
src/tools/sequence_triage_ccr_cir/adjusted.rs
Normal file
503
src/tools/sequence_triage_ccr_cir/adjusted.rs
Normal file
@ -0,0 +1,503 @@
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
|
||||
use serde_json::{Value, json};
|
||||
|
||||
use super::analysis::peer_sample_at_seq;
|
||||
use super::args::Args;
|
||||
use super::io::{format_time, path_string};
|
||||
use super::model::{
|
||||
AdjustedAnalysis, AdjustedRecord, DiffEvent, EdgePosition, SequenceSample, Side,
|
||||
};
|
||||
|
||||
pub(super) fn build_adjusted_analysis(
|
||||
args: &Args,
|
||||
left: &[SequenceSample],
|
||||
right: &[SequenceSample],
|
||||
events: &[DiffEvent],
|
||||
) -> AdjustedAnalysis {
|
||||
let mut analysis = AdjustedAnalysis {
|
||||
raw_persistent_occurrences: events.len(),
|
||||
raw_persistent_unique_keys: unique_event_count(events),
|
||||
..Default::default()
|
||||
};
|
||||
let mut edge_filtered_unique = BTreeSet::new();
|
||||
|
||||
for event in events {
|
||||
let source_samples = samples_for_side(event.source_side, left, right);
|
||||
let peer_samples = samples_for_side(opposite_side(event.source_side), left, right);
|
||||
let edge = edge_position(source_samples, event.source_seq, args);
|
||||
if edge == EdgePosition::Stable {
|
||||
analysis.edge_filtered_occurrences += 1;
|
||||
edge_filtered_unique.insert(event_identity(event));
|
||||
}
|
||||
let (classification, note) =
|
||||
adjusted_classification(event, edge, source_samples, peer_samples);
|
||||
analysis.add(
|
||||
classification,
|
||||
AdjustedRecord {
|
||||
classification,
|
||||
event_type: event.event_type,
|
||||
key: event.key.clone(),
|
||||
source_side: event.source_side,
|
||||
source_seq: event.source_seq,
|
||||
source_run_id: event.source_run_id.clone(),
|
||||
note,
|
||||
},
|
||||
args.sample_limit,
|
||||
);
|
||||
}
|
||||
|
||||
analysis.edge_filtered_unique_keys = edge_filtered_unique.len();
|
||||
let mut stable_unique = BTreeSet::new();
|
||||
for (class, stats) in &analysis.stats {
|
||||
if class.starts_with("STABLE_") {
|
||||
analysis.adjusted_stable_occurrences += stats.total;
|
||||
stable_unique.extend(stats.unique_keys.iter().cloned());
|
||||
}
|
||||
}
|
||||
analysis.adjusted_stable_unique_keys = stable_unique.len();
|
||||
let timeline_limit = if args.timeline_sample_limit == 0 {
|
||||
args.sample_limit
|
||||
} else {
|
||||
args.timeline_sample_limit
|
||||
};
|
||||
analysis.uri_timeline_samples =
|
||||
build_uri_timeline_samples(left, right, &analysis, timeline_limit);
|
||||
analysis.stable_object_groups =
|
||||
build_stable_object_groups(&analysis, left, right, timeline_limit);
|
||||
analysis
|
||||
}
|
||||
|
||||
fn adjusted_classification(
|
||||
event: &DiffEvent,
|
||||
edge: EdgePosition,
|
||||
source: &[SequenceSample],
|
||||
peer: &[SequenceSample],
|
||||
) -> (&'static str, String) {
|
||||
if event.event_type == "trust_anchor" {
|
||||
if peer_has_same_ta_identity(peer, &event.key) {
|
||||
return (
|
||||
"TA_PROJECTION_FORMAT_DIFFERENCE",
|
||||
"same TAL hash and TA certificate hash exist on peer side; full TA projection string differs".to_string(),
|
||||
);
|
||||
}
|
||||
if edge == EdgePosition::Stable {
|
||||
return (
|
||||
"STABLE_TA_DIVERGENCE",
|
||||
"trust-anchor identity is not aligned in a non-boundary sample".to_string(),
|
||||
);
|
||||
}
|
||||
return edge_unresolved_class(edge);
|
||||
}
|
||||
|
||||
if event.event_type == "object_hash" {
|
||||
if let Some((uri, hash)) = split_object_hash_key(&event.key) {
|
||||
let peer_has_uri = peer_has_uri_any(peer, uri);
|
||||
let peer_has_different_hash = peer_has_uri_different_hash_any(peer, uri, hash);
|
||||
let peer_hash_at_source_seq = peer_sample_at_seq(peer, event.source_seq)
|
||||
.and_then(|peer_sample| peer_sample.objects.get(uri));
|
||||
let source_later_matches_peer_hash = peer_hash_at_source_seq.is_some_and(|peer_hash| {
|
||||
source_future_has_hash(source, event.source_seq, uri, peer_hash)
|
||||
});
|
||||
if edge == EdgePosition::Leading && peer_has_different_hash {
|
||||
return (
|
||||
"EDGE_LEADING_CONTENT_ROLLOVER",
|
||||
"source leading-edge hash is absent, while peer already has the same URI with another hash".to_string(),
|
||||
);
|
||||
}
|
||||
if edge == EdgePosition::Trailing && peer_has_different_hash {
|
||||
return (
|
||||
"EDGE_TRAILING_CONTENT_ROLLOVER",
|
||||
"source trailing-edge hash is absent, while peer has the same URI with another hash and no later source observation exists".to_string(),
|
||||
);
|
||||
}
|
||||
if edge == EdgePosition::Stable && source_later_matches_peer_hash {
|
||||
return (
|
||||
"MID_SEQUENCE_CONTENT_ROLLOVER_RESOLVED",
|
||||
"peer already has a newer same-URI hash at this seq and source catches up later in the observed sequence".to_string(),
|
||||
);
|
||||
}
|
||||
if edge == EdgePosition::Stable && peer_has_different_hash {
|
||||
return (
|
||||
"STABLE_CONTENT_DIVERGENCE",
|
||||
"same URI exists on peer side with a different hash in a non-boundary sample"
|
||||
.to_string(),
|
||||
);
|
||||
}
|
||||
if edge == EdgePosition::Stable && !peer_has_uri {
|
||||
return (
|
||||
"STABLE_OBJECT_SET_DIVERGENCE",
|
||||
"content key is derived from a non-boundary URI that is absent on peer side"
|
||||
.to_string(),
|
||||
);
|
||||
}
|
||||
}
|
||||
if edge == EdgePosition::Stable {
|
||||
return (
|
||||
"STABLE_CONTENT_DIVERGENCE",
|
||||
"object hash key remains unaligned in a non-boundary sample".to_string(),
|
||||
);
|
||||
}
|
||||
return edge_unresolved_class(edge);
|
||||
}
|
||||
|
||||
if edge != EdgePosition::Stable {
|
||||
return edge_unresolved_class(edge);
|
||||
}
|
||||
|
||||
let stable_class = match event.raw_class {
|
||||
"PERSISTENT_OBJECT_SET_DIVERGENCE" => "STABLE_OBJECT_SET_DIVERGENCE",
|
||||
"PERSISTENT_REJECT_DIVERGENCE" => "STABLE_REJECT_DIVERGENCE",
|
||||
"PERSISTENT_OUTPUT_DIVERGENCE" => "STABLE_OUTPUT_DIVERGENCE",
|
||||
"PERSISTENT_CONTENT_DIVERGENCE" => "STABLE_CONTENT_DIVERGENCE",
|
||||
"PERSISTENT_TA_DIFFERENCE" => "STABLE_TA_DIVERGENCE",
|
||||
_ => "STABLE_UNCLASSIFIED_DIVERGENCE",
|
||||
};
|
||||
(
|
||||
stable_class,
|
||||
"persistent event appears in a non-boundary sample after sequence edge filtering"
|
||||
.to_string(),
|
||||
)
|
||||
}
|
||||
|
||||
fn edge_unresolved_class(edge: EdgePosition) -> (&'static str, String) {
|
||||
match edge {
|
||||
EdgePosition::Leading => (
|
||||
"EDGE_LEADING_UNRESOLVED",
|
||||
"event appears only from the warmup edge and may predate the observed sequence"
|
||||
.to_string(),
|
||||
),
|
||||
EdgePosition::Trailing => (
|
||||
"EDGE_TRAILING_UNRESOLVED",
|
||||
"event appears at the cooldown edge and lacks later peer observations".to_string(),
|
||||
),
|
||||
EdgePosition::Stable => (
|
||||
"STABLE_UNCLASSIFIED_DIVERGENCE",
|
||||
"event is not on an edge but no specific stable category matched".to_string(),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
impl AdjustedAnalysis {
|
||||
fn add(&mut self, class: &'static str, record: AdjustedRecord, sample_limit: usize) {
|
||||
let stats = self.stats.entry(class).or_default();
|
||||
stats.total += 1;
|
||||
stats.unique_keys.insert(format!(
|
||||
"{}|{}|{}",
|
||||
record.event_type,
|
||||
record.source_side.as_str(),
|
||||
record.key
|
||||
));
|
||||
if stats.samples.len() < sample_limit {
|
||||
stats.samples.push(record);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn unique_event_count(events: &[DiffEvent]) -> usize {
|
||||
events
|
||||
.iter()
|
||||
.map(event_identity)
|
||||
.collect::<BTreeSet<_>>()
|
||||
.len()
|
||||
}
|
||||
|
||||
fn event_identity(event: &DiffEvent) -> String {
|
||||
format!(
|
||||
"{}|{}|{}",
|
||||
event.event_type,
|
||||
event.source_side.as_str(),
|
||||
event.key
|
||||
)
|
||||
}
|
||||
|
||||
fn samples_for_side<'a>(
|
||||
side: Side,
|
||||
left: &'a [SequenceSample],
|
||||
right: &'a [SequenceSample],
|
||||
) -> &'a [SequenceSample] {
|
||||
match side {
|
||||
Side::Left => left,
|
||||
Side::Right => right,
|
||||
}
|
||||
}
|
||||
|
||||
fn opposite_side(side: Side) -> Side {
|
||||
match side {
|
||||
Side::Left => Side::Right,
|
||||
Side::Right => Side::Left,
|
||||
}
|
||||
}
|
||||
|
||||
fn edge_position(samples: &[SequenceSample], seq: u32, args: &Args) -> EdgePosition {
|
||||
let Some(index) = samples.iter().position(|sample| sample.raw.seq == seq) else {
|
||||
return EdgePosition::Stable;
|
||||
};
|
||||
if index < args.warmup_samples {
|
||||
return EdgePosition::Leading;
|
||||
}
|
||||
if samples.len().saturating_sub(index) <= args.cooldown_samples {
|
||||
return EdgePosition::Trailing;
|
||||
}
|
||||
EdgePosition::Stable
|
||||
}
|
||||
|
||||
fn peer_has_uri_any(peer: &[SequenceSample], uri: &str) -> bool {
|
||||
peer.iter().any(|sample| sample.objects.contains_key(uri))
|
||||
}
|
||||
|
||||
fn peer_has_uri_different_hash_any(peer: &[SequenceSample], uri: &str, hash: &str) -> bool {
|
||||
peer.iter()
|
||||
.filter_map(|sample| sample.objects.get(uri))
|
||||
.any(|peer_hash| peer_hash != hash)
|
||||
}
|
||||
|
||||
fn source_future_has_hash(
|
||||
source: &[SequenceSample],
|
||||
seq: u32,
|
||||
uri: &str,
|
||||
expected_hash: &str,
|
||||
) -> bool {
|
||||
source
|
||||
.iter()
|
||||
.filter(|sample| sample.raw.seq > seq)
|
||||
.any(|sample| {
|
||||
sample
|
||||
.objects
|
||||
.get(uri)
|
||||
.is_some_and(|hash| hash == expected_hash)
|
||||
})
|
||||
}
|
||||
|
||||
fn peer_has_same_ta_identity(peer: &[SequenceSample], key: &str) -> bool {
|
||||
let Some(identity) = trust_anchor_identity(key) else {
|
||||
return false;
|
||||
};
|
||||
peer.iter().any(|sample| {
|
||||
sample
|
||||
.trust_anchors
|
||||
.iter()
|
||||
.filter_map(|peer_key| trust_anchor_identity(peer_key))
|
||||
.any(|peer_identity| peer_identity == identity)
|
||||
})
|
||||
}
|
||||
|
||||
fn trust_anchor_identity(key: &str) -> Option<String> {
|
||||
let parts = key.split('|').collect::<Vec<_>>();
|
||||
if parts.len() != 4 {
|
||||
return None;
|
||||
}
|
||||
Some(format!("{}|{}", parts[2], parts[3]))
|
||||
}
|
||||
|
||||
fn split_object_hash_key(key: &str) -> Option<(&str, &str)> {
|
||||
key.rsplit_once('|')
|
||||
}
|
||||
|
||||
fn build_uri_timeline_samples(
|
||||
left: &[SequenceSample],
|
||||
right: &[SequenceSample],
|
||||
adjusted: &AdjustedAnalysis,
|
||||
limit: usize,
|
||||
) -> Vec<Value> {
|
||||
let mut uris = BTreeSet::new();
|
||||
for stats in adjusted.stats.values() {
|
||||
for sample in &stats.samples {
|
||||
if sample.event_type == "object_hash"
|
||||
&& let Some((uri, _)) = split_object_hash_key(&sample.key)
|
||||
{
|
||||
uris.insert(uri.to_string());
|
||||
}
|
||||
if sample.event_type == "object_uri" {
|
||||
uris.insert(sample.key.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
uris.into_iter()
|
||||
.take(limit)
|
||||
.map(|uri| {
|
||||
json!({
|
||||
"uri": uri,
|
||||
"left": timeline_for_uri(left, &uri),
|
||||
"right": timeline_for_uri(right, &uri),
|
||||
})
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn timeline_for_uri(samples: &[SequenceSample], uri: &str) -> Vec<Value> {
|
||||
samples
|
||||
.iter()
|
||||
.filter_map(|sample| {
|
||||
sample.objects.get(uri).map(|hash| {
|
||||
json!({
|
||||
"seq": sample.raw.seq,
|
||||
"runId": sample.raw.run_id,
|
||||
"validationTime": format_time(sample.validation_time),
|
||||
"hash": hash,
|
||||
})
|
||||
})
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn build_stable_object_groups(
|
||||
adjusted: &AdjustedAnalysis,
|
||||
left: &[SequenceSample],
|
||||
right: &[SequenceSample],
|
||||
limit: usize,
|
||||
) -> Vec<Value> {
|
||||
let mut groups: BTreeMap<String, StableObjectGroup> = BTreeMap::new();
|
||||
let Some(stats) = adjusted.stats.get("STABLE_OBJECT_SET_DIVERGENCE") else {
|
||||
return Vec::new();
|
||||
};
|
||||
for sample in &stats.samples {
|
||||
let physical_uri = physical_object_uri(sample);
|
||||
let group_key = format!(
|
||||
"{}|{}|{}|{}",
|
||||
sample.source_side.as_str(),
|
||||
sample.source_seq,
|
||||
sample.source_run_id,
|
||||
publication_point_prefix(&physical_uri)
|
||||
);
|
||||
let group = groups
|
||||
.entry(group_key)
|
||||
.or_insert_with(|| StableObjectGroup::new(sample, &physical_uri));
|
||||
if group.source_cir_path.is_none()
|
||||
&& let Some(source_sample) = sample_by_side_seq_run(
|
||||
left,
|
||||
right,
|
||||
sample.source_side,
|
||||
sample.source_seq,
|
||||
&sample.source_run_id,
|
||||
)
|
||||
{
|
||||
group.source_cir_path = Some(path_string(&source_sample.cir_path));
|
||||
}
|
||||
group.add(sample, physical_uri);
|
||||
}
|
||||
groups
|
||||
.into_values()
|
||||
.take(limit)
|
||||
.map(StableObjectGroup::to_json)
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct StableObjectGroup {
|
||||
source_side: Side,
|
||||
source_seq: u32,
|
||||
source_run_id: String,
|
||||
source_cir_path: Option<String>,
|
||||
publication_point: String,
|
||||
event_count: usize,
|
||||
event_types: BTreeMap<&'static str, usize>,
|
||||
physical_objects: BTreeMap<String, StablePhysicalObject>,
|
||||
}
|
||||
|
||||
impl StableObjectGroup {
|
||||
fn new(sample: &AdjustedRecord, physical_uri: &str) -> Self {
|
||||
Self {
|
||||
source_side: sample.source_side,
|
||||
source_seq: sample.source_seq,
|
||||
source_run_id: sample.source_run_id.clone(),
|
||||
source_cir_path: None,
|
||||
publication_point: publication_point_prefix(physical_uri),
|
||||
event_count: 0,
|
||||
event_types: BTreeMap::new(),
|
||||
physical_objects: BTreeMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn add(&mut self, sample: &AdjustedRecord, physical_uri: String) {
|
||||
self.event_count += 1;
|
||||
*self.event_types.entry(sample.event_type).or_default() += 1;
|
||||
let object = self
|
||||
.physical_objects
|
||||
.entry(physical_uri.clone())
|
||||
.or_insert_with(|| StablePhysicalObject {
|
||||
extension: object_extension(&physical_uri).to_string(),
|
||||
uri: physical_uri,
|
||||
event_types: BTreeSet::new(),
|
||||
hashes: BTreeSet::new(),
|
||||
});
|
||||
object.event_types.insert(sample.event_type);
|
||||
if let Some(hash) = event_hash(sample) {
|
||||
object.hashes.insert(hash.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
fn to_json(self) -> Value {
|
||||
json!({
|
||||
"sourceSide": self.source_side.as_str(),
|
||||
"sourceSeq": self.source_seq,
|
||||
"sourceRunId": self.source_run_id,
|
||||
"sourceCirPath": self.source_cir_path,
|
||||
"publicationPoint": self.publication_point,
|
||||
"eventCount": self.event_count,
|
||||
"eventTypes": self.event_types,
|
||||
"physicalObjectCount": self.physical_objects.len(),
|
||||
"physicalObjects": self.physical_objects.into_values().map(StablePhysicalObject::to_json).collect::<Vec<_>>(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct StablePhysicalObject {
|
||||
uri: String,
|
||||
extension: String,
|
||||
event_types: BTreeSet<&'static str>,
|
||||
hashes: BTreeSet<String>,
|
||||
}
|
||||
|
||||
impl StablePhysicalObject {
|
||||
fn to_json(self) -> Value {
|
||||
json!({
|
||||
"uri": self.uri,
|
||||
"extension": self.extension,
|
||||
"eventTypes": self.event_types,
|
||||
"hashes": self.hashes,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn physical_object_uri(sample: &AdjustedRecord) -> String {
|
||||
if sample.event_type == "object_hash"
|
||||
&& let Some((uri, _)) = split_object_hash_key(&sample.key)
|
||||
{
|
||||
return uri.to_string();
|
||||
}
|
||||
sample.key.clone()
|
||||
}
|
||||
|
||||
fn event_hash(sample: &AdjustedRecord) -> Option<&str> {
|
||||
if sample.event_type == "object_hash" {
|
||||
split_object_hash_key(&sample.key).map(|(_, hash)| hash)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn publication_point_prefix(uri: &str) -> String {
|
||||
uri.rsplit_once('/')
|
||||
.map(|(prefix, _)| format!("{prefix}/"))
|
||||
.unwrap_or_else(|| uri.to_string())
|
||||
}
|
||||
|
||||
fn object_extension(uri: &str) -> &str {
|
||||
uri.rsplit_once('.')
|
||||
.map(|(_, extension)| extension)
|
||||
.unwrap_or("")
|
||||
}
|
||||
|
||||
fn sample_by_side_seq_run<'a>(
|
||||
left: &'a [SequenceSample],
|
||||
right: &'a [SequenceSample],
|
||||
side: Side,
|
||||
seq: u32,
|
||||
run_id: &str,
|
||||
) -> Option<&'a SequenceSample> {
|
||||
samples_for_side(side, left, right)
|
||||
.iter()
|
||||
.find(|sample| sample.raw.seq == seq && sample.raw.run_id == run_id)
|
||||
}
|
||||
364
src/tools/sequence_triage_ccr_cir/analysis.rs
Normal file
364
src/tools/sequence_triage_ccr_cir/analysis.rs
Normal file
@ -0,0 +1,364 @@
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use super::args::Args;
|
||||
use super::io::object_hash_key;
|
||||
use super::model::{
|
||||
AnalysisResult, DiffEvent, EventOccurrence, SampleRecord, SequenceSample, Side,
|
||||
};
|
||||
|
||||
pub(super) fn analyze_set<F>(
|
||||
result: &mut AnalysisResult,
|
||||
event_type: &'static str,
|
||||
left: &[SequenceSample],
|
||||
right: &[SequenceSample],
|
||||
extract: F,
|
||||
resolved_class: &'static str,
|
||||
persistent_class: &'static str,
|
||||
args: &Args,
|
||||
) where
|
||||
F: for<'a> Fn(&'a SequenceSample) -> &'a BTreeSet<String>,
|
||||
{
|
||||
analyze_direction(
|
||||
result,
|
||||
event_type,
|
||||
Side::Left,
|
||||
left,
|
||||
right,
|
||||
&extract,
|
||||
resolved_class,
|
||||
persistent_class,
|
||||
args,
|
||||
);
|
||||
analyze_direction(
|
||||
result,
|
||||
event_type,
|
||||
Side::Right,
|
||||
right,
|
||||
left,
|
||||
&extract,
|
||||
resolved_class,
|
||||
persistent_class,
|
||||
args,
|
||||
);
|
||||
}
|
||||
|
||||
fn analyze_direction<F>(
|
||||
result: &mut AnalysisResult,
|
||||
event_type: &'static str,
|
||||
source_side: Side,
|
||||
source: &[SequenceSample],
|
||||
peer: &[SequenceSample],
|
||||
extract: &F,
|
||||
resolved_class: &'static str,
|
||||
persistent_class: &'static str,
|
||||
args: &Args,
|
||||
) where
|
||||
F: for<'a> Fn(&'a SequenceSample) -> &'a BTreeSet<String>,
|
||||
{
|
||||
for sample in source {
|
||||
let source_set = extract(sample);
|
||||
for key in source_set {
|
||||
if peer_sample_at_seq(peer, sample.raw.seq)
|
||||
.is_some_and(|peer_sample| extract(peer_sample).contains(key))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if let Some(matched) = find_future_match(peer, sample, key, extract, args) {
|
||||
result.add(
|
||||
resolved_class,
|
||||
SampleRecord {
|
||||
classification: resolved_class,
|
||||
event_type,
|
||||
key: key.clone(),
|
||||
source_side,
|
||||
source_seq: sample.raw.seq,
|
||||
source_run_id: sample.raw.run_id.clone(),
|
||||
matched_seq: Some(matched.seq),
|
||||
matched_run_id: Some(matched.run_id),
|
||||
note: format!(
|
||||
"matched in {} sequence within alignment window",
|
||||
matched.side.as_str()
|
||||
),
|
||||
},
|
||||
args.sample_limit,
|
||||
);
|
||||
} else {
|
||||
result.add(
|
||||
persistent_class,
|
||||
SampleRecord {
|
||||
classification: persistent_class,
|
||||
event_type,
|
||||
key: key.clone(),
|
||||
source_side,
|
||||
source_seq: sample.raw.seq,
|
||||
source_run_id: sample.raw.run_id.clone(),
|
||||
matched_seq: None,
|
||||
matched_run_id: None,
|
||||
note: "no matching event in peer sequence alignment window".to_string(),
|
||||
},
|
||||
args.sample_limit,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn analyze_hash_rollover(
|
||||
result: &mut AnalysisResult,
|
||||
left: &[SequenceSample],
|
||||
right: &[SequenceSample],
|
||||
args: &Args,
|
||||
) {
|
||||
for (source_side, source, peer) in [(Side::Left, left, right), (Side::Right, right, left)] {
|
||||
for sample in source {
|
||||
for (uri, hash) in &sample.objects {
|
||||
if peer_sample_at_seq(peer, sample.raw.seq)
|
||||
.and_then(|peer_sample| peer_sample.objects.get(uri))
|
||||
.is_some_and(|peer_hash| peer_hash == hash)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if let Some(peer_sample) = peer_sample_at_seq(peer, sample.raw.seq)
|
||||
&& peer_sample.objects.contains_key(uri)
|
||||
&& find_future_hash_match(peer, sample, uri, hash, args).is_some()
|
||||
{
|
||||
let matched =
|
||||
find_future_hash_match(peer, sample, uri, hash, args).expect("match");
|
||||
result.add(
|
||||
"CONTENT_ROLLOVER_RESOLVED",
|
||||
SampleRecord {
|
||||
classification: "CONTENT_ROLLOVER_RESOLVED",
|
||||
event_type: "object_content_rollover",
|
||||
key: object_hash_key(uri, hash),
|
||||
source_side,
|
||||
source_seq: sample.raw.seq,
|
||||
source_run_id: sample.raw.run_id.clone(),
|
||||
matched_seq: Some(matched.seq),
|
||||
matched_run_id: Some(matched.run_id),
|
||||
note: "same URI hash appeared in peer sequence later".to_string(),
|
||||
},
|
||||
args.sample_limit,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn collect_persistent_events(
|
||||
left: &[SequenceSample],
|
||||
right: &[SequenceSample],
|
||||
args: &Args,
|
||||
) -> Vec<DiffEvent> {
|
||||
let mut events = Vec::new();
|
||||
collect_persistent_set(
|
||||
&mut events,
|
||||
"object_uri",
|
||||
"PERSISTENT_OBJECT_SET_DIVERGENCE",
|
||||
left,
|
||||
right,
|
||||
|sample| &sample.object_uris,
|
||||
args,
|
||||
);
|
||||
collect_persistent_set(
|
||||
&mut events,
|
||||
"object_hash",
|
||||
"PERSISTENT_CONTENT_DIVERGENCE",
|
||||
left,
|
||||
right,
|
||||
|sample| &sample.object_hashes,
|
||||
args,
|
||||
);
|
||||
collect_persistent_set(
|
||||
&mut events,
|
||||
"reject_uri",
|
||||
"PERSISTENT_REJECT_DIVERGENCE",
|
||||
left,
|
||||
right,
|
||||
|sample| &sample.rejects,
|
||||
args,
|
||||
);
|
||||
collect_persistent_set(
|
||||
&mut events,
|
||||
"trust_anchor",
|
||||
"PERSISTENT_TA_DIFFERENCE",
|
||||
left,
|
||||
right,
|
||||
|sample| &sample.trust_anchors,
|
||||
args,
|
||||
);
|
||||
collect_persistent_set(
|
||||
&mut events,
|
||||
"vrp_output",
|
||||
"PERSISTENT_OUTPUT_DIVERGENCE",
|
||||
left,
|
||||
right,
|
||||
|sample| &sample.vrps,
|
||||
args,
|
||||
);
|
||||
collect_persistent_set(
|
||||
&mut events,
|
||||
"vap_output",
|
||||
"PERSISTENT_OUTPUT_DIVERGENCE",
|
||||
left,
|
||||
right,
|
||||
|sample| &sample.vaps,
|
||||
args,
|
||||
);
|
||||
events
|
||||
}
|
||||
|
||||
fn collect_persistent_set<F>(
|
||||
events: &mut Vec<DiffEvent>,
|
||||
event_type: &'static str,
|
||||
raw_class: &'static str,
|
||||
left: &[SequenceSample],
|
||||
right: &[SequenceSample],
|
||||
extract: F,
|
||||
args: &Args,
|
||||
) where
|
||||
F: for<'a> Fn(&'a SequenceSample) -> &'a BTreeSet<String>,
|
||||
{
|
||||
collect_persistent_direction(
|
||||
events,
|
||||
event_type,
|
||||
raw_class,
|
||||
Side::Left,
|
||||
left,
|
||||
right,
|
||||
&extract,
|
||||
args,
|
||||
);
|
||||
collect_persistent_direction(
|
||||
events,
|
||||
event_type,
|
||||
raw_class,
|
||||
Side::Right,
|
||||
right,
|
||||
left,
|
||||
&extract,
|
||||
args,
|
||||
);
|
||||
}
|
||||
|
||||
fn collect_persistent_direction<F>(
|
||||
events: &mut Vec<DiffEvent>,
|
||||
event_type: &'static str,
|
||||
raw_class: &'static str,
|
||||
source_side: Side,
|
||||
source: &[SequenceSample],
|
||||
peer: &[SequenceSample],
|
||||
extract: &F,
|
||||
args: &Args,
|
||||
) where
|
||||
F: for<'a> Fn(&'a SequenceSample) -> &'a BTreeSet<String>,
|
||||
{
|
||||
for sample in source {
|
||||
for key in extract(sample) {
|
||||
if peer_sample_at_seq(peer, sample.raw.seq)
|
||||
.is_some_and(|peer_sample| extract(peer_sample).contains(key))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if find_future_match(peer, sample, key, extract, args).is_some() {
|
||||
continue;
|
||||
}
|
||||
events.push(DiffEvent {
|
||||
event_type,
|
||||
raw_class,
|
||||
key: key.clone(),
|
||||
source_side,
|
||||
source_seq: sample.raw.seq,
|
||||
source_run_id: sample.raw.run_id.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AnalysisResult {
|
||||
fn add(&mut self, class: &'static str, record: SampleRecord, sample_limit: usize) {
|
||||
let stats = self.stats.entry(class).or_default();
|
||||
stats.total += 1;
|
||||
if stats.samples.len() < sample_limit {
|
||||
stats.samples.push(record);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn peer_sample_at_seq(peer: &[SequenceSample], seq: u32) -> Option<&SequenceSample> {
|
||||
peer.iter().find(|sample| sample.raw.seq == seq)
|
||||
}
|
||||
|
||||
fn find_future_match<F>(
|
||||
peer: &[SequenceSample],
|
||||
source: &SequenceSample,
|
||||
key: &str,
|
||||
extract: &F,
|
||||
args: &Args,
|
||||
) -> Option<EventOccurrence>
|
||||
where
|
||||
F: for<'a> Fn(&'a SequenceSample) -> &'a BTreeSet<String>,
|
||||
{
|
||||
peer.iter()
|
||||
.filter(|candidate| is_in_alignment_window(source, candidate, args))
|
||||
.find(|candidate| extract(candidate).contains(key))
|
||||
.map(|candidate| {
|
||||
occurrence(
|
||||
candidate,
|
||||
if candidate.raw.side.as_deref() == Some("left") {
|
||||
Side::Left
|
||||
} else {
|
||||
Side::Right
|
||||
},
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
fn find_future_hash_match(
|
||||
peer: &[SequenceSample],
|
||||
source: &SequenceSample,
|
||||
uri: &str,
|
||||
hash: &str,
|
||||
args: &Args,
|
||||
) -> Option<EventOccurrence> {
|
||||
peer.iter()
|
||||
.filter(|candidate| is_in_alignment_window(source, candidate, args))
|
||||
.find(|candidate| {
|
||||
candidate
|
||||
.objects
|
||||
.get(uri)
|
||||
.is_some_and(|peer_hash| peer_hash == hash)
|
||||
})
|
||||
.map(|candidate| {
|
||||
occurrence(
|
||||
candidate,
|
||||
if candidate.raw.side.as_deref() == Some("left") {
|
||||
Side::Left
|
||||
} else {
|
||||
Side::Right
|
||||
},
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
fn is_in_alignment_window(
|
||||
source: &SequenceSample,
|
||||
candidate: &SequenceSample,
|
||||
args: &Args,
|
||||
) -> bool {
|
||||
if candidate.raw.seq < source.raw.seq {
|
||||
return false;
|
||||
}
|
||||
let run_delta = candidate.raw.seq.saturating_sub(source.raw.seq);
|
||||
let time_delta = candidate.validation_time - source.validation_time;
|
||||
let secs = time_delta.whole_seconds().abs();
|
||||
run_delta <= args.align_window_runs || secs <= args.align_window_secs
|
||||
}
|
||||
|
||||
fn occurrence(sample: &SequenceSample, side: Side) -> EventOccurrence {
|
||||
EventOccurrence {
|
||||
side,
|
||||
seq: sample.raw.seq,
|
||||
run_id: sample.raw.run_id.clone(),
|
||||
}
|
||||
}
|
||||
119
src/tools/sequence_triage_ccr_cir/args.rs
Normal file
119
src/tools/sequence_triage_ccr_cir/args.rs
Normal file
@ -0,0 +1,119 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub(super) struct Args {
|
||||
pub(super) left_sequence: PathBuf,
|
||||
pub(super) right_sequence: PathBuf,
|
||||
pub(super) out_dir: PathBuf,
|
||||
pub(super) align_window_runs: u32,
|
||||
pub(super) align_window_secs: i64,
|
||||
pub(super) sample_limit: usize,
|
||||
pub(super) warmup_samples: usize,
|
||||
pub(super) cooldown_samples: usize,
|
||||
pub(super) timeline_sample_limit: usize,
|
||||
}
|
||||
|
||||
pub(super) fn usage() -> &'static str {
|
||||
"Usage: sequence_triage_ccr_cir --left-sequence <path> --right-sequence <path> --out-dir <path> [--align-window-runs <n>] [--align-window-secs <n>] [--sample-limit <n>] [--warmup-samples <n>] [--cooldown-samples <n>] [--timeline-sample-limit <n>]"
|
||||
}
|
||||
|
||||
pub(super) fn parse_args(argv: &[String]) -> Result<Args, String> {
|
||||
let mut left_sequence = None;
|
||||
let mut right_sequence = None;
|
||||
let mut out_dir = None;
|
||||
let mut align_window_runs = 2u32;
|
||||
let mut align_window_secs = 1800i64;
|
||||
let mut sample_limit = 200usize;
|
||||
let mut warmup_samples = 1usize;
|
||||
let mut cooldown_samples = 1usize;
|
||||
let mut timeline_sample_limit = 0usize;
|
||||
let mut index = 1usize;
|
||||
while index < argv.len() {
|
||||
match argv[index].as_str() {
|
||||
"--left-sequence" => {
|
||||
index += 1;
|
||||
left_sequence = Some(PathBuf::from(
|
||||
argv.get(index).ok_or("--left-sequence requires a value")?,
|
||||
));
|
||||
}
|
||||
"--right-sequence" => {
|
||||
index += 1;
|
||||
right_sequence = Some(PathBuf::from(
|
||||
argv.get(index).ok_or("--right-sequence requires a value")?,
|
||||
));
|
||||
}
|
||||
"--out-dir" => {
|
||||
index += 1;
|
||||
out_dir = Some(PathBuf::from(
|
||||
argv.get(index).ok_or("--out-dir requires a value")?,
|
||||
));
|
||||
}
|
||||
"--align-window-runs" => {
|
||||
index += 1;
|
||||
let value = argv
|
||||
.get(index)
|
||||
.ok_or("--align-window-runs requires a value")?;
|
||||
align_window_runs = value
|
||||
.parse::<u32>()
|
||||
.map_err(|_| format!("invalid --align-window-runs: {value}"))?;
|
||||
}
|
||||
"--align-window-secs" => {
|
||||
index += 1;
|
||||
let value = argv
|
||||
.get(index)
|
||||
.ok_or("--align-window-secs requires a value")?;
|
||||
align_window_secs = value
|
||||
.parse::<i64>()
|
||||
.map_err(|_| format!("invalid --align-window-secs: {value}"))?;
|
||||
}
|
||||
"--sample-limit" => {
|
||||
index += 1;
|
||||
let value = argv.get(index).ok_or("--sample-limit requires a value")?;
|
||||
sample_limit = value
|
||||
.parse::<usize>()
|
||||
.map_err(|_| format!("invalid --sample-limit: {value}"))?;
|
||||
}
|
||||
"--warmup-samples" => {
|
||||
index += 1;
|
||||
let value = argv.get(index).ok_or("--warmup-samples requires a value")?;
|
||||
warmup_samples = value
|
||||
.parse::<usize>()
|
||||
.map_err(|_| format!("invalid --warmup-samples: {value}"))?;
|
||||
}
|
||||
"--cooldown-samples" => {
|
||||
index += 1;
|
||||
let value = argv
|
||||
.get(index)
|
||||
.ok_or("--cooldown-samples requires a value")?;
|
||||
cooldown_samples = value
|
||||
.parse::<usize>()
|
||||
.map_err(|_| format!("invalid --cooldown-samples: {value}"))?;
|
||||
}
|
||||
"--timeline-sample-limit" => {
|
||||
index += 1;
|
||||
let value = argv
|
||||
.get(index)
|
||||
.ok_or("--timeline-sample-limit requires a value")?;
|
||||
timeline_sample_limit = value
|
||||
.parse::<usize>()
|
||||
.map_err(|_| format!("invalid --timeline-sample-limit: {value}"))?;
|
||||
}
|
||||
"-h" | "--help" => return Err(usage().to_string()),
|
||||
other => return Err(format!("unknown argument: {other}\n{}", usage())),
|
||||
}
|
||||
index += 1;
|
||||
}
|
||||
Ok(Args {
|
||||
left_sequence: left_sequence
|
||||
.ok_or_else(|| format!("--left-sequence is required\n{}", usage()))?,
|
||||
right_sequence: right_sequence
|
||||
.ok_or_else(|| format!("--right-sequence is required\n{}", usage()))?,
|
||||
out_dir: out_dir.ok_or_else(|| format!("--out-dir is required\n{}", usage()))?,
|
||||
align_window_runs,
|
||||
align_window_secs,
|
||||
sample_limit,
|
||||
warmup_samples,
|
||||
cooldown_samples,
|
||||
timeline_sample_limit,
|
||||
})
|
||||
}
|
||||
33
src/tools/sequence_triage_ccr_cir/io.rs
Normal file
33
src/tools/sequence_triage_ccr_cir/io.rs
Normal file
@ -0,0 +1,33 @@
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use time::OffsetDateTime;
|
||||
use time::format_description::well_known::Rfc3339;
|
||||
|
||||
pub(super) fn read_file(path: &Path) -> Result<Vec<u8>, String> {
|
||||
std::fs::read(path).map_err(|e| format!("read file failed: {}: {e}", path.display()))
|
||||
}
|
||||
|
||||
pub(super) fn resolve_path(base_dir: &Path, path: &Path) -> PathBuf {
|
||||
if path.is_absolute() {
|
||||
path.to_path_buf()
|
||||
} else {
|
||||
base_dir.join(path)
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn parse_rfc3339(value: &str) -> Result<OffsetDateTime, String> {
|
||||
OffsetDateTime::parse(value, &Rfc3339)
|
||||
.map_err(|e| format!("parse RFC3339 failed: {value}: {e}"))
|
||||
}
|
||||
|
||||
pub(super) fn format_time(value: OffsetDateTime) -> String {
|
||||
value.format(&Rfc3339).unwrap_or_else(|_| value.to_string())
|
||||
}
|
||||
|
||||
pub(super) fn path_string(path: &Path) -> String {
|
||||
path.to_string_lossy().into_owned()
|
||||
}
|
||||
|
||||
pub(super) fn object_hash_key(uri: &str, hash: &str) -> String {
|
||||
format!("{uri}|{hash}")
|
||||
}
|
||||
144
src/tools/sequence_triage_ccr_cir/loader.rs
Normal file
144
src/tools/sequence_triage_ccr_cir/loader.rs
Normal file
@ -0,0 +1,144 @@
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
use std::path::Path;
|
||||
|
||||
use crate::ccr::{decode_ccr_compare_views, decode_content_info};
|
||||
use crate::cir::decode_cir;
|
||||
|
||||
use super::io::{object_hash_key, parse_rfc3339, read_file, resolve_path};
|
||||
use super::model::{SequenceItemRaw, SequenceSample, Side};
|
||||
|
||||
pub(super) fn load_sequence(path: &Path, side: Side) -> Result<Vec<SequenceSample>, String> {
|
||||
let base_dir = path.parent().unwrap_or_else(|| Path::new("."));
|
||||
let text = std::fs::read_to_string(path)
|
||||
.map_err(|e| format!("read sequence failed: {}: {e}", path.display()))?;
|
||||
let mut samples = Vec::new();
|
||||
let mut seen_seq = BTreeSet::new();
|
||||
for (line_index, line) in text.lines().enumerate() {
|
||||
let line = line.trim();
|
||||
if line.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let raw: SequenceItemRaw = serde_json::from_str(line).map_err(|e| {
|
||||
format!(
|
||||
"parse sequence JSONL failed: {}:{}: {e}",
|
||||
path.display(),
|
||||
line_index + 1
|
||||
)
|
||||
})?;
|
||||
if raw.schema_version.unwrap_or(1) != 1 {
|
||||
return Err(format!(
|
||||
"unsupported sequence item schemaVersion in {}:{}",
|
||||
path.display(),
|
||||
line_index + 1
|
||||
));
|
||||
}
|
||||
if !seen_seq.insert(raw.seq) {
|
||||
return Err(format!("duplicate seq {} in {}", raw.seq, path.display()));
|
||||
}
|
||||
if let Some(status) = &raw.status
|
||||
&& status != "success"
|
||||
{
|
||||
return Err(format!(
|
||||
"sequence sample {} has non-success status: {status}",
|
||||
raw.run_id
|
||||
));
|
||||
}
|
||||
let cir_path = resolve_path(base_dir, &raw.cir_path);
|
||||
let ccr_path = resolve_path(base_dir, &raw.ccr_path);
|
||||
let cir = decode_cir(&read_file(&cir_path)?).map_err(|e| {
|
||||
format!(
|
||||
"decode CIR failed for sample {} ({}): {e}",
|
||||
raw.run_id,
|
||||
cir_path.display()
|
||||
)
|
||||
})?;
|
||||
let ccr = decode_content_info(&read_file(&ccr_path)?).map_err(|e| {
|
||||
format!(
|
||||
"decode CCR failed for sample {} ({}): {e}",
|
||||
raw.run_id,
|
||||
ccr_path.display()
|
||||
)
|
||||
})?;
|
||||
let validation_time = raw
|
||||
.validation_time
|
||||
.as_deref()
|
||||
.map(parse_rfc3339)
|
||||
.transpose()?
|
||||
.unwrap_or(cir.validation_time);
|
||||
let objects = cir
|
||||
.objects
|
||||
.iter()
|
||||
.map(|item| (item.rsync_uri.clone(), hex::encode(&item.sha256)))
|
||||
.collect::<BTreeMap<_, _>>();
|
||||
let object_uris = objects.keys().cloned().collect::<BTreeSet<_>>();
|
||||
let object_hashes = objects
|
||||
.iter()
|
||||
.map(|(uri, hash)| object_hash_key(uri, hash))
|
||||
.collect::<BTreeSet<_>>();
|
||||
let rejects = cir
|
||||
.rejected_objects
|
||||
.iter()
|
||||
.map(|item| item.object_uri.clone())
|
||||
.collect::<BTreeSet<_>>();
|
||||
let trust_anchors = cir
|
||||
.trust_anchors
|
||||
.iter()
|
||||
.map(|item| {
|
||||
format!(
|
||||
"{}|{}|{}|{}",
|
||||
item.ta_rsync_uri,
|
||||
item.tal_uri,
|
||||
hex::encode(crate::cir::sha256(&item.tal_bytes)),
|
||||
hex::encode(&item.ta_certificate_sha256)
|
||||
)
|
||||
})
|
||||
.collect::<BTreeSet<_>>();
|
||||
let (vrps, vaps) = decode_ccr_compare_views(&ccr).map_err(|e| {
|
||||
format!(
|
||||
"decode CCR compare views failed for sample {} ({}): {e}",
|
||||
raw.run_id,
|
||||
ccr_path.display()
|
||||
)
|
||||
})?;
|
||||
let vrps = vrps
|
||||
.into_iter()
|
||||
.map(|row| format!("{}|{}|{}", row.asn, row.ip_prefix, row.max_length))
|
||||
.collect::<BTreeSet<_>>();
|
||||
let vaps = vaps
|
||||
.into_iter()
|
||||
.map(|row| format!("{}|{}", row.customer_asn, row.providers))
|
||||
.collect::<BTreeSet<_>>();
|
||||
samples.push(SequenceSample {
|
||||
raw,
|
||||
validation_time,
|
||||
ccr_path,
|
||||
cir_path,
|
||||
objects,
|
||||
object_uris,
|
||||
object_hashes,
|
||||
rejects,
|
||||
trust_anchors,
|
||||
vrps,
|
||||
vaps,
|
||||
});
|
||||
}
|
||||
samples.sort_by_key(|sample| sample.raw.seq);
|
||||
for pair in samples.windows(2) {
|
||||
if pair[0].raw.seq >= pair[1].raw.seq {
|
||||
return Err("sequence must be sorted by increasing seq".into());
|
||||
}
|
||||
}
|
||||
if samples.iter().any(|sample| {
|
||||
sample
|
||||
.raw
|
||||
.side
|
||||
.as_deref()
|
||||
.is_some_and(|item| item != side.as_str())
|
||||
}) {
|
||||
return Err(format!(
|
||||
"sequence side field does not match expected side: {}",
|
||||
side.as_str()
|
||||
));
|
||||
}
|
||||
Ok(samples)
|
||||
}
|
||||
173
src/tools/sequence_triage_ccr_cir/model.rs
Normal file
173
src/tools/sequence_triage_ccr_cir/model.rs
Normal file
@ -0,0 +1,173 @@
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
use std::path::PathBuf;
|
||||
|
||||
use serde::Deserialize;
|
||||
use serde_json::Value;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub(super) struct SequenceItemRaw {
|
||||
pub(super) schema_version: Option<u32>,
|
||||
pub(super) rp_id: String,
|
||||
pub(super) side: Option<String>,
|
||||
pub(super) seq: u32,
|
||||
pub(super) run_id: String,
|
||||
pub(super) sync_mode: Option<String>,
|
||||
pub(super) status: Option<String>,
|
||||
pub(super) start_time: Option<String>,
|
||||
pub(super) finish_time: Option<String>,
|
||||
pub(super) validation_time: Option<String>,
|
||||
pub(super) ccr_path: PathBuf,
|
||||
pub(super) cir_path: PathBuf,
|
||||
pub(super) ccr_sha256: Option<String>,
|
||||
pub(super) cir_sha256: Option<String>,
|
||||
pub(super) wall_ms: Option<u64>,
|
||||
pub(super) max_rss_kb: Option<u64>,
|
||||
pub(super) vrps: Option<u64>,
|
||||
pub(super) vaps: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(super) struct SequenceSample {
|
||||
pub(super) raw: SequenceItemRaw,
|
||||
pub(super) validation_time: OffsetDateTime,
|
||||
pub(super) ccr_path: PathBuf,
|
||||
pub(super) cir_path: PathBuf,
|
||||
pub(super) objects: BTreeMap<String, String>,
|
||||
pub(super) object_uris: BTreeSet<String>,
|
||||
pub(super) object_hashes: BTreeSet<String>,
|
||||
pub(super) rejects: BTreeSet<String>,
|
||||
pub(super) trust_anchors: BTreeSet<String>,
|
||||
pub(super) vrps: BTreeSet<String>,
|
||||
pub(super) vaps: BTreeSet<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub(super) enum Side {
|
||||
Left,
|
||||
Right,
|
||||
}
|
||||
|
||||
impl Side {
|
||||
pub(super) fn as_str(self) -> &'static str {
|
||||
match self {
|
||||
Side::Left => "left",
|
||||
Side::Right => "right",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(super) struct EventOccurrence {
|
||||
pub(super) side: Side,
|
||||
pub(super) seq: u32,
|
||||
pub(super) run_id: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(super) struct SampleRecord {
|
||||
pub(super) classification: &'static str,
|
||||
pub(super) event_type: &'static str,
|
||||
pub(super) key: String,
|
||||
pub(super) source_side: Side,
|
||||
pub(super) source_seq: u32,
|
||||
pub(super) source_run_id: String,
|
||||
pub(super) matched_seq: Option<u32>,
|
||||
pub(super) matched_run_id: Option<String>,
|
||||
pub(super) note: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub(super) struct ClassStats {
|
||||
pub(super) total: usize,
|
||||
pub(super) samples: Vec<SampleRecord>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub(super) struct AnalysisResult {
|
||||
pub(super) stats: BTreeMap<&'static str, ClassStats>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(super) struct DiffEvent {
|
||||
pub(super) event_type: &'static str,
|
||||
pub(super) raw_class: &'static str,
|
||||
pub(super) key: String,
|
||||
pub(super) source_side: Side,
|
||||
pub(super) source_seq: u32,
|
||||
pub(super) source_run_id: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub(super) enum EdgePosition {
|
||||
Leading,
|
||||
Stable,
|
||||
Trailing,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(super) struct AdjustedRecord {
|
||||
pub(super) classification: &'static str,
|
||||
pub(super) event_type: &'static str,
|
||||
pub(super) key: String,
|
||||
pub(super) source_side: Side,
|
||||
pub(super) source_seq: u32,
|
||||
pub(super) source_run_id: String,
|
||||
pub(super) note: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub(super) struct AdjustedClassStats {
|
||||
pub(super) total: usize,
|
||||
pub(super) unique_keys: BTreeSet<String>,
|
||||
pub(super) samples: Vec<AdjustedRecord>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub(super) struct AdjustedAnalysis {
|
||||
pub(super) raw_persistent_occurrences: usize,
|
||||
pub(super) raw_persistent_unique_keys: usize,
|
||||
pub(super) edge_filtered_occurrences: usize,
|
||||
pub(super) edge_filtered_unique_keys: usize,
|
||||
pub(super) adjusted_stable_occurrences: usize,
|
||||
pub(super) adjusted_stable_unique_keys: usize,
|
||||
pub(super) stats: BTreeMap<&'static str, AdjustedClassStats>,
|
||||
pub(super) uri_timeline_samples: Vec<Value>,
|
||||
pub(super) stable_object_groups: Vec<Value>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(super) struct SandwichRecord {
|
||||
pub(super) classification: &'static str,
|
||||
pub(super) set_type: &'static str,
|
||||
pub(super) key: String,
|
||||
pub(super) source_side: Side,
|
||||
pub(super) source_start_seq: u32,
|
||||
pub(super) source_start_run_id: String,
|
||||
pub(super) source_end_seq: u32,
|
||||
pub(super) source_end_run_id: String,
|
||||
pub(super) peer_seq: u32,
|
||||
pub(super) peer_run_id: String,
|
||||
pub(super) source_value: Option<String>,
|
||||
pub(super) peer_value: Option<String>,
|
||||
pub(super) source_start_time: String,
|
||||
pub(super) peer_time: String,
|
||||
pub(super) source_end_time: String,
|
||||
pub(super) note: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub(super) struct SandwichClassStats {
|
||||
pub(super) total: usize,
|
||||
pub(super) unique_keys: BTreeSet<String>,
|
||||
pub(super) samples: Vec<SandwichRecord>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub(super) struct SandwichAnalysis {
|
||||
pub(super) total_occurrences: usize,
|
||||
pub(super) unique_keys: BTreeSet<String>,
|
||||
pub(super) by_set_type: BTreeMap<&'static str, usize>,
|
||||
pub(super) stats: BTreeMap<&'static str, SandwichClassStats>,
|
||||
}
|
||||
408
src/tools/sequence_triage_ccr_cir/output.rs
Normal file
408
src/tools/sequence_triage_ccr_cir/output.rs
Normal file
@ -0,0 +1,408 @@
|
||||
use std::collections::BTreeSet;
|
||||
use std::path::Path;
|
||||
|
||||
use serde_json::{Value, json};
|
||||
|
||||
use super::args::Args;
|
||||
use super::io::{format_time, path_string};
|
||||
use super::model::{
|
||||
AdjustedAnalysis, AdjustedRecord, AnalysisResult, SampleRecord, SandwichAnalysis,
|
||||
SandwichRecord, SequenceSample,
|
||||
};
|
||||
|
||||
pub(super) fn build_output(
|
||||
args: &Args,
|
||||
left: &[SequenceSample],
|
||||
right: &[SequenceSample],
|
||||
result: &AnalysisResult,
|
||||
adjusted: &AdjustedAnalysis,
|
||||
sandwich: &SandwichAnalysis,
|
||||
) -> Value {
|
||||
let classifications = result
|
||||
.stats
|
||||
.iter()
|
||||
.map(|(class, stats)| {
|
||||
json!({
|
||||
"classification": class,
|
||||
"count": stats.total,
|
||||
"samples": stats.samples.iter().map(sample_to_json).collect::<Vec<_>>(),
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let adjusted_classifications = adjusted
|
||||
.stats
|
||||
.iter()
|
||||
.map(|(class, stats)| {
|
||||
json!({
|
||||
"classification": class,
|
||||
"occurrences": stats.total,
|
||||
"uniqueKeys": stats.unique_keys.len(),
|
||||
"samples": stats.samples.iter().map(adjusted_sample_to_json).collect::<Vec<_>>(),
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let sandwich_classifications = sandwich
|
||||
.stats
|
||||
.iter()
|
||||
.map(|(class, stats)| {
|
||||
json!({
|
||||
"classification": class,
|
||||
"occurrences": stats.total,
|
||||
"uniqueKeys": stats.unique_keys.len(),
|
||||
"samples": stats.samples.iter().map(sandwich_sample_to_json).collect::<Vec<_>>(),
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
json!({
|
||||
"schemaVersion": 1,
|
||||
"generatedBy": "sequence_triage_ccr_cir",
|
||||
"inputContract": "left-right-sequence-jsonl-with-ccr-cir-artifacts",
|
||||
"parameters": {
|
||||
"leftSequence": path_string(&args.left_sequence),
|
||||
"rightSequence": path_string(&args.right_sequence),
|
||||
"alignWindowRuns": args.align_window_runs,
|
||||
"alignWindowSecs": args.align_window_secs,
|
||||
"sampleLimit": args.sample_limit,
|
||||
"warmupSamples": args.warmup_samples,
|
||||
"cooldownSamples": args.cooldown_samples,
|
||||
"timelineSampleLimit": if args.timeline_sample_limit == 0 { args.sample_limit } else { args.timeline_sample_limit },
|
||||
},
|
||||
"left": sequence_summary(left),
|
||||
"right": sequence_summary(right),
|
||||
"classificationCounts": classifications,
|
||||
"totals": {
|
||||
"resolvedTemporalLike": count_class(result, "TEMPORAL_LAG_RESOLVED") + count_class(result, "CONTENT_ROLLOVER_RESOLVED"),
|
||||
"persistent": count_prefix(result, "PERSISTENT_"),
|
||||
"unclassified": count_class(result, "UNCLASSIFIED_INSUFFICIENT_WINDOW"),
|
||||
},
|
||||
"adjusted": {
|
||||
"warmupSamples": args.warmup_samples,
|
||||
"cooldownSamples": args.cooldown_samples,
|
||||
"rawPersistent": {
|
||||
"occurrences": adjusted.raw_persistent_occurrences,
|
||||
"uniqueKeys": adjusted.raw_persistent_unique_keys,
|
||||
},
|
||||
"edgeFilteredPersistent": {
|
||||
"occurrences": adjusted.edge_filtered_occurrences,
|
||||
"uniqueKeys": adjusted.edge_filtered_unique_keys,
|
||||
},
|
||||
"adjustedStablePersistent": {
|
||||
"occurrences": adjusted.adjusted_stable_occurrences,
|
||||
"uniqueKeys": adjusted.adjusted_stable_unique_keys,
|
||||
},
|
||||
"classificationCounts": adjusted_classifications,
|
||||
"uriTimelineSamples": adjusted.uri_timeline_samples,
|
||||
"stableObjectGroups": adjusted.stable_object_groups,
|
||||
"interpretation": {
|
||||
"rawPersistentMeaning": "raw persistent keeps the original #043 single-event matching semantics.",
|
||||
"edgeFilteredMeaning": "edge-filtered persistent keeps only non-warmup and non-cooldown source occurrences.",
|
||||
"adjustedStableMeaning": "adjusted stable persistent keeps non-edge findings after URI-level rollover and TA projection filtering.",
|
||||
"stableObjectGroupsMeaning": "STABLE_OBJECT_SET_DIVERGENCE events are additionally collapsed by source CIR, publication point, and physical object URI so object_uri/object_hash duplicate event views do not inflate physical-object counts.",
|
||||
}
|
||||
},
|
||||
"sandwich": {
|
||||
"strictTimeWindow": true,
|
||||
"method": "For each side, use two adjacent source samples as a stable interval. If source_start.time < peer.time < source_end.time and the source value is identical at both interval endpoints, the peer sample is expected to contain the same value.",
|
||||
"totals": {
|
||||
"occurrences": sandwich.total_occurrences,
|
||||
"uniqueKeys": sandwich.unique_keys.len(),
|
||||
},
|
||||
"bySetType": sandwich.by_set_type,
|
||||
"classificationCounts": sandwich_classifications,
|
||||
"interpretation": {
|
||||
"missingStableObject": "The source side proves a URI/hash is stable across an interval that contains the peer sample, but the peer sample has no such URI.",
|
||||
"hashMismatchStableObject": "The source side proves a URI/hash is stable across an interval that contains the peer sample, but the peer sample has the same URI with a different hash.",
|
||||
"missingStableReject": "The source side consistently rejects the URI across an interval that contains the peer sample, but the peer sample does not reject it.",
|
||||
"missingStableOutput": "The source side consistently outputs a VRP/VAP key across an interval that contains the peer sample, but the peer sample does not output it."
|
||||
}
|
||||
},
|
||||
"interpretation": {
|
||||
"temporalResolvedMeaning": "Event appeared only on one side at one sample but aligned in the peer sequence later.",
|
||||
"persistentMeaning": "Event did not align within the configured run/time window; inspect as a candidate RP behavior difference or persistent sync/input difference.",
|
||||
"limits": [
|
||||
"Sequence triage only reads sequence JSONL plus referenced CCR/CIR files.",
|
||||
"It does not read report.json, logs, repo-bytes DB, cache, mirror, or raw objects for root cause proof.",
|
||||
"Resolved temporal findings reduce false positives but do not prove the exact external repository update time."
|
||||
]
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn sequence_summary(samples: &[SequenceSample]) -> Value {
|
||||
json!({
|
||||
"sampleCount": samples.len(),
|
||||
"rpIds": samples.iter().map(|sample| sample.raw.rp_id.clone()).collect::<BTreeSet<_>>(),
|
||||
"firstSeq": samples.first().map(|sample| sample.raw.seq),
|
||||
"lastSeq": samples.last().map(|sample| sample.raw.seq),
|
||||
"firstValidationTime": samples.first().map(|sample| format_time(sample.validation_time)),
|
||||
"lastValidationTime": samples.last().map(|sample| format_time(sample.validation_time)),
|
||||
"samples": samples.iter().map(|sample| json!({
|
||||
"seq": sample.raw.seq,
|
||||
"runId": sample.raw.run_id,
|
||||
"syncMode": sample.raw.sync_mode,
|
||||
"startTime": sample.raw.start_time,
|
||||
"finishTime": sample.raw.finish_time,
|
||||
"validationTime": format_time(sample.validation_time),
|
||||
"status": sample.raw.status,
|
||||
"ccrPath": path_string(&sample.ccr_path),
|
||||
"cirPath": path_string(&sample.cir_path),
|
||||
"ccrSha256": sample.raw.ccr_sha256,
|
||||
"cirSha256": sample.raw.cir_sha256,
|
||||
"wallMs": sample.raw.wall_ms,
|
||||
"maxRssKb": sample.raw.max_rss_kb,
|
||||
"vrps": sample.raw.vrps.or(Some(sample.vrps.len() as u64)),
|
||||
"vaps": sample.raw.vaps.or(Some(sample.vaps.len() as u64)),
|
||||
"objectCount": sample.object_uris.len(),
|
||||
"rejectCount": sample.rejects.len(),
|
||||
"trustAnchorCount": sample.trust_anchors.len(),
|
||||
})).collect::<Vec<_>>(),
|
||||
})
|
||||
}
|
||||
|
||||
fn sample_to_json(sample: &SampleRecord) -> Value {
|
||||
json!({
|
||||
"classification": sample.classification,
|
||||
"eventType": sample.event_type,
|
||||
"key": sample.key,
|
||||
"sourceSide": sample.source_side.as_str(),
|
||||
"sourceSeq": sample.source_seq,
|
||||
"sourceRunId": sample.source_run_id,
|
||||
"matchedSeq": sample.matched_seq,
|
||||
"matchedRunId": sample.matched_run_id,
|
||||
"note": sample.note,
|
||||
})
|
||||
}
|
||||
|
||||
fn adjusted_sample_to_json(sample: &AdjustedRecord) -> Value {
|
||||
json!({
|
||||
"classification": sample.classification,
|
||||
"eventType": sample.event_type,
|
||||
"key": sample.key,
|
||||
"sourceSide": sample.source_side.as_str(),
|
||||
"sourceSeq": sample.source_seq,
|
||||
"sourceRunId": sample.source_run_id,
|
||||
"note": sample.note,
|
||||
})
|
||||
}
|
||||
|
||||
fn sandwich_sample_to_json(sample: &SandwichRecord) -> Value {
|
||||
json!({
|
||||
"classification": sample.classification,
|
||||
"setType": sample.set_type,
|
||||
"key": sample.key,
|
||||
"sourceSide": sample.source_side.as_str(),
|
||||
"sourceStartSeq": sample.source_start_seq,
|
||||
"sourceStartRunId": sample.source_start_run_id,
|
||||
"sourceEndSeq": sample.source_end_seq,
|
||||
"sourceEndRunId": sample.source_end_run_id,
|
||||
"peerSeq": sample.peer_seq,
|
||||
"peerRunId": sample.peer_run_id,
|
||||
"sourceValue": sample.source_value,
|
||||
"peerValue": sample.peer_value,
|
||||
"sourceStartTime": sample.source_start_time,
|
||||
"peerTime": sample.peer_time,
|
||||
"sourceEndTime": sample.source_end_time,
|
||||
"note": sample.note,
|
||||
})
|
||||
}
|
||||
|
||||
fn count_class(result: &AnalysisResult, class: &'static str) -> usize {
|
||||
result
|
||||
.stats
|
||||
.get(class)
|
||||
.map(|stats| stats.total)
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
fn count_prefix(result: &AnalysisResult, prefix: &str) -> usize {
|
||||
result
|
||||
.stats
|
||||
.iter()
|
||||
.filter(|(class, _)| class.starts_with(prefix))
|
||||
.map(|(_, stats)| stats.total)
|
||||
.sum()
|
||||
}
|
||||
|
||||
pub(super) fn write_json(path: &Path, value: &Value) -> Result<(), String> {
|
||||
std::fs::write(
|
||||
path,
|
||||
serde_json::to_string_pretty(value).map_err(|e| e.to_string())? + "\n",
|
||||
)
|
||||
.map_err(|e| format!("write JSON failed: {}: {e}", path.display()))
|
||||
}
|
||||
|
||||
pub(super) fn write_markdown(path: &Path, output: &Value) -> Result<(), String> {
|
||||
let mut lines = vec![
|
||||
"# CCR/CIR Sequence Triage Summary".to_string(),
|
||||
"".to_string(),
|
||||
format!(
|
||||
"- `generatedBy`: `{}`",
|
||||
output["generatedBy"].as_str().unwrap_or("")
|
||||
),
|
||||
format!(
|
||||
"- `leftSamples`: `{}`",
|
||||
output["left"]["sampleCount"].as_u64().unwrap_or(0)
|
||||
),
|
||||
format!(
|
||||
"- `rightSamples`: `{}`",
|
||||
output["right"]["sampleCount"].as_u64().unwrap_or(0)
|
||||
),
|
||||
format!(
|
||||
"- `alignWindowRuns`: `{}`",
|
||||
output["parameters"]["alignWindowRuns"]
|
||||
.as_u64()
|
||||
.unwrap_or(0)
|
||||
),
|
||||
format!(
|
||||
"- `alignWindowSecs`: `{}`",
|
||||
output["parameters"]["alignWindowSecs"]
|
||||
.as_i64()
|
||||
.unwrap_or(0)
|
||||
),
|
||||
format!(
|
||||
"- `warmupSamples`: `{}`",
|
||||
output["adjusted"]["warmupSamples"].as_u64().unwrap_or(0)
|
||||
),
|
||||
format!(
|
||||
"- `cooldownSamples`: `{}`",
|
||||
output["adjusted"]["cooldownSamples"].as_u64().unwrap_or(0)
|
||||
),
|
||||
"".to_string(),
|
||||
"## Classification Counts".to_string(),
|
||||
"".to_string(),
|
||||
"| Classification | Count |".to_string(),
|
||||
"|---|---:|".to_string(),
|
||||
];
|
||||
if let Some(classes) = output["classificationCounts"].as_array() {
|
||||
for item in classes {
|
||||
lines.push(format!(
|
||||
"| `{}` | {} |",
|
||||
item["classification"].as_str().unwrap_or(""),
|
||||
item["count"].as_u64().unwrap_or(0)
|
||||
));
|
||||
}
|
||||
}
|
||||
lines.extend([
|
||||
"".to_string(),
|
||||
"## Adjusted Boundary / Rollover Classification".to_string(),
|
||||
"".to_string(),
|
||||
format!(
|
||||
"- `rawPersistent`: `{}` occurrences / `{}` unique keys",
|
||||
output["adjusted"]["rawPersistent"]["occurrences"]
|
||||
.as_u64()
|
||||
.unwrap_or(0),
|
||||
output["adjusted"]["rawPersistent"]["uniqueKeys"]
|
||||
.as_u64()
|
||||
.unwrap_or(0)
|
||||
),
|
||||
format!(
|
||||
"- `edgeFilteredPersistent`: `{}` occurrences / `{}` unique keys",
|
||||
output["adjusted"]["edgeFilteredPersistent"]["occurrences"]
|
||||
.as_u64()
|
||||
.unwrap_or(0),
|
||||
output["adjusted"]["edgeFilteredPersistent"]["uniqueKeys"]
|
||||
.as_u64()
|
||||
.unwrap_or(0)
|
||||
),
|
||||
format!(
|
||||
"- `adjustedStablePersistent`: `{}` occurrences / `{}` unique keys",
|
||||
output["adjusted"]["adjustedStablePersistent"]["occurrences"]
|
||||
.as_u64()
|
||||
.unwrap_or(0),
|
||||
output["adjusted"]["adjustedStablePersistent"]["uniqueKeys"]
|
||||
.as_u64()
|
||||
.unwrap_or(0)
|
||||
),
|
||||
"".to_string(),
|
||||
"| Adjusted Classification | Occurrences | Unique Keys |".to_string(),
|
||||
"|---|---:|---:|".to_string(),
|
||||
]);
|
||||
if let Some(classes) = output["adjusted"]["classificationCounts"].as_array() {
|
||||
for item in classes {
|
||||
lines.push(format!(
|
||||
"| `{}` | {} | {} |",
|
||||
item["classification"].as_str().unwrap_or(""),
|
||||
item["occurrences"].as_u64().unwrap_or(0),
|
||||
item["uniqueKeys"].as_u64().unwrap_or(0)
|
||||
));
|
||||
}
|
||||
}
|
||||
if let Some(groups) = output["adjusted"]["stableObjectGroups"].as_array()
|
||||
&& !groups.is_empty()
|
||||
{
|
||||
lines.extend([
|
||||
"".to_string(),
|
||||
"## Stable Object Groups".to_string(),
|
||||
"".to_string(),
|
||||
"| Source | CIR | Publication Point | Events | Physical Objects |".to_string(),
|
||||
"|---|---|---|---:|---:|".to_string(),
|
||||
]);
|
||||
for group in groups {
|
||||
lines.push(format!(
|
||||
"| `{}/seq{}/{}` | `{}` | `{}` | {} | {} |",
|
||||
group["sourceSide"].as_str().unwrap_or(""),
|
||||
group["sourceSeq"].as_u64().unwrap_or(0),
|
||||
group["sourceRunId"].as_str().unwrap_or(""),
|
||||
group["sourceCirPath"].as_str().unwrap_or(""),
|
||||
group["publicationPoint"].as_str().unwrap_or(""),
|
||||
group["eventCount"].as_u64().unwrap_or(0),
|
||||
group["physicalObjectCount"].as_u64().unwrap_or(0),
|
||||
));
|
||||
}
|
||||
}
|
||||
lines.extend([
|
||||
"".to_string(),
|
||||
"## Sandwich Anomaly Check".to_string(),
|
||||
"".to_string(),
|
||||
format!(
|
||||
"- `occurrences`: `{}`",
|
||||
output["sandwich"]["totals"]["occurrences"]
|
||||
.as_u64()
|
||||
.unwrap_or(0)
|
||||
),
|
||||
format!(
|
||||
"- `uniqueKeys`: `{}`",
|
||||
output["sandwich"]["totals"]["uniqueKeys"]
|
||||
.as_u64()
|
||||
.unwrap_or(0)
|
||||
),
|
||||
"- Rule: source side has two adjacent stable samples, and peer sample timestamp is strictly between them.".to_string(),
|
||||
"".to_string(),
|
||||
"| Sandwich Classification | Occurrences | Unique Keys |".to_string(),
|
||||
"|---|---:|---:|".to_string(),
|
||||
]);
|
||||
if let Some(classes) = output["sandwich"]["classificationCounts"].as_array() {
|
||||
for item in classes {
|
||||
lines.push(format!(
|
||||
"| `{}` | {} | {} |",
|
||||
item["classification"].as_str().unwrap_or(""),
|
||||
item["occurrences"].as_u64().unwrap_or(0),
|
||||
item["uniqueKeys"].as_u64().unwrap_or(0)
|
||||
));
|
||||
}
|
||||
}
|
||||
lines.extend([
|
||||
"".to_string(),
|
||||
"## Interpretation".to_string(),
|
||||
"".to_string(),
|
||||
"- `TEMPORAL_LAG_RESOLVED` / `CONTENT_ROLLOVER_RESOLVED` 表示差异在后续采样中对齐,优先视为采样时刻或仓库滚动窗口差异。".to_string(),
|
||||
"- `PERSISTENT_*` 表示配置窗口内仍未对齐,才是后续人工排查的高价值候选。".to_string(),
|
||||
"- `EDGE_*` 表示差异落在序列首尾,缺少前置或后续观察,不能直接当作实现差异。".to_string(),
|
||||
"- `STABLE_*` 表示经过首尾边界过滤和 URI-level rollover 过滤后仍存在的稳定候选。".to_string(),
|
||||
]);
|
||||
std::fs::write(path, lines.join("\n") + "\n")
|
||||
.map_err(|e| format!("write markdown failed: {}: {e}", path.display()))
|
||||
}
|
||||
|
||||
pub(super) fn write_samples_jsonl(path: &Path, result: &AnalysisResult) -> Result<(), String> {
|
||||
let mut body = String::new();
|
||||
for stats in result.stats.values() {
|
||||
for sample in &stats.samples {
|
||||
body.push_str(
|
||||
&serde_json::to_string(&sample_to_json(sample)).map_err(|e| e.to_string())?,
|
||||
);
|
||||
body.push('\n');
|
||||
}
|
||||
}
|
||||
std::fs::write(path, body).map_err(|e| format!("write samples failed: {}: {e}", path.display()))
|
||||
}
|
||||
261
src/tools/sequence_triage_ccr_cir/sandwich.rs
Normal file
261
src/tools/sequence_triage_ccr_cir/sandwich.rs
Normal file
@ -0,0 +1,261 @@
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use super::args::Args;
|
||||
use super::io::format_time;
|
||||
use super::model::{SandwichAnalysis, SandwichRecord, SequenceSample, Side};
|
||||
|
||||
pub(super) fn build_sandwich_analysis(
|
||||
args: &Args,
|
||||
left: &[SequenceSample],
|
||||
right: &[SequenceSample],
|
||||
) -> SandwichAnalysis {
|
||||
let mut analysis = SandwichAnalysis::default();
|
||||
analyze_sandwich_objects(&mut analysis, Side::Left, left, right, args);
|
||||
analyze_sandwich_objects(&mut analysis, Side::Right, right, left, args);
|
||||
analyze_sandwich_sets(
|
||||
&mut analysis,
|
||||
"reject_uri",
|
||||
"PEER_MISSING_STABLE_REJECT",
|
||||
Side::Left,
|
||||
left,
|
||||
right,
|
||||
|sample| &sample.rejects,
|
||||
args,
|
||||
);
|
||||
analyze_sandwich_sets(
|
||||
&mut analysis,
|
||||
"reject_uri",
|
||||
"PEER_MISSING_STABLE_REJECT",
|
||||
Side::Right,
|
||||
right,
|
||||
left,
|
||||
|sample| &sample.rejects,
|
||||
args,
|
||||
);
|
||||
analyze_sandwich_sets(
|
||||
&mut analysis,
|
||||
"vrp_output",
|
||||
"PEER_MISSING_STABLE_OUTPUT",
|
||||
Side::Left,
|
||||
left,
|
||||
right,
|
||||
|sample| &sample.vrps,
|
||||
args,
|
||||
);
|
||||
analyze_sandwich_sets(
|
||||
&mut analysis,
|
||||
"vrp_output",
|
||||
"PEER_MISSING_STABLE_OUTPUT",
|
||||
Side::Right,
|
||||
right,
|
||||
left,
|
||||
|sample| &sample.vrps,
|
||||
args,
|
||||
);
|
||||
analyze_sandwich_sets(
|
||||
&mut analysis,
|
||||
"vap_output",
|
||||
"PEER_MISSING_STABLE_OUTPUT",
|
||||
Side::Left,
|
||||
left,
|
||||
right,
|
||||
|sample| &sample.vaps,
|
||||
args,
|
||||
);
|
||||
analyze_sandwich_sets(
|
||||
&mut analysis,
|
||||
"vap_output",
|
||||
"PEER_MISSING_STABLE_OUTPUT",
|
||||
Side::Right,
|
||||
right,
|
||||
left,
|
||||
|sample| &sample.vaps,
|
||||
args,
|
||||
);
|
||||
analysis
|
||||
}
|
||||
|
||||
fn analyze_sandwich_objects(
|
||||
analysis: &mut SandwichAnalysis,
|
||||
source_side: Side,
|
||||
source: &[SequenceSample],
|
||||
peer: &[SequenceSample],
|
||||
args: &Args,
|
||||
) {
|
||||
for pair in source.windows(2) {
|
||||
let source_start = &pair[0];
|
||||
let source_end = &pair[1];
|
||||
if source_start.validation_time >= source_end.validation_time {
|
||||
continue;
|
||||
}
|
||||
let peers = peer_samples_between(peer, source_start, source_end);
|
||||
if peers.is_empty() {
|
||||
continue;
|
||||
}
|
||||
for (uri, source_hash) in &source_start.objects {
|
||||
if source_end.objects.get(uri) != Some(source_hash) {
|
||||
continue;
|
||||
}
|
||||
for peer_sample in &peers {
|
||||
match peer_sample.objects.get(uri) {
|
||||
Some(peer_hash) if peer_hash == source_hash => {}
|
||||
Some(peer_hash) => analysis.add(
|
||||
"PEER_HASH_MISMATCH_STABLE_OBJECT",
|
||||
sandwich_record(
|
||||
"PEER_HASH_MISMATCH_STABLE_OBJECT",
|
||||
"object",
|
||||
uri.clone(),
|
||||
source_side,
|
||||
source_start,
|
||||
source_end,
|
||||
peer_sample,
|
||||
Some(source_hash.clone()),
|
||||
Some(peer_hash.clone()),
|
||||
"source interval has stable object hash; peer sample has same URI with another hash",
|
||||
),
|
||||
args.sample_limit,
|
||||
),
|
||||
None => analysis.add(
|
||||
"PEER_MISSING_STABLE_OBJECT",
|
||||
sandwich_record(
|
||||
"PEER_MISSING_STABLE_OBJECT",
|
||||
"object",
|
||||
uri.clone(),
|
||||
source_side,
|
||||
source_start,
|
||||
source_end,
|
||||
peer_sample,
|
||||
Some(source_hash.clone()),
|
||||
None,
|
||||
"source interval has stable object hash; peer sample misses the URI",
|
||||
),
|
||||
args.sample_limit,
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn analyze_sandwich_sets<F>(
|
||||
analysis: &mut SandwichAnalysis,
|
||||
set_type: &'static str,
|
||||
classification: &'static str,
|
||||
source_side: Side,
|
||||
source: &[SequenceSample],
|
||||
peer: &[SequenceSample],
|
||||
extract: F,
|
||||
args: &Args,
|
||||
) where
|
||||
F: for<'a> Fn(&'a SequenceSample) -> &'a BTreeSet<String>,
|
||||
{
|
||||
for pair in source.windows(2) {
|
||||
let source_start = &pair[0];
|
||||
let source_end = &pair[1];
|
||||
if source_start.validation_time >= source_end.validation_time {
|
||||
continue;
|
||||
}
|
||||
let peers = peer_samples_between(peer, source_start, source_end);
|
||||
if peers.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let start_set = extract(source_start);
|
||||
let end_set = extract(source_end);
|
||||
for key in start_set {
|
||||
if !end_set.contains(key) {
|
||||
continue;
|
||||
}
|
||||
for peer_sample in &peers {
|
||||
if extract(peer_sample).contains(key) {
|
||||
continue;
|
||||
}
|
||||
analysis.add(
|
||||
classification,
|
||||
sandwich_record(
|
||||
classification,
|
||||
set_type,
|
||||
key.clone(),
|
||||
source_side,
|
||||
source_start,
|
||||
source_end,
|
||||
peer_sample,
|
||||
Some(key.clone()),
|
||||
None,
|
||||
"source interval has a stable key; peer sample misses the key",
|
||||
),
|
||||
args.sample_limit,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn peer_samples_between<'a>(
|
||||
peer: &'a [SequenceSample],
|
||||
source_start: &SequenceSample,
|
||||
source_end: &SequenceSample,
|
||||
) -> Vec<&'a SequenceSample> {
|
||||
peer.iter()
|
||||
.filter(|sample| {
|
||||
source_start.validation_time < sample.validation_time
|
||||
&& sample.validation_time < source_end.validation_time
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn sandwich_record(
|
||||
classification: &'static str,
|
||||
set_type: &'static str,
|
||||
key: String,
|
||||
source_side: Side,
|
||||
source_start: &SequenceSample,
|
||||
source_end: &SequenceSample,
|
||||
peer_sample: &SequenceSample,
|
||||
source_value: Option<String>,
|
||||
peer_value: Option<String>,
|
||||
note: &str,
|
||||
) -> SandwichRecord {
|
||||
SandwichRecord {
|
||||
classification,
|
||||
set_type,
|
||||
key,
|
||||
source_side,
|
||||
source_start_seq: source_start.raw.seq,
|
||||
source_start_run_id: source_start.raw.run_id.clone(),
|
||||
source_end_seq: source_end.raw.seq,
|
||||
source_end_run_id: source_end.raw.run_id.clone(),
|
||||
peer_seq: peer_sample.raw.seq,
|
||||
peer_run_id: peer_sample.raw.run_id.clone(),
|
||||
source_value,
|
||||
peer_value,
|
||||
source_start_time: format_time(source_start.validation_time),
|
||||
peer_time: format_time(peer_sample.validation_time),
|
||||
source_end_time: format_time(source_end.validation_time),
|
||||
note: note.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
impl SandwichAnalysis {
|
||||
fn add(&mut self, class: &'static str, record: SandwichRecord, sample_limit: usize) {
|
||||
self.total_occurrences += 1;
|
||||
self.unique_keys.insert(sandwich_unique_key(&record));
|
||||
*self.by_set_type.entry(record.set_type).or_default() += 1;
|
||||
let stats = self.stats.entry(class).or_default();
|
||||
stats.total += 1;
|
||||
stats.unique_keys.insert(sandwich_unique_key(&record));
|
||||
if stats.samples.len() < sample_limit {
|
||||
stats.samples.push(record);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn sandwich_unique_key(record: &SandwichRecord) -> String {
|
||||
format!(
|
||||
"{}|{}|{}|{}",
|
||||
record.classification,
|
||||
record.set_type,
|
||||
record.source_side.as_str(),
|
||||
record.key
|
||||
)
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user