20260531 拆分sequence triage工具并修复远端产物拉取

This commit is contained in:
yuyr 2026-06-01 11:01:42 +08:00
parent a29fe266a4
commit 00d7109503
10 changed files with 2034 additions and 1972 deletions

View File

@ -56,17 +56,16 @@ def rsync_from_remote(target: str, source: str | Path, destination: Path) -> Non
def rsync_run_artifacts_from_remote(target: str, source: str | Path, destination: Path) -> None: def rsync_run_artifacts_from_remote(target: str, source: str | Path, destination: Path) -> None:
destination.mkdir(parents=True, exist_ok=True) destination.mkdir(parents=True, exist_ok=True)
include_args = [ for name in [
"--include", "result.ccr", "result.ccr",
"--include", "result.cir", "result.cir",
"--include", "process-time.txt", "process-time.txt",
"--include", "remote-run-meta.json", "remote-run-meta.json",
"--include", "exit-code.txt", "exit-code.txt",
"--include", "started-at.txt", "started-at.txt",
"--include", "finished-at.txt", "finished-at.txt",
"--exclude", "*", ]:
] run_local(["rsync", "-a", f"{target}:{source}/{name}", f"{destination}/"])
run_local(["rsync", "-a", *include_args, f"{target}:{source}/", f"{destination}/"])
def load_json(path: Path) -> Any: def load_json(path: Path) -> Any:

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,503 @@
use std::collections::{BTreeMap, BTreeSet};
use serde_json::{Value, json};
use super::analysis::peer_sample_at_seq;
use super::args::Args;
use super::io::{format_time, path_string};
use super::model::{
AdjustedAnalysis, AdjustedRecord, DiffEvent, EdgePosition, SequenceSample, Side,
};
pub(super) fn build_adjusted_analysis(
args: &Args,
left: &[SequenceSample],
right: &[SequenceSample],
events: &[DiffEvent],
) -> AdjustedAnalysis {
let mut analysis = AdjustedAnalysis {
raw_persistent_occurrences: events.len(),
raw_persistent_unique_keys: unique_event_count(events),
..Default::default()
};
let mut edge_filtered_unique = BTreeSet::new();
for event in events {
let source_samples = samples_for_side(event.source_side, left, right);
let peer_samples = samples_for_side(opposite_side(event.source_side), left, right);
let edge = edge_position(source_samples, event.source_seq, args);
if edge == EdgePosition::Stable {
analysis.edge_filtered_occurrences += 1;
edge_filtered_unique.insert(event_identity(event));
}
let (classification, note) =
adjusted_classification(event, edge, source_samples, peer_samples);
analysis.add(
classification,
AdjustedRecord {
classification,
event_type: event.event_type,
key: event.key.clone(),
source_side: event.source_side,
source_seq: event.source_seq,
source_run_id: event.source_run_id.clone(),
note,
},
args.sample_limit,
);
}
analysis.edge_filtered_unique_keys = edge_filtered_unique.len();
let mut stable_unique = BTreeSet::new();
for (class, stats) in &analysis.stats {
if class.starts_with("STABLE_") {
analysis.adjusted_stable_occurrences += stats.total;
stable_unique.extend(stats.unique_keys.iter().cloned());
}
}
analysis.adjusted_stable_unique_keys = stable_unique.len();
let timeline_limit = if args.timeline_sample_limit == 0 {
args.sample_limit
} else {
args.timeline_sample_limit
};
analysis.uri_timeline_samples =
build_uri_timeline_samples(left, right, &analysis, timeline_limit);
analysis.stable_object_groups =
build_stable_object_groups(&analysis, left, right, timeline_limit);
analysis
}
fn adjusted_classification(
event: &DiffEvent,
edge: EdgePosition,
source: &[SequenceSample],
peer: &[SequenceSample],
) -> (&'static str, String) {
if event.event_type == "trust_anchor" {
if peer_has_same_ta_identity(peer, &event.key) {
return (
"TA_PROJECTION_FORMAT_DIFFERENCE",
"same TAL hash and TA certificate hash exist on peer side; full TA projection string differs".to_string(),
);
}
if edge == EdgePosition::Stable {
return (
"STABLE_TA_DIVERGENCE",
"trust-anchor identity is not aligned in a non-boundary sample".to_string(),
);
}
return edge_unresolved_class(edge);
}
if event.event_type == "object_hash" {
if let Some((uri, hash)) = split_object_hash_key(&event.key) {
let peer_has_uri = peer_has_uri_any(peer, uri);
let peer_has_different_hash = peer_has_uri_different_hash_any(peer, uri, hash);
let peer_hash_at_source_seq = peer_sample_at_seq(peer, event.source_seq)
.and_then(|peer_sample| peer_sample.objects.get(uri));
let source_later_matches_peer_hash = peer_hash_at_source_seq.is_some_and(|peer_hash| {
source_future_has_hash(source, event.source_seq, uri, peer_hash)
});
if edge == EdgePosition::Leading && peer_has_different_hash {
return (
"EDGE_LEADING_CONTENT_ROLLOVER",
"source leading-edge hash is absent, while peer already has the same URI with another hash".to_string(),
);
}
if edge == EdgePosition::Trailing && peer_has_different_hash {
return (
"EDGE_TRAILING_CONTENT_ROLLOVER",
"source trailing-edge hash is absent, while peer has the same URI with another hash and no later source observation exists".to_string(),
);
}
if edge == EdgePosition::Stable && source_later_matches_peer_hash {
return (
"MID_SEQUENCE_CONTENT_ROLLOVER_RESOLVED",
"peer already has a newer same-URI hash at this seq and source catches up later in the observed sequence".to_string(),
);
}
if edge == EdgePosition::Stable && peer_has_different_hash {
return (
"STABLE_CONTENT_DIVERGENCE",
"same URI exists on peer side with a different hash in a non-boundary sample"
.to_string(),
);
}
if edge == EdgePosition::Stable && !peer_has_uri {
return (
"STABLE_OBJECT_SET_DIVERGENCE",
"content key is derived from a non-boundary URI that is absent on peer side"
.to_string(),
);
}
}
if edge == EdgePosition::Stable {
return (
"STABLE_CONTENT_DIVERGENCE",
"object hash key remains unaligned in a non-boundary sample".to_string(),
);
}
return edge_unresolved_class(edge);
}
if edge != EdgePosition::Stable {
return edge_unresolved_class(edge);
}
let stable_class = match event.raw_class {
"PERSISTENT_OBJECT_SET_DIVERGENCE" => "STABLE_OBJECT_SET_DIVERGENCE",
"PERSISTENT_REJECT_DIVERGENCE" => "STABLE_REJECT_DIVERGENCE",
"PERSISTENT_OUTPUT_DIVERGENCE" => "STABLE_OUTPUT_DIVERGENCE",
"PERSISTENT_CONTENT_DIVERGENCE" => "STABLE_CONTENT_DIVERGENCE",
"PERSISTENT_TA_DIFFERENCE" => "STABLE_TA_DIVERGENCE",
_ => "STABLE_UNCLASSIFIED_DIVERGENCE",
};
(
stable_class,
"persistent event appears in a non-boundary sample after sequence edge filtering"
.to_string(),
)
}
fn edge_unresolved_class(edge: EdgePosition) -> (&'static str, String) {
match edge {
EdgePosition::Leading => (
"EDGE_LEADING_UNRESOLVED",
"event appears only from the warmup edge and may predate the observed sequence"
.to_string(),
),
EdgePosition::Trailing => (
"EDGE_TRAILING_UNRESOLVED",
"event appears at the cooldown edge and lacks later peer observations".to_string(),
),
EdgePosition::Stable => (
"STABLE_UNCLASSIFIED_DIVERGENCE",
"event is not on an edge but no specific stable category matched".to_string(),
),
}
}
impl AdjustedAnalysis {
fn add(&mut self, class: &'static str, record: AdjustedRecord, sample_limit: usize) {
let stats = self.stats.entry(class).or_default();
stats.total += 1;
stats.unique_keys.insert(format!(
"{}|{}|{}",
record.event_type,
record.source_side.as_str(),
record.key
));
if stats.samples.len() < sample_limit {
stats.samples.push(record);
}
}
}
fn unique_event_count(events: &[DiffEvent]) -> usize {
events
.iter()
.map(event_identity)
.collect::<BTreeSet<_>>()
.len()
}
fn event_identity(event: &DiffEvent) -> String {
format!(
"{}|{}|{}",
event.event_type,
event.source_side.as_str(),
event.key
)
}
fn samples_for_side<'a>(
side: Side,
left: &'a [SequenceSample],
right: &'a [SequenceSample],
) -> &'a [SequenceSample] {
match side {
Side::Left => left,
Side::Right => right,
}
}
fn opposite_side(side: Side) -> Side {
match side {
Side::Left => Side::Right,
Side::Right => Side::Left,
}
}
fn edge_position(samples: &[SequenceSample], seq: u32, args: &Args) -> EdgePosition {
let Some(index) = samples.iter().position(|sample| sample.raw.seq == seq) else {
return EdgePosition::Stable;
};
if index < args.warmup_samples {
return EdgePosition::Leading;
}
if samples.len().saturating_sub(index) <= args.cooldown_samples {
return EdgePosition::Trailing;
}
EdgePosition::Stable
}
fn peer_has_uri_any(peer: &[SequenceSample], uri: &str) -> bool {
peer.iter().any(|sample| sample.objects.contains_key(uri))
}
fn peer_has_uri_different_hash_any(peer: &[SequenceSample], uri: &str, hash: &str) -> bool {
peer.iter()
.filter_map(|sample| sample.objects.get(uri))
.any(|peer_hash| peer_hash != hash)
}
fn source_future_has_hash(
source: &[SequenceSample],
seq: u32,
uri: &str,
expected_hash: &str,
) -> bool {
source
.iter()
.filter(|sample| sample.raw.seq > seq)
.any(|sample| {
sample
.objects
.get(uri)
.is_some_and(|hash| hash == expected_hash)
})
}
fn peer_has_same_ta_identity(peer: &[SequenceSample], key: &str) -> bool {
let Some(identity) = trust_anchor_identity(key) else {
return false;
};
peer.iter().any(|sample| {
sample
.trust_anchors
.iter()
.filter_map(|peer_key| trust_anchor_identity(peer_key))
.any(|peer_identity| peer_identity == identity)
})
}
fn trust_anchor_identity(key: &str) -> Option<String> {
let parts = key.split('|').collect::<Vec<_>>();
if parts.len() != 4 {
return None;
}
Some(format!("{}|{}", parts[2], parts[3]))
}
fn split_object_hash_key(key: &str) -> Option<(&str, &str)> {
key.rsplit_once('|')
}
fn build_uri_timeline_samples(
left: &[SequenceSample],
right: &[SequenceSample],
adjusted: &AdjustedAnalysis,
limit: usize,
) -> Vec<Value> {
let mut uris = BTreeSet::new();
for stats in adjusted.stats.values() {
for sample in &stats.samples {
if sample.event_type == "object_hash"
&& let Some((uri, _)) = split_object_hash_key(&sample.key)
{
uris.insert(uri.to_string());
}
if sample.event_type == "object_uri" {
uris.insert(sample.key.clone());
}
}
}
uris.into_iter()
.take(limit)
.map(|uri| {
json!({
"uri": uri,
"left": timeline_for_uri(left, &uri),
"right": timeline_for_uri(right, &uri),
})
})
.collect()
}
fn timeline_for_uri(samples: &[SequenceSample], uri: &str) -> Vec<Value> {
samples
.iter()
.filter_map(|sample| {
sample.objects.get(uri).map(|hash| {
json!({
"seq": sample.raw.seq,
"runId": sample.raw.run_id,
"validationTime": format_time(sample.validation_time),
"hash": hash,
})
})
})
.collect()
}
fn build_stable_object_groups(
adjusted: &AdjustedAnalysis,
left: &[SequenceSample],
right: &[SequenceSample],
limit: usize,
) -> Vec<Value> {
let mut groups: BTreeMap<String, StableObjectGroup> = BTreeMap::new();
let Some(stats) = adjusted.stats.get("STABLE_OBJECT_SET_DIVERGENCE") else {
return Vec::new();
};
for sample in &stats.samples {
let physical_uri = physical_object_uri(sample);
let group_key = format!(
"{}|{}|{}|{}",
sample.source_side.as_str(),
sample.source_seq,
sample.source_run_id,
publication_point_prefix(&physical_uri)
);
let group = groups
.entry(group_key)
.or_insert_with(|| StableObjectGroup::new(sample, &physical_uri));
if group.source_cir_path.is_none()
&& let Some(source_sample) = sample_by_side_seq_run(
left,
right,
sample.source_side,
sample.source_seq,
&sample.source_run_id,
)
{
group.source_cir_path = Some(path_string(&source_sample.cir_path));
}
group.add(sample, physical_uri);
}
groups
.into_values()
.take(limit)
.map(StableObjectGroup::to_json)
.collect()
}
#[derive(Clone, Debug)]
struct StableObjectGroup {
source_side: Side,
source_seq: u32,
source_run_id: String,
source_cir_path: Option<String>,
publication_point: String,
event_count: usize,
event_types: BTreeMap<&'static str, usize>,
physical_objects: BTreeMap<String, StablePhysicalObject>,
}
impl StableObjectGroup {
fn new(sample: &AdjustedRecord, physical_uri: &str) -> Self {
Self {
source_side: sample.source_side,
source_seq: sample.source_seq,
source_run_id: sample.source_run_id.clone(),
source_cir_path: None,
publication_point: publication_point_prefix(physical_uri),
event_count: 0,
event_types: BTreeMap::new(),
physical_objects: BTreeMap::new(),
}
}
fn add(&mut self, sample: &AdjustedRecord, physical_uri: String) {
self.event_count += 1;
*self.event_types.entry(sample.event_type).or_default() += 1;
let object = self
.physical_objects
.entry(physical_uri.clone())
.or_insert_with(|| StablePhysicalObject {
extension: object_extension(&physical_uri).to_string(),
uri: physical_uri,
event_types: BTreeSet::new(),
hashes: BTreeSet::new(),
});
object.event_types.insert(sample.event_type);
if let Some(hash) = event_hash(sample) {
object.hashes.insert(hash.to_string());
}
}
fn to_json(self) -> Value {
json!({
"sourceSide": self.source_side.as_str(),
"sourceSeq": self.source_seq,
"sourceRunId": self.source_run_id,
"sourceCirPath": self.source_cir_path,
"publicationPoint": self.publication_point,
"eventCount": self.event_count,
"eventTypes": self.event_types,
"physicalObjectCount": self.physical_objects.len(),
"physicalObjects": self.physical_objects.into_values().map(StablePhysicalObject::to_json).collect::<Vec<_>>(),
})
}
}
#[derive(Clone, Debug)]
struct StablePhysicalObject {
uri: String,
extension: String,
event_types: BTreeSet<&'static str>,
hashes: BTreeSet<String>,
}
impl StablePhysicalObject {
fn to_json(self) -> Value {
json!({
"uri": self.uri,
"extension": self.extension,
"eventTypes": self.event_types,
"hashes": self.hashes,
})
}
}
fn physical_object_uri(sample: &AdjustedRecord) -> String {
if sample.event_type == "object_hash"
&& let Some((uri, _)) = split_object_hash_key(&sample.key)
{
return uri.to_string();
}
sample.key.clone()
}
fn event_hash(sample: &AdjustedRecord) -> Option<&str> {
if sample.event_type == "object_hash" {
split_object_hash_key(&sample.key).map(|(_, hash)| hash)
} else {
None
}
}
fn publication_point_prefix(uri: &str) -> String {
uri.rsplit_once('/')
.map(|(prefix, _)| format!("{prefix}/"))
.unwrap_or_else(|| uri.to_string())
}
fn object_extension(uri: &str) -> &str {
uri.rsplit_once('.')
.map(|(_, extension)| extension)
.unwrap_or("")
}
fn sample_by_side_seq_run<'a>(
left: &'a [SequenceSample],
right: &'a [SequenceSample],
side: Side,
seq: u32,
run_id: &str,
) -> Option<&'a SequenceSample> {
samples_for_side(side, left, right)
.iter()
.find(|sample| sample.raw.seq == seq && sample.raw.run_id == run_id)
}

View File

@ -0,0 +1,364 @@
use std::collections::BTreeSet;
use super::args::Args;
use super::io::object_hash_key;
use super::model::{
AnalysisResult, DiffEvent, EventOccurrence, SampleRecord, SequenceSample, Side,
};
pub(super) fn analyze_set<F>(
result: &mut AnalysisResult,
event_type: &'static str,
left: &[SequenceSample],
right: &[SequenceSample],
extract: F,
resolved_class: &'static str,
persistent_class: &'static str,
args: &Args,
) where
F: for<'a> Fn(&'a SequenceSample) -> &'a BTreeSet<String>,
{
analyze_direction(
result,
event_type,
Side::Left,
left,
right,
&extract,
resolved_class,
persistent_class,
args,
);
analyze_direction(
result,
event_type,
Side::Right,
right,
left,
&extract,
resolved_class,
persistent_class,
args,
);
}
fn analyze_direction<F>(
result: &mut AnalysisResult,
event_type: &'static str,
source_side: Side,
source: &[SequenceSample],
peer: &[SequenceSample],
extract: &F,
resolved_class: &'static str,
persistent_class: &'static str,
args: &Args,
) where
F: for<'a> Fn(&'a SequenceSample) -> &'a BTreeSet<String>,
{
for sample in source {
let source_set = extract(sample);
for key in source_set {
if peer_sample_at_seq(peer, sample.raw.seq)
.is_some_and(|peer_sample| extract(peer_sample).contains(key))
{
continue;
}
if let Some(matched) = find_future_match(peer, sample, key, extract, args) {
result.add(
resolved_class,
SampleRecord {
classification: resolved_class,
event_type,
key: key.clone(),
source_side,
source_seq: sample.raw.seq,
source_run_id: sample.raw.run_id.clone(),
matched_seq: Some(matched.seq),
matched_run_id: Some(matched.run_id),
note: format!(
"matched in {} sequence within alignment window",
matched.side.as_str()
),
},
args.sample_limit,
);
} else {
result.add(
persistent_class,
SampleRecord {
classification: persistent_class,
event_type,
key: key.clone(),
source_side,
source_seq: sample.raw.seq,
source_run_id: sample.raw.run_id.clone(),
matched_seq: None,
matched_run_id: None,
note: "no matching event in peer sequence alignment window".to_string(),
},
args.sample_limit,
);
}
}
}
}
pub(super) fn analyze_hash_rollover(
result: &mut AnalysisResult,
left: &[SequenceSample],
right: &[SequenceSample],
args: &Args,
) {
for (source_side, source, peer) in [(Side::Left, left, right), (Side::Right, right, left)] {
for sample in source {
for (uri, hash) in &sample.objects {
if peer_sample_at_seq(peer, sample.raw.seq)
.and_then(|peer_sample| peer_sample.objects.get(uri))
.is_some_and(|peer_hash| peer_hash == hash)
{
continue;
}
if let Some(peer_sample) = peer_sample_at_seq(peer, sample.raw.seq)
&& peer_sample.objects.contains_key(uri)
&& find_future_hash_match(peer, sample, uri, hash, args).is_some()
{
let matched =
find_future_hash_match(peer, sample, uri, hash, args).expect("match");
result.add(
"CONTENT_ROLLOVER_RESOLVED",
SampleRecord {
classification: "CONTENT_ROLLOVER_RESOLVED",
event_type: "object_content_rollover",
key: object_hash_key(uri, hash),
source_side,
source_seq: sample.raw.seq,
source_run_id: sample.raw.run_id.clone(),
matched_seq: Some(matched.seq),
matched_run_id: Some(matched.run_id),
note: "same URI hash appeared in peer sequence later".to_string(),
},
args.sample_limit,
);
}
}
}
}
}
pub(super) fn collect_persistent_events(
left: &[SequenceSample],
right: &[SequenceSample],
args: &Args,
) -> Vec<DiffEvent> {
let mut events = Vec::new();
collect_persistent_set(
&mut events,
"object_uri",
"PERSISTENT_OBJECT_SET_DIVERGENCE",
left,
right,
|sample| &sample.object_uris,
args,
);
collect_persistent_set(
&mut events,
"object_hash",
"PERSISTENT_CONTENT_DIVERGENCE",
left,
right,
|sample| &sample.object_hashes,
args,
);
collect_persistent_set(
&mut events,
"reject_uri",
"PERSISTENT_REJECT_DIVERGENCE",
left,
right,
|sample| &sample.rejects,
args,
);
collect_persistent_set(
&mut events,
"trust_anchor",
"PERSISTENT_TA_DIFFERENCE",
left,
right,
|sample| &sample.trust_anchors,
args,
);
collect_persistent_set(
&mut events,
"vrp_output",
"PERSISTENT_OUTPUT_DIVERGENCE",
left,
right,
|sample| &sample.vrps,
args,
);
collect_persistent_set(
&mut events,
"vap_output",
"PERSISTENT_OUTPUT_DIVERGENCE",
left,
right,
|sample| &sample.vaps,
args,
);
events
}
fn collect_persistent_set<F>(
events: &mut Vec<DiffEvent>,
event_type: &'static str,
raw_class: &'static str,
left: &[SequenceSample],
right: &[SequenceSample],
extract: F,
args: &Args,
) where
F: for<'a> Fn(&'a SequenceSample) -> &'a BTreeSet<String>,
{
collect_persistent_direction(
events,
event_type,
raw_class,
Side::Left,
left,
right,
&extract,
args,
);
collect_persistent_direction(
events,
event_type,
raw_class,
Side::Right,
right,
left,
&extract,
args,
);
}
fn collect_persistent_direction<F>(
events: &mut Vec<DiffEvent>,
event_type: &'static str,
raw_class: &'static str,
source_side: Side,
source: &[SequenceSample],
peer: &[SequenceSample],
extract: &F,
args: &Args,
) where
F: for<'a> Fn(&'a SequenceSample) -> &'a BTreeSet<String>,
{
for sample in source {
for key in extract(sample) {
if peer_sample_at_seq(peer, sample.raw.seq)
.is_some_and(|peer_sample| extract(peer_sample).contains(key))
{
continue;
}
if find_future_match(peer, sample, key, extract, args).is_some() {
continue;
}
events.push(DiffEvent {
event_type,
raw_class,
key: key.clone(),
source_side,
source_seq: sample.raw.seq,
source_run_id: sample.raw.run_id.clone(),
});
}
}
}
impl AnalysisResult {
fn add(&mut self, class: &'static str, record: SampleRecord, sample_limit: usize) {
let stats = self.stats.entry(class).or_default();
stats.total += 1;
if stats.samples.len() < sample_limit {
stats.samples.push(record);
}
}
}
pub(super) fn peer_sample_at_seq(peer: &[SequenceSample], seq: u32) -> Option<&SequenceSample> {
peer.iter().find(|sample| sample.raw.seq == seq)
}
fn find_future_match<F>(
peer: &[SequenceSample],
source: &SequenceSample,
key: &str,
extract: &F,
args: &Args,
) -> Option<EventOccurrence>
where
F: for<'a> Fn(&'a SequenceSample) -> &'a BTreeSet<String>,
{
peer.iter()
.filter(|candidate| is_in_alignment_window(source, candidate, args))
.find(|candidate| extract(candidate).contains(key))
.map(|candidate| {
occurrence(
candidate,
if candidate.raw.side.as_deref() == Some("left") {
Side::Left
} else {
Side::Right
},
)
})
}
fn find_future_hash_match(
peer: &[SequenceSample],
source: &SequenceSample,
uri: &str,
hash: &str,
args: &Args,
) -> Option<EventOccurrence> {
peer.iter()
.filter(|candidate| is_in_alignment_window(source, candidate, args))
.find(|candidate| {
candidate
.objects
.get(uri)
.is_some_and(|peer_hash| peer_hash == hash)
})
.map(|candidate| {
occurrence(
candidate,
if candidate.raw.side.as_deref() == Some("left") {
Side::Left
} else {
Side::Right
},
)
})
}
fn is_in_alignment_window(
source: &SequenceSample,
candidate: &SequenceSample,
args: &Args,
) -> bool {
if candidate.raw.seq < source.raw.seq {
return false;
}
let run_delta = candidate.raw.seq.saturating_sub(source.raw.seq);
let time_delta = candidate.validation_time - source.validation_time;
let secs = time_delta.whole_seconds().abs();
run_delta <= args.align_window_runs || secs <= args.align_window_secs
}
fn occurrence(sample: &SequenceSample, side: Side) -> EventOccurrence {
EventOccurrence {
side,
seq: sample.raw.seq,
run_id: sample.raw.run_id.clone(),
}
}

View File

@ -0,0 +1,119 @@
use std::path::PathBuf;
#[derive(Debug, PartialEq, Eq)]
pub(super) struct Args {
pub(super) left_sequence: PathBuf,
pub(super) right_sequence: PathBuf,
pub(super) out_dir: PathBuf,
pub(super) align_window_runs: u32,
pub(super) align_window_secs: i64,
pub(super) sample_limit: usize,
pub(super) warmup_samples: usize,
pub(super) cooldown_samples: usize,
pub(super) timeline_sample_limit: usize,
}
pub(super) fn usage() -> &'static str {
"Usage: sequence_triage_ccr_cir --left-sequence <path> --right-sequence <path> --out-dir <path> [--align-window-runs <n>] [--align-window-secs <n>] [--sample-limit <n>] [--warmup-samples <n>] [--cooldown-samples <n>] [--timeline-sample-limit <n>]"
}
pub(super) fn parse_args(argv: &[String]) -> Result<Args, String> {
let mut left_sequence = None;
let mut right_sequence = None;
let mut out_dir = None;
let mut align_window_runs = 2u32;
let mut align_window_secs = 1800i64;
let mut sample_limit = 200usize;
let mut warmup_samples = 1usize;
let mut cooldown_samples = 1usize;
let mut timeline_sample_limit = 0usize;
let mut index = 1usize;
while index < argv.len() {
match argv[index].as_str() {
"--left-sequence" => {
index += 1;
left_sequence = Some(PathBuf::from(
argv.get(index).ok_or("--left-sequence requires a value")?,
));
}
"--right-sequence" => {
index += 1;
right_sequence = Some(PathBuf::from(
argv.get(index).ok_or("--right-sequence requires a value")?,
));
}
"--out-dir" => {
index += 1;
out_dir = Some(PathBuf::from(
argv.get(index).ok_or("--out-dir requires a value")?,
));
}
"--align-window-runs" => {
index += 1;
let value = argv
.get(index)
.ok_or("--align-window-runs requires a value")?;
align_window_runs = value
.parse::<u32>()
.map_err(|_| format!("invalid --align-window-runs: {value}"))?;
}
"--align-window-secs" => {
index += 1;
let value = argv
.get(index)
.ok_or("--align-window-secs requires a value")?;
align_window_secs = value
.parse::<i64>()
.map_err(|_| format!("invalid --align-window-secs: {value}"))?;
}
"--sample-limit" => {
index += 1;
let value = argv.get(index).ok_or("--sample-limit requires a value")?;
sample_limit = value
.parse::<usize>()
.map_err(|_| format!("invalid --sample-limit: {value}"))?;
}
"--warmup-samples" => {
index += 1;
let value = argv.get(index).ok_or("--warmup-samples requires a value")?;
warmup_samples = value
.parse::<usize>()
.map_err(|_| format!("invalid --warmup-samples: {value}"))?;
}
"--cooldown-samples" => {
index += 1;
let value = argv
.get(index)
.ok_or("--cooldown-samples requires a value")?;
cooldown_samples = value
.parse::<usize>()
.map_err(|_| format!("invalid --cooldown-samples: {value}"))?;
}
"--timeline-sample-limit" => {
index += 1;
let value = argv
.get(index)
.ok_or("--timeline-sample-limit requires a value")?;
timeline_sample_limit = value
.parse::<usize>()
.map_err(|_| format!("invalid --timeline-sample-limit: {value}"))?;
}
"-h" | "--help" => return Err(usage().to_string()),
other => return Err(format!("unknown argument: {other}\n{}", usage())),
}
index += 1;
}
Ok(Args {
left_sequence: left_sequence
.ok_or_else(|| format!("--left-sequence is required\n{}", usage()))?,
right_sequence: right_sequence
.ok_or_else(|| format!("--right-sequence is required\n{}", usage()))?,
out_dir: out_dir.ok_or_else(|| format!("--out-dir is required\n{}", usage()))?,
align_window_runs,
align_window_secs,
sample_limit,
warmup_samples,
cooldown_samples,
timeline_sample_limit,
})
}

View File

@ -0,0 +1,33 @@
use std::path::{Path, PathBuf};
use time::OffsetDateTime;
use time::format_description::well_known::Rfc3339;
pub(super) fn read_file(path: &Path) -> Result<Vec<u8>, String> {
std::fs::read(path).map_err(|e| format!("read file failed: {}: {e}", path.display()))
}
pub(super) fn resolve_path(base_dir: &Path, path: &Path) -> PathBuf {
if path.is_absolute() {
path.to_path_buf()
} else {
base_dir.join(path)
}
}
pub(super) fn parse_rfc3339(value: &str) -> Result<OffsetDateTime, String> {
OffsetDateTime::parse(value, &Rfc3339)
.map_err(|e| format!("parse RFC3339 failed: {value}: {e}"))
}
pub(super) fn format_time(value: OffsetDateTime) -> String {
value.format(&Rfc3339).unwrap_or_else(|_| value.to_string())
}
pub(super) fn path_string(path: &Path) -> String {
path.to_string_lossy().into_owned()
}
pub(super) fn object_hash_key(uri: &str, hash: &str) -> String {
format!("{uri}|{hash}")
}

View File

@ -0,0 +1,144 @@
use std::collections::{BTreeMap, BTreeSet};
use std::path::Path;
use crate::ccr::{decode_ccr_compare_views, decode_content_info};
use crate::cir::decode_cir;
use super::io::{object_hash_key, parse_rfc3339, read_file, resolve_path};
use super::model::{SequenceItemRaw, SequenceSample, Side};
pub(super) fn load_sequence(path: &Path, side: Side) -> Result<Vec<SequenceSample>, String> {
let base_dir = path.parent().unwrap_or_else(|| Path::new("."));
let text = std::fs::read_to_string(path)
.map_err(|e| format!("read sequence failed: {}: {e}", path.display()))?;
let mut samples = Vec::new();
let mut seen_seq = BTreeSet::new();
for (line_index, line) in text.lines().enumerate() {
let line = line.trim();
if line.is_empty() {
continue;
}
let raw: SequenceItemRaw = serde_json::from_str(line).map_err(|e| {
format!(
"parse sequence JSONL failed: {}:{}: {e}",
path.display(),
line_index + 1
)
})?;
if raw.schema_version.unwrap_or(1) != 1 {
return Err(format!(
"unsupported sequence item schemaVersion in {}:{}",
path.display(),
line_index + 1
));
}
if !seen_seq.insert(raw.seq) {
return Err(format!("duplicate seq {} in {}", raw.seq, path.display()));
}
if let Some(status) = &raw.status
&& status != "success"
{
return Err(format!(
"sequence sample {} has non-success status: {status}",
raw.run_id
));
}
let cir_path = resolve_path(base_dir, &raw.cir_path);
let ccr_path = resolve_path(base_dir, &raw.ccr_path);
let cir = decode_cir(&read_file(&cir_path)?).map_err(|e| {
format!(
"decode CIR failed for sample {} ({}): {e}",
raw.run_id,
cir_path.display()
)
})?;
let ccr = decode_content_info(&read_file(&ccr_path)?).map_err(|e| {
format!(
"decode CCR failed for sample {} ({}): {e}",
raw.run_id,
ccr_path.display()
)
})?;
let validation_time = raw
.validation_time
.as_deref()
.map(parse_rfc3339)
.transpose()?
.unwrap_or(cir.validation_time);
let objects = cir
.objects
.iter()
.map(|item| (item.rsync_uri.clone(), hex::encode(&item.sha256)))
.collect::<BTreeMap<_, _>>();
let object_uris = objects.keys().cloned().collect::<BTreeSet<_>>();
let object_hashes = objects
.iter()
.map(|(uri, hash)| object_hash_key(uri, hash))
.collect::<BTreeSet<_>>();
let rejects = cir
.rejected_objects
.iter()
.map(|item| item.object_uri.clone())
.collect::<BTreeSet<_>>();
let trust_anchors = cir
.trust_anchors
.iter()
.map(|item| {
format!(
"{}|{}|{}|{}",
item.ta_rsync_uri,
item.tal_uri,
hex::encode(crate::cir::sha256(&item.tal_bytes)),
hex::encode(&item.ta_certificate_sha256)
)
})
.collect::<BTreeSet<_>>();
let (vrps, vaps) = decode_ccr_compare_views(&ccr).map_err(|e| {
format!(
"decode CCR compare views failed for sample {} ({}): {e}",
raw.run_id,
ccr_path.display()
)
})?;
let vrps = vrps
.into_iter()
.map(|row| format!("{}|{}|{}", row.asn, row.ip_prefix, row.max_length))
.collect::<BTreeSet<_>>();
let vaps = vaps
.into_iter()
.map(|row| format!("{}|{}", row.customer_asn, row.providers))
.collect::<BTreeSet<_>>();
samples.push(SequenceSample {
raw,
validation_time,
ccr_path,
cir_path,
objects,
object_uris,
object_hashes,
rejects,
trust_anchors,
vrps,
vaps,
});
}
samples.sort_by_key(|sample| sample.raw.seq);
for pair in samples.windows(2) {
if pair[0].raw.seq >= pair[1].raw.seq {
return Err("sequence must be sorted by increasing seq".into());
}
}
if samples.iter().any(|sample| {
sample
.raw
.side
.as_deref()
.is_some_and(|item| item != side.as_str())
}) {
return Err(format!(
"sequence side field does not match expected side: {}",
side.as_str()
));
}
Ok(samples)
}

View File

@ -0,0 +1,173 @@
use std::collections::{BTreeMap, BTreeSet};
use std::path::PathBuf;
use serde::Deserialize;
use serde_json::Value;
use time::OffsetDateTime;
#[derive(Clone, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(super) struct SequenceItemRaw {
pub(super) schema_version: Option<u32>,
pub(super) rp_id: String,
pub(super) side: Option<String>,
pub(super) seq: u32,
pub(super) run_id: String,
pub(super) sync_mode: Option<String>,
pub(super) status: Option<String>,
pub(super) start_time: Option<String>,
pub(super) finish_time: Option<String>,
pub(super) validation_time: Option<String>,
pub(super) ccr_path: PathBuf,
pub(super) cir_path: PathBuf,
pub(super) ccr_sha256: Option<String>,
pub(super) cir_sha256: Option<String>,
pub(super) wall_ms: Option<u64>,
pub(super) max_rss_kb: Option<u64>,
pub(super) vrps: Option<u64>,
pub(super) vaps: Option<u64>,
}
#[derive(Clone, Debug)]
pub(super) struct SequenceSample {
pub(super) raw: SequenceItemRaw,
pub(super) validation_time: OffsetDateTime,
pub(super) ccr_path: PathBuf,
pub(super) cir_path: PathBuf,
pub(super) objects: BTreeMap<String, String>,
pub(super) object_uris: BTreeSet<String>,
pub(super) object_hashes: BTreeSet<String>,
pub(super) rejects: BTreeSet<String>,
pub(super) trust_anchors: BTreeSet<String>,
pub(super) vrps: BTreeSet<String>,
pub(super) vaps: BTreeSet<String>,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(super) enum Side {
Left,
Right,
}
impl Side {
pub(super) fn as_str(self) -> &'static str {
match self {
Side::Left => "left",
Side::Right => "right",
}
}
}
#[derive(Clone, Debug)]
pub(super) struct EventOccurrence {
pub(super) side: Side,
pub(super) seq: u32,
pub(super) run_id: String,
}
#[derive(Clone, Debug)]
pub(super) struct SampleRecord {
pub(super) classification: &'static str,
pub(super) event_type: &'static str,
pub(super) key: String,
pub(super) source_side: Side,
pub(super) source_seq: u32,
pub(super) source_run_id: String,
pub(super) matched_seq: Option<u32>,
pub(super) matched_run_id: Option<String>,
pub(super) note: String,
}
#[derive(Clone, Debug, Default)]
pub(super) struct ClassStats {
pub(super) total: usize,
pub(super) samples: Vec<SampleRecord>,
}
#[derive(Clone, Debug, Default)]
pub(super) struct AnalysisResult {
pub(super) stats: BTreeMap<&'static str, ClassStats>,
}
#[derive(Clone, Debug)]
pub(super) struct DiffEvent {
pub(super) event_type: &'static str,
pub(super) raw_class: &'static str,
pub(super) key: String,
pub(super) source_side: Side,
pub(super) source_seq: u32,
pub(super) source_run_id: String,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(super) enum EdgePosition {
Leading,
Stable,
Trailing,
}
#[derive(Clone, Debug)]
pub(super) struct AdjustedRecord {
pub(super) classification: &'static str,
pub(super) event_type: &'static str,
pub(super) key: String,
pub(super) source_side: Side,
pub(super) source_seq: u32,
pub(super) source_run_id: String,
pub(super) note: String,
}
#[derive(Clone, Debug, Default)]
pub(super) struct AdjustedClassStats {
pub(super) total: usize,
pub(super) unique_keys: BTreeSet<String>,
pub(super) samples: Vec<AdjustedRecord>,
}
#[derive(Clone, Debug, Default)]
pub(super) struct AdjustedAnalysis {
pub(super) raw_persistent_occurrences: usize,
pub(super) raw_persistent_unique_keys: usize,
pub(super) edge_filtered_occurrences: usize,
pub(super) edge_filtered_unique_keys: usize,
pub(super) adjusted_stable_occurrences: usize,
pub(super) adjusted_stable_unique_keys: usize,
pub(super) stats: BTreeMap<&'static str, AdjustedClassStats>,
pub(super) uri_timeline_samples: Vec<Value>,
pub(super) stable_object_groups: Vec<Value>,
}
#[derive(Clone, Debug)]
pub(super) struct SandwichRecord {
pub(super) classification: &'static str,
pub(super) set_type: &'static str,
pub(super) key: String,
pub(super) source_side: Side,
pub(super) source_start_seq: u32,
pub(super) source_start_run_id: String,
pub(super) source_end_seq: u32,
pub(super) source_end_run_id: String,
pub(super) peer_seq: u32,
pub(super) peer_run_id: String,
pub(super) source_value: Option<String>,
pub(super) peer_value: Option<String>,
pub(super) source_start_time: String,
pub(super) peer_time: String,
pub(super) source_end_time: String,
pub(super) note: String,
}
#[derive(Clone, Debug, Default)]
pub(super) struct SandwichClassStats {
pub(super) total: usize,
pub(super) unique_keys: BTreeSet<String>,
pub(super) samples: Vec<SandwichRecord>,
}
#[derive(Clone, Debug, Default)]
pub(super) struct SandwichAnalysis {
pub(super) total_occurrences: usize,
pub(super) unique_keys: BTreeSet<String>,
pub(super) by_set_type: BTreeMap<&'static str, usize>,
pub(super) stats: BTreeMap<&'static str, SandwichClassStats>,
}

View File

@ -0,0 +1,408 @@
use std::collections::BTreeSet;
use std::path::Path;
use serde_json::{Value, json};
use super::args::Args;
use super::io::{format_time, path_string};
use super::model::{
AdjustedAnalysis, AdjustedRecord, AnalysisResult, SampleRecord, SandwichAnalysis,
SandwichRecord, SequenceSample,
};
pub(super) fn build_output(
args: &Args,
left: &[SequenceSample],
right: &[SequenceSample],
result: &AnalysisResult,
adjusted: &AdjustedAnalysis,
sandwich: &SandwichAnalysis,
) -> Value {
let classifications = result
.stats
.iter()
.map(|(class, stats)| {
json!({
"classification": class,
"count": stats.total,
"samples": stats.samples.iter().map(sample_to_json).collect::<Vec<_>>(),
})
})
.collect::<Vec<_>>();
let adjusted_classifications = adjusted
.stats
.iter()
.map(|(class, stats)| {
json!({
"classification": class,
"occurrences": stats.total,
"uniqueKeys": stats.unique_keys.len(),
"samples": stats.samples.iter().map(adjusted_sample_to_json).collect::<Vec<_>>(),
})
})
.collect::<Vec<_>>();
let sandwich_classifications = sandwich
.stats
.iter()
.map(|(class, stats)| {
json!({
"classification": class,
"occurrences": stats.total,
"uniqueKeys": stats.unique_keys.len(),
"samples": stats.samples.iter().map(sandwich_sample_to_json).collect::<Vec<_>>(),
})
})
.collect::<Vec<_>>();
json!({
"schemaVersion": 1,
"generatedBy": "sequence_triage_ccr_cir",
"inputContract": "left-right-sequence-jsonl-with-ccr-cir-artifacts",
"parameters": {
"leftSequence": path_string(&args.left_sequence),
"rightSequence": path_string(&args.right_sequence),
"alignWindowRuns": args.align_window_runs,
"alignWindowSecs": args.align_window_secs,
"sampleLimit": args.sample_limit,
"warmupSamples": args.warmup_samples,
"cooldownSamples": args.cooldown_samples,
"timelineSampleLimit": if args.timeline_sample_limit == 0 { args.sample_limit } else { args.timeline_sample_limit },
},
"left": sequence_summary(left),
"right": sequence_summary(right),
"classificationCounts": classifications,
"totals": {
"resolvedTemporalLike": count_class(result, "TEMPORAL_LAG_RESOLVED") + count_class(result, "CONTENT_ROLLOVER_RESOLVED"),
"persistent": count_prefix(result, "PERSISTENT_"),
"unclassified": count_class(result, "UNCLASSIFIED_INSUFFICIENT_WINDOW"),
},
"adjusted": {
"warmupSamples": args.warmup_samples,
"cooldownSamples": args.cooldown_samples,
"rawPersistent": {
"occurrences": adjusted.raw_persistent_occurrences,
"uniqueKeys": adjusted.raw_persistent_unique_keys,
},
"edgeFilteredPersistent": {
"occurrences": adjusted.edge_filtered_occurrences,
"uniqueKeys": adjusted.edge_filtered_unique_keys,
},
"adjustedStablePersistent": {
"occurrences": adjusted.adjusted_stable_occurrences,
"uniqueKeys": adjusted.adjusted_stable_unique_keys,
},
"classificationCounts": adjusted_classifications,
"uriTimelineSamples": adjusted.uri_timeline_samples,
"stableObjectGroups": adjusted.stable_object_groups,
"interpretation": {
"rawPersistentMeaning": "raw persistent keeps the original #043 single-event matching semantics.",
"edgeFilteredMeaning": "edge-filtered persistent keeps only non-warmup and non-cooldown source occurrences.",
"adjustedStableMeaning": "adjusted stable persistent keeps non-edge findings after URI-level rollover and TA projection filtering.",
"stableObjectGroupsMeaning": "STABLE_OBJECT_SET_DIVERGENCE events are additionally collapsed by source CIR, publication point, and physical object URI so object_uri/object_hash duplicate event views do not inflate physical-object counts.",
}
},
"sandwich": {
"strictTimeWindow": true,
"method": "For each side, use two adjacent source samples as a stable interval. If source_start.time < peer.time < source_end.time and the source value is identical at both interval endpoints, the peer sample is expected to contain the same value.",
"totals": {
"occurrences": sandwich.total_occurrences,
"uniqueKeys": sandwich.unique_keys.len(),
},
"bySetType": sandwich.by_set_type,
"classificationCounts": sandwich_classifications,
"interpretation": {
"missingStableObject": "The source side proves a URI/hash is stable across an interval that contains the peer sample, but the peer sample has no such URI.",
"hashMismatchStableObject": "The source side proves a URI/hash is stable across an interval that contains the peer sample, but the peer sample has the same URI with a different hash.",
"missingStableReject": "The source side consistently rejects the URI across an interval that contains the peer sample, but the peer sample does not reject it.",
"missingStableOutput": "The source side consistently outputs a VRP/VAP key across an interval that contains the peer sample, but the peer sample does not output it."
}
},
"interpretation": {
"temporalResolvedMeaning": "Event appeared only on one side at one sample but aligned in the peer sequence later.",
"persistentMeaning": "Event did not align within the configured run/time window; inspect as a candidate RP behavior difference or persistent sync/input difference.",
"limits": [
"Sequence triage only reads sequence JSONL plus referenced CCR/CIR files.",
"It does not read report.json, logs, repo-bytes DB, cache, mirror, or raw objects for root cause proof.",
"Resolved temporal findings reduce false positives but do not prove the exact external repository update time."
]
}
})
}
fn sequence_summary(samples: &[SequenceSample]) -> Value {
json!({
"sampleCount": samples.len(),
"rpIds": samples.iter().map(|sample| sample.raw.rp_id.clone()).collect::<BTreeSet<_>>(),
"firstSeq": samples.first().map(|sample| sample.raw.seq),
"lastSeq": samples.last().map(|sample| sample.raw.seq),
"firstValidationTime": samples.first().map(|sample| format_time(sample.validation_time)),
"lastValidationTime": samples.last().map(|sample| format_time(sample.validation_time)),
"samples": samples.iter().map(|sample| json!({
"seq": sample.raw.seq,
"runId": sample.raw.run_id,
"syncMode": sample.raw.sync_mode,
"startTime": sample.raw.start_time,
"finishTime": sample.raw.finish_time,
"validationTime": format_time(sample.validation_time),
"status": sample.raw.status,
"ccrPath": path_string(&sample.ccr_path),
"cirPath": path_string(&sample.cir_path),
"ccrSha256": sample.raw.ccr_sha256,
"cirSha256": sample.raw.cir_sha256,
"wallMs": sample.raw.wall_ms,
"maxRssKb": sample.raw.max_rss_kb,
"vrps": sample.raw.vrps.or(Some(sample.vrps.len() as u64)),
"vaps": sample.raw.vaps.or(Some(sample.vaps.len() as u64)),
"objectCount": sample.object_uris.len(),
"rejectCount": sample.rejects.len(),
"trustAnchorCount": sample.trust_anchors.len(),
})).collect::<Vec<_>>(),
})
}
fn sample_to_json(sample: &SampleRecord) -> Value {
json!({
"classification": sample.classification,
"eventType": sample.event_type,
"key": sample.key,
"sourceSide": sample.source_side.as_str(),
"sourceSeq": sample.source_seq,
"sourceRunId": sample.source_run_id,
"matchedSeq": sample.matched_seq,
"matchedRunId": sample.matched_run_id,
"note": sample.note,
})
}
fn adjusted_sample_to_json(sample: &AdjustedRecord) -> Value {
json!({
"classification": sample.classification,
"eventType": sample.event_type,
"key": sample.key,
"sourceSide": sample.source_side.as_str(),
"sourceSeq": sample.source_seq,
"sourceRunId": sample.source_run_id,
"note": sample.note,
})
}
fn sandwich_sample_to_json(sample: &SandwichRecord) -> Value {
json!({
"classification": sample.classification,
"setType": sample.set_type,
"key": sample.key,
"sourceSide": sample.source_side.as_str(),
"sourceStartSeq": sample.source_start_seq,
"sourceStartRunId": sample.source_start_run_id,
"sourceEndSeq": sample.source_end_seq,
"sourceEndRunId": sample.source_end_run_id,
"peerSeq": sample.peer_seq,
"peerRunId": sample.peer_run_id,
"sourceValue": sample.source_value,
"peerValue": sample.peer_value,
"sourceStartTime": sample.source_start_time,
"peerTime": sample.peer_time,
"sourceEndTime": sample.source_end_time,
"note": sample.note,
})
}
fn count_class(result: &AnalysisResult, class: &'static str) -> usize {
result
.stats
.get(class)
.map(|stats| stats.total)
.unwrap_or(0)
}
fn count_prefix(result: &AnalysisResult, prefix: &str) -> usize {
result
.stats
.iter()
.filter(|(class, _)| class.starts_with(prefix))
.map(|(_, stats)| stats.total)
.sum()
}
pub(super) fn write_json(path: &Path, value: &Value) -> Result<(), String> {
std::fs::write(
path,
serde_json::to_string_pretty(value).map_err(|e| e.to_string())? + "\n",
)
.map_err(|e| format!("write JSON failed: {}: {e}", path.display()))
}
pub(super) fn write_markdown(path: &Path, output: &Value) -> Result<(), String> {
let mut lines = vec![
"# CCR/CIR Sequence Triage Summary".to_string(),
"".to_string(),
format!(
"- `generatedBy`: `{}`",
output["generatedBy"].as_str().unwrap_or("")
),
format!(
"- `leftSamples`: `{}`",
output["left"]["sampleCount"].as_u64().unwrap_or(0)
),
format!(
"- `rightSamples`: `{}`",
output["right"]["sampleCount"].as_u64().unwrap_or(0)
),
format!(
"- `alignWindowRuns`: `{}`",
output["parameters"]["alignWindowRuns"]
.as_u64()
.unwrap_or(0)
),
format!(
"- `alignWindowSecs`: `{}`",
output["parameters"]["alignWindowSecs"]
.as_i64()
.unwrap_or(0)
),
format!(
"- `warmupSamples`: `{}`",
output["adjusted"]["warmupSamples"].as_u64().unwrap_or(0)
),
format!(
"- `cooldownSamples`: `{}`",
output["adjusted"]["cooldownSamples"].as_u64().unwrap_or(0)
),
"".to_string(),
"## Classification Counts".to_string(),
"".to_string(),
"| Classification | Count |".to_string(),
"|---|---:|".to_string(),
];
if let Some(classes) = output["classificationCounts"].as_array() {
for item in classes {
lines.push(format!(
"| `{}` | {} |",
item["classification"].as_str().unwrap_or(""),
item["count"].as_u64().unwrap_or(0)
));
}
}
lines.extend([
"".to_string(),
"## Adjusted Boundary / Rollover Classification".to_string(),
"".to_string(),
format!(
"- `rawPersistent`: `{}` occurrences / `{}` unique keys",
output["adjusted"]["rawPersistent"]["occurrences"]
.as_u64()
.unwrap_or(0),
output["adjusted"]["rawPersistent"]["uniqueKeys"]
.as_u64()
.unwrap_or(0)
),
format!(
"- `edgeFilteredPersistent`: `{}` occurrences / `{}` unique keys",
output["adjusted"]["edgeFilteredPersistent"]["occurrences"]
.as_u64()
.unwrap_or(0),
output["adjusted"]["edgeFilteredPersistent"]["uniqueKeys"]
.as_u64()
.unwrap_or(0)
),
format!(
"- `adjustedStablePersistent`: `{}` occurrences / `{}` unique keys",
output["adjusted"]["adjustedStablePersistent"]["occurrences"]
.as_u64()
.unwrap_or(0),
output["adjusted"]["adjustedStablePersistent"]["uniqueKeys"]
.as_u64()
.unwrap_or(0)
),
"".to_string(),
"| Adjusted Classification | Occurrences | Unique Keys |".to_string(),
"|---|---:|---:|".to_string(),
]);
if let Some(classes) = output["adjusted"]["classificationCounts"].as_array() {
for item in classes {
lines.push(format!(
"| `{}` | {} | {} |",
item["classification"].as_str().unwrap_or(""),
item["occurrences"].as_u64().unwrap_or(0),
item["uniqueKeys"].as_u64().unwrap_or(0)
));
}
}
if let Some(groups) = output["adjusted"]["stableObjectGroups"].as_array()
&& !groups.is_empty()
{
lines.extend([
"".to_string(),
"## Stable Object Groups".to_string(),
"".to_string(),
"| Source | CIR | Publication Point | Events | Physical Objects |".to_string(),
"|---|---|---|---:|---:|".to_string(),
]);
for group in groups {
lines.push(format!(
"| `{}/seq{}/{}` | `{}` | `{}` | {} | {} |",
group["sourceSide"].as_str().unwrap_or(""),
group["sourceSeq"].as_u64().unwrap_or(0),
group["sourceRunId"].as_str().unwrap_or(""),
group["sourceCirPath"].as_str().unwrap_or(""),
group["publicationPoint"].as_str().unwrap_or(""),
group["eventCount"].as_u64().unwrap_or(0),
group["physicalObjectCount"].as_u64().unwrap_or(0),
));
}
}
lines.extend([
"".to_string(),
"## Sandwich Anomaly Check".to_string(),
"".to_string(),
format!(
"- `occurrences`: `{}`",
output["sandwich"]["totals"]["occurrences"]
.as_u64()
.unwrap_or(0)
),
format!(
"- `uniqueKeys`: `{}`",
output["sandwich"]["totals"]["uniqueKeys"]
.as_u64()
.unwrap_or(0)
),
"- Rule: source side has two adjacent stable samples, and peer sample timestamp is strictly between them.".to_string(),
"".to_string(),
"| Sandwich Classification | Occurrences | Unique Keys |".to_string(),
"|---|---:|---:|".to_string(),
]);
if let Some(classes) = output["sandwich"]["classificationCounts"].as_array() {
for item in classes {
lines.push(format!(
"| `{}` | {} | {} |",
item["classification"].as_str().unwrap_or(""),
item["occurrences"].as_u64().unwrap_or(0),
item["uniqueKeys"].as_u64().unwrap_or(0)
));
}
}
lines.extend([
"".to_string(),
"## Interpretation".to_string(),
"".to_string(),
"- `TEMPORAL_LAG_RESOLVED` / `CONTENT_ROLLOVER_RESOLVED` 表示差异在后续采样中对齐,优先视为采样时刻或仓库滚动窗口差异。".to_string(),
"- `PERSISTENT_*` 表示配置窗口内仍未对齐,才是后续人工排查的高价值候选。".to_string(),
"- `EDGE_*` 表示差异落在序列首尾,缺少前置或后续观察,不能直接当作实现差异。".to_string(),
"- `STABLE_*` 表示经过首尾边界过滤和 URI-level rollover 过滤后仍存在的稳定候选。".to_string(),
]);
std::fs::write(path, lines.join("\n") + "\n")
.map_err(|e| format!("write markdown failed: {}: {e}", path.display()))
}
pub(super) fn write_samples_jsonl(path: &Path, result: &AnalysisResult) -> Result<(), String> {
let mut body = String::new();
for stats in result.stats.values() {
for sample in &stats.samples {
body.push_str(
&serde_json::to_string(&sample_to_json(sample)).map_err(|e| e.to_string())?,
);
body.push('\n');
}
}
std::fs::write(path, body).map_err(|e| format!("write samples failed: {}: {e}", path.display()))
}

View File

@ -0,0 +1,261 @@
use std::collections::BTreeSet;
use super::args::Args;
use super::io::format_time;
use super::model::{SandwichAnalysis, SandwichRecord, SequenceSample, Side};
pub(super) fn build_sandwich_analysis(
args: &Args,
left: &[SequenceSample],
right: &[SequenceSample],
) -> SandwichAnalysis {
let mut analysis = SandwichAnalysis::default();
analyze_sandwich_objects(&mut analysis, Side::Left, left, right, args);
analyze_sandwich_objects(&mut analysis, Side::Right, right, left, args);
analyze_sandwich_sets(
&mut analysis,
"reject_uri",
"PEER_MISSING_STABLE_REJECT",
Side::Left,
left,
right,
|sample| &sample.rejects,
args,
);
analyze_sandwich_sets(
&mut analysis,
"reject_uri",
"PEER_MISSING_STABLE_REJECT",
Side::Right,
right,
left,
|sample| &sample.rejects,
args,
);
analyze_sandwich_sets(
&mut analysis,
"vrp_output",
"PEER_MISSING_STABLE_OUTPUT",
Side::Left,
left,
right,
|sample| &sample.vrps,
args,
);
analyze_sandwich_sets(
&mut analysis,
"vrp_output",
"PEER_MISSING_STABLE_OUTPUT",
Side::Right,
right,
left,
|sample| &sample.vrps,
args,
);
analyze_sandwich_sets(
&mut analysis,
"vap_output",
"PEER_MISSING_STABLE_OUTPUT",
Side::Left,
left,
right,
|sample| &sample.vaps,
args,
);
analyze_sandwich_sets(
&mut analysis,
"vap_output",
"PEER_MISSING_STABLE_OUTPUT",
Side::Right,
right,
left,
|sample| &sample.vaps,
args,
);
analysis
}
fn analyze_sandwich_objects(
analysis: &mut SandwichAnalysis,
source_side: Side,
source: &[SequenceSample],
peer: &[SequenceSample],
args: &Args,
) {
for pair in source.windows(2) {
let source_start = &pair[0];
let source_end = &pair[1];
if source_start.validation_time >= source_end.validation_time {
continue;
}
let peers = peer_samples_between(peer, source_start, source_end);
if peers.is_empty() {
continue;
}
for (uri, source_hash) in &source_start.objects {
if source_end.objects.get(uri) != Some(source_hash) {
continue;
}
for peer_sample in &peers {
match peer_sample.objects.get(uri) {
Some(peer_hash) if peer_hash == source_hash => {}
Some(peer_hash) => analysis.add(
"PEER_HASH_MISMATCH_STABLE_OBJECT",
sandwich_record(
"PEER_HASH_MISMATCH_STABLE_OBJECT",
"object",
uri.clone(),
source_side,
source_start,
source_end,
peer_sample,
Some(source_hash.clone()),
Some(peer_hash.clone()),
"source interval has stable object hash; peer sample has same URI with another hash",
),
args.sample_limit,
),
None => analysis.add(
"PEER_MISSING_STABLE_OBJECT",
sandwich_record(
"PEER_MISSING_STABLE_OBJECT",
"object",
uri.clone(),
source_side,
source_start,
source_end,
peer_sample,
Some(source_hash.clone()),
None,
"source interval has stable object hash; peer sample misses the URI",
),
args.sample_limit,
),
}
}
}
}
}
fn analyze_sandwich_sets<F>(
analysis: &mut SandwichAnalysis,
set_type: &'static str,
classification: &'static str,
source_side: Side,
source: &[SequenceSample],
peer: &[SequenceSample],
extract: F,
args: &Args,
) where
F: for<'a> Fn(&'a SequenceSample) -> &'a BTreeSet<String>,
{
for pair in source.windows(2) {
let source_start = &pair[0];
let source_end = &pair[1];
if source_start.validation_time >= source_end.validation_time {
continue;
}
let peers = peer_samples_between(peer, source_start, source_end);
if peers.is_empty() {
continue;
}
let start_set = extract(source_start);
let end_set = extract(source_end);
for key in start_set {
if !end_set.contains(key) {
continue;
}
for peer_sample in &peers {
if extract(peer_sample).contains(key) {
continue;
}
analysis.add(
classification,
sandwich_record(
classification,
set_type,
key.clone(),
source_side,
source_start,
source_end,
peer_sample,
Some(key.clone()),
None,
"source interval has a stable key; peer sample misses the key",
),
args.sample_limit,
);
}
}
}
}
fn peer_samples_between<'a>(
peer: &'a [SequenceSample],
source_start: &SequenceSample,
source_end: &SequenceSample,
) -> Vec<&'a SequenceSample> {
peer.iter()
.filter(|sample| {
source_start.validation_time < sample.validation_time
&& sample.validation_time < source_end.validation_time
})
.collect()
}
#[allow(clippy::too_many_arguments)]
fn sandwich_record(
classification: &'static str,
set_type: &'static str,
key: String,
source_side: Side,
source_start: &SequenceSample,
source_end: &SequenceSample,
peer_sample: &SequenceSample,
source_value: Option<String>,
peer_value: Option<String>,
note: &str,
) -> SandwichRecord {
SandwichRecord {
classification,
set_type,
key,
source_side,
source_start_seq: source_start.raw.seq,
source_start_run_id: source_start.raw.run_id.clone(),
source_end_seq: source_end.raw.seq,
source_end_run_id: source_end.raw.run_id.clone(),
peer_seq: peer_sample.raw.seq,
peer_run_id: peer_sample.raw.run_id.clone(),
source_value,
peer_value,
source_start_time: format_time(source_start.validation_time),
peer_time: format_time(peer_sample.validation_time),
source_end_time: format_time(source_end.validation_time),
note: note.to_string(),
}
}
impl SandwichAnalysis {
fn add(&mut self, class: &'static str, record: SandwichRecord, sample_limit: usize) {
self.total_occurrences += 1;
self.unique_keys.insert(sandwich_unique_key(&record));
*self.by_set_type.entry(record.set_type).or_default() += 1;
let stats = self.stats.entry(class).or_default();
stats.total += 1;
stats.unique_keys.insert(sandwich_unique_key(&record));
if stats.samples.len() < sample_limit {
stats.samples.push(record);
}
}
}
fn sandwich_unique_key(record: &SandwichRecord) -> String {
format!(
"{}|{}|{}|{}",
record.classification,
record.set_type,
record.source_side.as_str(),
record.key
)
}