rpki/src/tools/rpki_artifact_metrics.rs

use std::collections::{BTreeMap, BTreeSet};
use std::fs;
use std::io::{Read, Write};
use std::net::{TcpListener, TcpStream};
use std::path::{Path, PathBuf};
use std::sync::{Arc, RwLock};
use std::thread;
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};

use crate::ccr::decode_content_info;
use crate::cir::decode_cir;
use serde::Serialize;
use serde_json::{Value, json};
use sha2::{Digest, Sha256};

const LARGE_PP_OBJECT_THRESHOLDS: &[u64] = &[10, 50, 100, 500, 1000, 5000, 10000, 50000];
const PP_SYNC_SECONDS_BUCKETS: &[f64] = &[0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0, 120.0, 300.0];

#[derive(Clone, Debug, PartialEq, Eq)]
struct Args {
    run_root: PathBuf,
    listen: String,
    poll_secs: u64,
    instance: String,
    once: bool,
    out_metrics: Option<PathBuf>,
    out_status: Option<PathBuf>,
}

fn usage() -> &'static str {
    "Usage: rpki_artifact_metrics --run-root <path> [--listen <addr:port>] [--poll-secs <n>] [--instance <name>] [--once] [--out-metrics <path>] [--out-status <path>]"
}

pub fn main_entry() -> Result<(), String> {
    real_main()
}

fn real_main() -> Result<(), String> {
    let args = parse_args(&std::env::args().collect::<Vec<_>>())?;
    if args.once {
        let snapshot = scan_run_root(&args.run_root, &args.instance)?;
        let metrics = render_metrics(&snapshot);
        let status = render_status_json(&snapshot)?;
        if let Some(path) = args.out_metrics.as_ref() {
            write_file(path, metrics.as_bytes())?;
        } else {
            print!("{metrics}");
        }
        if let Some(path) = args.out_status.as_ref() {
            write_file(path, status.as_bytes())?;
        }
        return Ok(());
    }

    let shared = Arc::new(RwLock::new(scan_run_root(&args.run_root, &args.instance)?));
    let scanner = Arc::clone(&shared);
    let run_root = args.run_root.clone();
    let instance = args.instance.clone();
    let poll_secs = args.poll_secs.max(1);
    thread::spawn(move || {
        loop {
            thread::sleep(Duration::from_secs(poll_secs));
            let previous_snapshot = scanner.read().expect("metrics lock poisoned").clone();
            let next =
                match scan_run_root_incremental(&run_root, &instance, Some(&previous_snapshot)) {
                    Ok(snapshot) => snapshot,
                    Err(err) => {
                        let mut previous = scanner.write().expect("metrics lock poisoned");
                        previous
                            .service
                            .parse_errors
                            .push(format!("scan failed: {err}"));
                        previous.service.last_reload_success = false;
                        previous.service.last_scan_timestamp_seconds = unix_now_seconds();
                        continue;
                    }
                };
            *scanner.write().expect("metrics lock poisoned") = next;
        }
    });

    serve_http(&args.listen, shared)
}

fn parse_args(argv: &[String]) -> Result<Args, String> {
    let mut run_root = None;
    let mut listen = "127.0.0.1:9556".to_string();
    let mut poll_secs = 10u64;
    let mut instance = "ours-rp".to_string();
    let mut once = false;
    let mut out_metrics = None;
    let mut out_status = None;
    let mut index = 1usize;
    while index < argv.len() {
        match argv[index].as_str() {
            "--run-root" => {
                index += 1;
                run_root = Some(PathBuf::from(value_at(argv, index, "--run-root")?));
            }
            "--listen" => {
                index += 1;
                listen = value_at(argv, index, "--listen")?.to_string();
            }
            "--poll-secs" => {
                index += 1;
                let value = value_at(argv, index, "--poll-secs")?;
                poll_secs = value
                    .parse::<u64>()
                    .map_err(|_| format!("invalid --poll-secs: {value}"))?;
            }
            "--instance" => {
                index += 1;
                instance = value_at(argv, index, "--instance")?.to_string();
            }
            "--once" => once = true,
            "--out-metrics" => {
                index += 1;
                out_metrics = Some(PathBuf::from(value_at(argv, index, "--out-metrics")?));
            }
            "--out-status" => {
                index += 1;
                out_status = Some(PathBuf::from(value_at(argv, index, "--out-status")?));
            }
            "-h" | "--help" => return Err(usage().to_string()),
            other => return Err(format!("unknown argument: {other}\n{}", usage())),
        }
        index += 1;
    }
    Ok(Args {
        run_root: run_root.ok_or_else(|| format!("--run-root is required\n{}", usage()))?,
        listen,
        poll_secs,
        instance,
        once,
        out_metrics,
        out_status,
    })
}

fn value_at<'a>(argv: &'a [String], index: usize, flag: &str) -> Result<&'a str, String> {
    argv.get(index)
        .map(|s| s.as_str())
        .ok_or_else(|| format!("{flag} requires a value"))
}

#[derive(Clone, Debug, Default, Serialize)]
#[serde(rename_all = "camelCase")]
struct MetricsSnapshot {
    instance: String,
    service: ServiceMetrics,
    runs: RunScanSummary,
    latest_run: Option<LatestRunMetrics>,
    cumulative: CumulativeMetrics,
    repo_stats: Vec<RepoMetrics>,
    object_counts: BTreeMap<(String, String), u64>,
    large_pp_counts: BTreeMap<u64, u64>,
    pp_sync_histograms: BTreeMap<String, Histogram>,
    top_repos_by_sync_duration: Vec<TopRepo>,
    top_pp_by_object_count: Vec<TopPublicationPoint>,
    top_pp_by_sync_duration: Vec<TopPublicationPoint>,
    cir: Option<CirMetrics>,
    ccr: Option<CcrMetrics>,
}

#[derive(Clone, Debug, Default, Serialize)]
#[serde(rename_all = "camelCase")]
struct ServiceMetrics {
    last_scan_timestamp_seconds: f64,
    last_scan_duration_seconds: f64,
    last_reload_success: bool,
    parse_errors: Vec<String>,
    run_root: String,
    runs_root: String,
}

#[derive(Clone, Debug, Default, Serialize)]
#[serde(rename_all = "camelCase")]
struct RunScanSummary {
    known: u64,
    success: u64,
    failed: u64,
    partial: u64,
    consecutive_failures: u64,
}

#[derive(Clone, Debug, Default, Serialize)]
#[serde(rename_all = "camelCase")]
struct CumulativeMetrics {
    completed_success_total: u64,
    completed_failed_total: u64,
    observed_duration_seconds_sum: f64,
    observed_duration_seconds_count: u64,
    observed_download_bytes_total: u64,
}

#[derive(Clone, Debug, Default, Serialize)]
#[serde(rename_all = "camelCase")]
struct LatestRunMetrics {
    run_seq: u64,
    run_id: String,
    run_dir: String,
    status: String,
    sync_mode: String,
    snapshot_reason: Option<String>,
    started_at: Option<String>,
    finished_at: Option<String>,
    start_timestamp_seconds: Option<f64>,
    finish_timestamp_seconds: Option<f64>,
    wall_seconds: f64,
    user_cpu_seconds: Option<f64>,
    system_cpu_seconds: Option<f64>,
    cpu_percent: Option<f64>,
    max_rss_bytes: Option<u64>,
    exit_code: Option<i64>,
    vrps: u64,
    vrps_unique: Option<u64>,
    vaps: u64,
    publication_points: u64,
    warnings: u64,
    tree_instances_processed: Option<u64>,
    tree_instances_failed: Option<u64>,
    stage_seconds: BTreeMap<String, f64>,
    repo_sync_phase: BTreeMap<String, CountDuration>,
    repo_terminal_state: BTreeMap<String, CountDuration>,
    download_events: Option<u64>,
    download_bytes: Option<u64>,
    artifact_sizes: BTreeMap<String, u64>,
    state_path_sizes: BTreeMap<String, PathSize>,
}

#[derive(Clone, Debug, Default, Serialize)]
#[serde(rename_all = "camelCase")]
struct CountDuration {
    count: u64,
    duration_seconds_total: f64,
}

#[derive(Clone, Debug, Default, Serialize)]
#[serde(rename_all = "camelCase")]
struct PathSize {
    total_size_bytes: u64,
    file_count: u64,
    dir_count: u64,
}

#[derive(Clone, Debug, Default, Serialize)]
#[serde(rename_all = "camelCase")]
struct RepoMetrics {
    repo_id: String,
    uri: String,
    host: String,
    transport: String,
    publication_points: u64,
    sync_success: bool,
    download_bytes: u64,
    duration_seconds_sum: f64,
    duration_seconds_max: f64,
    duration_seconds_avg: f64,
    phase_counts: BTreeMap<String, u64>,
    terminal_state_counts: BTreeMap<String, u64>,
}

#[derive(Clone, Debug, Default, Serialize)]
#[serde(rename_all = "camelCase")]
struct TopRepo {
    rank: usize,
    repo_id: String,
    uri: String,
    host: String,
    transport: String,
    duration_ms_max: u64,
    duration_ms_sum: u64,
    publication_points: u64,
}

#[derive(Clone, Debug, Default, Serialize)]
#[serde(rename_all = "camelCase")]
struct TopPublicationPoint {
    rank: usize,
    pp_id: String,
    repo_id: String,
    uri: String,
    repo_uri: String,
    host: String,
    transport: String,
    object_count: u64,
    sync_duration_ms: u64,
    terminal_state: String,
    phase: String,
}

#[derive(Clone, Debug, Serialize)]
#[serde(rename_all = "camelCase")]
struct Histogram {
    buckets: Vec<f64>,
    counts: Vec<u64>,
    sum: f64,
    count: u64,
}

impl Default for Histogram {
    fn default() -> Self {
        Self {
            buckets: Vec::new(),
            counts: Vec::new(),
            sum: 0.0,
            count: 0,
        }
    }
}

impl Histogram {
    fn new(buckets: &[f64]) -> Self {
        Self {
            buckets: buckets.to_vec(),
            counts: vec![0; buckets.len() + 1],
            sum: 0.0,
            count: 0,
        }
    }

    fn observe(&mut self, value: f64) {
        self.sum += value;
        self.count += 1;
        let mut placed = false;
        for (index, bucket) in self.buckets.iter().enumerate() {
            if value <= *bucket {
                self.counts[index] += 1;
                placed = true;
                break;
            }
        }
        if !placed {
            let last = self.counts.len() - 1;
            self.counts[last] += 1;
        }
    }

    fn cumulative_counts(&self) -> Vec<u64> {
        let mut out = Vec::with_capacity(self.counts.len());
        let mut running = 0u64;
        for count in &self.counts {
            running += *count;
            out.push(running);
        }
        out
    }
}

#[derive(Clone, Debug, Default, Serialize)]
#[serde(rename_all = "camelCase")]
struct CirMetrics {
    version: u32,
    objects: u64,
    trust_anchors: u64,
    rejected_objects: u64,
    reject_list_sha256: String,
    objects_by_type: BTreeMap<String, u64>,
    rejected_objects_by_type: BTreeMap<String, u64>,
}

#[derive(Clone, Debug, Default, Serialize)]
#[serde(rename_all = "camelCase")]
struct CcrMetrics {
    version: u32,
    state_present: BTreeMap<String, bool>,
    state_items: BTreeMap<String, u64>,
    state_digests: BTreeMap<String, String>,
}

#[derive(Clone, Debug)]
struct RunRecord {
    path: PathBuf,
    status: String,
    summary: Option<Value>,
    meta: Option<Value>,
}

fn scan_run_root(input_root: &Path, instance: &str) -> Result<MetricsSnapshot, String> {
    scan_run_root_incremental(input_root, instance, None)
}

fn scan_run_root_incremental(
    input_root: &Path,
    instance: &str,
    previous: Option<&MetricsSnapshot>,
) -> Result<MetricsSnapshot, String> {
    let started = Instant::now();
    let runs_root = resolve_runs_root(input_root);
    let mut snapshot = MetricsSnapshot {
        instance: instance.to_string(),
        service: ServiceMetrics {
            run_root: input_root.display().to_string(),
            runs_root: runs_root.display().to_string(),
            ..ServiceMetrics::default()
        },
        ..MetricsSnapshot::default()
    };

    let records = collect_run_records(&runs_root, &mut snapshot.service.parse_errors)?;
    snapshot.runs.known = records.len() as u64;
    for record in &records {
        match record.status.as_str() {
            "success" => snapshot.runs.success += 1,
            "failed" | "spawn_failed" => snapshot.runs.failed += 1,
            _ => snapshot.runs.partial += 1,
        }
    }
    snapshot.runs.consecutive_failures = consecutive_failures(&records);
    snapshot.cumulative.completed_success_total = snapshot.runs.success;
    snapshot.cumulative.completed_failed_total = snapshot.runs.failed;

    for record in records.iter().filter(|record| record.status == "success") {
        if let Some(summary) = record.summary.as_ref() {
            let wall_seconds = json_u64(summary, &["wallMs"]).unwrap_or(0) as f64 / 1000.0;
            snapshot.cumulative.observed_duration_seconds_sum += wall_seconds;
            snapshot.cumulative.observed_duration_seconds_count += 1;
            if let Some(bytes) = json_u64(summary, &["stageTiming", "download_bytes_total"]) {
                snapshot.cumulative.observed_download_bytes_total = snapshot
                    .cumulative
                    .observed_download_bytes_total
                    .saturating_add(bytes);
            }
        }
    }

    if let Some(latest) = records
        .iter()
        .rev()
        .find(|record| record.status == "success")
    {
        if can_reuse_latest_metrics(latest, previous) {
            reuse_latest_artifact_metrics(previous.expect("checked previous"), &mut snapshot);
        } else {
            build_latest_metrics(latest, &mut snapshot);
            let _ = crate::memory_telemetry::malloc_trim_probe();
        }
    }

    snapshot.service.last_scan_timestamp_seconds = unix_now_seconds();
    snapshot.service.last_scan_duration_seconds = started.elapsed().as_secs_f64();
    snapshot.service.last_reload_success = snapshot.service.parse_errors.is_empty();
    Ok(snapshot)
}

fn can_reuse_latest_metrics(record: &RunRecord, previous: Option<&MetricsSnapshot>) -> bool {
    let Some(previous) = previous else {
        return false;
    };
    let Some(previous_latest) = previous.latest_run.as_ref() else {
        return false;
    };
    if previous_latest.run_dir != record.path.display().to_string() {
        return false;
    }
    if previous_latest.status != record.status {
        return false;
    }
    let summary = record.summary.as_ref();
    let meta = record.meta.as_ref();
    let finished_at = summary
        .and_then(|v| json_str(v, &["finishedAtRfc3339Utc"]))
        .or_else(|| meta.and_then(|v| json_str(v, &["completed_at_rfc3339_utc"])));
    let wall_seconds = summary.and_then(|v| json_u64(v, &["wallMs"])).unwrap_or(0) as f64 / 1000.0;
    previous_latest.finished_at.as_deref() == finished_at
        && (previous_latest.wall_seconds - wall_seconds).abs() < f64::EPSILON
}

fn reuse_latest_artifact_metrics(previous: &MetricsSnapshot, snapshot: &mut MetricsSnapshot) {
    snapshot.latest_run = previous.latest_run.clone();
    snapshot.repo_stats = previous.repo_stats.clone();
    snapshot.object_counts = previous.object_counts.clone();
    snapshot.large_pp_counts = previous.large_pp_counts.clone();
    snapshot.pp_sync_histograms = previous.pp_sync_histograms.clone();
    snapshot.top_repos_by_sync_duration = previous.top_repos_by_sync_duration.clone();
    snapshot.top_pp_by_object_count = previous.top_pp_by_object_count.clone();
    snapshot.top_pp_by_sync_duration = previous.top_pp_by_sync_duration.clone();
    snapshot.cir = previous.cir.clone();
    snapshot.ccr = previous.ccr.clone();
}

fn resolve_runs_root(input_root: &Path) -> PathBuf {
    let runs = input_root.join("runs");
    if runs.is_dir() {
        runs
    } else {
        input_root.to_path_buf()
    }
}

fn collect_run_records(
    runs_root: &Path,
    errors: &mut Vec<String>,
) -> Result<Vec<RunRecord>, String> {
    let mut records = Vec::new();
    if !runs_root.is_dir() {
        return Err(format!(
            "runs root is not a directory: {}",
            runs_root.display()
        ));
    }
    let entries = fs::read_dir(runs_root)
        .map_err(|e| format!("read runs root failed: {}: {e}", runs_root.display()))?;
    for entry in entries {
        let entry = entry.map_err(|e| format!("read runs entry failed: {e}"))?;
        let path = entry.path();
        if !path.is_dir() {
            continue;
        }
        let Some(name) = path.file_name().and_then(|name| name.to_str()) else {
            continue;
        };
        if !name.starts_with("run_") {
            continue;
        }
        let summary = read_json_optional(&path.join("run-summary.json"), errors);
        let meta = read_json_optional(&path.join("run-meta.json"), errors);
        let status = classify_run_status(&summary, &meta, &path);
        records.push(RunRecord {
            path,
            status,
            summary,
            meta,
        });
    }
    records.sort_by(|left, right| left.path.cmp(&right.path));
    Ok(records)
}

fn classify_run_status(summary: &Option<Value>, meta: &Option<Value>, path: &Path) -> String {
    let summary_status = summary.as_ref().and_then(|v| json_str(v, &["status"]));
    let meta_status = meta.as_ref().and_then(|v| json_str(v, &["status"]));
    if summary_status == Some("success") && meta_status == Some("success") {
        return "success".to_string();
    }
    if matches!(summary_status, Some("failed" | "spawn_failed"))
        || matches!(meta_status, Some("failed" | "spawn_failed"))
    {
        return "failed".to_string();
    }
    if path.join("run-summary.json").exists() || path.join("run-meta.json").exists() {
        "partial".to_string()
    } else {
        "missing_metadata".to_string()
    }
}

fn consecutive_failures(records: &[RunRecord]) -> u64 {
    let mut count = 0u64;
    for record in records.iter().rev() {
        if record.status == "success" {
            break;
        }
        count += 1;
    }
    count
}

fn read_json_optional(path: &Path, errors: &mut Vec<String>) -> Option<Value> {
    if !path.exists() {
        return None;
    }
    match fs::read(path)
        .ok()
        .and_then(|bytes| serde_json::from_slice::<Value>(&bytes).ok())
    {
        Some(value) => Some(value),
        None => {
            errors.push(format!("parse json failed: {}", path.display()));
            None
        }
    }
}

fn build_latest_metrics(record: &RunRecord, snapshot: &mut MetricsSnapshot) {
    let summary = record.summary.as_ref();
    let meta = record.meta.as_ref();
    let run_seq = summary
        .and_then(|v| json_u64(v, &["runSeq"]))
        .or_else(|| meta.and_then(|v| json_u64(v, &["run_index"])))
        .unwrap_or_else(|| run_index_from_path(&record.path).unwrap_or(0));
    let run_id = summary
        .and_then(|v| json_str(v, &["runId"]))
        .or_else(|| meta.and_then(|v| json_str(v, &["run_id"])))
        .unwrap_or_else(|| {
            record
                .path
                .file_name()
                .and_then(|n| n.to_str())
                .unwrap_or("unknown")
        })
        .to_string();
    let sync_mode = meta
        .and_then(|v| json_str(v, &["sync_mode"]))
        .unwrap_or("unknown")
        .to_string();
    let snapshot_reason = meta
        .and_then(|v| json_str(v, &["snapshot_reason"]))
        .map(|s| s.to_string());
    let started_at = summary
        .and_then(|v| json_str(v, &["startedAtRfc3339Utc"]))
        .or_else(|| meta.and_then(|v| json_str(v, &["started_at_rfc3339_utc"])))
        .map(|s| s.to_string());
    let finished_at = summary
        .and_then(|v| json_str(v, &["finishedAtRfc3339Utc"]))
        .or_else(|| meta.and_then(|v| json_str(v, &["completed_at_rfc3339_utc"])))
        .map(|s| s.to_string());
    let wall_seconds = summary.and_then(|v| json_u64(v, &["wallMs"])).unwrap_or(0) as f64 / 1000.0;

    let mut latest = LatestRunMetrics {
        run_seq,
        run_id,
        run_dir: record.path.display().to_string(),
        status: record.status.clone(),
        sync_mode,
        snapshot_reason,
        started_at: started_at.clone(),
        finished_at: finished_at.clone(),
        start_timestamp_seconds: started_at.as_deref().and_then(parse_rfc3339_to_unix),
        finish_timestamp_seconds: finished_at.as_deref().and_then(parse_rfc3339_to_unix),
        wall_seconds,
        user_cpu_seconds: summary.and_then(|v| json_f64(v, &["processMetrics", "userSeconds"])),
        system_cpu_seconds: summary.and_then(|v| json_f64(v, &["processMetrics", "systemSeconds"])),
        cpu_percent: summary.and_then(|v| json_f64(v, &["processMetrics", "cpuPercent"])),
        max_rss_bytes: summary
            .and_then(|v| json_u64(v, &["processMetrics", "maxRssKb"]))
            .map(|kb| kb.saturating_mul(1024)),
        exit_code: summary.and_then(|v| json_i64(v, &["exitCode"])),
        ..LatestRunMetrics::default()
    };

    if let Some(summary) = summary {
        latest.vrps = json_u64(summary, &["reportCounts", "vrps"]).unwrap_or(0);
        latest.vaps = json_u64(summary, &["reportCounts", "aspas"]).unwrap_or(0);
        latest.publication_points =
            json_u64(summary, &["reportCounts", "publicationPoints"]).unwrap_or(0);
        latest.warnings = json_u64(summary, &["reportCounts", "warnings"]).unwrap_or(0);
        latest.tree_instances_processed =
            json_u64(summary, &["reportCounts", "treeInstancesProcessed"]);
        latest.tree_instances_failed = json_u64(summary, &["reportCounts", "treeInstancesFailed"]);
        latest.stage_seconds = extract_stage_seconds(summary.get("stageTiming"));
        latest.repo_sync_phase =
            extract_count_duration_map(summary.pointer("/repoSyncStats/by_phase"));
        latest.repo_terminal_state =
            extract_count_duration_map(summary.pointer("/repoSyncStats/by_terminal_state"));
        latest.download_events = json_u64(summary, &["stageTiming", "download_event_count"]);
        latest.download_bytes = json_u64(summary, &["stageTiming", "download_bytes_total"]);
        latest.artifact_sizes = extract_artifact_sizes(summary.get("artifacts"));
        latest.state_path_sizes = extract_path_sizes(summary.get("pathStats"));
    }

    parse_report(&record.path.join("report.json"), snapshot, &mut latest);
    latest.vrps_unique = count_vrp_csv_unique_keys_opt(
        &record.path.join("vrps.csv"),
        &mut snapshot.service.parse_errors,
    );
    parse_cir(&record.path.join("input.cir"), snapshot);
    parse_ccr(&record.path.join("result.ccr"), snapshot);
    snapshot.latest_run = Some(latest);
}

fn count_vrp_csv_unique_keys_opt(path: &Path, errors: &mut Vec<String>) -> Option<u64> {
    if !path.exists() {
        return None;
    }
    match count_vrp_csv_unique_keys(path) {
        Ok(count) => Some(count),
        Err(err) => {
            errors.push(err);
            None
        }
    }
}

fn count_vrp_csv_unique_keys(path: &Path) -> Result<u64, String> {
    let content = fs::read_to_string(path)
        .map_err(|e| format!("read VRP CSV failed: {}: {e}", path.display()))?;
    let mut unique = BTreeSet::new();
    for (index, line) in data_csv_lines(&content).enumerate() {
        if index == 0 {
            continue;
        }
        let columns = split_csv_simple(line);
        if columns.len() < 3 {
            return Err(format!(
                "invalid VRP CSV row in {}: expected at least 3 columns, got {}",
                path.display(),
                columns.len()
            ));
        }
        unique.insert((
            columns[0].to_string(),
            columns[1].to_string(),
            columns[2].to_string(),
        ));
    }
    Ok(unique.len() as u64)
}

fn data_csv_lines(content: &str) -> impl Iterator<Item = &str> {
    content
        .lines()
        .map(str::trim)
        .filter(|line| !line.is_empty())
        .filter(|line| !line.starts_with('#'))
}

fn split_csv_simple(line: &str) -> Vec<&str> {
    line.split(',').map(str::trim).collect()
}

fn parse_report(path: &Path, snapshot: &mut MetricsSnapshot, latest: &mut LatestRunMetrics) {
    if !path.exists() {
        return;
    }
    let Ok(bytes) = fs::read(path) else {
        snapshot
            .service
            .parse_errors
            .push(format!("read report.json failed: {}", path.display()));
        return;
    };
    let Ok(report) = serde_json::from_slice::<Value>(&bytes) else {
        snapshot
            .service
            .parse_errors
            .push(format!("parse report.json failed: {}", path.display()));
        return;
    };
    if latest.vrps == 0 {
        latest.vrps = report
            .get("vrps")
            .and_then(|v| v.as_array())
            .map(|a| a.len() as u64)
            .unwrap_or(0);
    }
    if latest.vaps == 0 {
        latest.vaps = report
            .get("aspas")
            .and_then(|v| v.as_array())
            .map(|a| a.len() as u64)
            .unwrap_or(0);
    }
    latest.warnings = latest.warnings.max(
        report
            .pointer("/tree/warnings")
            .and_then(|v| v.as_array())
            .map(|a| a.len() as u64)
            .unwrap_or(0),
    );
    if let Some(processed) = json_u64(&report, &["tree", "instances_processed"]) {
        latest.tree_instances_processed = Some(processed);
    }
    if let Some(failed) = json_u64(&report, &["tree", "instances_failed"]) {
        latest.tree_instances_failed = Some(failed);
    }
    if latest.repo_sync_phase.is_empty() {
        latest.repo_sync_phase =
            extract_count_duration_map(report.pointer("/repo_sync_stats/by_phase"));
    }
    if latest.repo_terminal_state.is_empty() {
        latest.repo_terminal_state =
            extract_count_duration_map(report.pointer("/repo_sync_stats/by_terminal_state"));
    }
    if let Some(pps) = report.get("publication_points").and_then(|v| v.as_array()) {
        latest.publication_points = pps.len() as u64;
        extract_publication_point_metrics(pps, report.get("downloads"), snapshot);
    }
}

fn extract_publication_point_metrics(
    pps: &[Value],
    downloads: Option<&Value>,
    snapshot: &mut MetricsSnapshot,
) {
    let mut repos: BTreeMap<String, RepoMetrics> = BTreeMap::new();
    let mut pp_by_object_count = Vec::<TopPublicationPoint>::new();
    let mut pp_by_sync_duration = Vec::<TopPublicationPoint>::new();
    let mut large_pp_counts = BTreeMap::<u64, u64>::new();
    let mut pp_sync_histograms = BTreeMap::<String, Histogram>::new();
    let mut object_counts = BTreeMap::<(String, String), u64>::new();

    for pp in pps {
        let pp_uri = json_str(pp, &["publication_point_rsync_uri"])
            .or_else(|| json_str(pp, &["manifest_rsync_uri"]))
            .or_else(|| json_str(pp, &["rsync_base_uri"]))
            .unwrap_or("unknown");
        let repo_uri = json_str(pp, &["rrdp_notification_uri"])
            .or_else(|| json_str(pp, &["rsync_base_uri"]))
            .or_else(|| json_str(pp, &["publication_point_rsync_uri"]))
            .unwrap_or(pp_uri);
        let repo_id = short_sha256(repo_uri);
        let pp_id = short_sha256(pp_uri);
        let host = uri_host(repo_uri);
        let transport = json_str(pp, &["repo_sync_source"])
            .or_else(|| json_str(pp, &["source"]))
            .map(normalize_transport)
            .unwrap_or_else(|| infer_transport(repo_uri));
        let duration_ms = json_u64(pp, &["repo_sync_duration_ms"]).unwrap_or(0);
        let duration_seconds = duration_ms as f64 / 1000.0;
        let phase = json_str(pp, &["repo_sync_phase"])
            .unwrap_or("unknown")
            .to_string();
        let terminal_state = json_str(pp, &["repo_terminal_state"])
            .unwrap_or("unknown")
            .to_string();
        let object_count = pp
            .get("objects")
            .and_then(|v| v.as_array())
            .map(|a| a.len() as u64)
            .unwrap_or(0);

        let repo = repos.entry(repo_id.clone()).or_insert_with(|| RepoMetrics {
            repo_id: repo_id.clone(),
            uri: repo_uri.to_string(),
            host: host.clone(),
            transport: transport.clone(),
            sync_success: true,
            ..RepoMetrics::default()
        });
        repo.publication_points += 1;
        repo.duration_seconds_sum += duration_seconds;
        repo.duration_seconds_max = repo.duration_seconds_max.max(duration_seconds);
        if !is_success_terminal_state(&terminal_state) {
            repo.sync_success = false;
        }
        *repo.phase_counts.entry(phase.clone()).or_default() += 1;
        *repo
            .terminal_state_counts
            .entry(terminal_state.clone())
            .or_default() += 1;

        for threshold in LARGE_PP_OBJECT_THRESHOLDS {
            if object_count > *threshold {
                *large_pp_counts.entry(*threshold).or_default() += 1;
            }
        }
        pp_sync_histograms
            .entry(transport.clone())
            .or_insert_with(|| Histogram::new(PP_SYNC_SECONDS_BUCKETS))
            .observe(duration_seconds);

        if let Some(objects) = pp.get("objects").and_then(|v| v.as_array()) {
            for object in objects {
                let kind = json_str(object, &["kind"]).unwrap_or("unknown").to_string();
                let result = json_str(object, &["result"])
                    .unwrap_or("unknown")
                    .to_string();
                *object_counts.entry((kind, result)).or_default() += 1;
            }
        }

        let top = TopPublicationPoint {
            rank: 0,
            pp_id,
            repo_id,
            uri: pp_uri.to_string(),
            repo_uri: repo_uri.to_string(),
            host,
            transport,
            object_count,
            sync_duration_ms: duration_ms,
            terminal_state,
            phase,
        };
        pp_by_object_count.push(top.clone());
        pp_by_sync_duration.push(top);
    }

    let mut repo_stats = repos.into_values().collect::<Vec<_>>();
    assign_download_bytes_to_repos(&mut repo_stats, downloads);
    for repo in &mut repo_stats {
        if repo.publication_points > 0 {
            repo.duration_seconds_avg = repo.duration_seconds_sum / repo.publication_points as f64;
        }
    }
    let mut top_repos = repo_stats
        .iter()
        .map(|repo| TopRepo {
            rank: 0,
            repo_id: repo.repo_id.clone(),
            uri: repo.uri.clone(),
            host: repo.host.clone(),
            transport: repo.transport.clone(),
            duration_ms_max: (repo.duration_seconds_max * 1000.0).round() as u64,
            duration_ms_sum: (repo.duration_seconds_sum * 1000.0).round() as u64,
            publication_points: repo.publication_points,
        })
        .collect::<Vec<_>>();
    top_repos.sort_by(|a, b| b.duration_ms_max.cmp(&a.duration_ms_max));
    top_repos.truncate(20);
    for (index, item) in top_repos.iter_mut().enumerate() {
        item.rank = index + 1;
    }

    pp_by_object_count.sort_by(|a, b| b.object_count.cmp(&a.object_count));
    pp_by_object_count.truncate(20);
    for (index, item) in pp_by_object_count.iter_mut().enumerate() {
        item.rank = index + 1;
    }
    pp_by_sync_duration.sort_by(|a, b| b.sync_duration_ms.cmp(&a.sync_duration_ms));
    pp_by_sync_duration.truncate(20);
    for (index, item) in pp_by_sync_duration.iter_mut().enumerate() {
        item.rank = index + 1;
    }

    snapshot.repo_stats = repo_stats;
    snapshot.object_counts = object_counts;
    snapshot.large_pp_counts = large_pp_counts;
    snapshot.pp_sync_histograms = pp_sync_histograms;
    snapshot.top_repos_by_sync_duration = top_repos;
    snapshot.top_pp_by_object_count = pp_by_object_count;
    snapshot.top_pp_by_sync_duration = pp_by_sync_duration;
}

fn assign_download_bytes_to_repos(repos: &mut [RepoMetrics], downloads: Option<&Value>) {
    let Some(downloads) = downloads.and_then(|v| v.as_array()) else {
        return;
    };
    for download in downloads {
        let Some(uri) = json_str(download, &["uri"]) else {
            continue;
        };
        let bytes = json_u64(download, &["bytes"]).unwrap_or(0);
        if bytes == 0 {
            continue;
        }
        if let Some(index) = find_repo_for_download(repos, uri) {
            repos[index].download_bytes = repos[index].download_bytes.saturating_add(bytes);
        }
    }
}

fn find_repo_for_download(repos: &[RepoMetrics], uri: &str) -> Option<usize> {
    if let Some(index) = repos.iter().position(|repo| repo.uri == uri) {
        return Some(index);
    }
    if uri.starts_with("rsync://") {
        return repos
            .iter()
            .enumerate()
            .filter(|(_, repo)| uri.starts_with(&repo.uri))
            .max_by_key(|(_, repo)| repo.uri.len())
            .map(|(index, _)| index);
    }

    let uri_host = uri_host(uri);
    let mut candidates = repos
        .iter()
        .enumerate()
        .filter(|(_, repo)| repo.host == uri_host && repo.uri.starts_with("http"))
        .collect::<Vec<_>>();
    if candidates.len() == 1 {
        return Some(candidates[0].0);
    }
    candidates.sort_by_key(|(_, repo)| common_prefix_len(&repo.uri, uri));
    candidates
        .last()
        .and_then(|(index, repo)| (common_prefix_len(&repo.uri, uri) > 0).then_some(*index))
}

fn is_success_terminal_state(state: &str) -> bool {
    matches!(state, "fresh" | "cached" | "reused" | "valid")
}

fn parse_cir(path: &Path, snapshot: &mut MetricsSnapshot) {
    if !path.exists() {
        return;
    }
    match fs::read(path)
        .map_err(|e| e.to_string())
        .and_then(|bytes| decode_cir(&bytes).map_err(|e| e.to_string()))
    {
        Ok(cir) => {
            let mut objects_by_type = BTreeMap::new();
            for object in &cir.objects {
                *objects_by_type
                    .entry(object_type_from_uri(&object.rsync_uri))
                    .or_default() += 1;
            }
            let mut rejected_objects_by_type = BTreeMap::new();
            for object in &cir.rejected_objects {
                *rejected_objects_by_type
                    .entry(object_type_from_uri(&object.object_uri))
                    .or_default() += 1;
            }
            snapshot.cir = Some(CirMetrics {
                version: cir.version,
                objects: cir.objects.len() as u64,
                trust_anchors: cir.trust_anchors.len() as u64,
                rejected_objects: cir.rejected_objects.len() as u64,
                reject_list_sha256: hex::encode(&cir.reject_list_sha256),
                objects_by_type,
                rejected_objects_by_type,
            });
        }
        Err(err) => snapshot
            .service
            .parse_errors
            .push(format!("decode CIR failed: {}: {err}", path.display())),
    }
}

fn parse_ccr(path: &Path, snapshot: &mut MetricsSnapshot) {
    if !path.exists() {
        return;
    }
    match fs::read(path)
        .map_err(|e| e.to_string())
        .and_then(|bytes| decode_content_info(&bytes).map_err(|e| e.to_string()))
    {
        Ok(ccr) => {
            let content = ccr.content;
            let mut state_present = BTreeMap::new();
            let mut state_items = BTreeMap::new();
            let mut state_digests = BTreeMap::new();
            if let Some(state) = content.mfts.as_ref() {
                state_present.insert("mfts".to_string(), true);
                state_items.insert("mfts".to_string(), state.mis.len() as u64);
                state_digests.insert("mfts".to_string(), hex::encode(&state.hash));
            } else {
                state_present.insert("mfts".to_string(), false);
            }
            if let Some(state) = content.vrps.as_ref() {
                state_present.insert("vrps".to_string(), true);
                state_items.insert("vrps".to_string(), state.rps.len() as u64);
                state_digests.insert("vrps".to_string(), hex::encode(&state.hash));
            } else {
                state_present.insert("vrps".to_string(), false);
            }
            if let Some(state) = content.vaps.as_ref() {
                state_present.insert("vaps".to_string(), true);
                state_items.insert("vaps".to_string(), state.aps.len() as u64);
                state_digests.insert("vaps".to_string(), hex::encode(&state.hash));
            } else {
                state_present.insert("vaps".to_string(), false);
            }
            if let Some(state) = content.tas.as_ref() {
                state_present.insert("tas".to_string(), true);
                state_items.insert("tas".to_string(), state.skis.len() as u64);
                state_digests.insert("tas".to_string(), hex::encode(&state.hash));
            } else {
                state_present.insert("tas".to_string(), false);
            }
            if let Some(state) = content.rks.as_ref() {
                state_present.insert("rks".to_string(), true);
                state_items.insert("rks".to_string(), state.rksets.len() as u64);
                state_digests.insert("rks".to_string(), hex::encode(&state.hash));
            } else {
                state_present.insert("rks".to_string(), false);
            }
            snapshot.ccr = Some(CcrMetrics {
                version: content.version,
                state_present,
                state_items,
                state_digests,
            });
        }
        Err(err) => snapshot
            .service
            .parse_errors
            .push(format!("decode CCR failed: {}: {err}", path.display())),
    }
}

fn render_metrics(snapshot: &MetricsSnapshot) -> String {
    let mut out = String::new();
    let mut writer = PromWriter::new(&mut out);
    let instance = snapshot.instance.as_str();

    writer.gauge(
        "ours_rp_metrics_service_up",
        "Artifact metrics service is up",
        &[label("instance", instance)],
        1.0,
    );
    writer.gauge(
        "ours_rp_metrics_service_last_scan_timestamp_seconds",
        "Unix timestamp of the last artifact scan",
        &[label("instance", instance)],
        snapshot.service.last_scan_timestamp_seconds,
    );
    writer.gauge(
        "ours_rp_metrics_service_last_scan_duration_seconds",
        "Duration of the last artifact scan",
        &[label("instance", instance)],
        snapshot.service.last_scan_duration_seconds,
    );
    writer.gauge(
        "ours_rp_metrics_service_last_reload_success",
        "Whether the last artifact reload had no parse errors",
        &[label("instance", instance)],
        bool_value(snapshot.service.last_reload_success),
    );
    writer.gauge(
        "ours_rp_metrics_service_parse_errors",
        "Current parse error count",
        &[label("instance", instance)],
        snapshot.service.parse_errors.len() as f64,
    );
    writer.gauge(
        "ours_rp_metrics_service_known_runs",
        "Known run directories by status",
        &[label("instance", instance), label("status", "success")],
        snapshot.runs.success as f64,
    );
    writer.gauge(
        "ours_rp_metrics_service_known_runs",
        "Known run directories by status",
        &[label("instance", instance), label("status", "failed")],
        snapshot.runs.failed as f64,
    );
    writer.gauge(
        "ours_rp_metrics_service_known_runs",
        "Known run directories by status",
        &[label("instance", instance), label("status", "partial")],
        snapshot.runs.partial as f64,
    );

    writer.counter(
        "ours_rp_run_completed_total",
        "Completed runs observed by the artifact metrics service",
        &[label("instance", instance), label("status", "success")],
        snapshot.cumulative.completed_success_total as f64,
    );
    writer.counter(
        "ours_rp_run_completed_total",
        "Completed runs observed by the artifact metrics service",
        &[label("instance", instance), label("status", "failed")],
        snapshot.cumulative.completed_failed_total as f64,
    );
    writer.counter(
        "ours_rp_run_observed_duration_seconds_sum",
        "Observed wall duration sum for successful runs",
        &[label("instance", instance)],
        snapshot.cumulative.observed_duration_seconds_sum,
    );
    writer.counter(
        "ours_rp_run_observed_duration_seconds_count",
        "Observed wall duration count for successful runs",
        &[label("instance", instance)],
        snapshot.cumulative.observed_duration_seconds_count as f64,
    );
    writer.counter(
        "ours_rp_run_observed_download_bytes_total",
        "Observed download bytes across successful runs",
        &[label("instance", instance)],
        snapshot.cumulative.observed_download_bytes_total as f64,
    );
    writer.gauge(
        "ours_rp_run_consecutive_failures",
        "Consecutive non-success runs at the end of the run list",
        &[label("instance", instance)],
        snapshot.runs.consecutive_failures as f64,
    );

    if let Some(latest) = snapshot.latest_run.as_ref() {
        render_latest_metrics(&mut writer, instance, latest);
    }
    render_repo_metrics(&mut writer, instance, &snapshot.repo_stats);
    render_failed_repo_metrics(&mut writer, instance, &snapshot.repo_stats);
    render_top_repo_metrics(&mut writer, instance, &snapshot.top_repos_by_sync_duration);
    render_object_metrics(&mut writer, instance, &snapshot.object_counts);
    render_large_pp_metrics(&mut writer, instance, &snapshot.large_pp_counts);
    render_top_publication_point_metrics(&mut writer, instance, &snapshot.top_pp_by_object_count);
    for (transport, histogram) in &snapshot.pp_sync_histograms {
        writer.histogram(
            "ours_rp_publication_point_sync_duration_seconds",
            "Distribution of sync duration per publication point",
            &[label("instance", instance), label("transport", transport)],
            histogram,
        );
    }
    if let Some(cir) = snapshot.cir.as_ref() {
        render_cir_metrics(&mut writer, instance, cir);
    }
    if let Some(ccr) = snapshot.ccr.as_ref() {
        render_ccr_metrics(&mut writer, instance, ccr);
    }
    out
}

fn render_latest_metrics(writer: &mut PromWriter<'_>, instance: &str, latest: &LatestRunMetrics) {
    writer.gauge(
        "ours_rp_run_sequence",
        "Latest successful run sequence",
        &[label("instance", instance)],
        latest.run_seq as f64,
    );
    writer.gauge(
        "ours_rp_run_success",
        "Whether the latest selected run is successful",
        &[label("instance", instance)],
        bool_value(latest.status == "success"),
    );
    writer.gauge(
        "ours_rp_run_sync_mode",
        "Latest run sync mode state",
        &[
            label("instance", instance),
            label("sync_mode", &latest.sync_mode),
        ],
        1.0,
    );
    if let Some(ts) = latest.start_timestamp_seconds {
        writer.gauge(
            "ours_rp_run_start_timestamp_seconds",
            "Latest run start timestamp",
            &[label("instance", instance)],
            ts,
        );
    }
    if let Some(ts) = latest.finish_timestamp_seconds {
        writer.gauge(
            "ours_rp_run_finish_timestamp_seconds",
            "Latest run finish timestamp",
            &[label("instance", instance)],
            ts,
        );
    }
    writer.gauge(
        "ours_rp_run_duration_seconds",
        "Latest run wall duration",
        &[label("instance", instance)],
        latest.wall_seconds,
    );
    if let Some(value) = latest.user_cpu_seconds {
        writer.gauge(
            "ours_rp_run_user_cpu_seconds",
            "Latest run user CPU seconds",
            &[label("instance", instance)],
            value,
        );
    }
    if let Some(value) = latest.system_cpu_seconds {
        writer.gauge(
            "ours_rp_run_system_cpu_seconds",
            "Latest run system CPU seconds",
            &[label("instance", instance)],
            value,
        );
    }
    if let Some(value) = latest.cpu_percent {
        writer.gauge(
            "ours_rp_run_cpu_percent",
            "Latest run CPU percent from GNU time",
            &[label("instance", instance)],
            value,
        );
    }
    if let Some(value) = latest.max_rss_bytes {
        writer.gauge(
            "ours_rp_run_max_rss_bytes",
            "Latest run maximum resident set size",
            &[label("instance", instance)],
            value as f64,
        );
    }
    if let Some(value) = latest.exit_code {
        writer.gauge(
            "ours_rp_run_exit_code",
            "Latest run exit code",
            &[label("instance", instance)],
            value as f64,
        );
    }
    writer.gauge(
        "ours_rp_vrps",
        "Latest run VRP count",
        &[label("instance", instance), label("kind", "total")],
        latest.vrps as f64,
    );
    if let Some(value) = latest.vrps_unique {
        writer.gauge(
            "ours_rp_vrps",
            "Latest run VRP count",
            &[label("instance", instance), label("kind", "unique")],
            value as f64,
        );
    }
    writer.gauge(
        "ours_rp_vaps",
        "Latest run VAP/ASPA count",
        &[label("instance", instance), label("kind", "total")],
        latest.vaps as f64,
    );
    writer.gauge(
        "ours_rp_publication_points",
        "Latest run publication point count",
        &[label("instance", instance)],
        latest.publication_points as f64,
    );
    writer.gauge(
        "ours_rp_warnings",
        "Latest run warning count",
        &[label("instance", instance)],
        latest.warnings as f64,
    );
    if let Some(value) = latest.tree_instances_processed {
        writer.gauge(
            "ours_rp_tree_instances",
            "Latest run tree instances by state",
            &[label("instance", instance), label("state", "processed")],
            value as f64,
        );
    }
    if let Some(value) = latest.tree_instances_failed {
        writer.gauge(
            "ours_rp_tree_instances",
            "Latest run tree instances by state",
            &[label("instance", instance), label("state", "failed")],
            value as f64,
        );
    }
    for (stage, value) in &latest.stage_seconds {
        writer.gauge(
            "ours_rp_run_stage_duration_seconds",
            "Latest run stage duration",
            &[label("instance", instance), label("stage", stage)],
            *value,
        );
    }
    for (phase, stat) in &latest.repo_sync_phase {
        writer.gauge(
            "ours_rp_repo_sync_phase_count",
            "Publication points by repo sync phase",
            &[label("instance", instance), label("phase", phase)],
            stat.count as f64,
        );
        writer.gauge(
            "ours_rp_repo_sync_phase_duration_seconds_total",
            "Repo sync phase cumulative duration in latest run",
            &[label("instance", instance), label("phase", phase)],
            stat.duration_seconds_total,
        );
    }
    for (state, stat) in &latest.repo_terminal_state {
        writer.gauge(
            "ours_rp_repo_terminal_state_count",
            "Publication points by terminal state",
            &[label("instance", instance), label("terminal_state", state)],
            stat.count as f64,
        );
        writer.gauge(
            "ours_rp_repo_terminal_state_duration_seconds_total",
            "Terminal state cumulative duration in latest run",
            &[label("instance", instance), label("terminal_state", state)],
            stat.duration_seconds_total,
        );
    }
    if let Some(value) = latest.download_events {
        writer.gauge(
            "ours_rp_download_events",
            "Latest run download event count",
            &[label("instance", instance)],
            value as f64,
        );
    }
    if let Some(value) = latest.download_bytes {
        writer.gauge(
            "ours_rp_download_bytes",
            "Latest run download bytes",
            &[label("instance", instance)],
            value as f64,
        );
    }
    for (artifact, size) in &latest.artifact_sizes {
        writer.gauge(
            "ours_rp_artifact_size_bytes",
            "Latest run artifact size",
            &[label("instance", instance), label("artifact", artifact)],
            *size as f64,
        );
    }
    for (path, stat) in &latest.state_path_sizes {
        writer.gauge(
            "ours_rp_state_path_size_bytes",
            "State path size",
            &[label("instance", instance), label("path", path)],
            stat.total_size_bytes as f64,
        );
        writer.gauge(
            "ours_rp_state_path_files",
            "State path file count",
            &[label("instance", instance), label("path", path)],
            stat.file_count as f64,
        );
    }
}

fn render_repo_metrics(writer: &mut PromWriter<'_>, instance: &str, repos: &[RepoMetrics]) {
    for repo in repos {
        let base = [
            label("instance", instance),
            label("repo_id", &repo.repo_id),
            label("host", &repo.host),
            label("uri", &repo.uri),
            label("transport", &repo.transport),
        ];
        writer.gauge("ours_rp_repository_info", "Repository metadata", &base, 1.0);
        writer.gauge(
            "ours_rp_repository_publication_points",
            "Publication points per repository",
            &base,
            repo.publication_points as f64,
        );
        writer.gauge(
            "ours_rp_repository_sync_success",
            "Whether repository sync is successful in the latest run",
            &base,
            bool_value(repo.sync_success),
        );
        writer.gauge(
            "ours_rp_repository_download_bytes",
            "Repository download bytes attributed from latest run download events",
            &base,
            repo.download_bytes as f64,
        );
        for (stat, value) in [
            ("sum", repo.duration_seconds_sum),
            ("max", repo.duration_seconds_max),
            ("avg", repo.duration_seconds_avg),
        ] {
            let labels = [
                label("instance", instance),
                label("repo_id", &repo.repo_id),
                label("host", &repo.host),
                label("transport", &repo.transport),
                label("stat", stat),
            ];
            writer.gauge(
                "ours_rp_repository_sync_duration_seconds",
                "Repository sync duration summary",
                &labels,
                value,
            );
        }
        for (phase, count) in &repo.phase_counts {
            let labels = [
                label("instance", instance),
                label("repo_id", &repo.repo_id),
                label("host", &repo.host),
                label("phase", phase),
            ];
            writer.gauge(
                "ours_rp_repository_sync_phase_publication_points",
                "Repository publication points by sync phase",
                &labels,
                *count as f64,
            );
        }
        for (state, count) in &repo.terminal_state_counts {
            let labels = [
                label("instance", instance),
                label("repo_id", &repo.repo_id),
                label("host", &repo.host),
                label("terminal_state", state),
            ];
            writer.gauge(
                "ours_rp_repository_terminal_state_publication_points",
                "Repository publication points by terminal state",
                &labels,
                *count as f64,
            );
        }
    }
}

fn render_failed_repo_metrics(writer: &mut PromWriter<'_>, instance: &str, repos: &[RepoMetrics]) {
    for repo in repos {
        if repo.phase_counts.contains_key("rrdp_failed_rsync_failed") {
            writer.gauge(
                "ours_rp_rrdp_rsync_failed_repository_duration_seconds",
                "Repositories whose RRDP and rsync sync both failed; value is max sync duration when available",
                &[
                    label("instance", instance),
                    label("repo_id", &repo.repo_id),
                    label("host", &repo.host),
                    label("phase", "rrdp_failed_rsync_failed"),
                    label("transport", &repo.transport),
                    label("uri", &repo.uri),
                ],
                repo.duration_seconds_max,
            );
        }
    }
}

fn render_top_repo_metrics(writer: &mut PromWriter<'_>, instance: &str, repos: &[TopRepo]) {
    for repo in repos {
        writer.gauge(
            "ours_rp_top_repository_sync_duration_seconds",
            "Top repositories by max sync duration in latest run",
            &[
                label("instance", instance),
                label("rank", &repo.rank.to_string()),
                label("repo_id", &repo.repo_id),
                label("host", &repo.host),
                label("transport", &repo.transport),
                label("publication_points", &repo.publication_points.to_string()),
                label("uri", &repo.uri),
            ],
            repo.duration_ms_max as f64 / 1000.0,
        );
    }
}

fn render_object_metrics(
    writer: &mut PromWriter<'_>,
    instance: &str,
    counts: &BTreeMap<(String, String), u64>,
) {
    for ((object_type, result), count) in counts {
        writer.gauge(
            "ours_rp_objects",
            "Latest run audited objects by type and result",
            &[
                label("instance", instance),
                label("object_type", object_type),
                label("result", result),
            ],
            *count as f64,
        );
    }
}

fn render_top_publication_point_metrics(
    writer: &mut PromWriter<'_>,
    instance: &str,
    publication_points: &[TopPublicationPoint],
) {
    for publication_point in publication_points {
        writer.gauge(
            "ours_rp_top_publication_point_object_count",
            "Top publication points by object count in latest run",
            &[
                label("instance", instance),
                label("rank", &publication_point.rank.to_string()),
                label("pp_id", &publication_point.pp_id),
                label("repo_id", &publication_point.repo_id),
                label("host", &publication_point.host),
                label("transport", &publication_point.transport),
                label("terminal_state", &publication_point.terminal_state),
                label("phase", &publication_point.phase),
                label("uri", &publication_point.uri),
            ],
            publication_point.object_count as f64,
        );
    }
}

fn render_large_pp_metrics(
    writer: &mut PromWriter<'_>,
    instance: &str,
    counts: &BTreeMap<u64, u64>,
) {
    for threshold in LARGE_PP_OBJECT_THRESHOLDS {
        writer.gauge(
            "ours_rp_large_publication_points",
            "Publication points with object count greater than threshold",
            &[
                label("instance", instance),
                label("object_count_gt", &threshold.to_string()),
            ],
            counts.get(threshold).copied().unwrap_or(0) as f64,
        );
    }
}

fn render_cir_metrics(writer: &mut PromWriter<'_>, instance: &str, cir: &CirMetrics) {
    writer.gauge(
        "ours_rp_cir_version",
        "CIR version",
        &[label("instance", instance)],
        cir.version as f64,
    );
    writer.gauge(
        "ours_rp_cir_objects",
        "CIR object count",
        &[label("instance", instance)],
        cir.objects as f64,
    );
    writer.gauge(
        "ours_rp_cir_trust_anchors",
        "CIR trust anchor count",
        &[label("instance", instance)],
        cir.trust_anchors as f64,
    );
    writer.gauge(
        "ours_rp_cir_rejected_objects",
        "CIR rejected object count",
        &[label("instance", instance)],
        cir.rejected_objects as f64,
    );
    writer.gauge(
        "ours_rp_cir_reject_list_digest_present",
        "CIR reject list digest is present",
        &[label("instance", instance)],
        if cir.reject_list_sha256.len() == 64 {
            1.0
        } else {
            0.0
        },
    );
    for (object_type, count) in &cir.objects_by_type {
        writer.gauge(
            "ours_rp_cir_objects_by_type",
            "CIR object count by file type",
            &[
                label("instance", instance),
                label("object_type", object_type),
            ],
            *count as f64,
        );
    }
    for (object_type, count) in &cir.rejected_objects_by_type {
        writer.gauge(
            "ours_rp_cir_rejected_objects_by_type",
            "CIR rejected object count by file type",
            &[
                label("instance", instance),
                label("object_type", object_type),
            ],
            *count as f64,
        );
    }
}

fn render_ccr_metrics(writer: &mut PromWriter<'_>, instance: &str, ccr: &CcrMetrics) {
    writer.gauge(
        "ours_rp_ccr_version",
        "CCR version",
        &[label("instance", instance)],
        ccr.version as f64,
    );
    for (state, present) in &ccr.state_present {
        writer.gauge(
            "ours_rp_ccr_state_present",
            "CCR state presence",
            &[label("instance", instance), label("state", state)],
            bool_value(*present),
        );
    }
    for (state, count) in &ccr.state_items {
        writer.gauge(
            "ours_rp_ccr_state_items",
            "CCR state item count",
            &[label("instance", instance), label("state", state)],
            *count as f64,
        );
    }
    for state in ccr.state_digests.keys() {
        writer.gauge(
            "ours_rp_ccr_state_digest_present",
            "CCR state digest presence",
            &[label("instance", instance), label("state", state)],
            1.0,
        );
    }
}

fn render_status_json(snapshot: &MetricsSnapshot) -> Result<String, String> {
    serde_json::to_string_pretty(&json!({
        "schemaVersion": 1,
        "generatedBy": "rpki_artifact_metrics",
        "instance": snapshot.instance,
        "service": snapshot.service,
        "runs": snapshot.runs,
        "latestRun": snapshot.latest_run,
        "cir": snapshot.cir,
        "ccr": snapshot.ccr,
        "topRepositoriesBySyncDuration": snapshot.top_repos_by_sync_duration,
        "topPublicationPointsByObjectCount": snapshot.top_pp_by_object_count,
        "topPublicationPointsBySyncDuration": snapshot.top_pp_by_sync_duration,
    }))
    .map_err(|e| e.to_string())
}

struct PromWriter<'a> {
    out: &'a mut String,
    emitted_headers: BTreeSet<String>,
}

#[derive(Clone, Debug)]
struct Label<'a> {
    key: &'a str,
    value: &'a str,
}

fn label<'a>(key: &'a str, value: &'a str) -> Label<'a> {
    Label { key, value }
}

impl<'a> PromWriter<'a> {
    fn new(out: &'a mut String) -> Self {
        Self {
            out,
            emitted_headers: BTreeSet::new(),
        }
    }

    fn gauge(&mut self, name: &str, help: &str, labels: &[Label<'_>], value: f64) {
        self.metric("gauge", name, help, labels, value);
    }

    fn counter(&mut self, name: &str, help: &str, labels: &[Label<'_>], value: f64) {
        self.metric("counter", name, help, labels, value);
    }

    fn metric(
        &mut self,
        metric_type: &str,
        name: &str,
        help: &str,
        labels: &[Label<'_>],
        value: f64,
    ) {
        self.header(name, help, metric_type);
        self.out.push_str(name);
        write_labels(self.out, labels);
        self.out.push(' ');
        self.out.push_str(&format_prom_value(value));
        self.out.push('\n');
    }

    fn histogram(
        &mut self,
        name: &str,
        help: &str,
        base_labels: &[Label<'_>],
        histogram: &Histogram,
    ) {
        self.header(name, help, "histogram");
        let cumulative = histogram.cumulative_counts();
        for (index, count) in cumulative.iter().enumerate() {
            let le = if index < histogram.buckets.len() {
                format_prom_value(histogram.buckets[index])
            } else {
                "+Inf".to_string()
            };
            let mut labels = base_labels.to_vec();
            labels.push(label("le", &le));
            self.out.push_str(name);
            self.out.push_str("_bucket");
            write_labels(self.out, &labels);
            self.out.push(' ');
            self.out.push_str(&count.to_string());
            self.out.push('\n');
        }
        self.out.push_str(name);
        self.out.push_str("_sum");
        write_labels(self.out, base_labels);
        self.out.push(' ');
        self.out.push_str(&format_prom_value(histogram.sum));
        self.out.push('\n');
        self.out.push_str(name);
        self.out.push_str("_count");
        write_labels(self.out, base_labels);
        self.out.push(' ');
        self.out.push_str(&histogram.count.to_string());
        self.out.push('\n');
    }

    fn header(&mut self, name: &str, help: &str, metric_type: &str) {
        if self.emitted_headers.insert(name.to_string()) {
            self.out.push_str("# HELP ");
            self.out.push_str(name);
            self.out.push(' ');
            self.out.push_str(&escape_help(help));
            self.out.push('\n');
            self.out.push_str("# TYPE ");
            self.out.push_str(name);
            self.out.push(' ');
            self.out.push_str(metric_type);
            self.out.push('\n');
        }
    }
}

fn write_labels(out: &mut String, labels: &[Label<'_>]) {
    if labels.is_empty() {
        return;
    }
    out.push('{');
    for (index, label) in labels.iter().enumerate() {
        if index > 0 {
            out.push(',');
        }
        out.push_str(label.key);
        out.push_str("=\"");
        out.push_str(&escape_label(label.value));
        out.push('"');
    }
    out.push('}');
}

fn serve_http(listen: &str, shared: Arc<RwLock<MetricsSnapshot>>) -> Result<(), String> {
    let listener = TcpListener::bind(listen).map_err(|e| format!("bind failed: {listen}: {e}"))?;
    for stream in listener.incoming() {
        match stream {
            Ok(mut stream) => {
                let snapshot = shared.read().expect("metrics lock poisoned").clone();
                if let Err(err) = handle_http_stream(&mut stream, &snapshot) {
                    eprintln!("http request failed: {err}");
                }
            }
            Err(err) => eprintln!("accept failed: {err}"),
        }
    }
    Ok(())
}

fn handle_http_stream(stream: &mut TcpStream, snapshot: &MetricsSnapshot) -> Result<(), String> {
    let mut buf = [0u8; 4096];
    let len = stream.read(&mut buf).map_err(|e| e.to_string())?;
    let req = String::from_utf8_lossy(&buf[..len]);
    let path = req
        .lines()
        .next()
        .and_then(|line| line.split_whitespace().nth(1))
        .unwrap_or("/");
    match path {
        "/metrics" => write_http_response(
            stream,
            "200 OK",
            "text/plain; version=0.0.4",
            &render_metrics(snapshot),
        ),
        "/status" => write_http_response(
            stream,
            "200 OK",
            "application/json",
            &render_status_json(snapshot)?,
        ),
        "/healthz" => write_http_response(stream, "200 OK", "text/plain", "ok\n"),
        _ => write_http_response(stream, "404 Not Found", "text/plain", "not found\n"),
    }
}

fn write_http_response(
    stream: &mut TcpStream,
    status: &str,
    content_type: &str,
    body: &str,
) -> Result<(), String> {
    let header = format!(
        "HTTP/1.1 {status}\r\nContent-Type: {content_type}\r\nContent-Length: {}\r\nConnection: close\r\n\r\n",
        body.as_bytes().len()
    );
    stream
        .write_all(header.as_bytes())
        .map_err(|e| e.to_string())?;
    stream.write_all(body.as_bytes()).map_err(|e| e.to_string())
}

fn write_file(path: &Path, bytes: &[u8]) -> Result<(), String> {
    if let Some(parent) = path.parent() {
        fs::create_dir_all(parent)
            .map_err(|e| format!("create parent failed: {}: {e}", parent.display()))?;
    }
    fs::write(path, bytes).map_err(|e| format!("write failed: {}: {e}", path.display()))
}

fn extract_stage_seconds(value: Option<&Value>) -> BTreeMap<String, f64> {
    let mut out = BTreeMap::new();
    let Some(value) = value else {
        return out;
    };
    let mapping = [
        ("validation_ms", "validation"),
        ("report_build_ms", "report_build"),
        ("report_write_ms", "report_write"),
        ("ccr_build_ms", "ccr_build"),
        ("ccr_write_ms", "ccr_write"),
        ("compare_view_build_ms", "compare_view_build"),
        ("compare_view_write_ms", "compare_view_write"),
        ("cir_build_cir_ms", "cir_build"),
        ("cir_write_cir_ms", "cir_write"),
        ("cir_total_ms", "cir_total"),
        ("total_ms", "total"),
        ("repo_sync_ms_total", "repo_sync_total"),
        ("rrdp_download_ms_total", "rrdp_download_total"),
        ("rsync_download_ms_total", "rsync_download_total"),
    ];
    for (field, stage) in mapping {
        if let Some(ms) = json_u64(value, &[field]) {
            out.insert(stage.to_string(), ms as f64 / 1000.0);
        }
    }
    out
}

fn extract_count_duration_map(value: Option<&Value>) -> BTreeMap<String, CountDuration> {
    let mut out = BTreeMap::new();
    let Some(object) = value.and_then(|v| v.as_object()) else {
        return out;
    };
    for (key, value) in object {
        out.insert(
            key.clone(),
            CountDuration {
                count: json_u64(value, &["count"]).unwrap_or(0),
                duration_seconds_total: json_u64(value, &["duration_ms_total"]).unwrap_or(0) as f64
                    / 1000.0,
            },
        );
    }
    out
}

fn extract_artifact_sizes(value: Option<&Value>) -> BTreeMap<String, u64> {
    let mut out = BTreeMap::new();
    for item in value.and_then(|v| v.as_array()).into_iter().flatten() {
        let artifact = json_str(item, &["type"])
            .or_else(|| {
                json_str(item, &["path"])
                    .and_then(|path| Path::new(path).file_name().and_then(|name| name.to_str()))
            })
            .unwrap_or("unknown");
        let size = json_u64(item, &["sizeBytes"])
            .or_else(|| json_u64(item, &["size"]))
            .unwrap_or(0);
        *out.entry(artifact.to_string()).or_default() += size;
    }
    out
}

fn extract_path_sizes(value: Option<&Value>) -> BTreeMap<String, PathSize> {
    let mut out = BTreeMap::new();
    for item in value.and_then(|v| v.as_array()).into_iter().flatten() {
        let label = json_str(item, &["label"]).unwrap_or("unknown").to_string();
        out.insert(
            label,
            PathSize {
                total_size_bytes: json_u64(item, &["totalSizeBytes"]).unwrap_or(0),
                file_count: json_u64(item, &["fileCount"]).unwrap_or(0),
                dir_count: json_u64(item, &["dirCount"]).unwrap_or(0),
            },
        );
    }
    out
}

fn run_index_from_path(path: &Path) -> Option<u64> {
    path.file_name()
        .and_then(|name| name.to_str())
        .and_then(|name| name.strip_prefix("run_"))
        .and_then(|value| value.parse::<u64>().ok())
}

fn json_str<'a>(value: &'a Value, path: &[&str]) -> Option<&'a str> {
    let mut current = value;
    for key in path {
        current = current.get(*key)?;
    }
    current.as_str()
}

fn json_u64(value: &Value, path: &[&str]) -> Option<u64> {
    let mut current = value;
    for key in path {
        current = current.get(*key)?;
    }
    current.as_u64()
}

fn json_i64(value: &Value, path: &[&str]) -> Option<i64> {
    let mut current = value;
    for key in path {
        current = current.get(*key)?;
    }
    current.as_i64()
}

fn json_f64(value: &Value, path: &[&str]) -> Option<f64> {
    let mut current = value;
    for key in path {
        current = current.get(*key)?;
    }
    current
        .as_f64()
        .or_else(|| current.as_u64().map(|v| v as f64))
}

fn parse_rfc3339_to_unix(value: &str) -> Option<f64> {
    time::OffsetDateTime::parse(value, &time::format_description::well_known::Rfc3339)
        .ok()
        .map(|dt| dt.unix_timestamp() as f64)
}

fn unix_now_seconds() -> f64 {
    SystemTime::now()
        .duration_since(UNIX_EPOCH)
        .map(|d| d.as_secs_f64())
        .unwrap_or(0.0)
}

fn bool_value(value: bool) -> f64 {
    if value { 1.0 } else { 0.0 }
}

fn normalize_transport(value: &str) -> String {
    let lower = value.to_ascii_lowercase();
    if lower.contains("rrdp") || lower.contains("https") {
        "rrdp".to_string()
    } else if lower.contains("rsync") {
        "rsync".to_string()
    } else {
        lower
    }
}

fn infer_transport(uri: &str) -> String {
    if uri.starts_with("http://") || uri.starts_with("https://") {
        "rrdp".to_string()
    } else if uri.starts_with("rsync://") {
        "rsync".to_string()
    } else {
        "unknown".to_string()
    }
}

fn uri_host(uri: &str) -> String {
    let without_scheme = uri.split_once("://").map(|(_, rest)| rest).unwrap_or(uri);
    without_scheme
        .split('/')
        .next()
        .filter(|s| !s.is_empty())
        .unwrap_or("unknown")
        .to_string()
}

fn object_type_from_uri(uri: &str) -> String {
    let lower = uri.to_ascii_lowercase();
    for (suffix, kind) in [
        (".mft", "manifest"),
        (".crl", "crl"),
        (".cer", "certificate"),
        (".roa", "roa"),
        (".asa", "aspa"),
        (".gbr", "gbr"),
    ] {
        if lower.ends_with(suffix) {
            return kind.to_string();
        }
    }
    "other".to_string()
}

fn short_sha256(value: &str) -> String {
    let digest = Sha256::digest(value.as_bytes());
    hex::encode(&digest[..6])
}

fn common_prefix_len(left: &str, right: &str) -> usize {
    left.bytes()
        .zip(right.bytes())
        .take_while(|(l, r)| l == r)
        .count()
}

fn format_prom_value(value: f64) -> String {
    if value.is_infinite() && value.is_sign_positive() {
        "+Inf".to_string()
    } else if value.fract() == 0.0 {
        format!("{value:.0}")
    } else {
        format!("{value:.6}")
            .trim_end_matches('0')
            .trim_end_matches('.')
            .to_string()
    }
}

fn escape_label(value: &str) -> String {
    value
        .replace('\\', "\\\\")
        .replace('\n', "\\n")
        .replace('"', "\\\"")
}

fn escape_help(value: &str) -> String {
    value.replace('\\', "\\\\").replace('\n', "\\n")
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::ccr::model::CCR_VERSION_V0;
    use crate::ccr::{
        CcrContentInfo, CcrDigestAlgorithm, RpkiCanonicalCacheRepresentation, TrustAnchorState,
        encode_content_info,
    };
    use crate::cir::{
        CanonicalInputRepresentation, CirHashAlgorithm, CirObject, CirRejectedObject,
        CirTrustAnchor, compute_reject_list_sha256, encode_cir, sha256,
    };
    use tempfile::TempDir;

    #[test]
    fn parse_args_accepts_once_outputs() {
        let args = parse_args(&[
            "rpki_artifact_metrics".to_string(),
            "--run-root".to_string(),
            "root".to_string(),
            "--once".to_string(),
            "--out-metrics".to_string(),
            "metrics.prom".to_string(),
            "--out-status".to_string(),
            "status.json".to_string(),
        ])
        .expect("parse");
        assert!(args.once);
        assert_eq!(args.run_root, PathBuf::from("root"));
        assert_eq!(args.out_metrics.as_deref(), Some(Path::new("metrics.prom")));
    }

    #[test]
    fn scan_fixture_exports_repo_pp_cir_and_ccr_metrics() {
        let td = TempDir::new().expect("tempdir");
        let run = td.path().join("runs/run_0001");
        fs::create_dir_all(&run).expect("create run");
        fs::write(
            run.join("run-meta.json"),
            r#"{"status":"success","run_index":1,"run_id":"run_0001","sync_mode":"snapshot","snapshot_reason":"first_run","started_at_rfc3339_utc":"2026-05-25T00:00:00Z","completed_at_rfc3339_utc":"2026-05-25T00:00:10Z"}"#,
        )
        .expect("meta");
        fs::write(
            run.join("run-summary.json"),
            r#"{"runSeq":1,"runId":"run_0001","runDir":"RUN","startedAtRfc3339Utc":"2026-05-25T00:00:00Z","finishedAtRfc3339Utc":"2026-05-25T00:00:10Z","wallMs":10000,"status":"success","exitCode":0,"processMetrics":{"userSeconds":2.5,"systemSeconds":1.5,"cpuPercent":40,"maxRssKb":1000},"stageTiming":{"validation_ms":7000,"total_ms":9000,"download_event_count":2,"download_bytes_total":1234},"reportCounts":{"vrps":3,"aspas":1,"publicationPoints":2,"warnings":0},"repoSyncStats":{"by_phase":{"rrdp_delta":{"count":2,"duration_ms_total":3000}},"by_terminal_state":{"fresh":{"count":2,"duration_ms_total":3000}}},"pathStats":[{"label":"work-db","totalSizeBytes":99,"fileCount":2,"dirCount":1}],"artifacts":[{"path":"report.json","sizeBytes":10}]}"#,
        )
        .expect("summary");
        fs::write(run.join("process-time.txt"), "time").expect("time");
        fs::write(run.join("stage-timing.json"), "{}").expect("stage");
        fs::write(
            run.join("vrps.csv"),
            "ASN,IP Prefix,Max Length,Trust Anchor,Expires\n\
             AS64496,192.0.2.0/24,24,ta,2026-05-25T01:00:00Z\n\
             AS64496,192.0.2.0/24,24,ta,2026-05-25T01:00:00Z\n\
             AS64497,2001:db8::/32,48,ta,2026-05-25T01:00:00Z\n",
        )
        .expect("vrps");
        fs::write(
            run.join("report.json"),
            r#"{"tree":{"instances_processed":2,"instances_failed":0,"warnings":[]},"vrps":[{},{},{}],"aspas":[{}],"downloads":[{"kind":"rrdp_notification","uri":"https://repo.example/notify.xml","success":true,"duration_ms":100,"bytes":111},{"kind":"rrdp_delta","uri":"https://repo.example/session/1/delta.xml","success":true,"duration_ms":200,"bytes":222}],"publication_points":[{"rsync_base_uri":"rsync://repo.example/a/","manifest_rsync_uri":"rsync://repo.example/a/a.mft","publication_point_rsync_uri":"rsync://repo.example/a/","rrdp_notification_uri":"https://repo.example/notify.xml","repo_sync_source":"rrdp","repo_sync_phase":"rrdp_delta","repo_sync_duration_ms":1000,"repo_terminal_state":"fresh","objects":[{"kind":"roa","result":"ok"},{"kind":"manifest","result":"ok"}]},{"rsync_base_uri":"rsync://repo.example/b/","manifest_rsync_uri":"rsync://repo.example/b/b.mft","publication_point_rsync_uri":"rsync://repo.example/b/","rrdp_notification_uri":"https://repo.example/notify.xml","repo_sync_source":"rrdp","repo_sync_phase":"rrdp_delta","repo_sync_duration_ms":2000,"repo_terminal_state":"fresh","objects":[{"kind":"roa","result":"ok"}]}],"repo_sync_stats":{"publication_points_total":2,"by_phase":{"rrdp_delta":{"count":2,"duration_ms_total":3000}},"by_terminal_state":{"fresh":{"count":2,"duration_ms_total":3000}}}}"#,
        )
        .expect("report");
        fs::write(run.join("input.cir"), sample_cir()).expect("cir");
        fs::write(run.join("result.ccr"), sample_ccr()).expect("ccr");

        let snapshot = scan_run_root(td.path(), "test").expect("scan");
        assert_eq!(snapshot.runs.success, 1);
        assert_eq!(snapshot.repo_stats.len(), 1);
        assert!(snapshot.repo_stats[0].sync_success);
        assert_eq!(snapshot.repo_stats[0].download_bytes, 333);
        assert_eq!(snapshot.top_pp_by_object_count[0].object_count, 2);
        assert_eq!(snapshot.cir.as_ref().unwrap().objects, 1);
        assert_eq!(snapshot.ccr.as_ref().unwrap().state_items["tas"], 1);
        let metrics = render_metrics(&snapshot);
        assert!(metrics.contains("ours_rp_repository_info"));
        assert!(metrics.contains("ours_rp_repository_sync_success"));
        assert!(metrics.contains("ours_rp_repository_download_bytes"));
        assert!(metrics.contains("ours_rp_large_publication_points"));
        assert!(metrics.contains("ours_rp_cir_objects"));
        assert!(metrics.contains("ours_rp_ccr_state_items"));
        assert!(metrics.contains(r#"ours_rp_vrps{instance="test",kind="total"} 3"#));
        assert!(metrics.contains(r#"ours_rp_vrps{instance="test",kind="unique"} 2"#));
        let status = render_status_json(&snapshot).expect("status");
        assert!(status.contains("topPublicationPointsByObjectCount"));
        assert!(status.contains(r#""vrpsUnique": 2"#));
    }

    #[test]
    fn partial_run_does_not_become_latest_success() {
        let td = TempDir::new().expect("tempdir");
        let run = td.path().join("runs/run_0001");
        fs::create_dir_all(&run).expect("create run");
        fs::write(run.join("run-meta.json"), r#"{"status":"running"}"#).expect("meta");
        let snapshot = scan_run_root(td.path(), "test").expect("scan");
        assert_eq!(snapshot.runs.partial, 1);
        assert!(snapshot.latest_run.is_none());
    }

    fn sample_cir() -> Vec<u8> {
        let rejected = vec![CirRejectedObject {
            object_uri: "rsync://repo.example/a/bad.roa".to_string(),
            reason: Some("bad".to_string()),
        }];
        let cir = CanonicalInputRepresentation {
            version: crate::cir::CIR_VERSION_V3,
            hash_alg: CirHashAlgorithm::Sha256,
            validation_time: time::OffsetDateTime::parse(
                "2026-05-25T00:00:00Z",
                &time::format_description::well_known::Rfc3339,
            )
            .unwrap(),
            objects: vec![CirObject {
                rsync_uri: "rsync://repo.example/a/a.roa".to_string(),
                sha256: vec![1; 32],
            }],
            trust_anchors: vec![CirTrustAnchor {
                ta_rsync_uri: "rsync://repo.example/ta.cer".to_string(),
                tal_uri: "https://tal.example/tal.tal".to_string(),
                tal_bytes: b"rsync://repo.example/ta.cer\n\nAQID\n".to_vec(),
                ta_certificate_der: b"ta".to_vec(),
                ta_certificate_sha256: sha256(b"ta"),
            }],
            reject_list_sha256: compute_reject_list_sha256(
                rejected.iter().map(|item| item.object_uri.as_str()),
            ),
            rejected_objects: rejected,
        };
        encode_cir(&cir).expect("encode cir")
    }

    fn sample_ccr() -> Vec<u8> {
        let ci = CcrContentInfo::new(RpkiCanonicalCacheRepresentation {
            version: CCR_VERSION_V0,
            hash_alg: CcrDigestAlgorithm::Sha256,
            produced_at: time::OffsetDateTime::parse(
                "2026-05-25T00:00:00Z",
                &time::format_description::well_known::Rfc3339,
            )
            .unwrap(),
            mfts: None,
            vrps: None,
            vaps: None,
            tas: Some(TrustAnchorState {
                skis: vec![vec![1; 20]],
                hash: vec![2; 32],
            }),
            rks: None,
        });
        encode_content_info(&ci).expect("encode ccr")
    }
}