use std::collections::BTreeMap; use std::fs; use std::path::{Path, PathBuf}; use rocksdb::{DB, IteratorMode, Options}; use rpki::storage::{ ALL_COLUMN_FAMILY_NAMES, CF_MANIFEST_REPLAY_META, CF_RAW_BY_HASH, CF_REPOSITORY_VIEW, CF_ROA_CACHE_PROJECTION, CF_RRDP_SOURCE, CF_RRDP_SOURCE_MEMBER, CF_RRDP_URI_OWNER, CF_TRANSPORT_PREFETCH, CF_VCIR, column_family_descriptors, }; #[derive(Clone, Copy, Debug, PartialEq, Eq)] enum DbStatsMode { Estimate, Exact, } #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] enum CfGroup { CurrentRepositoryView, CurrentValidationState, CurrentRrdpState, LegacyCompatibility, } #[derive(Clone, Debug, Default, PartialEq, Eq)] struct CfStats { keys: u64, key_bytes: u64, value_bytes: u64, metadata_size_bytes: u64, metadata_file_count: u64, live_sst_size_bytes: u64, live_sst_files: u64, } impl CfStats { fn avg_key_bytes(&self) -> f64 { avg_bytes(self.key_bytes, self.keys) } fn avg_value_bytes(&self) -> f64 { avg_bytes(self.value_bytes, self.keys) } } #[derive(Clone, Debug, Default, PartialEq, Eq)] struct DbFileStats { total_size_bytes: u64, total_count: u64, sst_size_bytes: u64, sst_count: u64, blob_size_bytes: u64, blob_count: u64, log_size_bytes: u64, log_count: u64, manifest_size_bytes: u64, manifest_count: u64, other_size_bytes: u64, other_count: u64, } impl CfGroup { fn as_str(self) -> &'static str { match self { Self::CurrentRepositoryView => "current_repository_view", Self::CurrentValidationState => "current_validation_state", Self::CurrentRrdpState => "current_rrdp_state", Self::LegacyCompatibility => "legacy_compatibility", } } } fn avg_bytes(bytes: u64, keys: u64) -> f64 { if keys == 0 { 0.0 } else { bytes as f64 / keys as f64 } } fn usage() -> String { let bin = "db_stats"; format!( "\ Usage: {bin} --db [--exact] Options: --db RocksDB directory --exact Iterate to count keys and logical bytes (slower; default uses RocksDB estimates) --help Show this help Output: - legacy fields: =, group_=, sst_files= - cf..*: key/value bytes, RocksDB metadata, live SST size - group..*: grouped key/value bytes and physical SST metadata - db.files.*: DB directory file totals split by .sst/.blob/.log/MANIFEST/other Output groups: - current_repository_view: repository_view + raw_by_hash - current_validation_state: vcir + manifest_replay_meta + roa_cache_projection + transport_prefetch - current_rrdp_state: rrdp_source + rrdp_source_member + rrdp_uri_owner " ) } fn estimate_keys(db: &DB, cf_name: &str) -> Result, Box> { let cf = db .cf_handle(cf_name) .ok_or_else(|| format!("missing column family: {cf_name}"))?; Ok(db.property_int_value_cf(cf, "rocksdb.estimate-num-keys")?) } fn exact_logical_stats(db: &DB, cf_name: &str) -> Result> { let cf = db .cf_handle(cf_name) .ok_or_else(|| format!("missing column family: {cf_name}"))?; let mode = IteratorMode::Start; let mut stats = CfStats::default(); for res in db.iterator_cf(cf, mode) { let (key, value) = res?; stats.keys = stats.keys.saturating_add(1); stats.key_bytes = stats.key_bytes.saturating_add(key.len() as u64); stats.value_bytes = stats.value_bytes.saturating_add(value.len() as u64); } Ok(stats) } fn cf_metadata_stats(db: &DB, cf_name: &str) -> Result<(u64, u64), Box> { let cf = db .cf_handle(cf_name) .ok_or_else(|| format!("missing column family: {cf_name}"))?; let metadata = db.get_column_family_metadata_cf(cf); Ok((metadata.size, metadata.file_count as u64)) } fn live_sst_stats(db: &DB) -> Result, Box> { let mut stats = BTreeMap::new(); for file in db.live_files()? { let entry = stats.entry(file.column_family_name).or_insert((0u64, 0u64)); entry.0 = entry.0.saturating_add(file.size as u64); entry.1 = entry.1.saturating_add(1); } Ok(stats) } fn collect_db_file_stats(db_path: &Path) -> Result> { let mut stats = DbFileStats::default(); for entry in fs::read_dir(db_path)? { let entry = entry?; if !entry.file_type()?.is_file() { continue; } let size = entry.metadata()?.len(); let name = entry.file_name(); let name = name.to_string_lossy(); stats.total_size_bytes = stats.total_size_bytes.saturating_add(size); stats.total_count = stats.total_count.saturating_add(1); if name.ends_with(".sst") { stats.sst_size_bytes = stats.sst_size_bytes.saturating_add(size); stats.sst_count = stats.sst_count.saturating_add(1); } else if name.ends_with(".blob") { stats.blob_size_bytes = stats.blob_size_bytes.saturating_add(size); stats.blob_count = stats.blob_count.saturating_add(1); } else if name == "LOG" || name.starts_with("LOG.") || name.ends_with(".log") { stats.log_size_bytes = stats.log_size_bytes.saturating_add(size); stats.log_count = stats.log_count.saturating_add(1); } else if name.starts_with("MANIFEST-") { stats.manifest_size_bytes = stats.manifest_size_bytes.saturating_add(size); stats.manifest_count = stats.manifest_count.saturating_add(1); } else { stats.other_size_bytes = stats.other_size_bytes.saturating_add(size); stats.other_count = stats.other_count.saturating_add(1); } } Ok(stats) } fn cf_group(cf_name: &str) -> CfGroup { match cf_name { CF_REPOSITORY_VIEW | CF_RAW_BY_HASH => CfGroup::CurrentRepositoryView, CF_VCIR | CF_MANIFEST_REPLAY_META | CF_ROA_CACHE_PROJECTION | CF_TRANSPORT_PREFETCH => { CfGroup::CurrentValidationState } CF_RRDP_SOURCE | CF_RRDP_SOURCE_MEMBER | CF_RRDP_URI_OWNER => CfGroup::CurrentRrdpState, _ => CfGroup::LegacyCompatibility, } } fn summarize_counts<'a>( counts: impl IntoIterator, ) -> BTreeMap { let mut grouped = BTreeMap::new(); for (cf_name, count) in counts { *grouped.entry(cf_group(cf_name)).or_insert(0) += count; } grouped } fn summarize_cf_stats<'a>( stats: impl IntoIterator, ) -> BTreeMap { let mut grouped = BTreeMap::new(); for (cf_name, cf_stats) in stats { let entry = grouped .entry(cf_group(cf_name)) .or_insert_with(CfStats::default); entry.keys = entry.keys.saturating_add(cf_stats.keys); entry.key_bytes = entry.key_bytes.saturating_add(cf_stats.key_bytes); entry.value_bytes = entry.value_bytes.saturating_add(cf_stats.value_bytes); entry.metadata_size_bytes = entry .metadata_size_bytes .saturating_add(cf_stats.metadata_size_bytes); entry.metadata_file_count = entry .metadata_file_count .saturating_add(cf_stats.metadata_file_count); entry.live_sst_size_bytes = entry .live_sst_size_bytes .saturating_add(cf_stats.live_sst_size_bytes); entry.live_sst_files = entry.live_sst_files.saturating_add(cf_stats.live_sst_files); } grouped } fn mode_label(mode: DbStatsMode) -> &'static str { match mode { DbStatsMode::Estimate => "estimate", DbStatsMode::Exact => "exact", } } fn main() -> Result<(), Box> { let argv: Vec = std::env::args().collect(); if argv.iter().any(|a| a == "--help" || a == "-h") { print!("{}", usage()); return Ok(()); } let mut db_path: Option = None; let mut mode = DbStatsMode::Estimate; let mut i = 1usize; while i < argv.len() { match argv[i].as_str() { "--db" => { i += 1; let v = argv.get(i).ok_or("--db requires a value")?; db_path = Some(PathBuf::from(v)); } "--exact" => mode = DbStatsMode::Exact, other => return Err(format!("unknown argument: {other}\n\n{}", usage()).into()), } i += 1; } let db_path = db_path.ok_or_else(|| format!("--db is required\n\n{}", usage()))?; let mut opts = Options::default(); opts.create_if_missing(false); opts.create_missing_column_families(false); let db = DB::open_cf_descriptors(&opts, &db_path, column_family_descriptors())?; println!("db={}", db_path.display()); println!("mode={}", mode_label(mode)); println!( "logical_bytes_available={}", matches!(mode, DbStatsMode::Exact) ); let live_sst = live_sst_stats(&db)?; let mut per_cf = Vec::with_capacity(ALL_COLUMN_FAMILY_NAMES.len()); let mut total: u64 = 0; for &name in ALL_COLUMN_FAMILY_NAMES { let mut stats = match mode { DbStatsMode::Exact => exact_logical_stats(&db, name)?, DbStatsMode::Estimate => CfStats { keys: estimate_keys(&db, name)?.unwrap_or(0), ..CfStats::default() }, }; let (metadata_size_bytes, metadata_file_count) = cf_metadata_stats(&db, name)?; let (live_sst_size_bytes, live_sst_files) = live_sst.get(name).copied().unwrap_or((0, 0)); stats.metadata_size_bytes = metadata_size_bytes; stats.metadata_file_count = metadata_file_count; stats.live_sst_size_bytes = live_sst_size_bytes; stats.live_sst_files = live_sst_files; total = total.saturating_add(stats.keys); println!("{name}={}", stats.keys); per_cf.push((name, stats)); } println!("total={total}"); for (group, count) in summarize_counts(per_cf.iter().map(|(name, stats)| (*name, stats.keys))) { println!("group_{}={count}", group.as_str()); } for (name, stats) in &per_cf { println!("cf.{name}.keys={}", stats.keys); println!("cf.{name}.key_bytes={}", stats.key_bytes); println!("cf.{name}.value_bytes={}", stats.value_bytes); println!("cf.{name}.avg_key_bytes={:.2}", stats.avg_key_bytes()); println!("cf.{name}.avg_value_bytes={:.2}", stats.avg_value_bytes()); println!( "cf.{name}.metadata_size_bytes={}", stats.metadata_size_bytes ); println!( "cf.{name}.metadata_file_count={}", stats.metadata_file_count ); println!( "cf.{name}.live_sst_size_bytes={}", stats.live_sst_size_bytes ); println!("cf.{name}.live_sst_files={}", stats.live_sst_files); } for (group, stats) in summarize_cf_stats(per_cf.iter().map(|(name, stats)| (*name, stats))) { let group = group.as_str(); println!("group.{group}.keys={}", stats.keys); println!("group.{group}.key_bytes={}", stats.key_bytes); println!("group.{group}.value_bytes={}", stats.value_bytes); println!( "group.{group}.metadata_size_bytes={}", stats.metadata_size_bytes ); println!( "group.{group}.metadata_file_count={}", stats.metadata_file_count ); println!( "group.{group}.live_sst_size_bytes={}", stats.live_sst_size_bytes ); println!("group.{group}.live_sst_files={}", stats.live_sst_files); } let live_sst_file_count = live_sst.values().map(|(_, count)| *count).sum::(); println!("sst_files={live_sst_file_count}"); let file_stats = collect_db_file_stats(&db_path)?; println!("db.files.total_size_bytes={}", file_stats.total_size_bytes); println!("db.files.total_count={}", file_stats.total_count); println!("db.files.sst_size_bytes={}", file_stats.sst_size_bytes); println!("db.files.sst_count={}", file_stats.sst_count); println!("db.files.blob_size_bytes={}", file_stats.blob_size_bytes); println!("db.files.blob_count={}", file_stats.blob_count); println!("db.files.log_size_bytes={}", file_stats.log_size_bytes); println!("db.files.log_count={}", file_stats.log_count); println!( "db.files.manifest_size_bytes={}", file_stats.manifest_size_bytes ); println!("db.files.manifest_count={}", file_stats.manifest_count); println!("db.files.other_size_bytes={}", file_stats.other_size_bytes); println!("db.files.other_count={}", file_stats.other_count); Ok(()) } #[cfg(test)] mod tests { use super::*; #[test] fn cf_group_classifies_current_and_legacy_keyspaces() { assert_eq!(cf_group(CF_REPOSITORY_VIEW), CfGroup::CurrentRepositoryView); assert_eq!(cf_group(CF_RAW_BY_HASH), CfGroup::CurrentRepositoryView); assert_eq!(cf_group(CF_VCIR), CfGroup::CurrentValidationState); assert_eq!( cf_group(CF_MANIFEST_REPLAY_META), CfGroup::CurrentValidationState ); assert_eq!( cf_group(CF_ROA_CACHE_PROJECTION), CfGroup::CurrentValidationState ); assert_eq!( cf_group(CF_TRANSPORT_PREFETCH), CfGroup::CurrentValidationState ); assert_eq!(cf_group(CF_RRDP_SOURCE), CfGroup::CurrentRrdpState); assert_eq!(cf_group(CF_RRDP_URI_OWNER), CfGroup::CurrentRrdpState); assert_eq!(cf_group("unknown_legacy"), CfGroup::LegacyCompatibility); } #[test] fn summarize_counts_accumulates_by_group() { let grouped = summarize_counts([ (CF_REPOSITORY_VIEW, 5), (CF_RAW_BY_HASH, 7), (CF_VCIR, 11), (CF_MANIFEST_REPLAY_META, 13), (CF_ROA_CACHE_PROJECTION, 17), (CF_RRDP_SOURCE_MEMBER, 19), ]); assert_eq!(grouped.get(&CfGroup::CurrentRepositoryView), Some(&12)); assert_eq!(grouped.get(&CfGroup::CurrentValidationState), Some(&41)); assert_eq!(grouped.get(&CfGroup::CurrentRrdpState), Some(&19)); assert_eq!(grouped.get(&CfGroup::LegacyCompatibility), None); } #[test] fn summarize_cf_stats_accumulates_bytes_and_physical_stats_by_group() { let repo = CfStats { keys: 2, key_bytes: 20, value_bytes: 200, metadata_size_bytes: 50, metadata_file_count: 1, live_sst_size_bytes: 40, live_sst_files: 1, }; let vcir = CfStats { keys: 3, key_bytes: 30, value_bytes: 300, metadata_size_bytes: 60, metadata_file_count: 2, live_sst_size_bytes: 55, live_sst_files: 2, }; let replay_meta = CfStats { keys: 5, key_bytes: 50, value_bytes: 500, metadata_size_bytes: 70, metadata_file_count: 3, live_sst_size_bytes: 65, live_sst_files: 3, }; let grouped = summarize_cf_stats([ (CF_REPOSITORY_VIEW, &repo), (CF_VCIR, &vcir), (CF_MANIFEST_REPLAY_META, &replay_meta), ]); assert_eq!(grouped.get(&CfGroup::CurrentRepositoryView), Some(&repo)); assert_eq!( grouped.get(&CfGroup::CurrentValidationState), Some(&CfStats { keys: 8, key_bytes: 80, value_bytes: 800, metadata_size_bytes: 130, metadata_file_count: 5, live_sst_size_bytes: 120, live_sst_files: 5, }) ); } #[test] fn collect_db_file_stats_splits_rocksdb_file_types() { let td = tempfile::tempdir().expect("tempdir"); std::fs::write(td.path().join("000001.sst"), [0u8; 10]).expect("sst"); std::fs::write(td.path().join("000002.blob"), [0u8; 20]).expect("blob"); std::fs::write(td.path().join("000003.log"), [0u8; 30]).expect("wal"); std::fs::write(td.path().join("LOG.old.1"), [0u8; 40]).expect("log"); std::fs::write(td.path().join("MANIFEST-000004"), [0u8; 50]).expect("manifest"); std::fs::write(td.path().join("CURRENT"), [0u8; 60]).expect("other"); std::fs::create_dir(td.path().join("subdir")).expect("subdir"); let stats = collect_db_file_stats(td.path()).expect("stats"); assert_eq!(stats.sst_size_bytes, 10); assert_eq!(stats.sst_count, 1); assert_eq!(stats.blob_size_bytes, 20); assert_eq!(stats.blob_count, 1); assert_eq!(stats.log_size_bytes, 70); assert_eq!(stats.log_count, 2); assert_eq!(stats.manifest_size_bytes, 50); assert_eq!(stats.manifest_count, 1); assert_eq!(stats.other_size_bytes, 60); assert_eq!(stats.other_count, 1); assert_eq!(stats.total_size_bytes, 210); assert_eq!(stats.total_count, 6); } #[test] fn usage_mentions_grouped_output_and_exact_mode() { let text = usage(); assert!(text.contains("--exact"), "{text}"); assert!(text.contains("current_validation_state"), "{text}"); assert!(text.contains("current_rrdp_state"), "{text}"); assert!(text.contains("db.files.*"), "{text}"); } }