From 26aec5ff355ee5fe30b9964ad3deb737bc3f844f Mon Sep 17 00:00:00 2001 From: yuyr Date: Mon, 27 Apr 2026 18:00:34 +0800 Subject: [PATCH] =?UTF-8?q?20260427=5F3=20=E5=A2=9E=E5=BC=BAdb=5Fstats?= =?UTF-8?q?=E5=BD=92=E5=9B=A0work-db=E4=BD=93=E7=A7=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/bin/db_stats.rs | 317 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 301 insertions(+), 16 deletions(-) diff --git a/src/bin/db_stats.rs b/src/bin/db_stats.rs index 2f46c79..ac50f22 100644 --- a/src/bin/db_stats.rs +++ b/src/bin/db_stats.rs @@ -1,5 +1,6 @@ use std::collections::BTreeMap; -use std::path::PathBuf; +use std::fs; +use std::path::{Path, PathBuf}; use rocksdb::{DB, IteratorMode, Options}; use rpki::storage::{ @@ -21,6 +22,43 @@ enum CfGroup { LegacyCompatibility, } +#[derive(Clone, Debug, Default, PartialEq, Eq)] +struct CfStats { + keys: u64, + key_bytes: u64, + value_bytes: u64, + metadata_size_bytes: u64, + metadata_file_count: u64, + live_sst_size_bytes: u64, + live_sst_files: u64, +} + +impl CfStats { + fn avg_key_bytes(&self) -> f64 { + avg_bytes(self.key_bytes, self.keys) + } + + fn avg_value_bytes(&self) -> f64 { + avg_bytes(self.value_bytes, self.keys) + } +} + +#[derive(Clone, Debug, Default, PartialEq, Eq)] +struct DbFileStats { + total_size_bytes: u64, + total_count: u64, + sst_size_bytes: u64, + sst_count: u64, + blob_size_bytes: u64, + blob_count: u64, + log_size_bytes: u64, + log_count: u64, + manifest_size_bytes: u64, + manifest_count: u64, + other_size_bytes: u64, + other_count: u64, +} + impl CfGroup { fn as_str(self) -> &'static str { match self { @@ -32,6 +70,14 @@ impl CfGroup { } } +fn avg_bytes(bytes: u64, keys: u64) -> f64 { + if keys == 0 { + 0.0 + } else { + bytes as f64 / keys as f64 + } +} + fn usage() -> String { let bin = "db_stats"; format!( @@ -41,9 +87,15 @@ Usage: Options: --db RocksDB directory - --exact Iterate to count keys (slower; default uses RocksDB estimates) + --exact Iterate to count keys and logical bytes (slower; default uses RocksDB estimates) --help Show this help +Output: + - legacy fields: =, group_=, sst_files= + - cf..*: key/value bytes, RocksDB metadata, live SST size + - group..*: grouped key/value bytes and physical SST metadata + - db.files.*: DB directory file totals split by .sst/.blob/.log/MANIFEST/other + Output groups: - current_repository_view: repository_view + raw_by_hash - current_validation_state: vcir + audit_rule_index @@ -59,17 +111,72 @@ fn estimate_keys(db: &DB, cf_name: &str) -> Result, Box Result> { +fn exact_logical_stats(db: &DB, cf_name: &str) -> Result> { let cf = db .cf_handle(cf_name) .ok_or_else(|| format!("missing column family: {cf_name}"))?; let mode = IteratorMode::Start; - let mut count = 0u64; + let mut stats = CfStats::default(); for res in db.iterator_cf(cf, mode) { - res?; - count += 1; + let (key, value) = res?; + stats.keys = stats.keys.saturating_add(1); + stats.key_bytes = stats.key_bytes.saturating_add(key.len() as u64); + stats.value_bytes = stats.value_bytes.saturating_add(value.len() as u64); } - Ok(count) + Ok(stats) +} + +fn cf_metadata_stats(db: &DB, cf_name: &str) -> Result<(u64, u64), Box> { + let cf = db + .cf_handle(cf_name) + .ok_or_else(|| format!("missing column family: {cf_name}"))?; + let metadata = db.get_column_family_metadata_cf(cf); + Ok((metadata.size, metadata.file_count as u64)) +} + +fn live_sst_stats(db: &DB) -> Result, Box> { + let mut stats = BTreeMap::new(); + for file in db.live_files()? { + let entry = stats.entry(file.column_family_name).or_insert((0u64, 0u64)); + entry.0 = entry.0.saturating_add(file.size as u64); + entry.1 = entry.1.saturating_add(1); + } + Ok(stats) +} + +fn collect_db_file_stats(db_path: &Path) -> Result> { + let mut stats = DbFileStats::default(); + for entry in fs::read_dir(db_path)? { + let entry = entry?; + if !entry.file_type()?.is_file() { + continue; + } + + let size = entry.metadata()?.len(); + let name = entry.file_name(); + let name = name.to_string_lossy(); + + stats.total_size_bytes = stats.total_size_bytes.saturating_add(size); + stats.total_count = stats.total_count.saturating_add(1); + + if name.ends_with(".sst") { + stats.sst_size_bytes = stats.sst_size_bytes.saturating_add(size); + stats.sst_count = stats.sst_count.saturating_add(1); + } else if name.ends_with(".blob") { + stats.blob_size_bytes = stats.blob_size_bytes.saturating_add(size); + stats.blob_count = stats.blob_count.saturating_add(1); + } else if name == "LOG" || name.starts_with("LOG.") || name.ends_with(".log") { + stats.log_size_bytes = stats.log_size_bytes.saturating_add(size); + stats.log_count = stats.log_count.saturating_add(1); + } else if name.starts_with("MANIFEST-") { + stats.manifest_size_bytes = stats.manifest_size_bytes.saturating_add(size); + stats.manifest_count = stats.manifest_count.saturating_add(1); + } else { + stats.other_size_bytes = stats.other_size_bytes.saturating_add(size); + stats.other_count = stats.other_count.saturating_add(1); + } + } + Ok(stats) } fn cf_group(cf_name: &str) -> CfGroup { @@ -91,6 +198,31 @@ fn summarize_counts<'a>( grouped } +fn summarize_cf_stats<'a>( + stats: impl IntoIterator, +) -> BTreeMap { + let mut grouped = BTreeMap::new(); + for (cf_name, cf_stats) in stats { + let entry = grouped + .entry(cf_group(cf_name)) + .or_insert_with(CfStats::default); + entry.keys = entry.keys.saturating_add(cf_stats.keys); + entry.key_bytes = entry.key_bytes.saturating_add(cf_stats.key_bytes); + entry.value_bytes = entry.value_bytes.saturating_add(cf_stats.value_bytes); + entry.metadata_size_bytes = entry + .metadata_size_bytes + .saturating_add(cf_stats.metadata_size_bytes); + entry.metadata_file_count = entry + .metadata_file_count + .saturating_add(cf_stats.metadata_file_count); + entry.live_sst_size_bytes = entry + .live_sst_size_bytes + .saturating_add(cf_stats.live_sst_size_bytes); + entry.live_sst_files = entry.live_sst_files.saturating_add(cf_stats.live_sst_files); + } + grouped +} + fn mode_label(mode: DbStatsMode) -> &'static str { match mode { DbStatsMode::Estimate => "estimate", @@ -131,26 +263,100 @@ fn main() -> Result<(), Box> { println!("db={}", db_path.display()); println!("mode={}", mode_label(mode)); + println!( + "logical_bytes_available={}", + matches!(mode, DbStatsMode::Exact) + ); + let live_sst = live_sst_stats(&db)?; let mut per_cf = Vec::with_capacity(ALL_COLUMN_FAMILY_NAMES.len()); let mut total: u64 = 0; for &name in ALL_COLUMN_FAMILY_NAMES { - let n = match mode { - DbStatsMode::Exact => exact_keys(&db, name)?, - DbStatsMode::Estimate => estimate_keys(&db, name)?.unwrap_or(0), + let mut stats = match mode { + DbStatsMode::Exact => exact_logical_stats(&db, name)?, + DbStatsMode::Estimate => CfStats { + keys: estimate_keys(&db, name)?.unwrap_or(0), + ..CfStats::default() + }, }; - total = total.saturating_add(n); - per_cf.push((name, n)); - println!("{name}={n}"); + + let (metadata_size_bytes, metadata_file_count) = cf_metadata_stats(&db, name)?; + let (live_sst_size_bytes, live_sst_files) = live_sst.get(name).copied().unwrap_or((0, 0)); + stats.metadata_size_bytes = metadata_size_bytes; + stats.metadata_file_count = metadata_file_count; + stats.live_sst_size_bytes = live_sst_size_bytes; + stats.live_sst_files = live_sst_files; + + total = total.saturating_add(stats.keys); + println!("{name}={}", stats.keys); + per_cf.push((name, stats)); } println!("total={total}"); - for (group, count) in summarize_counts(per_cf.iter().copied()) { + for (group, count) in summarize_counts(per_cf.iter().map(|(name, stats)| (*name, stats.keys))) { println!("group_{}={count}", group.as_str()); } - let live = db.live_files()?; - println!("sst_files={}", live.len()); + for (name, stats) in &per_cf { + println!("cf.{name}.keys={}", stats.keys); + println!("cf.{name}.key_bytes={}", stats.key_bytes); + println!("cf.{name}.value_bytes={}", stats.value_bytes); + println!("cf.{name}.avg_key_bytes={:.2}", stats.avg_key_bytes()); + println!("cf.{name}.avg_value_bytes={:.2}", stats.avg_value_bytes()); + println!( + "cf.{name}.metadata_size_bytes={}", + stats.metadata_size_bytes + ); + println!( + "cf.{name}.metadata_file_count={}", + stats.metadata_file_count + ); + println!( + "cf.{name}.live_sst_size_bytes={}", + stats.live_sst_size_bytes + ); + println!("cf.{name}.live_sst_files={}", stats.live_sst_files); + } + + for (group, stats) in summarize_cf_stats(per_cf.iter().map(|(name, stats)| (*name, stats))) { + let group = group.as_str(); + println!("group.{group}.keys={}", stats.keys); + println!("group.{group}.key_bytes={}", stats.key_bytes); + println!("group.{group}.value_bytes={}", stats.value_bytes); + println!( + "group.{group}.metadata_size_bytes={}", + stats.metadata_size_bytes + ); + println!( + "group.{group}.metadata_file_count={}", + stats.metadata_file_count + ); + println!( + "group.{group}.live_sst_size_bytes={}", + stats.live_sst_size_bytes + ); + println!("group.{group}.live_sst_files={}", stats.live_sst_files); + } + + let live_sst_file_count = live_sst.values().map(|(_, count)| *count).sum::(); + println!("sst_files={live_sst_file_count}"); + + let file_stats = collect_db_file_stats(&db_path)?; + println!("db.files.total_size_bytes={}", file_stats.total_size_bytes); + println!("db.files.total_count={}", file_stats.total_count); + println!("db.files.sst_size_bytes={}", file_stats.sst_size_bytes); + println!("db.files.sst_count={}", file_stats.sst_count); + println!("db.files.blob_size_bytes={}", file_stats.blob_size_bytes); + println!("db.files.blob_count={}", file_stats.blob_count); + println!("db.files.log_size_bytes={}", file_stats.log_size_bytes); + println!("db.files.log_count={}", file_stats.log_count); + println!( + "db.files.manifest_size_bytes={}", + file_stats.manifest_size_bytes + ); + println!("db.files.manifest_count={}", file_stats.manifest_count); + println!("db.files.other_size_bytes={}", file_stats.other_size_bytes); + println!("db.files.other_count={}", file_stats.other_count); Ok(()) } @@ -189,11 +395,90 @@ mod tests { assert_eq!(grouped.get(&CfGroup::LegacyCompatibility), None); } + #[test] + fn summarize_cf_stats_accumulates_bytes_and_physical_stats_by_group() { + let repo = CfStats { + keys: 2, + key_bytes: 20, + value_bytes: 200, + metadata_size_bytes: 50, + metadata_file_count: 1, + live_sst_size_bytes: 40, + live_sst_files: 1, + }; + let vcir = CfStats { + keys: 3, + key_bytes: 30, + value_bytes: 300, + metadata_size_bytes: 60, + metadata_file_count: 2, + live_sst_size_bytes: 55, + live_sst_files: 2, + }; + let audit = CfStats { + keys: 5, + key_bytes: 50, + value_bytes: 500, + metadata_size_bytes: 70, + metadata_file_count: 3, + live_sst_size_bytes: 65, + live_sst_files: 3, + }; + + let grouped = summarize_cf_stats([ + (CF_REPOSITORY_VIEW, &repo), + (CF_VCIR, &vcir), + (CF_AUDIT_RULE_INDEX, &audit), + ]); + + assert_eq!(grouped.get(&CfGroup::CurrentRepositoryView), Some(&repo)); + assert_eq!( + grouped.get(&CfGroup::CurrentValidationState), + Some(&CfStats { + keys: 8, + key_bytes: 80, + value_bytes: 800, + metadata_size_bytes: 130, + metadata_file_count: 5, + live_sst_size_bytes: 120, + live_sst_files: 5, + }) + ); + } + + #[test] + fn collect_db_file_stats_splits_rocksdb_file_types() { + let td = tempfile::tempdir().expect("tempdir"); + std::fs::write(td.path().join("000001.sst"), [0u8; 10]).expect("sst"); + std::fs::write(td.path().join("000002.blob"), [0u8; 20]).expect("blob"); + std::fs::write(td.path().join("000003.log"), [0u8; 30]).expect("wal"); + std::fs::write(td.path().join("LOG.old.1"), [0u8; 40]).expect("log"); + std::fs::write(td.path().join("MANIFEST-000004"), [0u8; 50]).expect("manifest"); + std::fs::write(td.path().join("CURRENT"), [0u8; 60]).expect("other"); + std::fs::create_dir(td.path().join("subdir")).expect("subdir"); + + let stats = collect_db_file_stats(td.path()).expect("stats"); + + assert_eq!(stats.sst_size_bytes, 10); + assert_eq!(stats.sst_count, 1); + assert_eq!(stats.blob_size_bytes, 20); + assert_eq!(stats.blob_count, 1); + assert_eq!(stats.log_size_bytes, 70); + assert_eq!(stats.log_count, 2); + assert_eq!(stats.manifest_size_bytes, 50); + assert_eq!(stats.manifest_count, 1); + assert_eq!(stats.other_size_bytes, 60); + assert_eq!(stats.other_count, 1); + assert_eq!(stats.total_size_bytes, 210); + assert_eq!(stats.total_count, 6); + } + #[test] fn usage_mentions_grouped_output_and_exact_mode() { let text = usage(); assert!(text.contains("--exact"), "{text}"); assert!(text.contains("current_validation_state"), "{text}"); assert!(text.contains("current_rrdp_state"), "{text}"); + assert!(text.contains("db.files.*"), "{text}"); } }