rpki/src/bin/db_stats.rs

497 lines
17 KiB
Rust

use std::collections::BTreeMap;
use std::fs;
use std::path::{Path, PathBuf};
use rocksdb::{DB, IteratorMode, Options};
use rpki::storage::{
ALL_COLUMN_FAMILY_NAMES, CF_MANIFEST_REPLAY_META, CF_RAW_BY_HASH, CF_REPOSITORY_VIEW,
CF_ROA_CACHE_PROJECTION, CF_RRDP_SOURCE, CF_RRDP_SOURCE_MEMBER, CF_RRDP_URI_OWNER,
CF_TRANSPORT_PREFETCH, CF_VCIR, column_family_descriptors,
};
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
enum DbStatsMode {
Estimate,
Exact,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
enum CfGroup {
CurrentRepositoryView,
CurrentValidationState,
CurrentRrdpState,
LegacyCompatibility,
}
#[derive(Clone, Debug, Default, PartialEq, Eq)]
struct CfStats {
keys: u64,
key_bytes: u64,
value_bytes: u64,
metadata_size_bytes: u64,
metadata_file_count: u64,
live_sst_size_bytes: u64,
live_sst_files: u64,
}
impl CfStats {
fn avg_key_bytes(&self) -> f64 {
avg_bytes(self.key_bytes, self.keys)
}
fn avg_value_bytes(&self) -> f64 {
avg_bytes(self.value_bytes, self.keys)
}
}
#[derive(Clone, Debug, Default, PartialEq, Eq)]
struct DbFileStats {
total_size_bytes: u64,
total_count: u64,
sst_size_bytes: u64,
sst_count: u64,
blob_size_bytes: u64,
blob_count: u64,
log_size_bytes: u64,
log_count: u64,
manifest_size_bytes: u64,
manifest_count: u64,
other_size_bytes: u64,
other_count: u64,
}
impl CfGroup {
fn as_str(self) -> &'static str {
match self {
Self::CurrentRepositoryView => "current_repository_view",
Self::CurrentValidationState => "current_validation_state",
Self::CurrentRrdpState => "current_rrdp_state",
Self::LegacyCompatibility => "legacy_compatibility",
}
}
}
fn avg_bytes(bytes: u64, keys: u64) -> f64 {
if keys == 0 {
0.0
} else {
bytes as f64 / keys as f64
}
}
fn usage() -> String {
let bin = "db_stats";
format!(
"\
Usage:
{bin} --db <path> [--exact]
Options:
--db <path> RocksDB directory
--exact Iterate to count keys and logical bytes (slower; default uses RocksDB estimates)
--help Show this help
Output:
- legacy fields: <cf>=<keys>, group_<name>=<keys>, sst_files=<n>
- cf.<name>.*: key/value bytes, RocksDB metadata, live SST size
- group.<name>.*: grouped key/value bytes and physical SST metadata
- db.files.*: DB directory file totals split by .sst/.blob/.log/MANIFEST/other
Output groups:
- current_repository_view: repository_view + raw_by_hash
- current_validation_state: vcir + manifest_replay_meta + roa_cache_projection + transport_prefetch
- current_rrdp_state: rrdp_source + rrdp_source_member + rrdp_uri_owner
"
)
}
fn estimate_keys(db: &DB, cf_name: &str) -> Result<Option<u64>, Box<dyn std::error::Error>> {
let cf = db
.cf_handle(cf_name)
.ok_or_else(|| format!("missing column family: {cf_name}"))?;
Ok(db.property_int_value_cf(cf, "rocksdb.estimate-num-keys")?)
}
fn exact_logical_stats(db: &DB, cf_name: &str) -> Result<CfStats, Box<dyn std::error::Error>> {
let cf = db
.cf_handle(cf_name)
.ok_or_else(|| format!("missing column family: {cf_name}"))?;
let mode = IteratorMode::Start;
let mut stats = CfStats::default();
for res in db.iterator_cf(cf, mode) {
let (key, value) = res?;
stats.keys = stats.keys.saturating_add(1);
stats.key_bytes = stats.key_bytes.saturating_add(key.len() as u64);
stats.value_bytes = stats.value_bytes.saturating_add(value.len() as u64);
}
Ok(stats)
}
fn cf_metadata_stats(db: &DB, cf_name: &str) -> Result<(u64, u64), Box<dyn std::error::Error>> {
let cf = db
.cf_handle(cf_name)
.ok_or_else(|| format!("missing column family: {cf_name}"))?;
let metadata = db.get_column_family_metadata_cf(cf);
Ok((metadata.size, metadata.file_count as u64))
}
fn live_sst_stats(db: &DB) -> Result<BTreeMap<String, (u64, u64)>, Box<dyn std::error::Error>> {
let mut stats = BTreeMap::new();
for file in db.live_files()? {
let entry = stats.entry(file.column_family_name).or_insert((0u64, 0u64));
entry.0 = entry.0.saturating_add(file.size as u64);
entry.1 = entry.1.saturating_add(1);
}
Ok(stats)
}
fn collect_db_file_stats(db_path: &Path) -> Result<DbFileStats, Box<dyn std::error::Error>> {
let mut stats = DbFileStats::default();
for entry in fs::read_dir(db_path)? {
let entry = entry?;
if !entry.file_type()?.is_file() {
continue;
}
let size = entry.metadata()?.len();
let name = entry.file_name();
let name = name.to_string_lossy();
stats.total_size_bytes = stats.total_size_bytes.saturating_add(size);
stats.total_count = stats.total_count.saturating_add(1);
if name.ends_with(".sst") {
stats.sst_size_bytes = stats.sst_size_bytes.saturating_add(size);
stats.sst_count = stats.sst_count.saturating_add(1);
} else if name.ends_with(".blob") {
stats.blob_size_bytes = stats.blob_size_bytes.saturating_add(size);
stats.blob_count = stats.blob_count.saturating_add(1);
} else if name == "LOG" || name.starts_with("LOG.") || name.ends_with(".log") {
stats.log_size_bytes = stats.log_size_bytes.saturating_add(size);
stats.log_count = stats.log_count.saturating_add(1);
} else if name.starts_with("MANIFEST-") {
stats.manifest_size_bytes = stats.manifest_size_bytes.saturating_add(size);
stats.manifest_count = stats.manifest_count.saturating_add(1);
} else {
stats.other_size_bytes = stats.other_size_bytes.saturating_add(size);
stats.other_count = stats.other_count.saturating_add(1);
}
}
Ok(stats)
}
fn cf_group(cf_name: &str) -> CfGroup {
match cf_name {
CF_REPOSITORY_VIEW | CF_RAW_BY_HASH => CfGroup::CurrentRepositoryView,
CF_VCIR | CF_MANIFEST_REPLAY_META | CF_ROA_CACHE_PROJECTION | CF_TRANSPORT_PREFETCH => {
CfGroup::CurrentValidationState
}
CF_RRDP_SOURCE | CF_RRDP_SOURCE_MEMBER | CF_RRDP_URI_OWNER => CfGroup::CurrentRrdpState,
_ => CfGroup::LegacyCompatibility,
}
}
fn summarize_counts<'a>(
counts: impl IntoIterator<Item = (&'a str, u64)>,
) -> BTreeMap<CfGroup, u64> {
let mut grouped = BTreeMap::new();
for (cf_name, count) in counts {
*grouped.entry(cf_group(cf_name)).or_insert(0) += count;
}
grouped
}
fn summarize_cf_stats<'a>(
stats: impl IntoIterator<Item = (&'a str, &'a CfStats)>,
) -> BTreeMap<CfGroup, CfStats> {
let mut grouped = BTreeMap::new();
for (cf_name, cf_stats) in stats {
let entry = grouped
.entry(cf_group(cf_name))
.or_insert_with(CfStats::default);
entry.keys = entry.keys.saturating_add(cf_stats.keys);
entry.key_bytes = entry.key_bytes.saturating_add(cf_stats.key_bytes);
entry.value_bytes = entry.value_bytes.saturating_add(cf_stats.value_bytes);
entry.metadata_size_bytes = entry
.metadata_size_bytes
.saturating_add(cf_stats.metadata_size_bytes);
entry.metadata_file_count = entry
.metadata_file_count
.saturating_add(cf_stats.metadata_file_count);
entry.live_sst_size_bytes = entry
.live_sst_size_bytes
.saturating_add(cf_stats.live_sst_size_bytes);
entry.live_sst_files = entry.live_sst_files.saturating_add(cf_stats.live_sst_files);
}
grouped
}
fn mode_label(mode: DbStatsMode) -> &'static str {
match mode {
DbStatsMode::Estimate => "estimate",
DbStatsMode::Exact => "exact",
}
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
let argv: Vec<String> = std::env::args().collect();
if argv.iter().any(|a| a == "--help" || a == "-h") {
print!("{}", usage());
return Ok(());
}
let mut db_path: Option<PathBuf> = None;
let mut mode = DbStatsMode::Estimate;
let mut i = 1usize;
while i < argv.len() {
match argv[i].as_str() {
"--db" => {
i += 1;
let v = argv.get(i).ok_or("--db requires a value")?;
db_path = Some(PathBuf::from(v));
}
"--exact" => mode = DbStatsMode::Exact,
other => return Err(format!("unknown argument: {other}\n\n{}", usage()).into()),
}
i += 1;
}
let db_path = db_path.ok_or_else(|| format!("--db is required\n\n{}", usage()))?;
let mut opts = Options::default();
opts.create_if_missing(false);
opts.create_missing_column_families(false);
let db = DB::open_cf_descriptors(&opts, &db_path, column_family_descriptors())?;
println!("db={}", db_path.display());
println!("mode={}", mode_label(mode));
println!(
"logical_bytes_available={}",
matches!(mode, DbStatsMode::Exact)
);
let live_sst = live_sst_stats(&db)?;
let mut per_cf = Vec::with_capacity(ALL_COLUMN_FAMILY_NAMES.len());
let mut total: u64 = 0;
for &name in ALL_COLUMN_FAMILY_NAMES {
let mut stats = match mode {
DbStatsMode::Exact => exact_logical_stats(&db, name)?,
DbStatsMode::Estimate => CfStats {
keys: estimate_keys(&db, name)?.unwrap_or(0),
..CfStats::default()
},
};
let (metadata_size_bytes, metadata_file_count) = cf_metadata_stats(&db, name)?;
let (live_sst_size_bytes, live_sst_files) = live_sst.get(name).copied().unwrap_or((0, 0));
stats.metadata_size_bytes = metadata_size_bytes;
stats.metadata_file_count = metadata_file_count;
stats.live_sst_size_bytes = live_sst_size_bytes;
stats.live_sst_files = live_sst_files;
total = total.saturating_add(stats.keys);
println!("{name}={}", stats.keys);
per_cf.push((name, stats));
}
println!("total={total}");
for (group, count) in summarize_counts(per_cf.iter().map(|(name, stats)| (*name, stats.keys))) {
println!("group_{}={count}", group.as_str());
}
for (name, stats) in &per_cf {
println!("cf.{name}.keys={}", stats.keys);
println!("cf.{name}.key_bytes={}", stats.key_bytes);
println!("cf.{name}.value_bytes={}", stats.value_bytes);
println!("cf.{name}.avg_key_bytes={:.2}", stats.avg_key_bytes());
println!("cf.{name}.avg_value_bytes={:.2}", stats.avg_value_bytes());
println!(
"cf.{name}.metadata_size_bytes={}",
stats.metadata_size_bytes
);
println!(
"cf.{name}.metadata_file_count={}",
stats.metadata_file_count
);
println!(
"cf.{name}.live_sst_size_bytes={}",
stats.live_sst_size_bytes
);
println!("cf.{name}.live_sst_files={}", stats.live_sst_files);
}
for (group, stats) in summarize_cf_stats(per_cf.iter().map(|(name, stats)| (*name, stats))) {
let group = group.as_str();
println!("group.{group}.keys={}", stats.keys);
println!("group.{group}.key_bytes={}", stats.key_bytes);
println!("group.{group}.value_bytes={}", stats.value_bytes);
println!(
"group.{group}.metadata_size_bytes={}",
stats.metadata_size_bytes
);
println!(
"group.{group}.metadata_file_count={}",
stats.metadata_file_count
);
println!(
"group.{group}.live_sst_size_bytes={}",
stats.live_sst_size_bytes
);
println!("group.{group}.live_sst_files={}", stats.live_sst_files);
}
let live_sst_file_count = live_sst.values().map(|(_, count)| *count).sum::<u64>();
println!("sst_files={live_sst_file_count}");
let file_stats = collect_db_file_stats(&db_path)?;
println!("db.files.total_size_bytes={}", file_stats.total_size_bytes);
println!("db.files.total_count={}", file_stats.total_count);
println!("db.files.sst_size_bytes={}", file_stats.sst_size_bytes);
println!("db.files.sst_count={}", file_stats.sst_count);
println!("db.files.blob_size_bytes={}", file_stats.blob_size_bytes);
println!("db.files.blob_count={}", file_stats.blob_count);
println!("db.files.log_size_bytes={}", file_stats.log_size_bytes);
println!("db.files.log_count={}", file_stats.log_count);
println!(
"db.files.manifest_size_bytes={}",
file_stats.manifest_size_bytes
);
println!("db.files.manifest_count={}", file_stats.manifest_count);
println!("db.files.other_size_bytes={}", file_stats.other_size_bytes);
println!("db.files.other_count={}", file_stats.other_count);
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn cf_group_classifies_current_and_legacy_keyspaces() {
assert_eq!(cf_group(CF_REPOSITORY_VIEW), CfGroup::CurrentRepositoryView);
assert_eq!(cf_group(CF_RAW_BY_HASH), CfGroup::CurrentRepositoryView);
assert_eq!(cf_group(CF_VCIR), CfGroup::CurrentValidationState);
assert_eq!(
cf_group(CF_MANIFEST_REPLAY_META),
CfGroup::CurrentValidationState
);
assert_eq!(
cf_group(CF_ROA_CACHE_PROJECTION),
CfGroup::CurrentValidationState
);
assert_eq!(
cf_group(CF_TRANSPORT_PREFETCH),
CfGroup::CurrentValidationState
);
assert_eq!(cf_group(CF_RRDP_SOURCE), CfGroup::CurrentRrdpState);
assert_eq!(cf_group(CF_RRDP_URI_OWNER), CfGroup::CurrentRrdpState);
assert_eq!(cf_group("unknown_legacy"), CfGroup::LegacyCompatibility);
}
#[test]
fn summarize_counts_accumulates_by_group() {
let grouped = summarize_counts([
(CF_REPOSITORY_VIEW, 5),
(CF_RAW_BY_HASH, 7),
(CF_VCIR, 11),
(CF_MANIFEST_REPLAY_META, 13),
(CF_ROA_CACHE_PROJECTION, 17),
(CF_RRDP_SOURCE_MEMBER, 19),
]);
assert_eq!(grouped.get(&CfGroup::CurrentRepositoryView), Some(&12));
assert_eq!(grouped.get(&CfGroup::CurrentValidationState), Some(&41));
assert_eq!(grouped.get(&CfGroup::CurrentRrdpState), Some(&19));
assert_eq!(grouped.get(&CfGroup::LegacyCompatibility), None);
}
#[test]
fn summarize_cf_stats_accumulates_bytes_and_physical_stats_by_group() {
let repo = CfStats {
keys: 2,
key_bytes: 20,
value_bytes: 200,
metadata_size_bytes: 50,
metadata_file_count: 1,
live_sst_size_bytes: 40,
live_sst_files: 1,
};
let vcir = CfStats {
keys: 3,
key_bytes: 30,
value_bytes: 300,
metadata_size_bytes: 60,
metadata_file_count: 2,
live_sst_size_bytes: 55,
live_sst_files: 2,
};
let replay_meta = CfStats {
keys: 5,
key_bytes: 50,
value_bytes: 500,
metadata_size_bytes: 70,
metadata_file_count: 3,
live_sst_size_bytes: 65,
live_sst_files: 3,
};
let grouped = summarize_cf_stats([
(CF_REPOSITORY_VIEW, &repo),
(CF_VCIR, &vcir),
(CF_MANIFEST_REPLAY_META, &replay_meta),
]);
assert_eq!(grouped.get(&CfGroup::CurrentRepositoryView), Some(&repo));
assert_eq!(
grouped.get(&CfGroup::CurrentValidationState),
Some(&CfStats {
keys: 8,
key_bytes: 80,
value_bytes: 800,
metadata_size_bytes: 130,
metadata_file_count: 5,
live_sst_size_bytes: 120,
live_sst_files: 5,
})
);
}
#[test]
fn collect_db_file_stats_splits_rocksdb_file_types() {
let td = tempfile::tempdir().expect("tempdir");
std::fs::write(td.path().join("000001.sst"), [0u8; 10]).expect("sst");
std::fs::write(td.path().join("000002.blob"), [0u8; 20]).expect("blob");
std::fs::write(td.path().join("000003.log"), [0u8; 30]).expect("wal");
std::fs::write(td.path().join("LOG.old.1"), [0u8; 40]).expect("log");
std::fs::write(td.path().join("MANIFEST-000004"), [0u8; 50]).expect("manifest");
std::fs::write(td.path().join("CURRENT"), [0u8; 60]).expect("other");
std::fs::create_dir(td.path().join("subdir")).expect("subdir");
let stats = collect_db_file_stats(td.path()).expect("stats");
assert_eq!(stats.sst_size_bytes, 10);
assert_eq!(stats.sst_count, 1);
assert_eq!(stats.blob_size_bytes, 20);
assert_eq!(stats.blob_count, 1);
assert_eq!(stats.log_size_bytes, 70);
assert_eq!(stats.log_count, 2);
assert_eq!(stats.manifest_size_bytes, 50);
assert_eq!(stats.manifest_count, 1);
assert_eq!(stats.other_size_bytes, 60);
assert_eq!(stats.other_count, 1);
assert_eq!(stats.total_size_bytes, 210);
assert_eq!(stats.total_count, 6);
}
#[test]
fn usage_mentions_grouped_output_and_exact_mode() {
let text = usage();
assert!(text.contains("--exact"), "{text}");
assert!(text.contains("current_validation_state"), "{text}");
assert!(text.contains("current_rrdp_state"), "{text}");
assert!(text.contains("db.files.*"), "{text}");
}
}