20260427_3 增强db_stats归因work-db体积
This commit is contained in:
parent
87275b5c57
commit
26aec5ff35
@ -1,5 +1,6 @@
|
|||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
use std::path::PathBuf;
|
use std::fs;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
use rocksdb::{DB, IteratorMode, Options};
|
use rocksdb::{DB, IteratorMode, Options};
|
||||||
use rpki::storage::{
|
use rpki::storage::{
|
||||||
@ -21,6 +22,43 @@ enum CfGroup {
|
|||||||
LegacyCompatibility,
|
LegacyCompatibility,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||||
|
struct CfStats {
|
||||||
|
keys: u64,
|
||||||
|
key_bytes: u64,
|
||||||
|
value_bytes: u64,
|
||||||
|
metadata_size_bytes: u64,
|
||||||
|
metadata_file_count: u64,
|
||||||
|
live_sst_size_bytes: u64,
|
||||||
|
live_sst_files: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CfStats {
|
||||||
|
fn avg_key_bytes(&self) -> f64 {
|
||||||
|
avg_bytes(self.key_bytes, self.keys)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn avg_value_bytes(&self) -> f64 {
|
||||||
|
avg_bytes(self.value_bytes, self.keys)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||||
|
struct DbFileStats {
|
||||||
|
total_size_bytes: u64,
|
||||||
|
total_count: u64,
|
||||||
|
sst_size_bytes: u64,
|
||||||
|
sst_count: u64,
|
||||||
|
blob_size_bytes: u64,
|
||||||
|
blob_count: u64,
|
||||||
|
log_size_bytes: u64,
|
||||||
|
log_count: u64,
|
||||||
|
manifest_size_bytes: u64,
|
||||||
|
manifest_count: u64,
|
||||||
|
other_size_bytes: u64,
|
||||||
|
other_count: u64,
|
||||||
|
}
|
||||||
|
|
||||||
impl CfGroup {
|
impl CfGroup {
|
||||||
fn as_str(self) -> &'static str {
|
fn as_str(self) -> &'static str {
|
||||||
match self {
|
match self {
|
||||||
@ -32,6 +70,14 @@ impl CfGroup {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn avg_bytes(bytes: u64, keys: u64) -> f64 {
|
||||||
|
if keys == 0 {
|
||||||
|
0.0
|
||||||
|
} else {
|
||||||
|
bytes as f64 / keys as f64
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn usage() -> String {
|
fn usage() -> String {
|
||||||
let bin = "db_stats";
|
let bin = "db_stats";
|
||||||
format!(
|
format!(
|
||||||
@ -41,9 +87,15 @@ Usage:
|
|||||||
|
|
||||||
Options:
|
Options:
|
||||||
--db <path> RocksDB directory
|
--db <path> RocksDB directory
|
||||||
--exact Iterate to count keys (slower; default uses RocksDB estimates)
|
--exact Iterate to count keys and logical bytes (slower; default uses RocksDB estimates)
|
||||||
--help Show this help
|
--help Show this help
|
||||||
|
|
||||||
|
Output:
|
||||||
|
- legacy fields: <cf>=<keys>, group_<name>=<keys>, sst_files=<n>
|
||||||
|
- cf.<name>.*: key/value bytes, RocksDB metadata, live SST size
|
||||||
|
- group.<name>.*: grouped key/value bytes and physical SST metadata
|
||||||
|
- db.files.*: DB directory file totals split by .sst/.blob/.log/MANIFEST/other
|
||||||
|
|
||||||
Output groups:
|
Output groups:
|
||||||
- current_repository_view: repository_view + raw_by_hash
|
- current_repository_view: repository_view + raw_by_hash
|
||||||
- current_validation_state: vcir + audit_rule_index
|
- current_validation_state: vcir + audit_rule_index
|
||||||
@ -59,17 +111,72 @@ fn estimate_keys(db: &DB, cf_name: &str) -> Result<Option<u64>, Box<dyn std::err
|
|||||||
Ok(db.property_int_value_cf(cf, "rocksdb.estimate-num-keys")?)
|
Ok(db.property_int_value_cf(cf, "rocksdb.estimate-num-keys")?)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn exact_keys(db: &DB, cf_name: &str) -> Result<u64, Box<dyn std::error::Error>> {
|
fn exact_logical_stats(db: &DB, cf_name: &str) -> Result<CfStats, Box<dyn std::error::Error>> {
|
||||||
let cf = db
|
let cf = db
|
||||||
.cf_handle(cf_name)
|
.cf_handle(cf_name)
|
||||||
.ok_or_else(|| format!("missing column family: {cf_name}"))?;
|
.ok_or_else(|| format!("missing column family: {cf_name}"))?;
|
||||||
let mode = IteratorMode::Start;
|
let mode = IteratorMode::Start;
|
||||||
let mut count = 0u64;
|
let mut stats = CfStats::default();
|
||||||
for res in db.iterator_cf(cf, mode) {
|
for res in db.iterator_cf(cf, mode) {
|
||||||
res?;
|
let (key, value) = res?;
|
||||||
count += 1;
|
stats.keys = stats.keys.saturating_add(1);
|
||||||
|
stats.key_bytes = stats.key_bytes.saturating_add(key.len() as u64);
|
||||||
|
stats.value_bytes = stats.value_bytes.saturating_add(value.len() as u64);
|
||||||
}
|
}
|
||||||
Ok(count)
|
Ok(stats)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn cf_metadata_stats(db: &DB, cf_name: &str) -> Result<(u64, u64), Box<dyn std::error::Error>> {
|
||||||
|
let cf = db
|
||||||
|
.cf_handle(cf_name)
|
||||||
|
.ok_or_else(|| format!("missing column family: {cf_name}"))?;
|
||||||
|
let metadata = db.get_column_family_metadata_cf(cf);
|
||||||
|
Ok((metadata.size, metadata.file_count as u64))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn live_sst_stats(db: &DB) -> Result<BTreeMap<String, (u64, u64)>, Box<dyn std::error::Error>> {
|
||||||
|
let mut stats = BTreeMap::new();
|
||||||
|
for file in db.live_files()? {
|
||||||
|
let entry = stats.entry(file.column_family_name).or_insert((0u64, 0u64));
|
||||||
|
entry.0 = entry.0.saturating_add(file.size as u64);
|
||||||
|
entry.1 = entry.1.saturating_add(1);
|
||||||
|
}
|
||||||
|
Ok(stats)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn collect_db_file_stats(db_path: &Path) -> Result<DbFileStats, Box<dyn std::error::Error>> {
|
||||||
|
let mut stats = DbFileStats::default();
|
||||||
|
for entry in fs::read_dir(db_path)? {
|
||||||
|
let entry = entry?;
|
||||||
|
if !entry.file_type()?.is_file() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let size = entry.metadata()?.len();
|
||||||
|
let name = entry.file_name();
|
||||||
|
let name = name.to_string_lossy();
|
||||||
|
|
||||||
|
stats.total_size_bytes = stats.total_size_bytes.saturating_add(size);
|
||||||
|
stats.total_count = stats.total_count.saturating_add(1);
|
||||||
|
|
||||||
|
if name.ends_with(".sst") {
|
||||||
|
stats.sst_size_bytes = stats.sst_size_bytes.saturating_add(size);
|
||||||
|
stats.sst_count = stats.sst_count.saturating_add(1);
|
||||||
|
} else if name.ends_with(".blob") {
|
||||||
|
stats.blob_size_bytes = stats.blob_size_bytes.saturating_add(size);
|
||||||
|
stats.blob_count = stats.blob_count.saturating_add(1);
|
||||||
|
} else if name == "LOG" || name.starts_with("LOG.") || name.ends_with(".log") {
|
||||||
|
stats.log_size_bytes = stats.log_size_bytes.saturating_add(size);
|
||||||
|
stats.log_count = stats.log_count.saturating_add(1);
|
||||||
|
} else if name.starts_with("MANIFEST-") {
|
||||||
|
stats.manifest_size_bytes = stats.manifest_size_bytes.saturating_add(size);
|
||||||
|
stats.manifest_count = stats.manifest_count.saturating_add(1);
|
||||||
|
} else {
|
||||||
|
stats.other_size_bytes = stats.other_size_bytes.saturating_add(size);
|
||||||
|
stats.other_count = stats.other_count.saturating_add(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(stats)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn cf_group(cf_name: &str) -> CfGroup {
|
fn cf_group(cf_name: &str) -> CfGroup {
|
||||||
@ -91,6 +198,31 @@ fn summarize_counts<'a>(
|
|||||||
grouped
|
grouped
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn summarize_cf_stats<'a>(
|
||||||
|
stats: impl IntoIterator<Item = (&'a str, &'a CfStats)>,
|
||||||
|
) -> BTreeMap<CfGroup, CfStats> {
|
||||||
|
let mut grouped = BTreeMap::new();
|
||||||
|
for (cf_name, cf_stats) in stats {
|
||||||
|
let entry = grouped
|
||||||
|
.entry(cf_group(cf_name))
|
||||||
|
.or_insert_with(CfStats::default);
|
||||||
|
entry.keys = entry.keys.saturating_add(cf_stats.keys);
|
||||||
|
entry.key_bytes = entry.key_bytes.saturating_add(cf_stats.key_bytes);
|
||||||
|
entry.value_bytes = entry.value_bytes.saturating_add(cf_stats.value_bytes);
|
||||||
|
entry.metadata_size_bytes = entry
|
||||||
|
.metadata_size_bytes
|
||||||
|
.saturating_add(cf_stats.metadata_size_bytes);
|
||||||
|
entry.metadata_file_count = entry
|
||||||
|
.metadata_file_count
|
||||||
|
.saturating_add(cf_stats.metadata_file_count);
|
||||||
|
entry.live_sst_size_bytes = entry
|
||||||
|
.live_sst_size_bytes
|
||||||
|
.saturating_add(cf_stats.live_sst_size_bytes);
|
||||||
|
entry.live_sst_files = entry.live_sst_files.saturating_add(cf_stats.live_sst_files);
|
||||||
|
}
|
||||||
|
grouped
|
||||||
|
}
|
||||||
|
|
||||||
fn mode_label(mode: DbStatsMode) -> &'static str {
|
fn mode_label(mode: DbStatsMode) -> &'static str {
|
||||||
match mode {
|
match mode {
|
||||||
DbStatsMode::Estimate => "estimate",
|
DbStatsMode::Estimate => "estimate",
|
||||||
@ -131,26 +263,100 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||||||
|
|
||||||
println!("db={}", db_path.display());
|
println!("db={}", db_path.display());
|
||||||
println!("mode={}", mode_label(mode));
|
println!("mode={}", mode_label(mode));
|
||||||
|
println!(
|
||||||
|
"logical_bytes_available={}",
|
||||||
|
matches!(mode, DbStatsMode::Exact)
|
||||||
|
);
|
||||||
|
|
||||||
|
let live_sst = live_sst_stats(&db)?;
|
||||||
let mut per_cf = Vec::with_capacity(ALL_COLUMN_FAMILY_NAMES.len());
|
let mut per_cf = Vec::with_capacity(ALL_COLUMN_FAMILY_NAMES.len());
|
||||||
let mut total: u64 = 0;
|
let mut total: u64 = 0;
|
||||||
for &name in ALL_COLUMN_FAMILY_NAMES {
|
for &name in ALL_COLUMN_FAMILY_NAMES {
|
||||||
let n = match mode {
|
let mut stats = match mode {
|
||||||
DbStatsMode::Exact => exact_keys(&db, name)?,
|
DbStatsMode::Exact => exact_logical_stats(&db, name)?,
|
||||||
DbStatsMode::Estimate => estimate_keys(&db, name)?.unwrap_or(0),
|
DbStatsMode::Estimate => CfStats {
|
||||||
|
keys: estimate_keys(&db, name)?.unwrap_or(0),
|
||||||
|
..CfStats::default()
|
||||||
|
},
|
||||||
};
|
};
|
||||||
total = total.saturating_add(n);
|
|
||||||
per_cf.push((name, n));
|
let (metadata_size_bytes, metadata_file_count) = cf_metadata_stats(&db, name)?;
|
||||||
println!("{name}={n}");
|
let (live_sst_size_bytes, live_sst_files) = live_sst.get(name).copied().unwrap_or((0, 0));
|
||||||
|
stats.metadata_size_bytes = metadata_size_bytes;
|
||||||
|
stats.metadata_file_count = metadata_file_count;
|
||||||
|
stats.live_sst_size_bytes = live_sst_size_bytes;
|
||||||
|
stats.live_sst_files = live_sst_files;
|
||||||
|
|
||||||
|
total = total.saturating_add(stats.keys);
|
||||||
|
println!("{name}={}", stats.keys);
|
||||||
|
per_cf.push((name, stats));
|
||||||
}
|
}
|
||||||
println!("total={total}");
|
println!("total={total}");
|
||||||
|
|
||||||
for (group, count) in summarize_counts(per_cf.iter().copied()) {
|
for (group, count) in summarize_counts(per_cf.iter().map(|(name, stats)| (*name, stats.keys))) {
|
||||||
println!("group_{}={count}", group.as_str());
|
println!("group_{}={count}", group.as_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
let live = db.live_files()?;
|
for (name, stats) in &per_cf {
|
||||||
println!("sst_files={}", live.len());
|
println!("cf.{name}.keys={}", stats.keys);
|
||||||
|
println!("cf.{name}.key_bytes={}", stats.key_bytes);
|
||||||
|
println!("cf.{name}.value_bytes={}", stats.value_bytes);
|
||||||
|
println!("cf.{name}.avg_key_bytes={:.2}", stats.avg_key_bytes());
|
||||||
|
println!("cf.{name}.avg_value_bytes={:.2}", stats.avg_value_bytes());
|
||||||
|
println!(
|
||||||
|
"cf.{name}.metadata_size_bytes={}",
|
||||||
|
stats.metadata_size_bytes
|
||||||
|
);
|
||||||
|
println!(
|
||||||
|
"cf.{name}.metadata_file_count={}",
|
||||||
|
stats.metadata_file_count
|
||||||
|
);
|
||||||
|
println!(
|
||||||
|
"cf.{name}.live_sst_size_bytes={}",
|
||||||
|
stats.live_sst_size_bytes
|
||||||
|
);
|
||||||
|
println!("cf.{name}.live_sst_files={}", stats.live_sst_files);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (group, stats) in summarize_cf_stats(per_cf.iter().map(|(name, stats)| (*name, stats))) {
|
||||||
|
let group = group.as_str();
|
||||||
|
println!("group.{group}.keys={}", stats.keys);
|
||||||
|
println!("group.{group}.key_bytes={}", stats.key_bytes);
|
||||||
|
println!("group.{group}.value_bytes={}", stats.value_bytes);
|
||||||
|
println!(
|
||||||
|
"group.{group}.metadata_size_bytes={}",
|
||||||
|
stats.metadata_size_bytes
|
||||||
|
);
|
||||||
|
println!(
|
||||||
|
"group.{group}.metadata_file_count={}",
|
||||||
|
stats.metadata_file_count
|
||||||
|
);
|
||||||
|
println!(
|
||||||
|
"group.{group}.live_sst_size_bytes={}",
|
||||||
|
stats.live_sst_size_bytes
|
||||||
|
);
|
||||||
|
println!("group.{group}.live_sst_files={}", stats.live_sst_files);
|
||||||
|
}
|
||||||
|
|
||||||
|
let live_sst_file_count = live_sst.values().map(|(_, count)| *count).sum::<u64>();
|
||||||
|
println!("sst_files={live_sst_file_count}");
|
||||||
|
|
||||||
|
let file_stats = collect_db_file_stats(&db_path)?;
|
||||||
|
println!("db.files.total_size_bytes={}", file_stats.total_size_bytes);
|
||||||
|
println!("db.files.total_count={}", file_stats.total_count);
|
||||||
|
println!("db.files.sst_size_bytes={}", file_stats.sst_size_bytes);
|
||||||
|
println!("db.files.sst_count={}", file_stats.sst_count);
|
||||||
|
println!("db.files.blob_size_bytes={}", file_stats.blob_size_bytes);
|
||||||
|
println!("db.files.blob_count={}", file_stats.blob_count);
|
||||||
|
println!("db.files.log_size_bytes={}", file_stats.log_size_bytes);
|
||||||
|
println!("db.files.log_count={}", file_stats.log_count);
|
||||||
|
println!(
|
||||||
|
"db.files.manifest_size_bytes={}",
|
||||||
|
file_stats.manifest_size_bytes
|
||||||
|
);
|
||||||
|
println!("db.files.manifest_count={}", file_stats.manifest_count);
|
||||||
|
println!("db.files.other_size_bytes={}", file_stats.other_size_bytes);
|
||||||
|
println!("db.files.other_count={}", file_stats.other_count);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@ -189,11 +395,90 @@ mod tests {
|
|||||||
assert_eq!(grouped.get(&CfGroup::LegacyCompatibility), None);
|
assert_eq!(grouped.get(&CfGroup::LegacyCompatibility), None);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn summarize_cf_stats_accumulates_bytes_and_physical_stats_by_group() {
|
||||||
|
let repo = CfStats {
|
||||||
|
keys: 2,
|
||||||
|
key_bytes: 20,
|
||||||
|
value_bytes: 200,
|
||||||
|
metadata_size_bytes: 50,
|
||||||
|
metadata_file_count: 1,
|
||||||
|
live_sst_size_bytes: 40,
|
||||||
|
live_sst_files: 1,
|
||||||
|
};
|
||||||
|
let vcir = CfStats {
|
||||||
|
keys: 3,
|
||||||
|
key_bytes: 30,
|
||||||
|
value_bytes: 300,
|
||||||
|
metadata_size_bytes: 60,
|
||||||
|
metadata_file_count: 2,
|
||||||
|
live_sst_size_bytes: 55,
|
||||||
|
live_sst_files: 2,
|
||||||
|
};
|
||||||
|
let audit = CfStats {
|
||||||
|
keys: 5,
|
||||||
|
key_bytes: 50,
|
||||||
|
value_bytes: 500,
|
||||||
|
metadata_size_bytes: 70,
|
||||||
|
metadata_file_count: 3,
|
||||||
|
live_sst_size_bytes: 65,
|
||||||
|
live_sst_files: 3,
|
||||||
|
};
|
||||||
|
|
||||||
|
let grouped = summarize_cf_stats([
|
||||||
|
(CF_REPOSITORY_VIEW, &repo),
|
||||||
|
(CF_VCIR, &vcir),
|
||||||
|
(CF_AUDIT_RULE_INDEX, &audit),
|
||||||
|
]);
|
||||||
|
|
||||||
|
assert_eq!(grouped.get(&CfGroup::CurrentRepositoryView), Some(&repo));
|
||||||
|
assert_eq!(
|
||||||
|
grouped.get(&CfGroup::CurrentValidationState),
|
||||||
|
Some(&CfStats {
|
||||||
|
keys: 8,
|
||||||
|
key_bytes: 80,
|
||||||
|
value_bytes: 800,
|
||||||
|
metadata_size_bytes: 130,
|
||||||
|
metadata_file_count: 5,
|
||||||
|
live_sst_size_bytes: 120,
|
||||||
|
live_sst_files: 5,
|
||||||
|
})
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn collect_db_file_stats_splits_rocksdb_file_types() {
|
||||||
|
let td = tempfile::tempdir().expect("tempdir");
|
||||||
|
std::fs::write(td.path().join("000001.sst"), [0u8; 10]).expect("sst");
|
||||||
|
std::fs::write(td.path().join("000002.blob"), [0u8; 20]).expect("blob");
|
||||||
|
std::fs::write(td.path().join("000003.log"), [0u8; 30]).expect("wal");
|
||||||
|
std::fs::write(td.path().join("LOG.old.1"), [0u8; 40]).expect("log");
|
||||||
|
std::fs::write(td.path().join("MANIFEST-000004"), [0u8; 50]).expect("manifest");
|
||||||
|
std::fs::write(td.path().join("CURRENT"), [0u8; 60]).expect("other");
|
||||||
|
std::fs::create_dir(td.path().join("subdir")).expect("subdir");
|
||||||
|
|
||||||
|
let stats = collect_db_file_stats(td.path()).expect("stats");
|
||||||
|
|
||||||
|
assert_eq!(stats.sst_size_bytes, 10);
|
||||||
|
assert_eq!(stats.sst_count, 1);
|
||||||
|
assert_eq!(stats.blob_size_bytes, 20);
|
||||||
|
assert_eq!(stats.blob_count, 1);
|
||||||
|
assert_eq!(stats.log_size_bytes, 70);
|
||||||
|
assert_eq!(stats.log_count, 2);
|
||||||
|
assert_eq!(stats.manifest_size_bytes, 50);
|
||||||
|
assert_eq!(stats.manifest_count, 1);
|
||||||
|
assert_eq!(stats.other_size_bytes, 60);
|
||||||
|
assert_eq!(stats.other_count, 1);
|
||||||
|
assert_eq!(stats.total_size_bytes, 210);
|
||||||
|
assert_eq!(stats.total_count, 6);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn usage_mentions_grouped_output_and_exact_mode() {
|
fn usage_mentions_grouped_output_and_exact_mode() {
|
||||||
let text = usage();
|
let text = usage();
|
||||||
assert!(text.contains("--exact"), "{text}");
|
assert!(text.contains("--exact"), "{text}");
|
||||||
assert!(text.contains("current_validation_state"), "{text}");
|
assert!(text.contains("current_validation_state"), "{text}");
|
||||||
assert!(text.contains("current_rrdp_state"), "{text}");
|
assert!(text.contains("current_rrdp_state"), "{text}");
|
||||||
|
assert!(text.contains("db.files.*"), "{text}");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user