rpki/src/storage/pp_cache_index.rs

469 lines
15 KiB
Rust

use std::collections::{BTreeMap, HashMap};
use std::fs::{self, File};
use std::io::Write;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::{Instant, SystemTime, UNIX_EPOCH};
use memmap2::Mmap;
use serde::Serialize;
use super::{StorageError, StorageResult};
const MAGIC: &[u8; 12] = b"RPKIPPIDX\0\0\0";
const VERSION: u32 = 1;
const HEADER_LEN: usize = 64;
const ENTRY_LEN: usize = 24;
#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize)]
pub struct PpCacheIndexLoadStats {
pub source: String,
pub entries: usize,
pub bytes: usize,
pub file_bytes: u64,
pub load_ms: u64,
}
#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize)]
pub struct PpCacheIndexRefreshStats {
pub state: String,
pub old_entries: usize,
pub dirty_entries: usize,
pub new_entries: usize,
pub file_bytes: u64,
pub write_ms: u64,
}
#[derive(Clone, Debug, PartialEq, Eq)]
struct PpCacheIndexEntry {
value_offset: usize,
value_len: usize,
}
#[derive(Debug)]
pub struct PpCacheMmapIndex {
mmap: Arc<Mmap>,
entries: HashMap<String, PpCacheIndexEntry>,
bytes: usize,
file_bytes: u64,
}
impl PpCacheMmapIndex {
pub fn open(path: &Path) -> StorageResult<Self> {
let file = File::open(path).map_err(|e| StorageError::RocksDb(e.to_string()))?;
let file_len = file
.metadata()
.map_err(|e| StorageError::RocksDb(e.to_string()))?
.len() as usize;
if file_len < HEADER_LEN {
return Err(StorageError::InvalidData {
entity: "publication_point_cache_mmap_index",
detail: "file too small".to_string(),
});
}
// SAFETY: The mmap is read-only, held by Arc for all returned slices, and the file
// format is bounds-checked before any slice is exposed.
let mmap = unsafe { Mmap::map(&file) }.map_err(|e| StorageError::RocksDb(e.to_string()))?;
if &mmap[0..MAGIC.len()] != MAGIC {
return Err(StorageError::InvalidData {
entity: "publication_point_cache_mmap_index",
detail: "invalid magic".to_string(),
});
}
let version = read_u32(&mmap, 12)?;
if version != VERSION {
return Err(StorageError::InvalidData {
entity: "publication_point_cache_mmap_index",
detail: format!("unsupported version {version}"),
});
}
let entry_count = read_u64(&mmap, 16)? as usize;
let entry_table_offset = read_u64(&mmap, 24)? as usize;
let entry_table_len = read_u64(&mmap, 32)? as usize;
let blob_offset = read_u64(&mmap, 40)? as usize;
let blob_len = read_u64(&mmap, 48)? as usize;
checked_range(file_len, entry_table_offset, entry_table_len)?;
checked_range(file_len, blob_offset, blob_len)?;
if entry_table_len != entry_count.saturating_mul(ENTRY_LEN) {
return Err(StorageError::InvalidData {
entity: "publication_point_cache_mmap_index",
detail: "entry table length mismatch".to_string(),
});
}
let mut entries = HashMap::with_capacity(entry_count);
let mut bytes = 0usize;
for offset in (entry_table_offset..entry_table_offset + entry_table_len).step_by(ENTRY_LEN)
{
let key_offset = read_u64(&mmap, offset)? as usize;
let key_len = read_u32(&mmap, offset + 8)? as usize;
let value_offset = read_u64(&mmap, offset + 12)? as usize;
let value_len = read_u32(&mmap, offset + 20)? as usize;
checked_range(blob_len, key_offset, key_len)?;
checked_range(blob_len, value_offset, value_len)?;
let key_start = blob_offset + key_offset;
let key_end = key_start + key_len;
let key = std::str::from_utf8(&mmap[key_start..key_end])
.map_err(|e| StorageError::InvalidData {
entity: "publication_point_cache_mmap_index.key",
detail: e.to_string(),
})?
.to_string();
bytes = bytes.saturating_add(value_len);
entries.insert(
key,
PpCacheIndexEntry {
value_offset,
value_len,
},
);
}
Ok(Self {
mmap: Arc::new(mmap),
entries,
bytes,
file_bytes: file_len as u64,
})
}
pub fn get(&self, manifest_rsync_uri: &str) -> Option<&[u8]> {
let entry = self.entries.get(manifest_rsync_uri)?;
let blob_offset = read_u64(self.mmap.as_ref(), 40).ok()? as usize;
let start = blob_offset + entry.value_offset;
let end = start + entry.value_len;
Some(&self.mmap[start..end])
}
pub fn entries(&self) -> usize {
self.entries.len()
}
pub fn bytes(&self) -> usize {
self.bytes
}
pub fn file_bytes(&self) -> u64 {
self.file_bytes
}
}
#[derive(Debug)]
pub struct PpCacheMmapIndexSet {
indexes: Vec<PpCacheMmapIndex>,
entries: usize,
bytes: usize,
file_bytes: u64,
}
impl PpCacheMmapIndexSet {
pub fn get(&self, manifest_rsync_uri: &str) -> Option<&[u8]> {
self.indexes
.iter()
.find_map(|index| index.get(manifest_rsync_uri))
}
pub fn entries(&self) -> usize {
self.entries
}
pub fn bytes(&self) -> usize {
self.bytes
}
pub fn file_bytes(&self) -> u64 {
self.file_bytes
}
}
pub fn default_pp_cache_index_dir(db_path: &Path) -> PathBuf {
let file_name = db_path
.file_name()
.and_then(|name| name.to_str())
.unwrap_or("work-db");
db_path.with_file_name(format!("{file_name}.pp-cache-index"))
}
pub fn load_pp_cache_mmap_index(
path: &Path,
) -> StorageResult<(PpCacheMmapIndex, PpCacheIndexLoadStats)> {
let started = Instant::now();
let index = PpCacheMmapIndex::open(path)?;
let file_bytes = fs::metadata(path)
.map_err(|e| StorageError::RocksDb(e.to_string()))?
.len();
let stats = PpCacheIndexLoadStats {
source: "mmap".to_string(),
entries: index.entries(),
bytes: index.bytes(),
file_bytes,
load_ms: started.elapsed().as_millis() as u64,
};
Ok((index, stats))
}
pub fn load_pp_cache_mmap_index_set(
dir: &Path,
) -> StorageResult<(PpCacheMmapIndexSet, PpCacheIndexLoadStats)> {
let started = Instant::now();
let mut paths = Vec::new();
if dir.exists() {
let mut segments = fs::read_dir(dir)
.map_err(|e| StorageError::RocksDb(e.to_string()))?
.filter_map(Result::ok)
.map(|entry| entry.path())
.filter(|path| {
path.file_name()
.and_then(|name| name.to_str())
.is_some_and(|name| name.starts_with("segment-") && name.ends_with(".idx"))
})
.collect::<Vec<_>>();
segments.sort();
segments.reverse();
paths.extend(segments);
let current = dir.join("current.idx");
if current.exists() {
paths.push(current);
}
}
if paths.is_empty() {
return Err(StorageError::InvalidData {
entity: "publication_point_cache_mmap_index",
detail: "index file missing".to_string(),
});
}
let mut indexes = Vec::new();
let mut entries = 0usize;
let mut bytes = 0usize;
let mut file_bytes = 0u64;
for path in paths {
let (index, _) = load_pp_cache_mmap_index(&path)?;
entries = entries.saturating_add(index.entries());
bytes = bytes.saturating_add(index.bytes());
file_bytes = file_bytes.saturating_add(index.file_bytes());
indexes.push(index);
}
let set = PpCacheMmapIndexSet {
indexes,
entries,
bytes,
file_bytes,
};
let stats = PpCacheIndexLoadStats {
source: "mmap".to_string(),
entries: set.entries(),
bytes: set.bytes(),
file_bytes: set.file_bytes(),
load_ms: started.elapsed().as_millis() as u64,
};
Ok((set, stats))
}
pub fn write_pp_cache_index_segment<I>(
dir: &Path,
entries: I,
) -> StorageResult<PpCacheIndexRefreshStats>
where
I: IntoIterator<Item = (String, Vec<u8>)>,
{
fs::create_dir_all(dir).map_err(|e| StorageError::RocksDb(e.to_string()))?;
let current = dir.join("current.idx");
if !current.exists() {
return write_pp_cache_index_atomic(&current, entries);
}
let now = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map_err(|e| StorageError::RocksDb(e.to_string()))?
.as_nanos();
let segment = dir.join(format!("segment-{now:020}-{}.idx", std::process::id()));
let mut stats = write_pp_cache_index_atomic(&segment, entries)?;
stats.state = "segment_written".to_string();
Ok(stats)
}
pub fn write_pp_cache_index_atomic<I>(
path: &Path,
entries: I,
) -> StorageResult<PpCacheIndexRefreshStats>
where
I: IntoIterator<Item = (String, Vec<u8>)>,
{
let started = Instant::now();
let parent = path.parent().ok_or_else(|| StorageError::InvalidData {
entity: "publication_point_cache_mmap_index.path",
detail: "missing parent".to_string(),
})?;
fs::create_dir_all(parent).map_err(|e| StorageError::RocksDb(e.to_string()))?;
let mut ordered = BTreeMap::<String, Vec<u8>>::new();
for (key, value) in entries {
ordered.insert(key, value);
}
let tmp_path = parent.join("next.tmp");
write_pp_cache_index_file(
&tmp_path,
ordered.iter().map(|(k, v)| (k.as_str(), v.as_slice())),
)?;
let file_bytes = fs::metadata(&tmp_path)
.map_err(|e| StorageError::RocksDb(e.to_string()))?
.len();
fs::rename(&tmp_path, path).map_err(|e| StorageError::RocksDb(e.to_string()))?;
Ok(PpCacheIndexRefreshStats {
state: "written".to_string(),
old_entries: 0,
dirty_entries: 0,
new_entries: ordered.len(),
file_bytes,
write_ms: started.elapsed().as_millis() as u64,
})
}
fn write_pp_cache_index_file<'a, I>(path: &Path, entries: I) -> StorageResult<()>
where
I: IntoIterator<Item = (&'a str, &'a [u8])>,
{
let entries = entries.into_iter().collect::<Vec<_>>();
let entry_table_offset = HEADER_LEN;
let entry_table_len = entries.len() * ENTRY_LEN;
let blob_offset = entry_table_offset + entry_table_len;
let mut table = Vec::with_capacity(entry_table_len);
let mut blob = Vec::new();
for (key, value) in entries.iter() {
let key_offset = blob.len();
blob.extend_from_slice(key.as_bytes());
let value_offset = blob.len();
blob.extend_from_slice(value);
write_u64(&mut table, key_offset as u64);
write_u32(&mut table, key.len() as u32);
write_u64(&mut table, value_offset as u64);
write_u32(&mut table, value.len() as u32);
}
let mut header = Vec::with_capacity(HEADER_LEN);
header.extend_from_slice(MAGIC);
write_u32(&mut header, VERSION);
write_u64(&mut header, entries.len() as u64);
write_u64(&mut header, entry_table_offset as u64);
write_u64(&mut header, entry_table_len as u64);
write_u64(&mut header, blob_offset as u64);
write_u64(&mut header, blob.len() as u64);
header.resize(HEADER_LEN, 0);
let mut file = File::create(path).map_err(|e| StorageError::RocksDb(e.to_string()))?;
file.write_all(&header)
.map_err(|e| StorageError::RocksDb(e.to_string()))?;
file.write_all(&table)
.map_err(|e| StorageError::RocksDb(e.to_string()))?;
file.write_all(&blob)
.map_err(|e| StorageError::RocksDb(e.to_string()))?;
file.sync_all()
.map_err(|e| StorageError::RocksDb(e.to_string()))?;
Ok(())
}
fn checked_range(total: usize, offset: usize, len: usize) -> StorageResult<()> {
if offset.checked_add(len).is_none_or(|end| end > total) {
return Err(StorageError::InvalidData {
entity: "publication_point_cache_mmap_index.range",
detail: "out of bounds".to_string(),
});
}
Ok(())
}
fn read_u32(bytes: &[u8], offset: usize) -> StorageResult<u32> {
checked_range(bytes.len(), offset, 4)?;
Ok(u32::from_le_bytes(
bytes[offset..offset + 4].try_into().unwrap(),
))
}
fn read_u64(bytes: &[u8], offset: usize) -> StorageResult<u64> {
checked_range(bytes.len(), offset, 8)?;
Ok(u64::from_le_bytes(
bytes[offset..offset + 8].try_into().unwrap(),
))
}
fn write_u32(out: &mut Vec<u8>, value: u32) {
out.extend_from_slice(&value.to_le_bytes());
}
fn write_u64(out: &mut Vec<u8>, value: u64) {
out.extend_from_slice(&value.to_le_bytes());
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn pp_cache_index_roundtrips_multiple_entries() {
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().join("current.idx");
let stats = write_pp_cache_index_atomic(
&path,
vec![
("rsync://example.test/a.mft".to_string(), b"aaa".to_vec()),
("rsync://example.test/b.mft".to_string(), b"bbbb".to_vec()),
],
)
.expect("write index");
assert_eq!(stats.new_entries, 2);
let (index, load) = load_pp_cache_mmap_index(&path).expect("load index");
assert_eq!(load.entries, 2);
assert_eq!(index.get("rsync://example.test/a.mft"), Some(&b"aaa"[..]));
assert_eq!(index.get("rsync://example.test/b.mft"), Some(&b"bbbb"[..]));
assert_eq!(index.get("rsync://example.test/missing.mft"), None);
}
#[test]
fn pp_cache_index_rejects_bad_magic() {
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().join("current.idx");
fs::write(&path, b"bad").expect("write bad");
let err = load_pp_cache_mmap_index(&path).expect_err("bad index rejected");
assert!(err.to_string().contains("file too small"));
}
#[test]
fn pp_cache_index_last_duplicate_wins() {
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().join("current.idx");
write_pp_cache_index_atomic(
&path,
vec![
("rsync://example.test/a.mft".to_string(), b"old".to_vec()),
("rsync://example.test/a.mft".to_string(), b"new".to_vec()),
],
)
.expect("write index");
let (index, _) = load_pp_cache_mmap_index(&path).expect("load index");
assert_eq!(index.entries(), 1);
assert_eq!(index.get("rsync://example.test/a.mft"), Some(&b"new"[..]));
}
#[test]
fn pp_cache_index_set_prefers_newest_segment() {
let dir = tempfile::tempdir().expect("tempdir");
let current = dir.path().join("current.idx");
write_pp_cache_index_atomic(
&current,
vec![("rsync://example.test/a.mft".to_string(), b"old".to_vec())],
)
.expect("write current index");
write_pp_cache_index_segment(
dir.path(),
vec![("rsync://example.test/a.mft".to_string(), b"new".to_vec())],
)
.expect("write segment index");
let (set, stats) = load_pp_cache_mmap_index_set(dir.path()).expect("load index set");
assert_eq!(stats.entries, 2);
assert_eq!(set.get("rsync://example.test/a.mft"), Some(&b"new"[..]));
}
}