469 lines
15 KiB
Rust
469 lines
15 KiB
Rust
use std::collections::{BTreeMap, HashMap};
|
|
use std::fs::{self, File};
|
|
use std::io::Write;
|
|
use std::path::{Path, PathBuf};
|
|
use std::sync::Arc;
|
|
use std::time::{Instant, SystemTime, UNIX_EPOCH};
|
|
|
|
use memmap2::Mmap;
|
|
use serde::Serialize;
|
|
|
|
use super::{StorageError, StorageResult};
|
|
|
|
const MAGIC: &[u8; 12] = b"RPKIPPIDX\0\0\0";
|
|
const VERSION: u32 = 1;
|
|
const HEADER_LEN: usize = 64;
|
|
const ENTRY_LEN: usize = 24;
|
|
|
|
#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize)]
|
|
pub struct PpCacheIndexLoadStats {
|
|
pub source: String,
|
|
pub entries: usize,
|
|
pub bytes: usize,
|
|
pub file_bytes: u64,
|
|
pub load_ms: u64,
|
|
}
|
|
|
|
#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize)]
|
|
pub struct PpCacheIndexRefreshStats {
|
|
pub state: String,
|
|
pub old_entries: usize,
|
|
pub dirty_entries: usize,
|
|
pub new_entries: usize,
|
|
pub file_bytes: u64,
|
|
pub write_ms: u64,
|
|
}
|
|
|
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
|
struct PpCacheIndexEntry {
|
|
value_offset: usize,
|
|
value_len: usize,
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub struct PpCacheMmapIndex {
|
|
mmap: Arc<Mmap>,
|
|
entries: HashMap<String, PpCacheIndexEntry>,
|
|
bytes: usize,
|
|
file_bytes: u64,
|
|
}
|
|
|
|
impl PpCacheMmapIndex {
|
|
pub fn open(path: &Path) -> StorageResult<Self> {
|
|
let file = File::open(path).map_err(|e| StorageError::RocksDb(e.to_string()))?;
|
|
let file_len = file
|
|
.metadata()
|
|
.map_err(|e| StorageError::RocksDb(e.to_string()))?
|
|
.len() as usize;
|
|
if file_len < HEADER_LEN {
|
|
return Err(StorageError::InvalidData {
|
|
entity: "publication_point_cache_mmap_index",
|
|
detail: "file too small".to_string(),
|
|
});
|
|
}
|
|
// SAFETY: The mmap is read-only, held by Arc for all returned slices, and the file
|
|
// format is bounds-checked before any slice is exposed.
|
|
let mmap = unsafe { Mmap::map(&file) }.map_err(|e| StorageError::RocksDb(e.to_string()))?;
|
|
if &mmap[0..MAGIC.len()] != MAGIC {
|
|
return Err(StorageError::InvalidData {
|
|
entity: "publication_point_cache_mmap_index",
|
|
detail: "invalid magic".to_string(),
|
|
});
|
|
}
|
|
let version = read_u32(&mmap, 12)?;
|
|
if version != VERSION {
|
|
return Err(StorageError::InvalidData {
|
|
entity: "publication_point_cache_mmap_index",
|
|
detail: format!("unsupported version {version}"),
|
|
});
|
|
}
|
|
let entry_count = read_u64(&mmap, 16)? as usize;
|
|
let entry_table_offset = read_u64(&mmap, 24)? as usize;
|
|
let entry_table_len = read_u64(&mmap, 32)? as usize;
|
|
let blob_offset = read_u64(&mmap, 40)? as usize;
|
|
let blob_len = read_u64(&mmap, 48)? as usize;
|
|
checked_range(file_len, entry_table_offset, entry_table_len)?;
|
|
checked_range(file_len, blob_offset, blob_len)?;
|
|
if entry_table_len != entry_count.saturating_mul(ENTRY_LEN) {
|
|
return Err(StorageError::InvalidData {
|
|
entity: "publication_point_cache_mmap_index",
|
|
detail: "entry table length mismatch".to_string(),
|
|
});
|
|
}
|
|
|
|
let mut entries = HashMap::with_capacity(entry_count);
|
|
let mut bytes = 0usize;
|
|
for offset in (entry_table_offset..entry_table_offset + entry_table_len).step_by(ENTRY_LEN)
|
|
{
|
|
let key_offset = read_u64(&mmap, offset)? as usize;
|
|
let key_len = read_u32(&mmap, offset + 8)? as usize;
|
|
let value_offset = read_u64(&mmap, offset + 12)? as usize;
|
|
let value_len = read_u32(&mmap, offset + 20)? as usize;
|
|
checked_range(blob_len, key_offset, key_len)?;
|
|
checked_range(blob_len, value_offset, value_len)?;
|
|
let key_start = blob_offset + key_offset;
|
|
let key_end = key_start + key_len;
|
|
let key = std::str::from_utf8(&mmap[key_start..key_end])
|
|
.map_err(|e| StorageError::InvalidData {
|
|
entity: "publication_point_cache_mmap_index.key",
|
|
detail: e.to_string(),
|
|
})?
|
|
.to_string();
|
|
bytes = bytes.saturating_add(value_len);
|
|
entries.insert(
|
|
key,
|
|
PpCacheIndexEntry {
|
|
value_offset,
|
|
value_len,
|
|
},
|
|
);
|
|
}
|
|
Ok(Self {
|
|
mmap: Arc::new(mmap),
|
|
entries,
|
|
bytes,
|
|
file_bytes: file_len as u64,
|
|
})
|
|
}
|
|
|
|
pub fn get(&self, manifest_rsync_uri: &str) -> Option<&[u8]> {
|
|
let entry = self.entries.get(manifest_rsync_uri)?;
|
|
let blob_offset = read_u64(self.mmap.as_ref(), 40).ok()? as usize;
|
|
let start = blob_offset + entry.value_offset;
|
|
let end = start + entry.value_len;
|
|
Some(&self.mmap[start..end])
|
|
}
|
|
|
|
pub fn entries(&self) -> usize {
|
|
self.entries.len()
|
|
}
|
|
|
|
pub fn bytes(&self) -> usize {
|
|
self.bytes
|
|
}
|
|
|
|
pub fn file_bytes(&self) -> u64 {
|
|
self.file_bytes
|
|
}
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub struct PpCacheMmapIndexSet {
|
|
indexes: Vec<PpCacheMmapIndex>,
|
|
entries: usize,
|
|
bytes: usize,
|
|
file_bytes: u64,
|
|
}
|
|
|
|
impl PpCacheMmapIndexSet {
|
|
pub fn get(&self, manifest_rsync_uri: &str) -> Option<&[u8]> {
|
|
self.indexes
|
|
.iter()
|
|
.find_map(|index| index.get(manifest_rsync_uri))
|
|
}
|
|
|
|
pub fn entries(&self) -> usize {
|
|
self.entries
|
|
}
|
|
|
|
pub fn bytes(&self) -> usize {
|
|
self.bytes
|
|
}
|
|
|
|
pub fn file_bytes(&self) -> u64 {
|
|
self.file_bytes
|
|
}
|
|
}
|
|
|
|
pub fn default_pp_cache_index_dir(db_path: &Path) -> PathBuf {
|
|
let file_name = db_path
|
|
.file_name()
|
|
.and_then(|name| name.to_str())
|
|
.unwrap_or("work-db");
|
|
db_path.with_file_name(format!("{file_name}.pp-cache-index"))
|
|
}
|
|
|
|
pub fn load_pp_cache_mmap_index(
|
|
path: &Path,
|
|
) -> StorageResult<(PpCacheMmapIndex, PpCacheIndexLoadStats)> {
|
|
let started = Instant::now();
|
|
let index = PpCacheMmapIndex::open(path)?;
|
|
let file_bytes = fs::metadata(path)
|
|
.map_err(|e| StorageError::RocksDb(e.to_string()))?
|
|
.len();
|
|
let stats = PpCacheIndexLoadStats {
|
|
source: "mmap".to_string(),
|
|
entries: index.entries(),
|
|
bytes: index.bytes(),
|
|
file_bytes,
|
|
load_ms: started.elapsed().as_millis() as u64,
|
|
};
|
|
Ok((index, stats))
|
|
}
|
|
|
|
pub fn load_pp_cache_mmap_index_set(
|
|
dir: &Path,
|
|
) -> StorageResult<(PpCacheMmapIndexSet, PpCacheIndexLoadStats)> {
|
|
let started = Instant::now();
|
|
let mut paths = Vec::new();
|
|
if dir.exists() {
|
|
let mut segments = fs::read_dir(dir)
|
|
.map_err(|e| StorageError::RocksDb(e.to_string()))?
|
|
.filter_map(Result::ok)
|
|
.map(|entry| entry.path())
|
|
.filter(|path| {
|
|
path.file_name()
|
|
.and_then(|name| name.to_str())
|
|
.is_some_and(|name| name.starts_with("segment-") && name.ends_with(".idx"))
|
|
})
|
|
.collect::<Vec<_>>();
|
|
segments.sort();
|
|
segments.reverse();
|
|
paths.extend(segments);
|
|
let current = dir.join("current.idx");
|
|
if current.exists() {
|
|
paths.push(current);
|
|
}
|
|
}
|
|
if paths.is_empty() {
|
|
return Err(StorageError::InvalidData {
|
|
entity: "publication_point_cache_mmap_index",
|
|
detail: "index file missing".to_string(),
|
|
});
|
|
}
|
|
|
|
let mut indexes = Vec::new();
|
|
let mut entries = 0usize;
|
|
let mut bytes = 0usize;
|
|
let mut file_bytes = 0u64;
|
|
for path in paths {
|
|
let (index, _) = load_pp_cache_mmap_index(&path)?;
|
|
entries = entries.saturating_add(index.entries());
|
|
bytes = bytes.saturating_add(index.bytes());
|
|
file_bytes = file_bytes.saturating_add(index.file_bytes());
|
|
indexes.push(index);
|
|
}
|
|
let set = PpCacheMmapIndexSet {
|
|
indexes,
|
|
entries,
|
|
bytes,
|
|
file_bytes,
|
|
};
|
|
let stats = PpCacheIndexLoadStats {
|
|
source: "mmap".to_string(),
|
|
entries: set.entries(),
|
|
bytes: set.bytes(),
|
|
file_bytes: set.file_bytes(),
|
|
load_ms: started.elapsed().as_millis() as u64,
|
|
};
|
|
Ok((set, stats))
|
|
}
|
|
|
|
pub fn write_pp_cache_index_segment<I>(
|
|
dir: &Path,
|
|
entries: I,
|
|
) -> StorageResult<PpCacheIndexRefreshStats>
|
|
where
|
|
I: IntoIterator<Item = (String, Vec<u8>)>,
|
|
{
|
|
fs::create_dir_all(dir).map_err(|e| StorageError::RocksDb(e.to_string()))?;
|
|
let current = dir.join("current.idx");
|
|
if !current.exists() {
|
|
return write_pp_cache_index_atomic(¤t, entries);
|
|
}
|
|
let now = SystemTime::now()
|
|
.duration_since(UNIX_EPOCH)
|
|
.map_err(|e| StorageError::RocksDb(e.to_string()))?
|
|
.as_nanos();
|
|
let segment = dir.join(format!("segment-{now:020}-{}.idx", std::process::id()));
|
|
let mut stats = write_pp_cache_index_atomic(&segment, entries)?;
|
|
stats.state = "segment_written".to_string();
|
|
Ok(stats)
|
|
}
|
|
|
|
pub fn write_pp_cache_index_atomic<I>(
|
|
path: &Path,
|
|
entries: I,
|
|
) -> StorageResult<PpCacheIndexRefreshStats>
|
|
where
|
|
I: IntoIterator<Item = (String, Vec<u8>)>,
|
|
{
|
|
let started = Instant::now();
|
|
let parent = path.parent().ok_or_else(|| StorageError::InvalidData {
|
|
entity: "publication_point_cache_mmap_index.path",
|
|
detail: "missing parent".to_string(),
|
|
})?;
|
|
fs::create_dir_all(parent).map_err(|e| StorageError::RocksDb(e.to_string()))?;
|
|
|
|
let mut ordered = BTreeMap::<String, Vec<u8>>::new();
|
|
for (key, value) in entries {
|
|
ordered.insert(key, value);
|
|
}
|
|
|
|
let tmp_path = parent.join("next.tmp");
|
|
write_pp_cache_index_file(
|
|
&tmp_path,
|
|
ordered.iter().map(|(k, v)| (k.as_str(), v.as_slice())),
|
|
)?;
|
|
let file_bytes = fs::metadata(&tmp_path)
|
|
.map_err(|e| StorageError::RocksDb(e.to_string()))?
|
|
.len();
|
|
fs::rename(&tmp_path, path).map_err(|e| StorageError::RocksDb(e.to_string()))?;
|
|
Ok(PpCacheIndexRefreshStats {
|
|
state: "written".to_string(),
|
|
old_entries: 0,
|
|
dirty_entries: 0,
|
|
new_entries: ordered.len(),
|
|
file_bytes,
|
|
write_ms: started.elapsed().as_millis() as u64,
|
|
})
|
|
}
|
|
|
|
fn write_pp_cache_index_file<'a, I>(path: &Path, entries: I) -> StorageResult<()>
|
|
where
|
|
I: IntoIterator<Item = (&'a str, &'a [u8])>,
|
|
{
|
|
let entries = entries.into_iter().collect::<Vec<_>>();
|
|
let entry_table_offset = HEADER_LEN;
|
|
let entry_table_len = entries.len() * ENTRY_LEN;
|
|
let blob_offset = entry_table_offset + entry_table_len;
|
|
let mut table = Vec::with_capacity(entry_table_len);
|
|
let mut blob = Vec::new();
|
|
for (key, value) in entries.iter() {
|
|
let key_offset = blob.len();
|
|
blob.extend_from_slice(key.as_bytes());
|
|
let value_offset = blob.len();
|
|
blob.extend_from_slice(value);
|
|
write_u64(&mut table, key_offset as u64);
|
|
write_u32(&mut table, key.len() as u32);
|
|
write_u64(&mut table, value_offset as u64);
|
|
write_u32(&mut table, value.len() as u32);
|
|
}
|
|
|
|
let mut header = Vec::with_capacity(HEADER_LEN);
|
|
header.extend_from_slice(MAGIC);
|
|
write_u32(&mut header, VERSION);
|
|
write_u64(&mut header, entries.len() as u64);
|
|
write_u64(&mut header, entry_table_offset as u64);
|
|
write_u64(&mut header, entry_table_len as u64);
|
|
write_u64(&mut header, blob_offset as u64);
|
|
write_u64(&mut header, blob.len() as u64);
|
|
header.resize(HEADER_LEN, 0);
|
|
|
|
let mut file = File::create(path).map_err(|e| StorageError::RocksDb(e.to_string()))?;
|
|
file.write_all(&header)
|
|
.map_err(|e| StorageError::RocksDb(e.to_string()))?;
|
|
file.write_all(&table)
|
|
.map_err(|e| StorageError::RocksDb(e.to_string()))?;
|
|
file.write_all(&blob)
|
|
.map_err(|e| StorageError::RocksDb(e.to_string()))?;
|
|
file.sync_all()
|
|
.map_err(|e| StorageError::RocksDb(e.to_string()))?;
|
|
Ok(())
|
|
}
|
|
|
|
fn checked_range(total: usize, offset: usize, len: usize) -> StorageResult<()> {
|
|
if offset.checked_add(len).is_none_or(|end| end > total) {
|
|
return Err(StorageError::InvalidData {
|
|
entity: "publication_point_cache_mmap_index.range",
|
|
detail: "out of bounds".to_string(),
|
|
});
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
fn read_u32(bytes: &[u8], offset: usize) -> StorageResult<u32> {
|
|
checked_range(bytes.len(), offset, 4)?;
|
|
Ok(u32::from_le_bytes(
|
|
bytes[offset..offset + 4].try_into().unwrap(),
|
|
))
|
|
}
|
|
|
|
fn read_u64(bytes: &[u8], offset: usize) -> StorageResult<u64> {
|
|
checked_range(bytes.len(), offset, 8)?;
|
|
Ok(u64::from_le_bytes(
|
|
bytes[offset..offset + 8].try_into().unwrap(),
|
|
))
|
|
}
|
|
|
|
fn write_u32(out: &mut Vec<u8>, value: u32) {
|
|
out.extend_from_slice(&value.to_le_bytes());
|
|
}
|
|
|
|
fn write_u64(out: &mut Vec<u8>, value: u64) {
|
|
out.extend_from_slice(&value.to_le_bytes());
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn pp_cache_index_roundtrips_multiple_entries() {
|
|
let dir = tempfile::tempdir().expect("tempdir");
|
|
let path = dir.path().join("current.idx");
|
|
let stats = write_pp_cache_index_atomic(
|
|
&path,
|
|
vec![
|
|
("rsync://example.test/a.mft".to_string(), b"aaa".to_vec()),
|
|
("rsync://example.test/b.mft".to_string(), b"bbbb".to_vec()),
|
|
],
|
|
)
|
|
.expect("write index");
|
|
assert_eq!(stats.new_entries, 2);
|
|
|
|
let (index, load) = load_pp_cache_mmap_index(&path).expect("load index");
|
|
assert_eq!(load.entries, 2);
|
|
assert_eq!(index.get("rsync://example.test/a.mft"), Some(&b"aaa"[..]));
|
|
assert_eq!(index.get("rsync://example.test/b.mft"), Some(&b"bbbb"[..]));
|
|
assert_eq!(index.get("rsync://example.test/missing.mft"), None);
|
|
}
|
|
|
|
#[test]
|
|
fn pp_cache_index_rejects_bad_magic() {
|
|
let dir = tempfile::tempdir().expect("tempdir");
|
|
let path = dir.path().join("current.idx");
|
|
fs::write(&path, b"bad").expect("write bad");
|
|
let err = load_pp_cache_mmap_index(&path).expect_err("bad index rejected");
|
|
assert!(err.to_string().contains("file too small"));
|
|
}
|
|
|
|
#[test]
|
|
fn pp_cache_index_last_duplicate_wins() {
|
|
let dir = tempfile::tempdir().expect("tempdir");
|
|
let path = dir.path().join("current.idx");
|
|
write_pp_cache_index_atomic(
|
|
&path,
|
|
vec![
|
|
("rsync://example.test/a.mft".to_string(), b"old".to_vec()),
|
|
("rsync://example.test/a.mft".to_string(), b"new".to_vec()),
|
|
],
|
|
)
|
|
.expect("write index");
|
|
|
|
let (index, _) = load_pp_cache_mmap_index(&path).expect("load index");
|
|
assert_eq!(index.entries(), 1);
|
|
assert_eq!(index.get("rsync://example.test/a.mft"), Some(&b"new"[..]));
|
|
}
|
|
|
|
#[test]
|
|
fn pp_cache_index_set_prefers_newest_segment() {
|
|
let dir = tempfile::tempdir().expect("tempdir");
|
|
let current = dir.path().join("current.idx");
|
|
write_pp_cache_index_atomic(
|
|
¤t,
|
|
vec![("rsync://example.test/a.mft".to_string(), b"old".to_vec())],
|
|
)
|
|
.expect("write current index");
|
|
write_pp_cache_index_segment(
|
|
dir.path(),
|
|
vec![("rsync://example.test/a.mft".to_string(), b"new".to_vec())],
|
|
)
|
|
.expect("write segment index");
|
|
|
|
let (set, stats) = load_pp_cache_mmap_index_set(dir.path()).expect("load index set");
|
|
assert_eq!(stats.entries, 2);
|
|
assert_eq!(set.get("rsync://example.test/a.mft"), Some(&b"new"[..]));
|
|
}
|
|
}
|