prefix binds tighter than as-expr, because &a as *u32 should be valid syntax i think it can actually do pointer arithmetic somewhat validly now?
151 lines
4.2 KiB
Rust
151 lines
4.2 KiB
Rust
use std::{collections::BTreeMap, hash::Hasher};
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
|
pub struct Index {
|
|
pub start: u32,
|
|
pub end: u32,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
|
pub enum ImmOrIndex {
|
|
U64(u64),
|
|
U32(u32),
|
|
Index(Index),
|
|
}
|
|
|
|
impl Index {
|
|
pub fn new(start: u32, end: u32) -> Self {
|
|
Self { start, end }
|
|
}
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
pub struct StringTable {
|
|
bytes: Vec<u8>,
|
|
indices: BTreeMap<u64, Index>,
|
|
}
|
|
|
|
impl core::ops::Index<Index> for StringTable {
|
|
type Output = [u8];
|
|
|
|
fn index(&self, idx: Index) -> &Self::Output {
|
|
&self.bytes[idx.start as usize..idx.end as usize]
|
|
}
|
|
}
|
|
|
|
impl StringTable {
|
|
pub fn new() -> Self {
|
|
Self {
|
|
bytes: Vec::new(),
|
|
indices: BTreeMap::new(),
|
|
}
|
|
}
|
|
|
|
pub fn display_idx(&self, idx: ImmOrIndex) -> ImmOrIndexDisplay {
|
|
ImmOrIndexDisplay::new(self, idx)
|
|
}
|
|
|
|
pub fn count_bits(&self, idx: ImmOrIndex) -> u32 {
|
|
match idx {
|
|
ImmOrIndex::U64(v) => u64::BITS - v.leading_zeros(),
|
|
ImmOrIndex::U32(v) => u32::BITS - v.leading_zeros(),
|
|
ImmOrIndex::Index(idx) => {
|
|
let bytes = self.get_bytes(idx);
|
|
let ints = unsafe {
|
|
core::slice::from_raw_parts(bytes.as_ptr().cast::<u32>(), bytes.len() / 4)
|
|
};
|
|
crate::lexer::bigint::count_bits(ints)
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn get_str(&self, idx: Index) -> &str {
|
|
unsafe { core::str::from_utf8_unchecked(&self[idx]) }
|
|
}
|
|
|
|
pub fn get_bytes(&self, idx: Index) -> &[u8] {
|
|
&self[idx]
|
|
}
|
|
|
|
pub fn insert<B: AsRef<[u8]>>(&mut self, bytes: B) -> Index {
|
|
let bytes = bytes.as_ref();
|
|
let hash = {
|
|
let mut hasher = std::hash::DefaultHasher::new();
|
|
hasher.write(bytes);
|
|
hasher.finish()
|
|
};
|
|
|
|
if let Some(idx) = self.indices.get(&hash).cloned() {
|
|
idx
|
|
} else {
|
|
self.insert_inner(hash, bytes)
|
|
}
|
|
}
|
|
|
|
fn insert_inner(&mut self, hash: u64, bytes: &[u8]) -> Index {
|
|
let start = self.bytes.len();
|
|
self.bytes.extend(bytes);
|
|
let end = self.bytes.len();
|
|
let index = Index::new(start as u32, end as u32);
|
|
self.indices.insert(hash, index);
|
|
index
|
|
}
|
|
}
|
|
|
|
mod display {
|
|
use core::{fmt::Debug, str};
|
|
|
|
use super::*;
|
|
|
|
impl Debug for StringTable {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
f.debug_list()
|
|
.entries(self.indices.iter().map(|(_, idx)| {
|
|
struct Test<'a> {
|
|
bytes: &'a [u8],
|
|
str: Option<&'a str>,
|
|
}
|
|
impl<'a> Debug for Test<'a> {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
write!(f, "{{ bytes: {:x?}", self.bytes)?;
|
|
if let Some(str) = self.str {
|
|
write!(f, ", str: {}", str)?;
|
|
}
|
|
write!(f, " }}")
|
|
}
|
|
}
|
|
let bytes = self.get_bytes(*idx);
|
|
let str = str::from_utf8(bytes).ok();
|
|
Test { bytes, str }
|
|
}))
|
|
.finish()
|
|
}
|
|
}
|
|
|
|
pub struct ImmOrIndexDisplay<'table> {
|
|
table: &'table StringTable,
|
|
idx: ImmOrIndex,
|
|
}
|
|
|
|
impl<'table> ImmOrIndexDisplay<'table> {
|
|
pub fn new(table: &'table StringTable, idx: ImmOrIndex) -> Self {
|
|
Self { table, idx }
|
|
}
|
|
}
|
|
|
|
impl<'table> core::fmt::Display for ImmOrIndexDisplay<'table> {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
match self.idx {
|
|
ImmOrIndex::U64(i) => write!(f, "0x{i:0>16x}"),
|
|
ImmOrIndex::U32(i) => write!(f, "0x{i:0>8x}"),
|
|
ImmOrIndex::Index(idx) => {
|
|
let bytes = self.table.get_bytes(idx);
|
|
write!(f, "{bytes:?}")
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
pub use display::ImmOrIndexDisplay;
|