SeaLang/src/string_table.rs
Janis 010e6d2bec LANGUAGE CHANGES
prefix binds tighter than as-expr, because &a as *u32 should be valid syntax
i think it can actually do pointer arithmetic somewhat validly now?
2024-08-25 03:09:54 +02:00

151 lines
4.2 KiB
Rust

use std::{collections::BTreeMap, hash::Hasher};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct Index {
pub start: u32,
pub end: u32,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum ImmOrIndex {
U64(u64),
U32(u32),
Index(Index),
}
impl Index {
pub fn new(start: u32, end: u32) -> Self {
Self { start, end }
}
}
#[derive(Clone)]
pub struct StringTable {
bytes: Vec<u8>,
indices: BTreeMap<u64, Index>,
}
impl core::ops::Index<Index> for StringTable {
type Output = [u8];
fn index(&self, idx: Index) -> &Self::Output {
&self.bytes[idx.start as usize..idx.end as usize]
}
}
impl StringTable {
pub fn new() -> Self {
Self {
bytes: Vec::new(),
indices: BTreeMap::new(),
}
}
pub fn display_idx(&self, idx: ImmOrIndex) -> ImmOrIndexDisplay {
ImmOrIndexDisplay::new(self, idx)
}
pub fn count_bits(&self, idx: ImmOrIndex) -> u32 {
match idx {
ImmOrIndex::U64(v) => u64::BITS - v.leading_zeros(),
ImmOrIndex::U32(v) => u32::BITS - v.leading_zeros(),
ImmOrIndex::Index(idx) => {
let bytes = self.get_bytes(idx);
let ints = unsafe {
core::slice::from_raw_parts(bytes.as_ptr().cast::<u32>(), bytes.len() / 4)
};
crate::lexer::bigint::count_bits(ints)
}
}
}
pub fn get_str(&self, idx: Index) -> &str {
unsafe { core::str::from_utf8_unchecked(&self[idx]) }
}
pub fn get_bytes(&self, idx: Index) -> &[u8] {
&self[idx]
}
pub fn insert<B: AsRef<[u8]>>(&mut self, bytes: B) -> Index {
let bytes = bytes.as_ref();
let hash = {
let mut hasher = std::hash::DefaultHasher::new();
hasher.write(bytes);
hasher.finish()
};
if let Some(idx) = self.indices.get(&hash).cloned() {
idx
} else {
self.insert_inner(hash, bytes)
}
}
fn insert_inner(&mut self, hash: u64, bytes: &[u8]) -> Index {
let start = self.bytes.len();
self.bytes.extend(bytes);
let end = self.bytes.len();
let index = Index::new(start as u32, end as u32);
self.indices.insert(hash, index);
index
}
}
mod display {
use core::{fmt::Debug, str};
use super::*;
impl Debug for StringTable {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_list()
.entries(self.indices.iter().map(|(_, idx)| {
struct Test<'a> {
bytes: &'a [u8],
str: Option<&'a str>,
}
impl<'a> Debug for Test<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{{ bytes: {:x?}", self.bytes)?;
if let Some(str) = self.str {
write!(f, ", str: {}", str)?;
}
write!(f, " }}")
}
}
let bytes = self.get_bytes(*idx);
let str = str::from_utf8(bytes).ok();
Test { bytes, str }
}))
.finish()
}
}
pub struct ImmOrIndexDisplay<'table> {
table: &'table StringTable,
idx: ImmOrIndex,
}
impl<'table> ImmOrIndexDisplay<'table> {
pub fn new(table: &'table StringTable, idx: ImmOrIndex) -> Self {
Self { table, idx }
}
}
impl<'table> core::fmt::Display for ImmOrIndexDisplay<'table> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self.idx {
ImmOrIndex::U64(i) => write!(f, "0x{i:0>16x}"),
ImmOrIndex::U32(i) => write!(f, "0x{i:0>8x}"),
ImmOrIndex::Index(idx) => {
let bytes = self.table.get_bytes(idx);
write!(f, "{bytes:?}")
}
}
}
}
}
pub use display::ImmOrIndexDisplay;