InternPool bytes

make intern::Index a bit safer/clearer to use
added bytes to internpool
more helper functions for interning numbers/bytes/strings
This commit is contained in:
Janis 2024-12-22 23:52:39 +01:00
parent a3bee90ac3
commit 632729af52

View file

@ -73,6 +73,9 @@ pub enum Key<'a> {
String { String {
str: &'a str, str: &'a str,
}, },
Bytes {
bytes: &'a [u8],
},
SIntSmall { SIntSmall {
bits: i32, bits: i32,
}, },
@ -136,6 +139,7 @@ impl Hash for Key<'_> {
core::mem::discriminant(self).hash(state); core::mem::discriminant(self).hash(state);
match self { match self {
Key::String { str } => str.hash(state), Key::String { str } => str.hash(state),
Key::Bytes { bytes } => bytes.hash(state),
Key::SIntSmall { bits } => bits.hash(state), Key::SIntSmall { bits } => bits.hash(state),
Key::UIntSmall { bits } => bits.hash(state), Key::UIntSmall { bits } => bits.hash(state),
Key::SInt64 { bits } => bits.hash(state), Key::SInt64 { bits } => bits.hash(state),
@ -270,18 +274,24 @@ impl Item {
#[repr(transparent)] #[repr(transparent)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Index(pub u32); pub struct Index(u32);
impl Display for Index { impl Display for Index {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "#{}", self.0) write!(f, "#{}", self.index())
} }
} }
impl Index { impl Index {
pub fn new(inner: u32) -> Self {
Self(inner)
}
pub fn into_u32(self) -> u32 { pub fn into_u32(self) -> u32 {
unsafe { core::mem::transmute(self) } unsafe { core::mem::transmute(self) }
} }
pub fn from_u32(inner: u32) -> Self {
unsafe { core::mem::transmute(inner) }
}
pub fn as_u32(&self) -> &u32 { pub fn as_u32(&self) -> &u32 {
unsafe { core::mem::transmute(self) } unsafe { core::mem::transmute(self) }
} }
@ -370,6 +380,8 @@ static_keys!(
U128 => Key::UIntType { bit_width: 128 }, U128 => Key::UIntType { bit_width: 128 },
TRUE => Key::TrueValue, TRUE => Key::TrueValue,
FALSE => Key::FalseValue, FALSE => Key::FalseValue,
EMPTY_STRING => Key::String { str: "<this string is empty>" },
EMPTY_BYTES => Key::Bytes { bytes: &[] },
); );
impl InternPool { impl InternPool {
@ -826,6 +838,13 @@ impl InternPool {
let words_idx = self.extend_words([start, len]); let words_idx = self.extend_words([start, len]);
self.create_item(Tag::String, words_idx) self.create_item(Tag::String, words_idx)
} }
Key::Bytes { bytes } => {
let len = bytes.len() as u32;
let start = self.extend_strings(bytes);
let words_idx = self.extend_words([start, len]);
self.create_item(Tag::String, words_idx)
}
Key::SIntSmall { bits } => { Key::SIntSmall { bits } => {
self.create_item(Tag::SIntSmall, bits as u32) self.create_item(Tag::SIntSmall, bits as u32)
} }
@ -872,7 +891,7 @@ impl InternPool {
} }
Key::PointerType { pointee, flags } => { Key::PointerType { pointee, flags } => {
let flags = flags.pack(); let flags = flags.pack();
let i = self.extend_words([pointee.0, flags as u32]); let i = self.extend_words([pointee.index(), flags as u32]);
self.create_item(Tag::PointerType, i) self.create_item(Tag::PointerType, i)
} }
Key::ArrayType { Key::ArrayType {
@ -881,7 +900,8 @@ impl InternPool {
length, length,
} => { } => {
let flags = flags.pack(); let flags = flags.pack();
let i = self.extend_words([pointee.0, flags as u32, length]); let i =
self.extend_words([pointee.index(), flags as u32, length]);
self.create_item(Tag::ArrayType, i) self.create_item(Tag::ArrayType, i)
} }
Key::StructType { Key::StructType {
@ -922,7 +942,8 @@ impl InternPool {
let start = self.push_word(info.pack()); let start = self.push_word(info.pack());
self.extend_words([return_type.into_u32()]); self.extend_words([return_type.into_u32()]);
_ = self.extend_words(parameters.into_iter().map(|i| i.0)); _ = self
.extend_words(parameters.into_iter().map(|i| i.index()));
self.create_item(Tag::FunctionType, start) self.create_item(Tag::FunctionType, start)
} }
@ -951,7 +972,7 @@ impl InternPool {
let len = self.len(); let len = self.len();
self.tags.push(tag); self.tags.push(tag);
self.indices.push(index); self.indices.push(index);
Index(len) Index::new(len)
} }
pub fn get_key(&self, index: Index) -> Key { pub fn get_key(&self, index: Index) -> Key {
@ -1028,14 +1049,14 @@ impl InternPool {
} }
} }
Tag::PointerType => { Tag::PointerType => {
let pointee = Index(self.words[item.idx()]); let pointee = Index::new(self.words[item.idx()]);
let flags = let flags =
PointerFlags::unpack(self.words[item.idx() + 1] as u8); PointerFlags::unpack(self.words[item.idx() + 1] as u8);
Key::PointerType { pointee, flags } Key::PointerType { pointee, flags }
} }
Tag::ArrayType => { Tag::ArrayType => {
let pointee = Index(self.words[item.idx()]); let pointee = Index::new(self.words[item.idx()]);
let flags = let flags =
PointerFlags::unpack(self.words[item.idx() + 1] as u8); PointerFlags::unpack(self.words[item.idx() + 1] as u8);
let length = self.words[item.idx() + 2]; let length = self.words[item.idx() + 2];
@ -1047,7 +1068,7 @@ impl InternPool {
} }
} }
Tag::StructType => { Tag::StructType => {
let name = Index(self.words[item.idx()]); let name = Index::new(self.words[item.idx()]);
let decl = super::Index::new(self.words[item.idx() + 1]); let decl = super::Index::new(self.words[item.idx() + 1]);
let flags = StructFlags::unpack(self.words[item.idx() + 2]); let flags = StructFlags::unpack(self.words[item.idx() + 2]);
let fields = if flags.num_fields != 0 { let fields = if flags.num_fields != 0 {
@ -1059,7 +1080,7 @@ impl InternPool {
.iter() .iter()
.cloned() .cloned()
.array_chunks::<2>() .array_chunks::<2>()
.map(|[n, t]| (Index(n), Index(t))) .map(|[n, t]| (Index::new(n), Index::new(t)))
.collect::<Vec<_>>() .collect::<Vec<_>>()
} else { } else {
vec![] vec![]
@ -1081,7 +1102,7 @@ impl InternPool {
let end = start + len as usize; let end = start + len as usize;
let params = self.words[start..end] let params = self.words[start..end]
.iter() .iter()
.map(|&i| Index(i)) .map(|&i| Index::new(i))
.collect::<Vec<_>>(); .collect::<Vec<_>>();
( (
self.get_assume_present(&Key::SimpleType { self.get_assume_present(&Key::SimpleType {
@ -1090,12 +1111,12 @@ impl InternPool {
params, params,
) )
} else { } else {
let return_type = Index(self.words[item.idx() + 1]); let return_type = Index::new(self.words[item.idx() + 1]);
let start = item.idx() + 2; let start = item.idx() + 2;
let end = start + len as usize; let end = start + len as usize;
let params = self.words[start..end] let params = self.words[start..end]
.iter() .iter()
.map(|&i| Index(i)) .map(|&i| Index::new(i))
.collect::<Vec<_>>(); .collect::<Vec<_>>();
(return_type, params) (return_type, params)
}; };
@ -1131,6 +1152,31 @@ impl InternPool {
self.get_or_insert(key) self.get_or_insert(key)
} }
pub fn get_unsigned_integer(&mut self, value: u64) -> Index {
let key = match value {
_ if value <= u32::MAX as u64 => {
Key::UIntSmall { bits: value as u32 }
}
_ => Key::UInt64 { bits: value as u64 },
};
self.get_or_insert(key)
}
pub fn get_bytes_index(&mut self, bytes: &[u8]) -> Index {
self.get_or_insert(Key::Bytes { bytes })
}
pub fn try_get_bytes_index(&self, bytes: &[u8]) -> Option<Index> {
self.try_get_index(&Key::Bytes { bytes })
}
pub fn insert_string(&mut self, str: &str) -> Index {
self.get_string_index(str)
}
pub fn insert_bytes(&mut self, bytes: &[u8]) -> Index {
self.get_bytes_index(bytes)
}
pub fn get_string_index(&mut self, str: &str) -> Index { pub fn get_string_index(&mut self, str: &str) -> Index {
self.get_or_insert(Key::String { str }) self.get_or_insert(Key::String { str })
} }
@ -1287,8 +1333,16 @@ impl InternPool {
str str
} }
pub fn get_bytes(&self, index: Index) -> &[u8] {
let key = self.get_key(index);
assert!(matches!(key, Key::Bytes { .. }));
variant!(key => Key::Bytes { bytes });
bytes
}
fn check_bounds(&self, index: Index) -> Option<Index> { fn check_bounds(&self, index: Index) -> Option<Index> {
(index.0 < self.len()).then_some(index) ((index.index() as u32) < self.len()).then_some(index)
} }
fn get_item(&self, index: Index) -> Option<Item> { fn get_item(&self, index: Index) -> Option<Item> {