constants parsing

This commit is contained in:
janis 2025-10-01 17:56:02 +02:00
parent c270fe5add
commit 45cc444221
Signed by: janis
SSH key fingerprint: SHA256:bB1qbbqmDXZNT0KKD5c2Dfjg53JGhj7B3CFcLIzSqq8
4 changed files with 253 additions and 44 deletions

View file

@ -53,6 +53,15 @@ impl Radix {
_ => None,
}
}
pub fn from_token(token: Token) -> Option<Self> {
match token {
Token::IntegerHexConstant(_) => Some(Radix::Hex),
Token::IntegerBinConstant(_) => Some(Radix::Bin),
Token::IntegerOctConstant(_) => Some(Radix::Oct),
Token::IntegerConstant(_) => Some(Radix::Dec),
_ => None,
}
}
#[allow(unused)]
pub fn radix(self) -> u8 {
@ -263,7 +272,7 @@ fn parse_constant_inner(source: &mut Source) -> Result<ConstantKind> {
let exp = try_parse_exp_part(source)?.is_some();
// trailing FloatingType?
let floating = if source.next_if(|&c| c == 'f').is_some() {
let trailing_float_type = if source.next_if(|&c| c == 'f').is_some() {
let digits = source.next_tuple::<(char, char)>();
if !(digits == Some(('6', '4')) || digits == Some(('3', '2'))) {
// need either f64 or f32 here!
@ -274,12 +283,12 @@ fn parse_constant_inner(source: &mut Source) -> Result<ConstantKind> {
false
};
let token = match (dot, exp, floating) {
let token = match (dot, exp, trailing_float_type) {
(false, false, false) => ConstantKind::Integer,
(true, false, _) => ConstantKind::DotFloating,
(true, true, _) => ConstantKind::DotFloatingExp,
(false, true, _) => ConstantKind::FloatingExp,
(false, _, _) => ConstantKind::Floating,
(false, false, _) => ConstantKind::Floating,
};
Ok(token)

View file

@ -1,6 +1,6 @@
#![feature(slice_swap_unchecked, iter_collect_into, push_mut)]
mod is_things {
pub mod is_things {
/// True if `c` is considered a whitespace according to Rust language definition.
/// See [Rust language reference](https://doc.rust-lang.org/reference/whitespace.html)
/// for definitions of these classes.
@ -83,18 +83,18 @@ mod is_things {
macro_rules! tokens {
($vis:vis $ty_name:ident:
{
$($name2:ident),*
$($(#[$meta2:meta])* $name2:ident),*
},
{
$($name:ident => $lexeme:literal),*
$($(#[$meta:meta])* $name:ident => $lexeme:literal),*
}) => {
#[allow(dead_code)]
#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)]
$vis enum $ty_name<'a> {
$($name,
$($(#[$meta])* $name,
)*
$($name2(&'a str),)*
$($(#[$meta2])* $name2(&'a str),)*
}
impl ::core::fmt::Display for $ty_name<'_> {
@ -144,17 +144,27 @@ tokens!(pub Token: {
// Marker Token for any Comment
Comment,
DocComment,
// Marker Token for any pre-processing directive
/// character constant, e.g. `'a'` or `'\n'`
CharConstant,
/// Decimal integer constant, e.g. `12345`
IntegerConstant,
/// Hexadecimal integer constant with leading `0x`, e.g. `0x1A3F`
IntegerHexConstant,
/// Binary integer constant with leading `0b`, e.g. `0b10101`
IntegerBinConstant,
/// Octal integer constant with leading `0o`, e.g. `0o7654`
IntegerOctConstant,
/// Simple floating point constant, e.g. `1f32`
FloatingConstant,
/// Simple floating point constant with exponent, e.g. `2e10f64`
FloatingExpConstant,
/// Floating point constant starting with a dot, e.g. `.5f32`
DotFloatingConstant,
/// Floating point constant starting with a dot and with an exponent, e.g. `.5e-2f64`
DotFloatingExpConstant,
/// String constant, e.g. `"hello, world!"`
StringConstant,
/// Identifier
Ident
},
// Lexical Tokens:
@ -178,6 +188,8 @@ tokens!(pub Token: {
Colon => ":",
Equal => "=",
// Keywords:
True => "true",
False => "false",
Void => "void",
Bool => "bool",
F32 => "f32",
@ -712,6 +724,7 @@ macro_rules! impl_token_sequence_list {
variadics_please::all_tuples_enumerated!(impl_token_sequence_list, 1, 15, T);
mod complex_tokens;
pub use complex_tokens::Radix;
#[cfg(test)]
mod tests {

View file

@ -0,0 +1,104 @@
use internment::Intern;
use itertools::Itertools;
use lexer::{Radix, Token, is_things};
use werkzeug::iter::NextIf;
use crate::{FloatType, InnerType, IntSize, Type, Value};
pub(crate) fn parse_floating_constant(lexeme: &str) -> (Intern<Value>, Type) {
let (value, ty) = lexeme
.strip_suffix("f32")
.map(|l| (Value::F32(l.parse().unwrap()), FloatType::F32))
.or_else(|| {
lexeme
.strip_suffix("f64")
.map(|l| (Value::F64(l.parse().unwrap()), FloatType::F64))
})
.unwrap_or((Value::F32(lexeme.parse().unwrap()), FloatType::F32));
(
Intern::new(value),
Intern::new(InnerType::Float { float_type: ty }),
)
}
pub(crate) fn parse_constant(token: Token<'_>) -> (Intern<Value>, Type) {
let lexeme = match token {
Token::FloatingConstant(lexeme)
| Token::DotFloatingConstant(lexeme)
| Token::FloatingExpConstant(lexeme)
| Token::DotFloatingExpConstant(lexeme) => parse_floating_constant(lexeme),
Token::IntegerConstant(lexeme) => parse_integer_constant(lexeme, Radix::Dec),
Token::IntegerHexConstant(lexeme)
| Token::IntegerOctConstant(lexeme)
| Token::IntegerBinConstant(lexeme) => {
let radix = Radix::from_token(token).unwrap();
parse_integer_constant(&lexeme[2..], radix)
}
_ => unreachable!(),
};
lexeme
}
pub(crate) fn parse_integer_constant(lexeme: &str, radix: Radix) -> (Intern<Value>, Type) {
let mut chars = lexeme.char_indices();
let digits = chars.take_while_ref(|&(_, c)| radix.is_digit()(c) && c != '_');
let value = digits
.map(|(_, c)| radix.map_digit(c))
.fold(0u64, |acc, d| acc * radix.radix() as u64 + d as u64);
let value = Intern::new(Value::UInt(value));
let ty = chars
.clone()
.next_if(|&(_, c)| c == 'u' || c == 'i')
// integral type and signed-ness
.map(|(i, c)| (&lexeme[(i + 1)..], c == 'i'))
.map(|(bits, signed)| {
let mut chars = bits.chars();
let mut bits = 0u16;
let x = 'f: {
while let Some(c) = chars.next() {
if !is_things::is_digit(c) {
break 'f None;
}
// TODO: check overflow
bits = bits * 10 + Radix::Dec.map_digit(c) as u16;
}
Some(bits)
};
// TODO: error out on invalid type
x.map(|bits| InnerType::Int {
signed,
size: IntSize::Bits(bits),
})
})
.flatten()
.unwrap_or(InnerType::AnyInt);
(value, Intern::new(ty))
}
pub(crate) fn type_from_value(value: &Value) -> Type {
let inner = match value {
Value::F32(_) => InnerType::Float {
float_type: FloatType::F32,
},
Value::F64(_) => InnerType::Float {
float_type: FloatType::F64,
},
Value::Bool(_) => InnerType::Bool,
Value::Int(_) => InnerType::AnyInt,
Value::UInt(_) => InnerType::AnyUInt,
Value::String(_) => InnerType::Str,
Value::Unit => InnerType::Unit,
};
Intern::new(inner)
}

View file

@ -1,3 +1,5 @@
use std::hash::Hash;
use internment::Intern;
use lexer::{Token, TokenConsumer, TokenItem, TokenItemIterator};
use logos::Logos;
@ -16,6 +18,12 @@ pub enum InnerType {
Bottom,
Unit,
Bool,
/// A signed integer constant; concrete type undetermined
AnyInt,
/// An unsigned integer constant; concrete type undetermined
AnyUInt,
/// A string slice
Str,
Int {
signed: bool,
size: IntSize,
@ -53,13 +61,36 @@ pub enum FloatType {
F64,
}
#[derive(Debug, Clone)]
#[derive(Debug, PartialEq, Clone)]
pub enum Value {
Bool(bool),
Int(i64),
UInt(u64),
Float(f64),
F64(f64),
F32(f32),
String(String),
Unit,
}
impl Eq for Value {}
impl Hash for Value {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
core::mem::discriminant(self).hash(state);
match self {
Value::Bool(b) => b.hash(state),
Value::Int(i) => i.hash(state),
Value::UInt(u) => u.hash(state),
Value::F64(f) => {
werkzeug::util::hash_f64(state, f);
}
Value::F32(f) => {
werkzeug::util::hash_f32(state, f);
}
Value::String(s) => s.hash(state),
Value::Unit => {}
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
@ -99,7 +130,7 @@ pub enum AstNode {
},
Constant {
ty: Type,
value: Value,
value: Intern<Value>,
},
NoopExpr,
Stmt {
@ -331,9 +362,10 @@ struct ExtraToken<'a> {
pomelo! {
%include {
use super::AstNode;
use internment::Intern;
use super::{
Parameter, Ast, ParameterList, FunctionDecl, Type, InnerType,
FloatType, ExtraToken, Index, IntSize, Visibility,
FloatType, ExtraToken, Index, IntSize, Visibility, Value,
};
};
%extra_argument Ast;
@ -370,31 +402,44 @@ pomelo! {
extra.push(AstNode::Attributes { attrs: vec![idx] })
};
typ ::= Bool { internment::Intern::new(InnerType::Bool) };
typ ::= I1 { internment::Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(1) }) };
typ ::= I8 { internment::Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(8) }) };
typ ::= I16 { internment::Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(16) }) };
typ ::= I32 { internment::Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(32) }) };
typ ::= I64 { internment::Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(64) }) };
typ ::= U1 { internment::Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(1) }) };
typ ::= U8 { internment::Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(8) }) };
typ ::= U16 { internment::Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(16) }) };
typ ::= U32 { internment::Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(32) }) };
typ ::= U64 { internment::Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(64) }) };
typ ::= ISize { internment::Intern::new(InnerType::Int { signed: true, size: IntSize::Pointer }) };
typ ::= USize { internment::Intern::new(InnerType::Int { signed: false, size: IntSize::Pointer }) };
typ ::= F32 { internment::Intern::new(InnerType::Float { float_type: FloatType::F32 }) };
typ ::= F64 { internment::Intern::new(InnerType::Float { float_type: FloatType::F64 }) };
typ ::= Bang { internment::Intern::new(InnerType::Bottom) };
typ ::= unit { internment::Intern::new(InnerType::Unit) };
typ ::= Void { internment::Intern::new(InnerType::Unit) };
typ ::= Bool { Intern::new(InnerType::Bool) };
typ ::= I1 { Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(1) }) };
typ ::= I8 { Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(8) }) };
typ ::= I16 { Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(16) }) };
typ ::= I32 { Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(32) }) };
typ ::= I64 { Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(64) }) };
typ ::= U1 { Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(1) }) };
typ ::= U8 { Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(8) }) };
typ ::= U16 { Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(16) }) };
typ ::= U32 { Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(32) }) };
typ ::= U64 { Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(64) }) };
typ ::= ISize { Intern::new(InnerType::Int { signed: true, size: IntSize::Pointer }) };
typ ::= USize { Intern::new(InnerType::Int { signed: false, size: IntSize::Pointer }) };
typ ::= F32 { Intern::new(InnerType::Float { float_type: FloatType::F32 }) };
typ ::= F64 { Intern::new(InnerType::Float { float_type: FloatType::F64 }) };
typ ::= Bang { Intern::new(InnerType::Bottom) };
typ ::= unit { Intern::new(InnerType::Unit) };
typ ::= Void { Intern::new(InnerType::Unit) };
unit ::= LParen RParen;
%type immediate (Intern<Value>, Type);
immediate ::= unit { (Intern::new(Value::Unit), Intern::new(InnerType::Unit)) };
immediate ::= False { (Intern::new(Value::Bool(false)), Intern::new(InnerType::Bool)) };
immediate ::= True { (Intern::new(Value::Bool(true)), Intern::new(InnerType::Bool)) };
%type Constant lexer::Token<'a>;
immediate ::= Constant(token) {
crate::constants::parse_constant(token)
};
%type expr Index;
%type stmt Index;
%type stmts Vec<Index>;
expr ::= { extra.push(AstNode::NoopExpr)};
expr ::= immediate((value, ty)) {
extra.push(AstNode::Constant { ty, value })
};
stmt ::= Semi { extra.push(AstNode::NoopExpr) };
stmt ::= Comment(text) { extra.push(AstNode::Comment { text: text.to_string() }) };
stmt ::= expr(expr) Semi { extra.push(AstNode::Stmt { expr }) };
stmts ::= stmt(s) { vec![s] };
@ -403,10 +448,19 @@ pomelo! {
v.push(s);
v
};
block ::= LBrace stmts?(ss) RBrace {
%type block_inner (Vec<Index>, Option<Index>);
block_inner ::= {(vec![], None)};
block_inner ::= expr(expr) {(vec![], Some(expr))};
block_inner ::= stmts(ss) {(ss, None)};
block_inner ::= stmts(ss) expr(expr) {(ss, Some(expr))};
block ::= LBrace block_inner((ss, expr)) RBrace {
extra.push(AstNode::Block {
statements: ss.unwrap_or_default(),
expr: None })
statements: ss,
expr
})
};
%type vis Visibility;
@ -485,6 +539,8 @@ impl<'a> From<lexer::Token<'a>> for parser::Token<'a> {
Token::I16 => Self::I16,
Token::I32 => Self::I32,
Token::I64 => Self::I64,
Token::True => Self::True,
Token::False => Self::False,
Token::Const => todo!(), // Self::Const,
Token::Mutable => Self::Mutable,
Token::Volatile => todo!(),
@ -539,19 +595,21 @@ impl<'a> From<lexer::Token<'a>> for parser::Token<'a> {
Token::Eof(_) => todo!(),
Token::ParseError(_) => todo!(),
Token::CharConstant(_) => todo!(),
Token::IntegerConstant(_) => todo!(),
Token::IntegerHexConstant(_) => todo!(),
Token::IntegerBinConstant(_) => todo!(),
Token::IntegerOctConstant(_) => todo!(),
Token::FloatingConstant(_) => todo!(),
Token::FloatingExpConstant(_) => todo!(),
Token::DotFloatingConstant(_) => todo!(),
Token::DotFloatingExpConstant(_) => todo!(),
Token::IntegerConstant(_) => Self::Constant(value),
Token::IntegerHexConstant(_) => Self::Constant(value),
Token::IntegerBinConstant(_) => Self::Constant(value),
Token::IntegerOctConstant(_) => Self::Constant(value),
Token::FloatingConstant(_) => Self::Constant(value),
Token::FloatingExpConstant(_) => Self::Constant(value),
Token::DotFloatingConstant(_) => Self::Constant(value),
Token::DotFloatingExpConstant(_) => Self::Constant(value),
Token::StringConstant(_) => todo!(),
}
}
}
mod constants;
#[cfg(test)]
mod tests {
use crate::AstNode;
@ -561,6 +619,31 @@ mod tests {
eprintln!("Size of AstNode: {}", std::mem::size_of::<AstNode>());
}
#[test]
fn parse_constant() {
use crate::parser::{Parser, Token};
let input = r#"
fn a() -> u32 {
42u32
}
fn b() -> u32 {
42i8
}
fn c() -> f32 {
42e4
}
"#;
let mut lex = lexer::TokenIterator::new(input);
let mut mapped = lex.map(Token::from);
let mut ast = crate::Ast::new();
let mut parser = Parser::new(ast);
while let Some(token) = mapped.next() {
parser.parse(token).unwrap();
}
let (out, ast) = parser.end_of_input().unwrap();
eprintln!("AST: {:#?}", ast);
}
#[test]
fn parse() {
use crate::parser::{Parser, Token};
@ -570,7 +653,7 @@ mod tests {
fn main(a: u32, b: u32) -> u32 {}
"#;
let mut lex = lexer::TokenIterator::new(input);
let mut mapped = lex.inspect(|t| eprintln!("{t:?}")).map(Token::from);
let mut mapped = lex.map(Token::from);
let mut ast = crate::Ast::new();
let mut parser = Parser::new(ast);
while let Some(token) = mapped.next() {