diff --git a/crates/lexer/src/complex_tokens.rs b/crates/lexer/src/complex_tokens.rs index 52ec7c2..0fe68d2 100644 --- a/crates/lexer/src/complex_tokens.rs +++ b/crates/lexer/src/complex_tokens.rs @@ -53,6 +53,15 @@ impl Radix { _ => None, } } + pub fn from_token(token: Token) -> Option { + match token { + Token::IntegerHexConstant(_) => Some(Radix::Hex), + Token::IntegerBinConstant(_) => Some(Radix::Bin), + Token::IntegerOctConstant(_) => Some(Radix::Oct), + Token::IntegerConstant(_) => Some(Radix::Dec), + _ => None, + } + } #[allow(unused)] pub fn radix(self) -> u8 { @@ -263,7 +272,7 @@ fn parse_constant_inner(source: &mut Source) -> Result { let exp = try_parse_exp_part(source)?.is_some(); // trailing FloatingType? - let floating = if source.next_if(|&c| c == 'f').is_some() { + let trailing_float_type = if source.next_if(|&c| c == 'f').is_some() { let digits = source.next_tuple::<(char, char)>(); if !(digits == Some(('6', '4')) || digits == Some(('3', '2'))) { // need either f64 or f32 here! @@ -274,12 +283,12 @@ fn parse_constant_inner(source: &mut Source) -> Result { false }; - let token = match (dot, exp, floating) { + let token = match (dot, exp, trailing_float_type) { (false, false, false) => ConstantKind::Integer, (true, false, _) => ConstantKind::DotFloating, (true, true, _) => ConstantKind::DotFloatingExp, (false, true, _) => ConstantKind::FloatingExp, - (false, _, _) => ConstantKind::Floating, + (false, false, _) => ConstantKind::Floating, }; Ok(token) diff --git a/crates/lexer/src/lib.rs b/crates/lexer/src/lib.rs index 2b996ea..9bb36d8 100644 --- a/crates/lexer/src/lib.rs +++ b/crates/lexer/src/lib.rs @@ -1,6 +1,6 @@ #![feature(slice_swap_unchecked, iter_collect_into, push_mut)] -mod is_things { +pub mod is_things { /// True if `c` is considered a whitespace according to Rust language definition. /// See [Rust language reference](https://doc.rust-lang.org/reference/whitespace.html) /// for definitions of these classes. @@ -83,18 +83,18 @@ mod is_things { macro_rules! tokens { ($vis:vis $ty_name:ident: { - $($name2:ident),* + $($(#[$meta2:meta])* $name2:ident),* }, { - $($name:ident => $lexeme:literal),* + $($(#[$meta:meta])* $name:ident => $lexeme:literal),* }) => { #[allow(dead_code)] #[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)] $vis enum $ty_name<'a> { - $($name, + $($(#[$meta])* $name, )* - $($name2(&'a str),)* + $($(#[$meta2])* $name2(&'a str),)* } impl ::core::fmt::Display for $ty_name<'_> { @@ -144,17 +144,27 @@ tokens!(pub Token: { // Marker Token for any Comment Comment, DocComment, - // Marker Token for any pre-processing directive + /// character constant, e.g. `'a'` or `'\n'` CharConstant, + /// Decimal integer constant, e.g. `12345` IntegerConstant, + /// Hexadecimal integer constant with leading `0x`, e.g. `0x1A3F` IntegerHexConstant, + /// Binary integer constant with leading `0b`, e.g. `0b10101` IntegerBinConstant, + /// Octal integer constant with leading `0o`, e.g. `0o7654` IntegerOctConstant, + /// Simple floating point constant, e.g. `1f32` FloatingConstant, + /// Simple floating point constant with exponent, e.g. `2e10f64` FloatingExpConstant, + /// Floating point constant starting with a dot, e.g. `.5f32` DotFloatingConstant, + /// Floating point constant starting with a dot and with an exponent, e.g. `.5e-2f64` DotFloatingExpConstant, + /// String constant, e.g. `"hello, world!"` StringConstant, + /// Identifier Ident }, // Lexical Tokens: @@ -178,6 +188,8 @@ tokens!(pub Token: { Colon => ":", Equal => "=", // Keywords: + True => "true", + False => "false", Void => "void", Bool => "bool", F32 => "f32", @@ -712,6 +724,7 @@ macro_rules! impl_token_sequence_list { variadics_please::all_tuples_enumerated!(impl_token_sequence_list, 1, 15, T); mod complex_tokens; +pub use complex_tokens::Radix; #[cfg(test)] mod tests { diff --git a/crates/parser/src/constants.rs b/crates/parser/src/constants.rs new file mode 100644 index 0000000..5216b69 --- /dev/null +++ b/crates/parser/src/constants.rs @@ -0,0 +1,104 @@ +use internment::Intern; +use itertools::Itertools; +use lexer::{Radix, Token, is_things}; +use werkzeug::iter::NextIf; + +use crate::{FloatType, InnerType, IntSize, Type, Value}; + +pub(crate) fn parse_floating_constant(lexeme: &str) -> (Intern, Type) { + let (value, ty) = lexeme + .strip_suffix("f32") + .map(|l| (Value::F32(l.parse().unwrap()), FloatType::F32)) + .or_else(|| { + lexeme + .strip_suffix("f64") + .map(|l| (Value::F64(l.parse().unwrap()), FloatType::F64)) + }) + .unwrap_or((Value::F32(lexeme.parse().unwrap()), FloatType::F32)); + + ( + Intern::new(value), + Intern::new(InnerType::Float { float_type: ty }), + ) +} + +pub(crate) fn parse_constant(token: Token<'_>) -> (Intern, Type) { + let lexeme = match token { + Token::FloatingConstant(lexeme) + | Token::DotFloatingConstant(lexeme) + | Token::FloatingExpConstant(lexeme) + | Token::DotFloatingExpConstant(lexeme) => parse_floating_constant(lexeme), + Token::IntegerConstant(lexeme) => parse_integer_constant(lexeme, Radix::Dec), + Token::IntegerHexConstant(lexeme) + | Token::IntegerOctConstant(lexeme) + | Token::IntegerBinConstant(lexeme) => { + let radix = Radix::from_token(token).unwrap(); + parse_integer_constant(&lexeme[2..], radix) + } + _ => unreachable!(), + }; + + lexeme +} + +pub(crate) fn parse_integer_constant(lexeme: &str, radix: Radix) -> (Intern, Type) { + let mut chars = lexeme.char_indices(); + let digits = chars.take_while_ref(|&(_, c)| radix.is_digit()(c) && c != '_'); + + let value = digits + .map(|(_, c)| radix.map_digit(c)) + .fold(0u64, |acc, d| acc * radix.radix() as u64 + d as u64); + + let value = Intern::new(Value::UInt(value)); + + let ty = chars + .clone() + .next_if(|&(_, c)| c == 'u' || c == 'i') + // integral type and signed-ness + .map(|(i, c)| (&lexeme[(i + 1)..], c == 'i')) + .map(|(bits, signed)| { + let mut chars = bits.chars(); + let mut bits = 0u16; + + let x = 'f: { + while let Some(c) = chars.next() { + if !is_things::is_digit(c) { + break 'f None; + } + + // TODO: check overflow + bits = bits * 10 + Radix::Dec.map_digit(c) as u16; + } + + Some(bits) + }; + + // TODO: error out on invalid type + x.map(|bits| InnerType::Int { + signed, + size: IntSize::Bits(bits), + }) + }) + .flatten() + .unwrap_or(InnerType::AnyInt); + + (value, Intern::new(ty)) +} + +pub(crate) fn type_from_value(value: &Value) -> Type { + let inner = match value { + Value::F32(_) => InnerType::Float { + float_type: FloatType::F32, + }, + Value::F64(_) => InnerType::Float { + float_type: FloatType::F64, + }, + Value::Bool(_) => InnerType::Bool, + Value::Int(_) => InnerType::AnyInt, + Value::UInt(_) => InnerType::AnyUInt, + Value::String(_) => InnerType::Str, + Value::Unit => InnerType::Unit, + }; + + Intern::new(inner) +} diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs index b60f1d4..5b218a9 100644 --- a/crates/parser/src/lib.rs +++ b/crates/parser/src/lib.rs @@ -1,3 +1,5 @@ +use std::hash::Hash; + use internment::Intern; use lexer::{Token, TokenConsumer, TokenItem, TokenItemIterator}; use logos::Logos; @@ -16,6 +18,12 @@ pub enum InnerType { Bottom, Unit, Bool, + /// A signed integer constant; concrete type undetermined + AnyInt, + /// An unsigned integer constant; concrete type undetermined + AnyUInt, + /// A string slice + Str, Int { signed: bool, size: IntSize, @@ -53,13 +61,36 @@ pub enum FloatType { F64, } -#[derive(Debug, Clone)] +#[derive(Debug, PartialEq, Clone)] pub enum Value { Bool(bool), Int(i64), UInt(u64), - Float(f64), + F64(f64), + F32(f32), String(String), + Unit, +} + +impl Eq for Value {} + +impl Hash for Value { + fn hash(&self, state: &mut H) { + core::mem::discriminant(self).hash(state); + match self { + Value::Bool(b) => b.hash(state), + Value::Int(i) => i.hash(state), + Value::UInt(u) => u.hash(state), + Value::F64(f) => { + werkzeug::util::hash_f64(state, f); + } + Value::F32(f) => { + werkzeug::util::hash_f32(state, f); + } + Value::String(s) => s.hash(state), + Value::Unit => {} + } + } } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] @@ -99,7 +130,7 @@ pub enum AstNode { }, Constant { ty: Type, - value: Value, + value: Intern, }, NoopExpr, Stmt { @@ -331,9 +362,10 @@ struct ExtraToken<'a> { pomelo! { %include { use super::AstNode; + use internment::Intern; use super::{ Parameter, Ast, ParameterList, FunctionDecl, Type, InnerType, - FloatType, ExtraToken, Index, IntSize, Visibility, + FloatType, ExtraToken, Index, IntSize, Visibility, Value, }; }; %extra_argument Ast; @@ -370,31 +402,44 @@ pomelo! { extra.push(AstNode::Attributes { attrs: vec![idx] }) }; - typ ::= Bool { internment::Intern::new(InnerType::Bool) }; - typ ::= I1 { internment::Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(1) }) }; - typ ::= I8 { internment::Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(8) }) }; - typ ::= I16 { internment::Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(16) }) }; - typ ::= I32 { internment::Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(32) }) }; - typ ::= I64 { internment::Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(64) }) }; - typ ::= U1 { internment::Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(1) }) }; - typ ::= U8 { internment::Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(8) }) }; - typ ::= U16 { internment::Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(16) }) }; - typ ::= U32 { internment::Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(32) }) }; - typ ::= U64 { internment::Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(64) }) }; - typ ::= ISize { internment::Intern::new(InnerType::Int { signed: true, size: IntSize::Pointer }) }; - typ ::= USize { internment::Intern::new(InnerType::Int { signed: false, size: IntSize::Pointer }) }; - typ ::= F32 { internment::Intern::new(InnerType::Float { float_type: FloatType::F32 }) }; - typ ::= F64 { internment::Intern::new(InnerType::Float { float_type: FloatType::F64 }) }; - typ ::= Bang { internment::Intern::new(InnerType::Bottom) }; - typ ::= unit { internment::Intern::new(InnerType::Unit) }; - typ ::= Void { internment::Intern::new(InnerType::Unit) }; + typ ::= Bool { Intern::new(InnerType::Bool) }; + typ ::= I1 { Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(1) }) }; + typ ::= I8 { Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(8) }) }; + typ ::= I16 { Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(16) }) }; + typ ::= I32 { Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(32) }) }; + typ ::= I64 { Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(64) }) }; + typ ::= U1 { Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(1) }) }; + typ ::= U8 { Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(8) }) }; + typ ::= U16 { Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(16) }) }; + typ ::= U32 { Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(32) }) }; + typ ::= U64 { Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(64) }) }; + typ ::= ISize { Intern::new(InnerType::Int { signed: true, size: IntSize::Pointer }) }; + typ ::= USize { Intern::new(InnerType::Int { signed: false, size: IntSize::Pointer }) }; + typ ::= F32 { Intern::new(InnerType::Float { float_type: FloatType::F32 }) }; + typ ::= F64 { Intern::new(InnerType::Float { float_type: FloatType::F64 }) }; + typ ::= Bang { Intern::new(InnerType::Bottom) }; + typ ::= unit { Intern::new(InnerType::Unit) }; + typ ::= Void { Intern::new(InnerType::Unit) }; unit ::= LParen RParen; + %type immediate (Intern, Type); + immediate ::= unit { (Intern::new(Value::Unit), Intern::new(InnerType::Unit)) }; + immediate ::= False { (Intern::new(Value::Bool(false)), Intern::new(InnerType::Bool)) }; + immediate ::= True { (Intern::new(Value::Bool(true)), Intern::new(InnerType::Bool)) }; + %type Constant lexer::Token<'a>; + immediate ::= Constant(token) { + crate::constants::parse_constant(token) + }; + %type expr Index; %type stmt Index; %type stmts Vec; - expr ::= { extra.push(AstNode::NoopExpr)}; + expr ::= immediate((value, ty)) { + extra.push(AstNode::Constant { ty, value }) + }; + stmt ::= Semi { extra.push(AstNode::NoopExpr) }; + stmt ::= Comment(text) { extra.push(AstNode::Comment { text: text.to_string() }) }; stmt ::= expr(expr) Semi { extra.push(AstNode::Stmt { expr }) }; stmts ::= stmt(s) { vec![s] }; @@ -403,10 +448,19 @@ pomelo! { v.push(s); v }; - block ::= LBrace stmts?(ss) RBrace { + + %type block_inner (Vec, Option); + block_inner ::= {(vec![], None)}; + block_inner ::= expr(expr) {(vec![], Some(expr))}; + block_inner ::= stmts(ss) {(ss, None)}; + block_inner ::= stmts(ss) expr(expr) {(ss, Some(expr))}; + + + block ::= LBrace block_inner((ss, expr)) RBrace { extra.push(AstNode::Block { - statements: ss.unwrap_or_default(), - expr: None }) + statements: ss, + expr + }) }; %type vis Visibility; @@ -485,6 +539,8 @@ impl<'a> From> for parser::Token<'a> { Token::I16 => Self::I16, Token::I32 => Self::I32, Token::I64 => Self::I64, + Token::True => Self::True, + Token::False => Self::False, Token::Const => todo!(), // Self::Const, Token::Mutable => Self::Mutable, Token::Volatile => todo!(), @@ -539,19 +595,21 @@ impl<'a> From> for parser::Token<'a> { Token::Eof(_) => todo!(), Token::ParseError(_) => todo!(), Token::CharConstant(_) => todo!(), - Token::IntegerConstant(_) => todo!(), - Token::IntegerHexConstant(_) => todo!(), - Token::IntegerBinConstant(_) => todo!(), - Token::IntegerOctConstant(_) => todo!(), - Token::FloatingConstant(_) => todo!(), - Token::FloatingExpConstant(_) => todo!(), - Token::DotFloatingConstant(_) => todo!(), - Token::DotFloatingExpConstant(_) => todo!(), + Token::IntegerConstant(_) => Self::Constant(value), + Token::IntegerHexConstant(_) => Self::Constant(value), + Token::IntegerBinConstant(_) => Self::Constant(value), + Token::IntegerOctConstant(_) => Self::Constant(value), + Token::FloatingConstant(_) => Self::Constant(value), + Token::FloatingExpConstant(_) => Self::Constant(value), + Token::DotFloatingConstant(_) => Self::Constant(value), + Token::DotFloatingExpConstant(_) => Self::Constant(value), Token::StringConstant(_) => todo!(), } } } +mod constants; + #[cfg(test)] mod tests { use crate::AstNode; @@ -561,6 +619,31 @@ mod tests { eprintln!("Size of AstNode: {}", std::mem::size_of::()); } + #[test] + fn parse_constant() { + use crate::parser::{Parser, Token}; + let input = r#" +fn a() -> u32 { + 42u32 +} +fn b() -> u32 { + 42i8 +} +fn c() -> f32 { + 42e4 +} +"#; + let mut lex = lexer::TokenIterator::new(input); + let mut mapped = lex.map(Token::from); + let mut ast = crate::Ast::new(); + let mut parser = Parser::new(ast); + while let Some(token) = mapped.next() { + parser.parse(token).unwrap(); + } + let (out, ast) = parser.end_of_input().unwrap(); + eprintln!("AST: {:#?}", ast); + } + #[test] fn parse() { use crate::parser::{Parser, Token}; @@ -570,7 +653,7 @@ mod tests { fn main(a: u32, b: u32) -> u32 {} "#; let mut lex = lexer::TokenIterator::new(input); - let mut mapped = lex.inspect(|t| eprintln!("{t:?}")).map(Token::from); + let mut mapped = lex.map(Token::from); let mut ast = crate::Ast::new(); let mut parser = Parser::new(ast); while let Some(token) = mapped.next() {