constants parsing
This commit is contained in:
parent
c270fe5add
commit
45cc444221
|
@ -53,6 +53,15 @@ impl Radix {
|
|||
_ => None,
|
||||
}
|
||||
}
|
||||
pub fn from_token(token: Token) -> Option<Self> {
|
||||
match token {
|
||||
Token::IntegerHexConstant(_) => Some(Radix::Hex),
|
||||
Token::IntegerBinConstant(_) => Some(Radix::Bin),
|
||||
Token::IntegerOctConstant(_) => Some(Radix::Oct),
|
||||
Token::IntegerConstant(_) => Some(Radix::Dec),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
pub fn radix(self) -> u8 {
|
||||
|
@ -263,7 +272,7 @@ fn parse_constant_inner(source: &mut Source) -> Result<ConstantKind> {
|
|||
let exp = try_parse_exp_part(source)?.is_some();
|
||||
|
||||
// trailing FloatingType?
|
||||
let floating = if source.next_if(|&c| c == 'f').is_some() {
|
||||
let trailing_float_type = if source.next_if(|&c| c == 'f').is_some() {
|
||||
let digits = source.next_tuple::<(char, char)>();
|
||||
if !(digits == Some(('6', '4')) || digits == Some(('3', '2'))) {
|
||||
// need either f64 or f32 here!
|
||||
|
@ -274,12 +283,12 @@ fn parse_constant_inner(source: &mut Source) -> Result<ConstantKind> {
|
|||
false
|
||||
};
|
||||
|
||||
let token = match (dot, exp, floating) {
|
||||
let token = match (dot, exp, trailing_float_type) {
|
||||
(false, false, false) => ConstantKind::Integer,
|
||||
(true, false, _) => ConstantKind::DotFloating,
|
||||
(true, true, _) => ConstantKind::DotFloatingExp,
|
||||
(false, true, _) => ConstantKind::FloatingExp,
|
||||
(false, _, _) => ConstantKind::Floating,
|
||||
(false, false, _) => ConstantKind::Floating,
|
||||
};
|
||||
|
||||
Ok(token)
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#![feature(slice_swap_unchecked, iter_collect_into, push_mut)]
|
||||
|
||||
mod is_things {
|
||||
pub mod is_things {
|
||||
/// True if `c` is considered a whitespace according to Rust language definition.
|
||||
/// See [Rust language reference](https://doc.rust-lang.org/reference/whitespace.html)
|
||||
/// for definitions of these classes.
|
||||
|
@ -83,18 +83,18 @@ mod is_things {
|
|||
macro_rules! tokens {
|
||||
($vis:vis $ty_name:ident:
|
||||
{
|
||||
$($name2:ident),*
|
||||
$($(#[$meta2:meta])* $name2:ident),*
|
||||
},
|
||||
{
|
||||
$($name:ident => $lexeme:literal),*
|
||||
$($(#[$meta:meta])* $name:ident => $lexeme:literal),*
|
||||
}) => {
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)]
|
||||
$vis enum $ty_name<'a> {
|
||||
$($name,
|
||||
$($(#[$meta])* $name,
|
||||
)*
|
||||
$($name2(&'a str),)*
|
||||
$($(#[$meta2])* $name2(&'a str),)*
|
||||
}
|
||||
|
||||
impl ::core::fmt::Display for $ty_name<'_> {
|
||||
|
@ -144,17 +144,27 @@ tokens!(pub Token: {
|
|||
// Marker Token for any Comment
|
||||
Comment,
|
||||
DocComment,
|
||||
// Marker Token for any pre-processing directive
|
||||
/// character constant, e.g. `'a'` or `'\n'`
|
||||
CharConstant,
|
||||
/// Decimal integer constant, e.g. `12345`
|
||||
IntegerConstant,
|
||||
/// Hexadecimal integer constant with leading `0x`, e.g. `0x1A3F`
|
||||
IntegerHexConstant,
|
||||
/// Binary integer constant with leading `0b`, e.g. `0b10101`
|
||||
IntegerBinConstant,
|
||||
/// Octal integer constant with leading `0o`, e.g. `0o7654`
|
||||
IntegerOctConstant,
|
||||
/// Simple floating point constant, e.g. `1f32`
|
||||
FloatingConstant,
|
||||
/// Simple floating point constant with exponent, e.g. `2e10f64`
|
||||
FloatingExpConstant,
|
||||
/// Floating point constant starting with a dot, e.g. `.5f32`
|
||||
DotFloatingConstant,
|
||||
/// Floating point constant starting with a dot and with an exponent, e.g. `.5e-2f64`
|
||||
DotFloatingExpConstant,
|
||||
/// String constant, e.g. `"hello, world!"`
|
||||
StringConstant,
|
||||
/// Identifier
|
||||
Ident
|
||||
},
|
||||
// Lexical Tokens:
|
||||
|
@ -178,6 +188,8 @@ tokens!(pub Token: {
|
|||
Colon => ":",
|
||||
Equal => "=",
|
||||
// Keywords:
|
||||
True => "true",
|
||||
False => "false",
|
||||
Void => "void",
|
||||
Bool => "bool",
|
||||
F32 => "f32",
|
||||
|
@ -712,6 +724,7 @@ macro_rules! impl_token_sequence_list {
|
|||
|
||||
variadics_please::all_tuples_enumerated!(impl_token_sequence_list, 1, 15, T);
|
||||
mod complex_tokens;
|
||||
pub use complex_tokens::Radix;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
|
104
crates/parser/src/constants.rs
Normal file
104
crates/parser/src/constants.rs
Normal file
|
@ -0,0 +1,104 @@
|
|||
use internment::Intern;
|
||||
use itertools::Itertools;
|
||||
use lexer::{Radix, Token, is_things};
|
||||
use werkzeug::iter::NextIf;
|
||||
|
||||
use crate::{FloatType, InnerType, IntSize, Type, Value};
|
||||
|
||||
pub(crate) fn parse_floating_constant(lexeme: &str) -> (Intern<Value>, Type) {
|
||||
let (value, ty) = lexeme
|
||||
.strip_suffix("f32")
|
||||
.map(|l| (Value::F32(l.parse().unwrap()), FloatType::F32))
|
||||
.or_else(|| {
|
||||
lexeme
|
||||
.strip_suffix("f64")
|
||||
.map(|l| (Value::F64(l.parse().unwrap()), FloatType::F64))
|
||||
})
|
||||
.unwrap_or((Value::F32(lexeme.parse().unwrap()), FloatType::F32));
|
||||
|
||||
(
|
||||
Intern::new(value),
|
||||
Intern::new(InnerType::Float { float_type: ty }),
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn parse_constant(token: Token<'_>) -> (Intern<Value>, Type) {
|
||||
let lexeme = match token {
|
||||
Token::FloatingConstant(lexeme)
|
||||
| Token::DotFloatingConstant(lexeme)
|
||||
| Token::FloatingExpConstant(lexeme)
|
||||
| Token::DotFloatingExpConstant(lexeme) => parse_floating_constant(lexeme),
|
||||
Token::IntegerConstant(lexeme) => parse_integer_constant(lexeme, Radix::Dec),
|
||||
Token::IntegerHexConstant(lexeme)
|
||||
| Token::IntegerOctConstant(lexeme)
|
||||
| Token::IntegerBinConstant(lexeme) => {
|
||||
let radix = Radix::from_token(token).unwrap();
|
||||
parse_integer_constant(&lexeme[2..], radix)
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
lexeme
|
||||
}
|
||||
|
||||
pub(crate) fn parse_integer_constant(lexeme: &str, radix: Radix) -> (Intern<Value>, Type) {
|
||||
let mut chars = lexeme.char_indices();
|
||||
let digits = chars.take_while_ref(|&(_, c)| radix.is_digit()(c) && c != '_');
|
||||
|
||||
let value = digits
|
||||
.map(|(_, c)| radix.map_digit(c))
|
||||
.fold(0u64, |acc, d| acc * radix.radix() as u64 + d as u64);
|
||||
|
||||
let value = Intern::new(Value::UInt(value));
|
||||
|
||||
let ty = chars
|
||||
.clone()
|
||||
.next_if(|&(_, c)| c == 'u' || c == 'i')
|
||||
// integral type and signed-ness
|
||||
.map(|(i, c)| (&lexeme[(i + 1)..], c == 'i'))
|
||||
.map(|(bits, signed)| {
|
||||
let mut chars = bits.chars();
|
||||
let mut bits = 0u16;
|
||||
|
||||
let x = 'f: {
|
||||
while let Some(c) = chars.next() {
|
||||
if !is_things::is_digit(c) {
|
||||
break 'f None;
|
||||
}
|
||||
|
||||
// TODO: check overflow
|
||||
bits = bits * 10 + Radix::Dec.map_digit(c) as u16;
|
||||
}
|
||||
|
||||
Some(bits)
|
||||
};
|
||||
|
||||
// TODO: error out on invalid type
|
||||
x.map(|bits| InnerType::Int {
|
||||
signed,
|
||||
size: IntSize::Bits(bits),
|
||||
})
|
||||
})
|
||||
.flatten()
|
||||
.unwrap_or(InnerType::AnyInt);
|
||||
|
||||
(value, Intern::new(ty))
|
||||
}
|
||||
|
||||
pub(crate) fn type_from_value(value: &Value) -> Type {
|
||||
let inner = match value {
|
||||
Value::F32(_) => InnerType::Float {
|
||||
float_type: FloatType::F32,
|
||||
},
|
||||
Value::F64(_) => InnerType::Float {
|
||||
float_type: FloatType::F64,
|
||||
},
|
||||
Value::Bool(_) => InnerType::Bool,
|
||||
Value::Int(_) => InnerType::AnyInt,
|
||||
Value::UInt(_) => InnerType::AnyUInt,
|
||||
Value::String(_) => InnerType::Str,
|
||||
Value::Unit => InnerType::Unit,
|
||||
};
|
||||
|
||||
Intern::new(inner)
|
||||
}
|
|
@ -1,3 +1,5 @@
|
|||
use std::hash::Hash;
|
||||
|
||||
use internment::Intern;
|
||||
use lexer::{Token, TokenConsumer, TokenItem, TokenItemIterator};
|
||||
use logos::Logos;
|
||||
|
@ -16,6 +18,12 @@ pub enum InnerType {
|
|||
Bottom,
|
||||
Unit,
|
||||
Bool,
|
||||
/// A signed integer constant; concrete type undetermined
|
||||
AnyInt,
|
||||
/// An unsigned integer constant; concrete type undetermined
|
||||
AnyUInt,
|
||||
/// A string slice
|
||||
Str,
|
||||
Int {
|
||||
signed: bool,
|
||||
size: IntSize,
|
||||
|
@ -53,13 +61,36 @@ pub enum FloatType {
|
|||
F64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum Value {
|
||||
Bool(bool),
|
||||
Int(i64),
|
||||
UInt(u64),
|
||||
Float(f64),
|
||||
F64(f64),
|
||||
F32(f32),
|
||||
String(String),
|
||||
Unit,
|
||||
}
|
||||
|
||||
impl Eq for Value {}
|
||||
|
||||
impl Hash for Value {
|
||||
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
||||
core::mem::discriminant(self).hash(state);
|
||||
match self {
|
||||
Value::Bool(b) => b.hash(state),
|
||||
Value::Int(i) => i.hash(state),
|
||||
Value::UInt(u) => u.hash(state),
|
||||
Value::F64(f) => {
|
||||
werkzeug::util::hash_f64(state, f);
|
||||
}
|
||||
Value::F32(f) => {
|
||||
werkzeug::util::hash_f32(state, f);
|
||||
}
|
||||
Value::String(s) => s.hash(state),
|
||||
Value::Unit => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
|
@ -99,7 +130,7 @@ pub enum AstNode {
|
|||
},
|
||||
Constant {
|
||||
ty: Type,
|
||||
value: Value,
|
||||
value: Intern<Value>,
|
||||
},
|
||||
NoopExpr,
|
||||
Stmt {
|
||||
|
@ -331,9 +362,10 @@ struct ExtraToken<'a> {
|
|||
pomelo! {
|
||||
%include {
|
||||
use super::AstNode;
|
||||
use internment::Intern;
|
||||
use super::{
|
||||
Parameter, Ast, ParameterList, FunctionDecl, Type, InnerType,
|
||||
FloatType, ExtraToken, Index, IntSize, Visibility,
|
||||
FloatType, ExtraToken, Index, IntSize, Visibility, Value,
|
||||
};
|
||||
};
|
||||
%extra_argument Ast;
|
||||
|
@ -370,31 +402,44 @@ pomelo! {
|
|||
extra.push(AstNode::Attributes { attrs: vec![idx] })
|
||||
};
|
||||
|
||||
typ ::= Bool { internment::Intern::new(InnerType::Bool) };
|
||||
typ ::= I1 { internment::Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(1) }) };
|
||||
typ ::= I8 { internment::Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(8) }) };
|
||||
typ ::= I16 { internment::Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(16) }) };
|
||||
typ ::= I32 { internment::Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(32) }) };
|
||||
typ ::= I64 { internment::Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(64) }) };
|
||||
typ ::= U1 { internment::Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(1) }) };
|
||||
typ ::= U8 { internment::Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(8) }) };
|
||||
typ ::= U16 { internment::Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(16) }) };
|
||||
typ ::= U32 { internment::Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(32) }) };
|
||||
typ ::= U64 { internment::Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(64) }) };
|
||||
typ ::= ISize { internment::Intern::new(InnerType::Int { signed: true, size: IntSize::Pointer }) };
|
||||
typ ::= USize { internment::Intern::new(InnerType::Int { signed: false, size: IntSize::Pointer }) };
|
||||
typ ::= F32 { internment::Intern::new(InnerType::Float { float_type: FloatType::F32 }) };
|
||||
typ ::= F64 { internment::Intern::new(InnerType::Float { float_type: FloatType::F64 }) };
|
||||
typ ::= Bang { internment::Intern::new(InnerType::Bottom) };
|
||||
typ ::= unit { internment::Intern::new(InnerType::Unit) };
|
||||
typ ::= Void { internment::Intern::new(InnerType::Unit) };
|
||||
typ ::= Bool { Intern::new(InnerType::Bool) };
|
||||
typ ::= I1 { Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(1) }) };
|
||||
typ ::= I8 { Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(8) }) };
|
||||
typ ::= I16 { Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(16) }) };
|
||||
typ ::= I32 { Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(32) }) };
|
||||
typ ::= I64 { Intern::new(InnerType::Int { signed: true, size: IntSize::Bits(64) }) };
|
||||
typ ::= U1 { Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(1) }) };
|
||||
typ ::= U8 { Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(8) }) };
|
||||
typ ::= U16 { Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(16) }) };
|
||||
typ ::= U32 { Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(32) }) };
|
||||
typ ::= U64 { Intern::new(InnerType::Int { signed: false, size: IntSize::Bits(64) }) };
|
||||
typ ::= ISize { Intern::new(InnerType::Int { signed: true, size: IntSize::Pointer }) };
|
||||
typ ::= USize { Intern::new(InnerType::Int { signed: false, size: IntSize::Pointer }) };
|
||||
typ ::= F32 { Intern::new(InnerType::Float { float_type: FloatType::F32 }) };
|
||||
typ ::= F64 { Intern::new(InnerType::Float { float_type: FloatType::F64 }) };
|
||||
typ ::= Bang { Intern::new(InnerType::Bottom) };
|
||||
typ ::= unit { Intern::new(InnerType::Unit) };
|
||||
typ ::= Void { Intern::new(InnerType::Unit) };
|
||||
|
||||
unit ::= LParen RParen;
|
||||
|
||||
%type immediate (Intern<Value>, Type);
|
||||
immediate ::= unit { (Intern::new(Value::Unit), Intern::new(InnerType::Unit)) };
|
||||
immediate ::= False { (Intern::new(Value::Bool(false)), Intern::new(InnerType::Bool)) };
|
||||
immediate ::= True { (Intern::new(Value::Bool(true)), Intern::new(InnerType::Bool)) };
|
||||
%type Constant lexer::Token<'a>;
|
||||
immediate ::= Constant(token) {
|
||||
crate::constants::parse_constant(token)
|
||||
};
|
||||
|
||||
%type expr Index;
|
||||
%type stmt Index;
|
||||
%type stmts Vec<Index>;
|
||||
expr ::= { extra.push(AstNode::NoopExpr)};
|
||||
expr ::= immediate((value, ty)) {
|
||||
extra.push(AstNode::Constant { ty, value })
|
||||
};
|
||||
stmt ::= Semi { extra.push(AstNode::NoopExpr) };
|
||||
stmt ::= Comment(text) { extra.push(AstNode::Comment { text: text.to_string() }) };
|
||||
stmt ::= expr(expr) Semi { extra.push(AstNode::Stmt { expr }) };
|
||||
|
||||
stmts ::= stmt(s) { vec![s] };
|
||||
|
@ -403,10 +448,19 @@ pomelo! {
|
|||
v.push(s);
|
||||
v
|
||||
};
|
||||
block ::= LBrace stmts?(ss) RBrace {
|
||||
|
||||
%type block_inner (Vec<Index>, Option<Index>);
|
||||
block_inner ::= {(vec![], None)};
|
||||
block_inner ::= expr(expr) {(vec![], Some(expr))};
|
||||
block_inner ::= stmts(ss) {(ss, None)};
|
||||
block_inner ::= stmts(ss) expr(expr) {(ss, Some(expr))};
|
||||
|
||||
|
||||
block ::= LBrace block_inner((ss, expr)) RBrace {
|
||||
extra.push(AstNode::Block {
|
||||
statements: ss.unwrap_or_default(),
|
||||
expr: None })
|
||||
statements: ss,
|
||||
expr
|
||||
})
|
||||
};
|
||||
|
||||
%type vis Visibility;
|
||||
|
@ -485,6 +539,8 @@ impl<'a> From<lexer::Token<'a>> for parser::Token<'a> {
|
|||
Token::I16 => Self::I16,
|
||||
Token::I32 => Self::I32,
|
||||
Token::I64 => Self::I64,
|
||||
Token::True => Self::True,
|
||||
Token::False => Self::False,
|
||||
Token::Const => todo!(), // Self::Const,
|
||||
Token::Mutable => Self::Mutable,
|
||||
Token::Volatile => todo!(),
|
||||
|
@ -539,19 +595,21 @@ impl<'a> From<lexer::Token<'a>> for parser::Token<'a> {
|
|||
Token::Eof(_) => todo!(),
|
||||
Token::ParseError(_) => todo!(),
|
||||
Token::CharConstant(_) => todo!(),
|
||||
Token::IntegerConstant(_) => todo!(),
|
||||
Token::IntegerHexConstant(_) => todo!(),
|
||||
Token::IntegerBinConstant(_) => todo!(),
|
||||
Token::IntegerOctConstant(_) => todo!(),
|
||||
Token::FloatingConstant(_) => todo!(),
|
||||
Token::FloatingExpConstant(_) => todo!(),
|
||||
Token::DotFloatingConstant(_) => todo!(),
|
||||
Token::DotFloatingExpConstant(_) => todo!(),
|
||||
Token::IntegerConstant(_) => Self::Constant(value),
|
||||
Token::IntegerHexConstant(_) => Self::Constant(value),
|
||||
Token::IntegerBinConstant(_) => Self::Constant(value),
|
||||
Token::IntegerOctConstant(_) => Self::Constant(value),
|
||||
Token::FloatingConstant(_) => Self::Constant(value),
|
||||
Token::FloatingExpConstant(_) => Self::Constant(value),
|
||||
Token::DotFloatingConstant(_) => Self::Constant(value),
|
||||
Token::DotFloatingExpConstant(_) => Self::Constant(value),
|
||||
Token::StringConstant(_) => todo!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mod constants;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::AstNode;
|
||||
|
@ -561,6 +619,31 @@ mod tests {
|
|||
eprintln!("Size of AstNode: {}", std::mem::size_of::<AstNode>());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_constant() {
|
||||
use crate::parser::{Parser, Token};
|
||||
let input = r#"
|
||||
fn a() -> u32 {
|
||||
42u32
|
||||
}
|
||||
fn b() -> u32 {
|
||||
42i8
|
||||
}
|
||||
fn c() -> f32 {
|
||||
42e4
|
||||
}
|
||||
"#;
|
||||
let mut lex = lexer::TokenIterator::new(input);
|
||||
let mut mapped = lex.map(Token::from);
|
||||
let mut ast = crate::Ast::new();
|
||||
let mut parser = Parser::new(ast);
|
||||
while let Some(token) = mapped.next() {
|
||||
parser.parse(token).unwrap();
|
||||
}
|
||||
let (out, ast) = parser.end_of_input().unwrap();
|
||||
eprintln!("AST: {:#?}", ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse() {
|
||||
use crate::parser::{Parser, Token};
|
||||
|
@ -570,7 +653,7 @@ mod tests {
|
|||
fn main(a: u32, b: u32) -> u32 {}
|
||||
"#;
|
||||
let mut lex = lexer::TokenIterator::new(input);
|
||||
let mut mapped = lex.inspect(|t| eprintln!("{t:?}")).map(Token::from);
|
||||
let mut mapped = lex.map(Token::from);
|
||||
let mut ast = crate::Ast::new();
|
||||
let mut parser = Parser::new(ast);
|
||||
while let Some(token) = mapped.next() {
|
||||
|
|
Loading…
Reference in a new issue