new ast parser
This commit is contained in:
parent
44fa241a79
commit
45653380cf
|
@ -11,6 +11,7 @@ itertools = "0.13.0"
|
|||
log = "0.4.22"
|
||||
num-bigint = "0.4.6"
|
||||
num-traits = "0.2.19"
|
||||
ordered-float = "4.2.2"
|
||||
petgraph = "0.6.5"
|
||||
thiserror = "1.0.63"
|
||||
unicode-xid = "0.2.4"
|
||||
|
|
2638
src/ast2/mod.rs
2638
src/ast2/mod.rs
File diff suppressed because it is too large
Load diff
|
@ -26,7 +26,7 @@ fn main() {
|
|||
match tokens {
|
||||
Ok(tokens) => {
|
||||
for tok in tokens.iter() {
|
||||
println!("{}@[{}]", tok.token(), tok.source_location().start);
|
||||
println!("{}@[{}]", tok.token(), tok.source_location());
|
||||
}
|
||||
}
|
||||
Err((tokens, errors)) => {
|
||||
|
|
|
@ -167,3 +167,10 @@ macro_rules! variant {
|
|||
let $pattern = $value else { unreachable!() };
|
||||
};
|
||||
}
|
||||
|
||||
pub fn from_lo_hi_dwords(lo: u32, hi: u32) -> u64 {
|
||||
lo as u64 | (hi as u64) << 32
|
||||
}
|
||||
pub fn into_lo_hi_dwords(qword: u64) -> (u32, u32) {
|
||||
(qword as u32, (qword >> 32) as u32)
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@ pub mod bigint {
|
|||
Self(vec![v])
|
||||
}
|
||||
pub fn from_u64(v: u64) -> BigInt {
|
||||
let (lo, hi) = into_lo_hi(v);
|
||||
let (lo, hi) = into_lo_hi_dwords(v);
|
||||
Self(vec![lo, hi])
|
||||
}
|
||||
|
||||
|
@ -120,7 +120,7 @@ pub mod bigint {
|
|||
|
||||
impl PartialEq<u64> for BigInt {
|
||||
fn eq(&self, other: &u64) -> bool {
|
||||
let (lo, hi) = into_lo_hi(*other);
|
||||
let (lo, hi) = into_lo_hi_dwords(*other);
|
||||
cmp_bigint(&self.0, &[lo, hi]) == Ordering::Equal
|
||||
}
|
||||
}
|
||||
|
@ -133,7 +133,7 @@ pub mod bigint {
|
|||
|
||||
impl PartialOrd<u64> for BigInt {
|
||||
fn partial_cmp(&self, other: &u64) -> Option<Ordering> {
|
||||
let (lo, hi) = into_lo_hi(*other);
|
||||
let (lo, hi) = into_lo_hi_dwords(*other);
|
||||
Some(cmp_bigint(&self.0, &[lo, hi]))
|
||||
}
|
||||
}
|
||||
|
@ -217,7 +217,7 @@ pub mod bigint {
|
|||
|
||||
impl AddAssign<u64> for BigInt {
|
||||
fn add_assign(&mut self, rhs: u64) {
|
||||
let (lo, hi) = into_lo_hi(rhs);
|
||||
let (lo, hi) = into_lo_hi_dwords(rhs);
|
||||
if hi == 0 {
|
||||
*self += lo;
|
||||
} else {
|
||||
|
@ -278,7 +278,7 @@ pub mod bigint {
|
|||
rhs.0.push(0);
|
||||
}
|
||||
|
||||
let (lo, hi) = into_lo_hi(self);
|
||||
let (lo, hi) = into_lo_hi_dwords(self);
|
||||
sub_bigint_in_right(&[lo, hi], &mut rhs.0);
|
||||
|
||||
rhs.normalised()
|
||||
|
@ -302,7 +302,7 @@ pub mod bigint {
|
|||
|
||||
impl SubAssign<u64> for BigInt {
|
||||
fn sub_assign(&mut self, rhs: u64) {
|
||||
let (lo, hi) = into_lo_hi(rhs);
|
||||
let (lo, hi) = into_lo_hi_dwords(rhs);
|
||||
while self.num_digits() < 2 {
|
||||
self.0.push(0);
|
||||
}
|
||||
|
@ -332,7 +332,7 @@ pub mod bigint {
|
|||
type Output = Self;
|
||||
|
||||
fn mul(self, rhs: u64) -> Self::Output {
|
||||
let (lo, hi) = into_lo_hi(rhs);
|
||||
let (lo, hi) = into_lo_hi_dwords(rhs);
|
||||
BigInt(mul_bigint(&self.0, &[lo, hi]))
|
||||
}
|
||||
}
|
||||
|
@ -357,7 +357,7 @@ pub mod bigint {
|
|||
type Output = Self;
|
||||
|
||||
fn div(self, rhs: u64) -> Self::Output {
|
||||
let (lo, hi) = into_lo_hi(rhs);
|
||||
let (lo, hi) = into_lo_hi_dwords(rhs);
|
||||
div_rem_bigint(self, BigInt([lo, hi].to_vec())).0
|
||||
}
|
||||
}
|
||||
|
@ -382,7 +382,7 @@ pub mod bigint {
|
|||
type Output = Self;
|
||||
|
||||
fn rem(self, rhs: u64) -> Self::Output {
|
||||
let (lo, hi) = into_lo_hi(rhs);
|
||||
let (lo, hi) = into_lo_hi_dwords(rhs);
|
||||
div_rem_bigint(self, BigInt([lo, hi].to_vec())).1
|
||||
}
|
||||
}
|
||||
|
@ -717,12 +717,7 @@ pub mod bigint {
|
|||
(divident.normalised(), rem)
|
||||
}
|
||||
|
||||
fn from_lo_hi(lo: u32, hi: u32) -> u64 {
|
||||
lo as u64 | (hi as u64) << 32
|
||||
}
|
||||
fn into_lo_hi(qword: u64) -> (u32, u32) {
|
||||
(qword as u32, (qword >> 32) as u32)
|
||||
}
|
||||
use crate::common::{from_lo_hi_dwords, into_lo_hi_dwords};
|
||||
|
||||
// from rust num_bigint
|
||||
/// Subtract a multiple.
|
||||
|
@ -740,10 +735,11 @@ pub mod bigint {
|
|||
// sum >= -(big_digit::MAX * big_digit::MAX) - big_digit::MAX
|
||||
// sum <= big_digit::MAX
|
||||
// Offsetting sum by (big_digit::MAX << big_digit::BITS) puts it in DoubleBigDigit range.
|
||||
let offset_sum = from_lo_hi(u32::MAX, *x) - u32::MAX as u64 + offset_carry as u64
|
||||
let offset_sum = from_lo_hi_dwords(u32::MAX, *x) - u32::MAX as u64
|
||||
+ offset_carry as u64
|
||||
- *y as u64 * c as u64;
|
||||
|
||||
let (new_x, new_offset_carry) = into_lo_hi(offset_sum);
|
||||
let (new_x, new_offset_carry) = into_lo_hi_dwords(offset_sum);
|
||||
offset_carry = new_offset_carry;
|
||||
*x = new_x;
|
||||
}
|
||||
|
@ -793,7 +789,7 @@ pub mod bigint {
|
|||
// q0 is too large if:
|
||||
// [a2,a1,a0] < q0 * [b1,b0]
|
||||
// (r << BITS) + a2 < q0 * b1
|
||||
while r <= u32::MAX as u64 && from_lo_hi(r as u32, a2) < q0 as u64 * b1 as u64 {
|
||||
while r <= u32::MAX as u64 && from_lo_hi_dwords(r as u32, a2) < q0 as u64 * b1 as u64 {
|
||||
q0 -= 1;
|
||||
r += b0 as u64;
|
||||
}
|
||||
|
|
240
src/lexer.rs
240
src/lexer.rs
|
@ -67,6 +67,21 @@ impl<'a> Chars<'a> {
|
|||
self.offset
|
||||
}
|
||||
|
||||
pub fn offset_to_source_location(&self, offset: u32) -> SourceLocation {
|
||||
let (start_l, start_c) = {
|
||||
let range = self.get_from_to(0, offset);
|
||||
range.chars().fold((1u32, 0u32), |(line, col), c| {
|
||||
if c == '\n' {
|
||||
(line + 1, 0)
|
||||
} else {
|
||||
(line, col + 1)
|
||||
}
|
||||
})
|
||||
};
|
||||
|
||||
SourceLocation::new(start_l, start_c)
|
||||
}
|
||||
|
||||
pub fn get_source_span(&self, start: u32, end: u32) -> std::ops::Range<SourceLocation> {
|
||||
let (start_l, start_c) = {
|
||||
let range = self.get_from_to(0, start);
|
||||
|
@ -153,7 +168,218 @@ pub struct TokenIterator<'a> {
|
|||
offset: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
enum DelimitorCondition {
|
||||
BelowZero,
|
||||
MinusOne,
|
||||
Zero,
|
||||
One,
|
||||
AboveZero,
|
||||
}
|
||||
|
||||
impl DelimitorCondition {
|
||||
fn from_i32(i: i32) -> Self {
|
||||
match i {
|
||||
0 => Self::Zero,
|
||||
1 => Self::One,
|
||||
-1 => Self::MinusOne,
|
||||
1.. => Self::AboveZero,
|
||||
..-1 => Self::BelowZero,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
struct DelimitorConditions {
|
||||
braces: DelimitorCondition,
|
||||
parens: DelimitorCondition,
|
||||
brackets: DelimitorCondition,
|
||||
semis: Option<u32>,
|
||||
}
|
||||
impl DelimitorConditions {
|
||||
fn is_invalid(&self, cond: &Self) -> bool {
|
||||
self.braces < cond.braces
|
||||
|| self.parens < cond.parens
|
||||
|| self.brackets < cond.brackets
|
||||
|| self.semis > cond.semis
|
||||
}
|
||||
|
||||
fn from_i32s(braces: i32, parens: i32, brackets: i32, semis: Option<u32>) -> Self {
|
||||
Self {
|
||||
braces: DelimitorCondition::from_i32(braces),
|
||||
parens: DelimitorCondition::from_i32(parens),
|
||||
brackets: DelimitorCondition::from_i32(brackets),
|
||||
semis,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> TokenIterator<'a> {
|
||||
pub fn advance_until_before_token(&mut self, token: Token) -> Option<Token> {
|
||||
while let Some(next) = self.peek_token() {
|
||||
if next.token() == token {
|
||||
return Some(token);
|
||||
}
|
||||
_ = self.next();
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub fn advance_until_before_one_of(&mut self, tokens: &[Token]) -> Option<Token> {
|
||||
while let Some(next) = self.peek_token() {
|
||||
if tokens.contains(&next.token()) {
|
||||
return Some(next.token());
|
||||
}
|
||||
_ = self.next();
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub fn advance_past_end_of_braced(&mut self) -> Option<()> {
|
||||
use DelimitorCondition::*;
|
||||
self.advance_past_condition(DelimitorConditions {
|
||||
braces: MinusOne,
|
||||
parens: Zero,
|
||||
brackets: Zero,
|
||||
semis: None,
|
||||
})
|
||||
}
|
||||
pub fn advance_past_end_of_bracketed(&mut self) -> Option<()> {
|
||||
use DelimitorCondition::*;
|
||||
self.advance_past_condition(DelimitorConditions {
|
||||
braces: Zero,
|
||||
parens: Zero,
|
||||
brackets: MinusOne,
|
||||
semis: None,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn advance_past_semi(&mut self) -> Option<()> {
|
||||
use DelimitorCondition::*;
|
||||
self.advance_past_condition(DelimitorConditions {
|
||||
braces: Zero,
|
||||
parens: Zero,
|
||||
brackets: Zero,
|
||||
semis: Some(1),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn advance_past_end_of_parens(&mut self) -> Option<()> {
|
||||
use DelimitorCondition::*;
|
||||
self.advance_past_condition(DelimitorConditions {
|
||||
braces: Zero,
|
||||
parens: MinusOne,
|
||||
brackets: Zero,
|
||||
semis: None,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn advance_until_start_of_braced(&mut self) -> Option<()> {
|
||||
use DelimitorCondition::*;
|
||||
self.advance_until_condition(DelimitorConditions {
|
||||
braces: One,
|
||||
parens: Zero,
|
||||
brackets: Zero,
|
||||
semis: None,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn advance_until_start_of_parens(&mut self) -> Option<()> {
|
||||
use DelimitorCondition::*;
|
||||
self.advance_until_condition(DelimitorConditions {
|
||||
braces: Zero,
|
||||
parens: One,
|
||||
brackets: Zero,
|
||||
semis: None,
|
||||
})
|
||||
}
|
||||
pub fn advance_until_end_of_parens(&mut self) -> Option<()> {
|
||||
use DelimitorCondition::*;
|
||||
self.advance_until_condition(DelimitorConditions {
|
||||
braces: Zero,
|
||||
parens: MinusOne,
|
||||
brackets: Zero,
|
||||
semis: None,
|
||||
})
|
||||
}
|
||||
|
||||
fn advance_until_condition(&mut self, cond: DelimitorConditions) -> Option<()> {
|
||||
self.advance_past_condition(cond)?;
|
||||
_ = self.offset.saturating_sub(1);
|
||||
Some(())
|
||||
}
|
||||
|
||||
fn advance_past_condition(&mut self, cond: DelimitorConditions) -> Option<()> {
|
||||
let mut braces = 0;
|
||||
let mut parens = 0;
|
||||
let mut brackets = 0;
|
||||
let mut semis = 0;
|
||||
while let Some(next) = self.next() {
|
||||
match next.token() {
|
||||
Token::OpenBrace => {
|
||||
braces += 1;
|
||||
}
|
||||
Token::CloseBrace => {
|
||||
braces -= 1;
|
||||
}
|
||||
Token::OpenParens => {
|
||||
parens += 1;
|
||||
}
|
||||
Token::CloseParens => {
|
||||
parens -= 1;
|
||||
}
|
||||
Token::OpenSquareBracket => {
|
||||
brackets += 1;
|
||||
}
|
||||
Token::CloseSquareBracket => {
|
||||
brackets -= 1;
|
||||
}
|
||||
Token::Semi => {
|
||||
semis += 1;
|
||||
}
|
||||
_ => { /* nada */ }
|
||||
}
|
||||
|
||||
let current =
|
||||
DelimitorConditions::from_i32s(braces, parens, brackets, cond.semis.map(|_| semis));
|
||||
if cond == current {
|
||||
return Some(());
|
||||
}
|
||||
|
||||
if current.is_invalid(&cond) {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
pub fn eat_all_zero_or_once(&mut self, tokens: &[Token]) -> Vec<bool> {
|
||||
let mut occurences = vec![0u32; tokens.len()];
|
||||
|
||||
while occurences.iter().all(|&i| i <= 1) {
|
||||
let Some(next) = self.peek_token() else {
|
||||
break;
|
||||
};
|
||||
if let Some(pos) = tokens.iter().position(|&t| t == next.token()) {
|
||||
occurences[pos] += 1;
|
||||
}
|
||||
}
|
||||
|
||||
occurences.into_iter().map(|i| i >= 1).collect()
|
||||
}
|
||||
|
||||
pub fn current_source_location(&self) -> SourceLocation {
|
||||
self.clone()
|
||||
.next()
|
||||
.map(|i| i.source_location())
|
||||
.unwrap_or_else(|| {
|
||||
self.tokenizer
|
||||
.source
|
||||
.offset_to_source_location(self.tokenizer.source.num_bytes() as u32)
|
||||
})
|
||||
}
|
||||
|
||||
pub fn expect_token(&mut self, token: Token) -> crate::parser::Result<TokenItem<'a>> {
|
||||
self.next_if(|item| item.token() == token)
|
||||
.ok_or(crate::parser::Error::ExpectedTokenNotFound(token))
|
||||
|
@ -209,6 +435,9 @@ impl Display for SourceLocation {
|
|||
}
|
||||
|
||||
impl SourceLocation {
|
||||
pub fn invalid() -> Self {
|
||||
Self::new(u32::MAX, u32::MAX)
|
||||
}
|
||||
pub fn new(line: u32, column: u32) -> Self {
|
||||
Self { line, column }
|
||||
}
|
||||
|
@ -244,6 +473,9 @@ impl SourceLocation {
|
|||
}
|
||||
|
||||
impl<'a> TokenItem<'a> {
|
||||
pub fn token_pos(&self) -> TokenPos {
|
||||
self.inner
|
||||
}
|
||||
pub fn token(&self) -> Token {
|
||||
self.inner.token
|
||||
}
|
||||
|
@ -254,11 +486,17 @@ impl<'a> TokenItem<'a> {
|
|||
.get_from_to(self.inner.start, self.inner.end)
|
||||
}
|
||||
|
||||
pub fn source_location(&self) -> std::ops::Range<SourceLocation> {
|
||||
pub fn source_location_range(&self) -> std::ops::Range<SourceLocation> {
|
||||
self.tokenizer
|
||||
.source
|
||||
.get_source_span(self.inner.start, self.inner.end)
|
||||
}
|
||||
|
||||
pub fn source_location(&self) -> SourceLocation {
|
||||
self.tokenizer
|
||||
.source
|
||||
.offset_to_source_location(self.inner.start)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for TokenIterator<'a> {
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
bigint_helper_methods,
|
||||
map_try_insert,
|
||||
iter_intersperse,
|
||||
iter_array_chunks,
|
||||
int_roundings
|
||||
)]
|
||||
#![allow(unused_macros)]
|
||||
|
|
|
@ -11,7 +11,7 @@ use crate::{
|
|||
lexer::{Radix, TokenIterator},
|
||||
string_table::{ImmOrIndex, Index, StringTable},
|
||||
symbol_table::{SymbolKind, SymbolTable},
|
||||
tokens::Token,
|
||||
tokens::{Token, PRECEDENCE_MAP},
|
||||
};
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
|
@ -2462,26 +2462,3 @@ impl Tree {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
static PRECEDENCE_MAP: std::sync::LazyLock<HashMap<Token, u32>> = std::sync::LazyLock::new(|| {
|
||||
HashMap::from([
|
||||
(Token::PipePipe, 10),
|
||||
(Token::AmpersandAmpersand, 20),
|
||||
(Token::Pipe, 30),
|
||||
(Token::Caret, 40),
|
||||
(Token::Ampersand, 50),
|
||||
(Token::BangEqual, 60),
|
||||
(Token::EqualEqual, 60),
|
||||
(Token::LessEqual, 70),
|
||||
(Token::GreaterEqual, 70),
|
||||
(Token::Less, 70),
|
||||
(Token::Greater, 70),
|
||||
(Token::GreaterGreater, 80),
|
||||
(Token::LessLess, 80),
|
||||
(Token::Plus, 90),
|
||||
(Token::Minus, 90),
|
||||
(Token::Percent, 100),
|
||||
(Token::Star, 100),
|
||||
(Token::Slash, 100),
|
||||
])
|
||||
});
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
use std::collections::HashMap;
|
||||
|
||||
macro_rules! tokens {
|
||||
($vis:vis $ty_name:ident:
|
||||
{
|
||||
|
@ -96,7 +98,11 @@ tokens!(pub Token: {
|
|||
Bool => "bool",
|
||||
F32 => "f32",
|
||||
F64 => "f64",
|
||||
ISize => "isize",
|
||||
USize => "usize",
|
||||
Const => "const",
|
||||
Volatile => "volatile",
|
||||
Noalias => "noalias",
|
||||
Fn => "fn",
|
||||
Let => "let",
|
||||
Var => "var",
|
||||
|
@ -104,6 +110,13 @@ tokens!(pub Token: {
|
|||
As => "as",
|
||||
Else => "else",
|
||||
Return => "return",
|
||||
Struct => "struct",
|
||||
Type => "type",
|
||||
Union => "union",
|
||||
Enum => "enum",
|
||||
Packed => "packed",
|
||||
Extern => "extern",
|
||||
Pub => "pub",
|
||||
// Operators
|
||||
Dot => ".",
|
||||
MinusGreater => "->",
|
||||
|
@ -294,3 +307,27 @@ impl TokenPos {
|
|||
Self { token, start, end }
|
||||
}
|
||||
}
|
||||
|
||||
pub static PRECEDENCE_MAP: std::sync::LazyLock<HashMap<Token, u32>> =
|
||||
std::sync::LazyLock::new(|| {
|
||||
HashMap::from([
|
||||
(Token::PipePipe, 10),
|
||||
(Token::AmpersandAmpersand, 20),
|
||||
(Token::Pipe, 30),
|
||||
(Token::Caret, 40),
|
||||
(Token::Ampersand, 50),
|
||||
(Token::BangEqual, 60),
|
||||
(Token::EqualEqual, 60),
|
||||
(Token::LessEqual, 70),
|
||||
(Token::GreaterEqual, 70),
|
||||
(Token::Less, 70),
|
||||
(Token::Greater, 70),
|
||||
(Token::GreaterGreater, 80),
|
||||
(Token::LessLess, 80),
|
||||
(Token::Plus, 90),
|
||||
(Token::Minus, 90),
|
||||
(Token::Percent, 100),
|
||||
(Token::Star, 100),
|
||||
(Token::Slash, 100),
|
||||
])
|
||||
});
|
||||
|
|
Loading…
Reference in a new issue