new ast parser

This commit is contained in:
Janis 2024-09-09 15:01:53 +02:00
parent 44fa241a79
commit 45653380cf
9 changed files with 2939 additions and 44 deletions

View file

@ -11,6 +11,7 @@ itertools = "0.13.0"
log = "0.4.22"
num-bigint = "0.4.6"
num-traits = "0.2.19"
ordered-float = "4.2.2"
petgraph = "0.6.5"
thiserror = "1.0.63"
unicode-xid = "0.2.4"

File diff suppressed because it is too large Load diff

View file

@ -26,7 +26,7 @@ fn main() {
match tokens {
Ok(tokens) => {
for tok in tokens.iter() {
println!("{}@[{}]", tok.token(), tok.source_location().start);
println!("{}@[{}]", tok.token(), tok.source_location());
}
}
Err((tokens, errors)) => {

View file

@ -167,3 +167,10 @@ macro_rules! variant {
let $pattern = $value else { unreachable!() };
};
}
pub fn from_lo_hi_dwords(lo: u32, hi: u32) -> u64 {
lo as u64 | (hi as u64) << 32
}
pub fn into_lo_hi_dwords(qword: u64) -> (u32, u32) {
(qword as u32, (qword >> 32) as u32)
}

View file

@ -19,7 +19,7 @@ pub mod bigint {
Self(vec![v])
}
pub fn from_u64(v: u64) -> BigInt {
let (lo, hi) = into_lo_hi(v);
let (lo, hi) = into_lo_hi_dwords(v);
Self(vec![lo, hi])
}
@ -120,7 +120,7 @@ pub mod bigint {
impl PartialEq<u64> for BigInt {
fn eq(&self, other: &u64) -> bool {
let (lo, hi) = into_lo_hi(*other);
let (lo, hi) = into_lo_hi_dwords(*other);
cmp_bigint(&self.0, &[lo, hi]) == Ordering::Equal
}
}
@ -133,7 +133,7 @@ pub mod bigint {
impl PartialOrd<u64> for BigInt {
fn partial_cmp(&self, other: &u64) -> Option<Ordering> {
let (lo, hi) = into_lo_hi(*other);
let (lo, hi) = into_lo_hi_dwords(*other);
Some(cmp_bigint(&self.0, &[lo, hi]))
}
}
@ -217,7 +217,7 @@ pub mod bigint {
impl AddAssign<u64> for BigInt {
fn add_assign(&mut self, rhs: u64) {
let (lo, hi) = into_lo_hi(rhs);
let (lo, hi) = into_lo_hi_dwords(rhs);
if hi == 0 {
*self += lo;
} else {
@ -278,7 +278,7 @@ pub mod bigint {
rhs.0.push(0);
}
let (lo, hi) = into_lo_hi(self);
let (lo, hi) = into_lo_hi_dwords(self);
sub_bigint_in_right(&[lo, hi], &mut rhs.0);
rhs.normalised()
@ -302,7 +302,7 @@ pub mod bigint {
impl SubAssign<u64> for BigInt {
fn sub_assign(&mut self, rhs: u64) {
let (lo, hi) = into_lo_hi(rhs);
let (lo, hi) = into_lo_hi_dwords(rhs);
while self.num_digits() < 2 {
self.0.push(0);
}
@ -332,7 +332,7 @@ pub mod bigint {
type Output = Self;
fn mul(self, rhs: u64) -> Self::Output {
let (lo, hi) = into_lo_hi(rhs);
let (lo, hi) = into_lo_hi_dwords(rhs);
BigInt(mul_bigint(&self.0, &[lo, hi]))
}
}
@ -357,7 +357,7 @@ pub mod bigint {
type Output = Self;
fn div(self, rhs: u64) -> Self::Output {
let (lo, hi) = into_lo_hi(rhs);
let (lo, hi) = into_lo_hi_dwords(rhs);
div_rem_bigint(self, BigInt([lo, hi].to_vec())).0
}
}
@ -382,7 +382,7 @@ pub mod bigint {
type Output = Self;
fn rem(self, rhs: u64) -> Self::Output {
let (lo, hi) = into_lo_hi(rhs);
let (lo, hi) = into_lo_hi_dwords(rhs);
div_rem_bigint(self, BigInt([lo, hi].to_vec())).1
}
}
@ -717,12 +717,7 @@ pub mod bigint {
(divident.normalised(), rem)
}
fn from_lo_hi(lo: u32, hi: u32) -> u64 {
lo as u64 | (hi as u64) << 32
}
fn into_lo_hi(qword: u64) -> (u32, u32) {
(qword as u32, (qword >> 32) as u32)
}
use crate::common::{from_lo_hi_dwords, into_lo_hi_dwords};
// from rust num_bigint
/// Subtract a multiple.
@ -740,10 +735,11 @@ pub mod bigint {
// sum >= -(big_digit::MAX * big_digit::MAX) - big_digit::MAX
// sum <= big_digit::MAX
// Offsetting sum by (big_digit::MAX << big_digit::BITS) puts it in DoubleBigDigit range.
let offset_sum = from_lo_hi(u32::MAX, *x) - u32::MAX as u64 + offset_carry as u64
let offset_sum = from_lo_hi_dwords(u32::MAX, *x) - u32::MAX as u64
+ offset_carry as u64
- *y as u64 * c as u64;
let (new_x, new_offset_carry) = into_lo_hi(offset_sum);
let (new_x, new_offset_carry) = into_lo_hi_dwords(offset_sum);
offset_carry = new_offset_carry;
*x = new_x;
}
@ -793,7 +789,7 @@ pub mod bigint {
// q0 is too large if:
// [a2,a1,a0] < q0 * [b1,b0]
// (r << BITS) + a2 < q0 * b1
while r <= u32::MAX as u64 && from_lo_hi(r as u32, a2) < q0 as u64 * b1 as u64 {
while r <= u32::MAX as u64 && from_lo_hi_dwords(r as u32, a2) < q0 as u64 * b1 as u64 {
q0 -= 1;
r += b0 as u64;
}

View file

@ -67,6 +67,21 @@ impl<'a> Chars<'a> {
self.offset
}
pub fn offset_to_source_location(&self, offset: u32) -> SourceLocation {
let (start_l, start_c) = {
let range = self.get_from_to(0, offset);
range.chars().fold((1u32, 0u32), |(line, col), c| {
if c == '\n' {
(line + 1, 0)
} else {
(line, col + 1)
}
})
};
SourceLocation::new(start_l, start_c)
}
pub fn get_source_span(&self, start: u32, end: u32) -> std::ops::Range<SourceLocation> {
let (start_l, start_c) = {
let range = self.get_from_to(0, start);
@ -153,7 +168,218 @@ pub struct TokenIterator<'a> {
offset: usize,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
enum DelimitorCondition {
BelowZero,
MinusOne,
Zero,
One,
AboveZero,
}
impl DelimitorCondition {
fn from_i32(i: i32) -> Self {
match i {
0 => Self::Zero,
1 => Self::One,
-1 => Self::MinusOne,
1.. => Self::AboveZero,
..-1 => Self::BelowZero,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
struct DelimitorConditions {
braces: DelimitorCondition,
parens: DelimitorCondition,
brackets: DelimitorCondition,
semis: Option<u32>,
}
impl DelimitorConditions {
fn is_invalid(&self, cond: &Self) -> bool {
self.braces < cond.braces
|| self.parens < cond.parens
|| self.brackets < cond.brackets
|| self.semis > cond.semis
}
fn from_i32s(braces: i32, parens: i32, brackets: i32, semis: Option<u32>) -> Self {
Self {
braces: DelimitorCondition::from_i32(braces),
parens: DelimitorCondition::from_i32(parens),
brackets: DelimitorCondition::from_i32(brackets),
semis,
}
}
}
impl<'a> TokenIterator<'a> {
pub fn advance_until_before_token(&mut self, token: Token) -> Option<Token> {
while let Some(next) = self.peek_token() {
if next.token() == token {
return Some(token);
}
_ = self.next();
}
None
}
pub fn advance_until_before_one_of(&mut self, tokens: &[Token]) -> Option<Token> {
while let Some(next) = self.peek_token() {
if tokens.contains(&next.token()) {
return Some(next.token());
}
_ = self.next();
}
None
}
pub fn advance_past_end_of_braced(&mut self) -> Option<()> {
use DelimitorCondition::*;
self.advance_past_condition(DelimitorConditions {
braces: MinusOne,
parens: Zero,
brackets: Zero,
semis: None,
})
}
pub fn advance_past_end_of_bracketed(&mut self) -> Option<()> {
use DelimitorCondition::*;
self.advance_past_condition(DelimitorConditions {
braces: Zero,
parens: Zero,
brackets: MinusOne,
semis: None,
})
}
pub fn advance_past_semi(&mut self) -> Option<()> {
use DelimitorCondition::*;
self.advance_past_condition(DelimitorConditions {
braces: Zero,
parens: Zero,
brackets: Zero,
semis: Some(1),
})
}
pub fn advance_past_end_of_parens(&mut self) -> Option<()> {
use DelimitorCondition::*;
self.advance_past_condition(DelimitorConditions {
braces: Zero,
parens: MinusOne,
brackets: Zero,
semis: None,
})
}
pub fn advance_until_start_of_braced(&mut self) -> Option<()> {
use DelimitorCondition::*;
self.advance_until_condition(DelimitorConditions {
braces: One,
parens: Zero,
brackets: Zero,
semis: None,
})
}
pub fn advance_until_start_of_parens(&mut self) -> Option<()> {
use DelimitorCondition::*;
self.advance_until_condition(DelimitorConditions {
braces: Zero,
parens: One,
brackets: Zero,
semis: None,
})
}
pub fn advance_until_end_of_parens(&mut self) -> Option<()> {
use DelimitorCondition::*;
self.advance_until_condition(DelimitorConditions {
braces: Zero,
parens: MinusOne,
brackets: Zero,
semis: None,
})
}
fn advance_until_condition(&mut self, cond: DelimitorConditions) -> Option<()> {
self.advance_past_condition(cond)?;
_ = self.offset.saturating_sub(1);
Some(())
}
fn advance_past_condition(&mut self, cond: DelimitorConditions) -> Option<()> {
let mut braces = 0;
let mut parens = 0;
let mut brackets = 0;
let mut semis = 0;
while let Some(next) = self.next() {
match next.token() {
Token::OpenBrace => {
braces += 1;
}
Token::CloseBrace => {
braces -= 1;
}
Token::OpenParens => {
parens += 1;
}
Token::CloseParens => {
parens -= 1;
}
Token::OpenSquareBracket => {
brackets += 1;
}
Token::CloseSquareBracket => {
brackets -= 1;
}
Token::Semi => {
semis += 1;
}
_ => { /* nada */ }
}
let current =
DelimitorConditions::from_i32s(braces, parens, brackets, cond.semis.map(|_| semis));
if cond == current {
return Some(());
}
if current.is_invalid(&cond) {
return None;
}
}
None
}
pub fn eat_all_zero_or_once(&mut self, tokens: &[Token]) -> Vec<bool> {
let mut occurences = vec![0u32; tokens.len()];
while occurences.iter().all(|&i| i <= 1) {
let Some(next) = self.peek_token() else {
break;
};
if let Some(pos) = tokens.iter().position(|&t| t == next.token()) {
occurences[pos] += 1;
}
}
occurences.into_iter().map(|i| i >= 1).collect()
}
pub fn current_source_location(&self) -> SourceLocation {
self.clone()
.next()
.map(|i| i.source_location())
.unwrap_or_else(|| {
self.tokenizer
.source
.offset_to_source_location(self.tokenizer.source.num_bytes() as u32)
})
}
pub fn expect_token(&mut self, token: Token) -> crate::parser::Result<TokenItem<'a>> {
self.next_if(|item| item.token() == token)
.ok_or(crate::parser::Error::ExpectedTokenNotFound(token))
@ -209,6 +435,9 @@ impl Display for SourceLocation {
}
impl SourceLocation {
pub fn invalid() -> Self {
Self::new(u32::MAX, u32::MAX)
}
pub fn new(line: u32, column: u32) -> Self {
Self { line, column }
}
@ -244,6 +473,9 @@ impl SourceLocation {
}
impl<'a> TokenItem<'a> {
pub fn token_pos(&self) -> TokenPos {
self.inner
}
pub fn token(&self) -> Token {
self.inner.token
}
@ -254,11 +486,17 @@ impl<'a> TokenItem<'a> {
.get_from_to(self.inner.start, self.inner.end)
}
pub fn source_location(&self) -> std::ops::Range<SourceLocation> {
pub fn source_location_range(&self) -> std::ops::Range<SourceLocation> {
self.tokenizer
.source
.get_source_span(self.inner.start, self.inner.end)
}
pub fn source_location(&self) -> SourceLocation {
self.tokenizer
.source
.offset_to_source_location(self.inner.start)
}
}
impl<'a> Iterator for TokenIterator<'a> {

View file

@ -6,6 +6,7 @@
bigint_helper_methods,
map_try_insert,
iter_intersperse,
iter_array_chunks,
int_roundings
)]
#![allow(unused_macros)]

View file

@ -11,7 +11,7 @@ use crate::{
lexer::{Radix, TokenIterator},
string_table::{ImmOrIndex, Index, StringTable},
symbol_table::{SymbolKind, SymbolTable},
tokens::Token,
tokens::{Token, PRECEDENCE_MAP},
};
#[derive(Debug, thiserror::Error)]
@ -2462,26 +2462,3 @@ impl Tree {
}
}
}
static PRECEDENCE_MAP: std::sync::LazyLock<HashMap<Token, u32>> = std::sync::LazyLock::new(|| {
HashMap::from([
(Token::PipePipe, 10),
(Token::AmpersandAmpersand, 20),
(Token::Pipe, 30),
(Token::Caret, 40),
(Token::Ampersand, 50),
(Token::BangEqual, 60),
(Token::EqualEqual, 60),
(Token::LessEqual, 70),
(Token::GreaterEqual, 70),
(Token::Less, 70),
(Token::Greater, 70),
(Token::GreaterGreater, 80),
(Token::LessLess, 80),
(Token::Plus, 90),
(Token::Minus, 90),
(Token::Percent, 100),
(Token::Star, 100),
(Token::Slash, 100),
])
});

View file

@ -1,3 +1,5 @@
use std::collections::HashMap;
macro_rules! tokens {
($vis:vis $ty_name:ident:
{
@ -96,7 +98,11 @@ tokens!(pub Token: {
Bool => "bool",
F32 => "f32",
F64 => "f64",
ISize => "isize",
USize => "usize",
Const => "const",
Volatile => "volatile",
Noalias => "noalias",
Fn => "fn",
Let => "let",
Var => "var",
@ -104,6 +110,13 @@ tokens!(pub Token: {
As => "as",
Else => "else",
Return => "return",
Struct => "struct",
Type => "type",
Union => "union",
Enum => "enum",
Packed => "packed",
Extern => "extern",
Pub => "pub",
// Operators
Dot => ".",
MinusGreater => "->",
@ -294,3 +307,27 @@ impl TokenPos {
Self { token, start, end }
}
}
pub static PRECEDENCE_MAP: std::sync::LazyLock<HashMap<Token, u32>> =
std::sync::LazyLock::new(|| {
HashMap::from([
(Token::PipePipe, 10),
(Token::AmpersandAmpersand, 20),
(Token::Pipe, 30),
(Token::Caret, 40),
(Token::Ampersand, 50),
(Token::BangEqual, 60),
(Token::EqualEqual, 60),
(Token::LessEqual, 70),
(Token::GreaterEqual, 70),
(Token::Less, 70),
(Token::Greater, 70),
(Token::GreaterGreater, 80),
(Token::LessLess, 80),
(Token::Plus, 90),
(Token::Minus, 90),
(Token::Percent, 100),
(Token::Star, 100),
(Token::Slash, 100),
])
});