From e8934b8ccc13b2e0d81dd642131eb111cf80ac35 Mon Sep 17 00:00:00 2001 From: Janis Date: Tue, 6 Aug 2024 20:57:19 +0200 Subject: [PATCH] initial commit --- .gitignore | 2 + Cargo.toml | 10 ++ grammar.bnf | 95 +++++++++++ rust-toolchain | 1 + src/common.rs | 160 ++++++++++++++++++ src/lexer.rs | 442 +++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 5 + src/tokens.rs | 236 ++++++++++++++++++++++++++ 8 files changed, 951 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 grammar.bnf create mode 100644 rust-toolchain create mode 100644 src/common.rs create mode 100644 src/lexer.rs create mode 100644 src/lib.rs create mode 100644 src/tokens.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4fffb2f --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +/Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..1fb4d26 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "compiler" +version = "0.1.0" +edition = "2021" + +[dependencies] +itertools = "0.13.0" +log = "0.4.22" +thiserror = "1.0.63" +unicode-xid = "0.2.4" diff --git a/grammar.bnf b/grammar.bnf new file mode 100644 index 0000000..86296ff --- /dev/null +++ b/grammar.bnf @@ -0,0 +1,95 @@ +# cool language called sea: + + ::= ... + ::= ... + ::= ... + ::= ... + ::= ... + ::= | '_' + ::= | | '_' + ::= + | + + ::= + + ::= | + + ::= fn '(' ,? ')' (-> )? + + ::= + | , + ::= : + + ::= * + | + + ::= + | + | + | ';' + ::= return ? ';' + ::= ';' + ::= ';' + ::= |= | &= | ^= | /= | *= | %= | <<= | >>= | += | -= | = + + ::= + + ::= + | || + ::= + | && + ::= + | '|' + ::= + | ^ + ::= + | & + ::= + | (!= | ==) + ::= + | (< | > | <= | >=) + ::= + | (<< | >>) + ::= + | (+ | -) + ::= + | (* | / | %) + + ::= + ::= ! - + & * + + ::= as + + ::= + | + | + | '(' ')' + + ::= (let | var) (':' )? ( = )? + + ::= + | + | + ::= '*' 'const'? + ::= bool + | + | + | void + + ::= ('u' | 'i') + + ::= 'f'('32' | '64') + + + ::= + | + ::= ? + | '0x' ? + | '0b' ? + | '0o' ? + + ::= ? + | '.' ? ? + | '.' ? ? ? + + ::= ('e' | 'E') ('-' | '+')? + diff --git a/rust-toolchain b/rust-toolchain new file mode 100644 index 0000000..bf867e0 --- /dev/null +++ b/rust-toolchain @@ -0,0 +1 @@ +nightly diff --git a/src/common.rs b/src/common.rs new file mode 100644 index 0000000..21e486c --- /dev/null +++ b/src/common.rs @@ -0,0 +1,160 @@ +#![allow(unused)] +/// True if `c` is considered a whitespace according to Rust language definition. +/// See [Rust language reference](https://doc.rust-lang.org/reference/whitespace.html) +/// for definitions of these classes. +pub fn is_whitespace(c: char) -> bool { + // This is Pattern_White_Space. + // + // Note that this set is stable (ie, it doesn't change with different + // Unicode versions), so it's ok to just hard-code the values. + + matches!( + c, + // Usual ASCII suspects + '\u{0009}' // \t + | '\u{000A}' // \n + | '\u{000B}' // vertical tab + | '\u{000C}' // form feed + | '\u{000D}' // \r + | '\u{0020}' // space + + // NEXT LINE from latin1 + | '\u{0085}' + + // Bidi markers + | '\u{200E}' // LEFT-TO-RIGHT MARK + | '\u{200F}' // RIGHT-TO-LEFT MARK + + // Dedicated whitespace characters from Unicode + | '\u{2028}' // LINE SEPARATOR + | '\u{2029}' // PARAGRAPH SEPARATOR + ) +} + +/// True if `c` is valid as a first character of an identifier. +/// See [Rust language reference](https://doc.rust-lang.org/reference/identifiers.html) for +/// a formal definition of valid identifier name. +pub fn is_id_start(c: char) -> bool { + // This is XID_Start OR '_' (which formally is not a XID_Start). + c == '_' || unicode_xid::UnicodeXID::is_xid_start(c) +} + +/// True if `c` is valid as a non-first character of an identifier. +/// See [Rust language reference](https://doc.rust-lang.org/reference/identifiers.html) for +/// a formal definition of valid identifier name. +pub fn is_id_continue(c: char) -> bool { + unicode_xid::UnicodeXID::is_xid_continue(c) +} + +/// The passed string is lexically an identifier. +pub fn is_ident(string: &str) -> bool { + let mut chars = string.chars(); + if let Some(start) = chars.next() { + is_id_start(start) && chars.all(is_id_continue) + } else { + false + } +} + +pub fn is_digit(ch: char) -> bool { + ('0'..='9').contains(&ch) +} + +pub fn is_bin_digit(ch: char) -> bool { + ch == '0' || ch == '1' +} + +pub fn is_nonzero_digit(ch: char) -> bool { + ('1'..='9').contains(&ch) +} + +pub fn is_oct_digit(ch: char) -> bool { + ('0'..='7').contains(&ch) +} + +pub fn is_hex_digit(ch: char) -> bool { + ('0'..='9').contains(&ch) || ('a'..='f').contains(&ch) || ('A'..='F').contains(&ch) +} + +/// Trait for only yielding the next item in the Iterator if it tests true for some predicate +pub trait NextIf: Iterator + Clone { + /// Yield next item if `pred` returns `true`. + /// If `pred` returns `false` the Iterator is not advanced. + #[must_use] + fn next_if(&mut self, pred: F) -> Option + where + F: FnOnce(&Self::Item) -> bool, + { + let old = self.clone(); + match self.next() { + Some(item) => { + if pred(&item) { + Some(item) + } else { + *self = old; + None + } + } + None => None, + } + } + /// Yield next item if `pred` returns `Some(T)`. + /// If `pred` returns `None` the Iterator is not advanced. + #[must_use] + fn next_if_map(&mut self, pred: F) -> Option + where + F: FnOnce(Self::Item) -> Option, + { + let old = self.clone(); + match self.next() { + Some(item) => match pred(item) { + None => { + *self = old; + None + } + some => some, + }, + None => None, + } + } +} + +impl NextIf for T where T: Iterator + Clone {} + +pub trait FallibleParse: Iterator + Clone { + /// consumes items from `self` if and only if `map` yields `Some`. + #[must_use] + fn try_parse(&mut self, map: F) -> Option + where + F: FnOnce(&mut Self) -> Option, + { + // clone iterator and keep around + let old = self.clone(); + match map(self) { + Some(result) => Some(result), + None => { + // the map function failed, restore iterator and yield None. + *self = old; + None + } + } + } + #[must_use] + fn try_parse_result(&mut self, map: F) -> Result + where + F: FnOnce(&mut Self) -> Result, + { + // clone iterator and keep around + let old = self.clone(); + match map(self) { + Ok(result) => Ok(result), + Err(e) => { + // the map function failed, restore iterator and yield None. + *self = old; + Err(e) + } + } + } +} + +impl FallibleParse for T where T: Iterator + Clone {} diff --git a/src/lexer.rs b/src/lexer.rs new file mode 100644 index 0000000..0bfe059 --- /dev/null +++ b/src/lexer.rs @@ -0,0 +1,442 @@ +use crate::tokens::Token; +use crate::tokens::TokenPos; +use itertools::Itertools; + +use crate::common::FallibleParse; +use crate::common::NextIf; + +#[derive(Debug, thiserror::Error)] +pub enum LexerError { + #[error("{0}")] + StringError(String), + #[error("Exp part of floating constant had no digits.")] + FloatingConstantExpPartNoDigit, + #[error("Dummy Message.")] + NumericalConstantDigitLeadingUnderscore, + #[error("Dummy Message.")] + NumericalConstantDigitNoDigit, + #[error("Dummy Message.")] + IntegralTypeExpectedDigit, + #[error("Dummy Message.")] + FloatingConstantInvalidTrailingType, + #[error("Dummy Message.")] + InvalidToken, + #[error("Dummy Message.")] + ExpectedIdStartForIdentifier, +} + +pub type LexerResult = core::result::Result; + +#[derive(Debug, Clone)] +pub struct Chars<'a> { + bytes: &'a [u8], + offset: usize, +} + +impl<'a> Chars<'a> { + pub fn as_str(&self) -> &str { + unsafe { core::str::from_utf8_unchecked(&self.bytes[self.offset..]) } + } + pub fn is_eof(&self) -> bool { + self.offset >= self.bytes.len() + } + pub fn peek(&self) -> Option { + self.clone().next() + } + + pub fn position(&self) -> u32 { + self.offset() as u32 + } + + pub fn offset(&self) -> usize { + self.offset + } + + pub fn get_range(&self, start: u32, end: u32) -> &str { + unsafe { core::str::from_utf8_unchecked(&self.bytes[start as usize..end as usize]) } + } + + fn next_char(&mut self) -> Option { + let ch = self.as_str().chars().next()?; + self.offset += ch.len_utf8(); + Some(ch) + } +} + +impl<'a> Iterator for Chars<'a> { + type Item = char; + + fn next(&mut self) -> Option { + self.next_char() + } +} + +#[derive(Debug, Clone)] +pub struct Tokenizer<'a> { + source: Chars<'a>, + tokens: Vec, +} + +macro_rules! next_or_eof { + ($expr:expr) => { + match $expr.next() { + Some(c) => c, + None => { + return Ok(Token::Eof); + } + } + }; + (?$expr:expr) => { + match $expr.peek() { + Some(c) => c, + None => { + return Ok(Token::Eof); + } + } + }; +} + +macro_rules! residual { + (ok: $expr:expr) => { + match $expr { + Ok(t) => t, + Err(e) => { + return Err(e); + } + } + }; + (none: $expr:expr) => { + match $expr { + Ok(Some(t)) => { + return Ok(Some(t)); + } + Ok(val) => val, + Err(e) => { + return Err(e); + } + } + }; + (flatten: none: $expr:expr) => { + match $expr { + Ok(Some(t)) => { + return Ok(t); + } + Ok(val) => val, + Err(e) => { + return Err(e); + } + } + }; + (some: $expr:expr) => { + match $expr { + Ok(Some(t)) => t, + Ok(None) => { + return Ok(None); + } + Err(e) => { + return Err(e); + } + } + }; +} + +impl<'a> Tokenizer<'a> { + fn push_token(&mut self, token: Token, start: u32, end: u32) -> LexerResult<()> { + self.tokens.push(TokenPos::new(token, start, end)); + + Ok(()) + } + + pub fn next_token(&mut self) -> LexerResult<()> { + self.source + .take_while_ref(|&c| crate::common::is_whitespace(c)) + .count(); + let start = self.source.position(); + + let token = self.source.try_parse_result(|source| { + let a = try_parse_integral_type(source).map(|o| o.map(|_| Token::IntegralType)); + residual!(none: a); + + let mut peeking = source.clone(); + match peeking.next() { + Some('0'..='9') => { + return Ok(Some(parse_constant(source)?)); + } + Some('.') if peeking.next().map(|c| ['b', 'x', 'o'].contains(&c)) == Some(true) => { + return Ok(Some(parse_constant(source)?)); + } + _ => {} + } + + Ok(None) + }); + + if let Some(token) = token? { + return self.push_token(token, start, self.source.position()); + } + + // lexical tokens + let token = crate::tokens::LexemeParser::parse(self.source.clone()); + + if let Some(token) = token { + _ = self.source.advance_by(token.lexeme_len()); + + match token { + Token::SlashSlash | Token::SlashSlashSlash => { + _ = self.push_token(token, start, self.source.position()); + let start = self.source.position(); + loop { + // advance until either EOF or newline + let Some(ch) = self.source.next() else { + break; + }; + if ch == '\n' { + break; + } + } + let end = self.source.position() - 1; + return self.push_token( + if token == Token::SlashSlash { + Token::Comment + } else { + Token::DocComment + }, + start, + end, + ); + } + Token::SlashStar | Token::SlashStarStar => { + let start = self.source.position(); + let mut end = self.source.position(); + + let mut last = self.source.next(); + loop { + // break out of loop if EOF + let Some(l) = last.replace(match self.source.next() { + Some(ch) => ch, + None => { + break; + } + }) else { + break; + }; + + // break out of loop if end of comment + if (l, last.unwrap()) == ('*', '/') { + break; + } + end = self.source.position() - 1; + } + return self.push_token( + if token == Token::SlashStar { + Token::Comment + } else { + Token::DocComment + }, + start, + end, + ); + } + _ => {} + } + + if token.maybe_ident() { + if self + .source + .take_while_ref(|&c| crate::common::is_id_continue(c)) + .count() + .gt(&0) + { + return self.push_token(Token::Ident, start, self.source.position()); + } + } + + return self.push_token(token, start, self.source.position()); + } + + self.source + .next_if(|&c| crate::common::is_id_start(c)) + .ok_or(LexerError::ExpectedIdStartForIdentifier)?; + self.source + .take_while_ref(|&c| crate::common::is_id_continue(c)) + .count(); + + return self.push_token(Token::Ident, start, self.source.position()); + } +} + +/// IntegralType <- +/// ( 'u' | 'i' ) DIGITS+ +fn try_parse_integral_type(source: &mut Chars) -> LexerResult> { + if !source.next_if(|&c| c == 'u' || c == 'i').is_some() { + return Ok(None); + } + + if source + .take_while_ref(|&c| crate::common::is_digit(c)) + .count() + <= 0 + { + return Err(LexerError::IntegralTypeExpectedDigit); + }; + + Ok(Some(())) +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum Radix { + Hex, + Bin, + Dec, + Oct, +} + +impl Radix { + /// must be called with one of `['b','x','d','o']` + unsafe fn from_char_unchecked(c: char) -> Self { + match c.to_ascii_lowercase() { + 'o' => Self::Oct, + 'b' => Self::Oct, + 'x' => Self::Oct, + 'd' => Self::Oct, + _ => unreachable!(), + } + } + fn from_char(c: char) -> Option { + match c.to_ascii_lowercase() { + 'o' => Some(Self::Oct), + 'b' => Some(Self::Oct), + 'x' => Some(Self::Oct), + 'd' => Some(Self::Oct), + _ => None, + } + } + fn to_token(self) -> Token { + match self { + Radix::Hex => Token::IntegerHexConstant, + Radix::Bin => Token::IntegerBinConstant, + Radix::Oct => Token::IntegerOctConstant, + Radix::Dec => Token::IntegerConstant, + } + } + fn is_digit(self) -> fn(char) -> bool { + match self { + Radix::Hex => crate::common::is_hex_digit, + Radix::Bin => crate::common::is_bin_digit, + Radix::Oct => crate::common::is_oct_digit, + Radix::Dec => crate::common::is_digit, + } + } +} + +/// where DIGIT is defined by radix: +/// DIGITS <- +/// if allow_leading_underscore: `_`* DIGIT (DIGIT|`_`)* +/// else: DIGIT (DIGIT|`_`)* +fn parse_digit_part( + source: &mut Chars, + allow_leading_underscore: bool, + radix: Radix, +) -> LexerResult<()> { + let radix = radix.is_digit(); + + if allow_leading_underscore { + let _underscore = source.take_while_ref(|&c| c == '_').count(); + } + let _need_digit = source.next_if(|&c| radix(c)).ok_or_else(|| { + if source.peek() == Some('_') { + LexerError::NumericalConstantDigitLeadingUnderscore + } else { + LexerError::NumericalConstantDigitNoDigit + } + })?; + let _rest = source.take_while_ref(|&c| radix(c) || c == '_').count(); + + Ok(()) +} + +/// returns `Err(E)` if it failed to parse. +/// returns `Ok(None)` if no exp part was found. +/// returns `Ok(Some(()))` if an exp part was found and parsed. +/// +/// EXP_PART <- +/// (`e`|`E`) (`-`|`+`)? DEC_DIGITS +fn try_parse_exp_part(source: &mut Chars) -> LexerResult> { + if source.next_if(|&c| c.to_ascii_lowercase() == 'e').is_some() { + let _sign = source.next_if(|&c| c == '-' || c == '+'); + if source + .take_while_ref(|&c| crate::common::is_digit(c)) + .count() + .lt(&1) + { + // need digits following exp notation + Err(LexerError::FloatingConstantExpPartNoDigit) + } else { + Ok(Some(())) + } + } else { + Ok(None) + } +} + +/// CONSTANT <- +/// DEC_DIGITS IntegralType? +/// `0x` HEX_DIGITS IntegralType? +/// `0b` BIN_DIGITS IntegralType? +/// `0o` OCT_DIGITS IntegralType? +/// DEC_DIGITS FloatingType? +/// `.` DEC_DIGITS EXP_PART? FloatingType? +/// DEC_DIGITS `.` DEC_DIGITS? EXP_PART? FloatingType? +fn parse_constant(source: &mut Chars) -> LexerResult { + let zero = source.next_if(|&c| c == '0').is_some(); + let radix = zero + .then(|| source.next_if_map(|c| Radix::from_char(c))) + .flatten(); + + if let Some(radix) = radix { + parse_digit_part(source, false, radix)?; + if source.peek().map(|c| c == 'u' || c == 'i') == Some(true) { + try_parse_integral_type(source)?; + } + return Ok(radix.to_token()); + } + + // if zero: `_`* DIGIT (DIGIT|`_`)* + // else: DIGIT (DIGIT|`_`)* + let _digits = parse_digit_part(source, false, Radix::Dec)?; + + if let Ok(_) = source.try_parse_result(|source| try_parse_integral_type(source)) { + return Ok(Token::IntegerConstant); + } + + let dot = source.next_if(|&c| c == '.').is_some(); + + if dot { + parse_digit_part(source, false, Radix::Dec)?; + } + + // parse exp notation + let exp = try_parse_exp_part(source)?.is_some(); + + // trailing FloatingType? + let floating = if source.next_if(|&c| c == 'f').is_some() { + let digits = source.next_tuple::<(char, char)>(); + if !(digits == Some(('6', '4')) || digits == Some(('3', '2'))) { + // need either f64 or f32 here! + return Err(LexerError::FloatingConstantInvalidTrailingType); + } + true + } else { + false + }; + + let token = match (dot, exp, floating) { + (false, false, false) => Token::IntegerConstant, + (true, false, _) => Token::DotFloatingConstant, + (true, true, _) => Token::DotFloatingExpConstant, + (false, true, _) => Token::FloatingExpConstant, + (false, _, _) => Token::FloatingConstant, + }; + + Ok(token) +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..1fdc1f2 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,5 @@ +#![feature(extract_if, iter_advance_by)] + +mod common; +mod lexer; +mod tokens; diff --git a/src/tokens.rs b/src/tokens.rs new file mode 100644 index 0000000..dad749c --- /dev/null +++ b/src/tokens.rs @@ -0,0 +1,236 @@ +macro_rules! tokens { + ($vis:vis $ty_name:ident: + { + $($name2:ident),* + }, + { + $($name:ident => $lexeme:literal),* + }) => { + + #[allow(dead_code)] + #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] + $vis enum $ty_name { + $($name, + )* + $($name2,)* + } + + impl std::fmt::Display for $ty_name { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + $(Self::$name => write!(f, "{}", $lexeme),)* + $(Self::$name2 => write!(f, "<{}>", stringify!($name2))),* + } + } + } + + #[allow(dead_code)] + impl $ty_name { + $vis fn lexeme(&self) -> Option<&'static str> { + match self { + $(Self::$name => Some($lexeme),)* + $(Self::$name2 => None),* + } + } + + /// returns the number of chars in this lexeme + $vis fn lexeme_len(&self) -> usize { + self.lexeme().map(|lexeme|lexeme.chars().count()).unwrap_or(0) + } + + $vis fn maybe_ident(&self) -> bool { + self.lexeme().map(|lexeme| crate::common::is_ident(lexeme)).unwrap_or(false) + } + + $vis fn lexemes() -> &'static [(Self, &'static str)] { + &[ + $((Self::$name, $lexeme)),* + ] + } + } + }; +} + +tokens!(pub Token: { + Eof, + // Marker Token for any Comment + Comment, + DocComment, + // Marker Token for any pre-processing directive + CharConstant, + IntegerConstant, + IntegerHexConstant, + IntegerBinConstant, + IntegerOctConstant, + FloatingConstant, + FloatingExpConstant, + DotFloatingConstant, + DotFloatingExpConstant, + StringConstant, + IntegralType, + Ident +}, + // Lexical Tokens: + { + SlashSlash => "//", + SlashStar => "/*", + SlashStarStar => "/**", + StarSlash => "*/", + SlashSlashSlash => "///", + // Punctuation: + OpenParens => "(", + CloseParens => ")", + OpenBrace => "{", + CloseBrace => "}", + OpenSquareBracket => "[", + CloseSquareBracket => "]", + Semi => ";", + Comma => ",", + Elipsis3 => "...", + Elipsis2 => "..", + Colon => ":", + Equal => "=", + // Keywords: + Void => "void", + Bool => "bool", + F32 => "f32", + F64 => "f64", + Const => "const", + Fn => "fn", + Let => "let", + Var => "var", + If => "if", + As => "as", + Else => "else", + Return => "return", + // Operators + Dot => ".", + MinusGreater => "->", + Bang => "!", + Tilde => "~", + Plus => "+", + Minus => "-", + Star => "*", + Slash => "/", + Percent => "%", + Less => "<", + Greater => ">", + LessEqual => "<=", + GreaterEqual => ">=", + EqualEqual => "==", + BangEqual => "!=", + PipePipe => "||", + AmpersandAmpersand => "&&", + Ampersand => "&", + Caret => "^", + Pipe => "|", + LessLess => "<<", + GreaterGreater => ">>", + Question => "?", + PlusEqual => "+=", + MinusEqual => "-=", + StarEqual => "*=", + SlashEqual => "/=", + PercentEqual => "%=", + AmpersandEqual => "&=", + PipeEqual => "|=", + CaretEqual => "^=", + LessLessEqual => "<<=", + GreaterGreaterEqual => ">>=" + }); + +/// Helper type for parsing tokens that have a defined lexeme, such as `fn`, +/// `f32`, `const`, etc. Tokens with variable lexemes, such as primitive +/// integral types, constants or identifiers are not parsed by this. +pub struct LexemeParser { + lexemes: Vec, + candidates: Vec, + len: usize, +} + +impl LexemeParser { + pub fn new() -> Self { + let lexemes = Token::lexemes() + .iter() + .map(|(tok, _)| tok.clone()) + .collect::>(); + + Self { + lexemes, + candidates: vec![], + len: 0, + } + } + + pub fn finish(mut self) -> Option { + self.candidates.pop() + } + + pub fn parse(mut tokens: impl Iterator) -> Option { + let mut this = Self::new(); + loop { + let Some(ch) = tokens.next() else { + break; + }; + + if crate::common::is_whitespace(ch) { + break; + } + + this.advance(ch)?; + } + this.finish() + } + + /// accepts a char and returns `None` until it is done trying to parse the longest `Token`. + /// when finished, returns a Token, if it parsed one, or `Some(None)`. + pub fn advance(&mut self, ch: char) -> Option> { + self.len += 1; + + // advance match + // keep tokens whose lexemes match the next char + self.lexemes.retain(|tok| { + // SAFETY: all of these tokens are lexical, and every character in + // them is represented by a single byte and we know they must be + // utf8/ascii. + unsafe { + char::from_u32_unchecked(tok.lexeme().unwrap().as_bytes()[self.len - 1] as u32) + == ch + } + }); + + // A token has been successfully matched completely if it has not yet + // been removed from the lexeme list but the length of it's lexeme is no + // greater than the number of chars we've received. + self.candidates.extend(self.lexemes.extract_if(|tok| { + // SAFETY: as above, all of the tokens in self.lexemes are + // lexical and are all single byte characters. + tok.lexeme().unwrap().as_bytes().len() <= self.len + })); + + // we prefer the longer match + // that means that a+++++b doesn't parse and a+++(++b) is a++ + ++b + // `&&i` is also LogicalAnd i and not Ampersand Ampersand i + // Somehow, this is also a gnu extension... + + if self.lexemes.is_empty() { + // return match, if it exists + return Some(self.candidates.pop()); + } + + return None; + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct TokenPos { + pub token: Token, + pub start: u32, + pub end: u32, +} + +impl TokenPos { + pub fn new(token: Token, start: u32, end: u32) -> Self { + Self { token, start, end } + } +}