simple lexing error reporting
This commit is contained in:
parent
69e67c882d
commit
02be9bdc26
|
@ -4,6 +4,8 @@ version = "0.1.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
ansi_term = "0.12.1"
|
||||||
|
clap = "4.5.14"
|
||||||
itertools = "0.13.0"
|
itertools = "0.13.0"
|
||||||
log = "0.4.22"
|
log = "0.4.22"
|
||||||
thiserror = "1.0.63"
|
thiserror = "1.0.63"
|
||||||
|
|
54
src/bin/tokenizer.rs
Normal file
54
src/bin/tokenizer.rs
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
use std::{io::Read, path::PathBuf};
|
||||||
|
|
||||||
|
use compiler::*;
|
||||||
|
use lexer::SourceLocation;
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let cmd = clap::Command::new("sea-tokens").bin_name("sea-tokens").arg(
|
||||||
|
clap::Arg::new("input")
|
||||||
|
.short('i')
|
||||||
|
.help("sea source file.")
|
||||||
|
.value_parser(clap::builder::PathBufValueParser::new()),
|
||||||
|
);
|
||||||
|
|
||||||
|
let matches = cmd.get_matches();
|
||||||
|
let path = matches.get_one::<PathBuf>("input");
|
||||||
|
let source = path
|
||||||
|
.and_then(|p| std::fs::read(p).ok())
|
||||||
|
.or_else(|| {
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
std::io::stdin().read(&mut buf).ok()?;
|
||||||
|
Some(buf)
|
||||||
|
})
|
||||||
|
.expect("no source bytes.");
|
||||||
|
|
||||||
|
let tokens = tokenize(&source);
|
||||||
|
match tokens {
|
||||||
|
Ok(tokens) => {
|
||||||
|
for tok in tokens.iter() {
|
||||||
|
println!("{}@[{}]", tok.token(), tok.source_location().start);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err((tokens, errors)) => {
|
||||||
|
eprint!("{} errors while tokenizing", errors.len());
|
||||||
|
match path {
|
||||||
|
Some(path) => {
|
||||||
|
eprint!("{}", path.display());
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
eprint!("stdin");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
eprintln!(":");
|
||||||
|
for error in &errors {
|
||||||
|
let lines = tokens.src().get_lines(error.range.start, error.range.end);
|
||||||
|
let location = tokens
|
||||||
|
.src()
|
||||||
|
.get_source_span(error.range.start, error.range.end);
|
||||||
|
|
||||||
|
eprintln!("Error: {}", error.err);
|
||||||
|
SourceLocation::squiggle_line(location, lines);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
217
src/lexer.rs
217
src/lexer.rs
|
@ -1,3 +1,5 @@
|
||||||
|
use std::fmt::Display;
|
||||||
|
|
||||||
use crate::tokens::Token;
|
use crate::tokens::Token;
|
||||||
use crate::tokens::TokenPos;
|
use crate::tokens::TokenPos;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
|
@ -11,18 +13,20 @@ pub enum Error {
|
||||||
StringError(String),
|
StringError(String),
|
||||||
#[error("Exp part of floating constant had no digits.")]
|
#[error("Exp part of floating constant had no digits.")]
|
||||||
FloatingConstantExpPartNoDigit,
|
FloatingConstantExpPartNoDigit,
|
||||||
#[error("Dummy Message.")]
|
#[error("constant cannot start with leading underscore '_'.")]
|
||||||
NumericalConstantDigitLeadingUnderscore,
|
NumericalConstantDigitLeadingUnderscore,
|
||||||
#[error("Dummy Message.")]
|
#[error("Expected digit here for constant.")]
|
||||||
NumericalConstantDigitNoDigit,
|
NumericalConstantDigitNoDigit,
|
||||||
#[error("Dummy Message.")]
|
#[error("Expected digit here for integer constant.")]
|
||||||
IntegralTypeExpectedDigit,
|
IntegralTypeExpectedDigit,
|
||||||
#[error("Dummy Message.")]
|
#[error("Floating constant has invalid trailing type.")]
|
||||||
FloatingConstantInvalidTrailingType,
|
FloatingConstantInvalidTrailingType,
|
||||||
#[error("Dummy Message.")]
|
#[error("Invalid token.")]
|
||||||
InvalidToken,
|
InvalidToken,
|
||||||
#[error("Dummy Message.")]
|
#[error("Identifier starts with invalid character.")]
|
||||||
ExpectedIdStartForIdentifier,
|
ExpectedIdStartForIdentifier,
|
||||||
|
#[error("Unknown suffix in constant.")]
|
||||||
|
NumericalConstantUnknownSuffix,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type Result<T> = core::result::Result<T, Error>;
|
pub type Result<T> = core::result::Result<T, Error>;
|
||||||
|
@ -35,11 +39,22 @@ pub struct Chars<'a> {
|
||||||
|
|
||||||
impl<'a> Chars<'a> {
|
impl<'a> Chars<'a> {
|
||||||
pub fn as_str(&self) -> &str {
|
pub fn as_str(&self) -> &str {
|
||||||
unsafe { core::str::from_utf8_unchecked(&self.bytes[self.offset..]) }
|
let offset = self.offset.min(self.num_bytes());
|
||||||
|
unsafe { core::str::from_utf8_unchecked(&self.bytes[offset..]) }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn seek(&mut self, offset: u32) {
|
||||||
|
self.offset = offset as usize;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn num_bytes(&self) -> usize {
|
||||||
|
self.bytes.len()
|
||||||
|
}
|
||||||
|
|
||||||
pub fn is_eof(&self) -> bool {
|
pub fn is_eof(&self) -> bool {
|
||||||
self.offset >= self.bytes.len()
|
self.offset >= self.bytes.len()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn peek(&self) -> Option<char> {
|
pub fn peek(&self) -> Option<char> {
|
||||||
self.clone().next()
|
self.clone().next()
|
||||||
}
|
}
|
||||||
|
@ -52,13 +67,9 @@ impl<'a> Chars<'a> {
|
||||||
self.offset
|
self.offset
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_source_span(
|
pub fn get_source_span(&self, start: u32, end: u32) -> std::ops::Range<SourceLocation> {
|
||||||
&self,
|
|
||||||
start: u32,
|
|
||||||
end: u32,
|
|
||||||
) -> std::ops::RangeInclusive<SourceLocation> {
|
|
||||||
let (start_l, start_c) = {
|
let (start_l, start_c) = {
|
||||||
let range = self.get_range(0, start);
|
let range = self.get_from_to(0, start);
|
||||||
range.chars().fold((1u32, 0u32), |(line, col), c| {
|
range.chars().fold((1u32, 0u32), |(line, col), c| {
|
||||||
if c == '\n' {
|
if c == '\n' {
|
||||||
(line + 1, 0)
|
(line + 1, 0)
|
||||||
|
@ -68,7 +79,7 @@ impl<'a> Chars<'a> {
|
||||||
})
|
})
|
||||||
};
|
};
|
||||||
let (end_l, end_c) = {
|
let (end_l, end_c) = {
|
||||||
let range = self.get_range(start, end);
|
let range = self.get_from_to(start, end);
|
||||||
range.chars().fold((start_l, start_c), |(line, col), c| {
|
range.chars().fold((start_l, start_c), |(line, col), c| {
|
||||||
if c == '\n' {
|
if c == '\n' {
|
||||||
(line + 1, 0)
|
(line + 1, 0)
|
||||||
|
@ -78,13 +89,40 @@ impl<'a> Chars<'a> {
|
||||||
})
|
})
|
||||||
};
|
};
|
||||||
|
|
||||||
core::ops::RangeInclusive::new(
|
core::ops::Range {
|
||||||
SourceLocation::new(start_l, start_c),
|
start: SourceLocation::new(start_l, start_c),
|
||||||
SourceLocation::new(end_l, end_c),
|
end: SourceLocation::new(end_l, end_c),
|
||||||
)
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_range(&self, start: u32, end: u32) -> &str {
|
pub fn get_lines(&self, start: u32, end: u32) -> &str {
|
||||||
|
let range = self.get_from_to(0, start);
|
||||||
|
let start = range
|
||||||
|
.char_indices()
|
||||||
|
.rev()
|
||||||
|
.skip_while(|&(_, c)| c != '\n')
|
||||||
|
.next()
|
||||||
|
.map(|(idx, c)| idx + c.len_utf8())
|
||||||
|
.unwrap_or(0);
|
||||||
|
|
||||||
|
let range = self.get_from_to(end, self.num_bytes() as u32);
|
||||||
|
let end = range
|
||||||
|
.char_indices()
|
||||||
|
.skip_while(|&(_, c)| c != '\n')
|
||||||
|
.next()
|
||||||
|
.map(|(idx, _)| idx as u32 + end)
|
||||||
|
.unwrap_or(self.num_bytes() as u32);
|
||||||
|
|
||||||
|
self.get_from_to(start as u32, end as u32)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_range(&self, range: core::ops::Range<u32>) -> &str {
|
||||||
|
unsafe {
|
||||||
|
core::str::from_utf8_unchecked(&self.bytes[range.start as usize..range.end as usize])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_from_to(&self, start: u32, end: u32) -> &str {
|
||||||
unsafe { core::str::from_utf8_unchecked(&self.bytes[start as usize..end as usize]) }
|
unsafe { core::str::from_utf8_unchecked(&self.bytes[start as usize..end as usize]) }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -163,10 +201,45 @@ pub struct SourceLocation {
|
||||||
pub column: u32,
|
pub column: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Display for SourceLocation {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "l:{},c:{}", self.line, self.column)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl SourceLocation {
|
impl SourceLocation {
|
||||||
pub fn new(line: u32, column: u32) -> Self {
|
pub fn new(line: u32, column: u32) -> Self {
|
||||||
Self { line, column }
|
Self { line, column }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn squiggle_line(this: core::ops::Range<Self>, lines: &str) {
|
||||||
|
let lines = lines.lines();
|
||||||
|
let squiggle_lines = this.end.line - this.start.line;
|
||||||
|
|
||||||
|
for (i, line) in lines.enumerate() {
|
||||||
|
println!("{line}");
|
||||||
|
let squiggle_range = {
|
||||||
|
let start = if i == 0 { this.start.column } else { 0 };
|
||||||
|
let end = if i as u32 + 1 == squiggle_lines {
|
||||||
|
this.end.column
|
||||||
|
} else {
|
||||||
|
line.len() as u32
|
||||||
|
};
|
||||||
|
start..end
|
||||||
|
};
|
||||||
|
|
||||||
|
if !squiggle_range.is_empty() {
|
||||||
|
for _ in 0..squiggle_range.start {
|
||||||
|
print!(" ");
|
||||||
|
}
|
||||||
|
print!("{}", ansi_term::Colour::Red.paint("^"));
|
||||||
|
for _ in squiggle_range.start..(squiggle_range.end - 1) {
|
||||||
|
print!("{}", ansi_term::Colour::Red.paint("~"));
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> TokenItem<'a> {
|
impl<'a> TokenItem<'a> {
|
||||||
|
@ -177,10 +250,10 @@ impl<'a> TokenItem<'a> {
|
||||||
pub fn lexeme(&self) -> &str {
|
pub fn lexeme(&self) -> &str {
|
||||||
self.tokenizer
|
self.tokenizer
|
||||||
.source
|
.source
|
||||||
.get_range(self.inner.start, self.inner.end)
|
.get_from_to(self.inner.start, self.inner.end)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn source_location(&self) -> std::ops::RangeInclusive<SourceLocation> {
|
pub fn source_location(&self) -> std::ops::Range<SourceLocation> {
|
||||||
self.tokenizer
|
self.tokenizer
|
||||||
.source
|
.source
|
||||||
.get_source_span(self.inner.start, self.inner.end)
|
.get_source_span(self.inner.start, self.inner.end)
|
||||||
|
@ -275,6 +348,11 @@ macro_rules! residual {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct TokenizeError {
|
||||||
|
pub err: Error,
|
||||||
|
pub range: core::ops::Range<u32>,
|
||||||
|
}
|
||||||
|
|
||||||
impl<'a> Tokenizer<'a> {
|
impl<'a> Tokenizer<'a> {
|
||||||
pub fn iter(&self) -> TokenIterator {
|
pub fn iter(&self) -> TokenIterator {
|
||||||
TokenIterator {
|
TokenIterator {
|
||||||
|
@ -283,6 +361,61 @@ impl<'a> Tokenizer<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn src(&self) -> &Chars<'a> {
|
||||||
|
&self.source
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn new_with_errors(
|
||||||
|
bytes: &'a [u8],
|
||||||
|
) -> core::result::Result<Self, (Self, Vec<TokenizeError>)> {
|
||||||
|
let mut this = Self {
|
||||||
|
source: Chars { bytes, offset: 0 },
|
||||||
|
tokens: Vec::new(),
|
||||||
|
};
|
||||||
|
let mut errors = Vec::new();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
if this.source.is_eof() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
let start = this.source.position();
|
||||||
|
|
||||||
|
match this.next_token() {
|
||||||
|
Ok(_) => {}
|
||||||
|
Err(e) => {
|
||||||
|
// let is_quoted = this
|
||||||
|
// .source
|
||||||
|
// .get_range(start, this.source.bytes.len() as u32)
|
||||||
|
// .chars()
|
||||||
|
// .take_while_ref(|&c| crate::common::is_whitespace(c))
|
||||||
|
// .next()
|
||||||
|
// .map(|c| c == '\'' || c == '"')
|
||||||
|
// .unwrap_or(false);
|
||||||
|
let end = this.source.position();
|
||||||
|
|
||||||
|
if this.source.peek().map(|c| crate::common::is_whitespace(c)) != Some(true) {
|
||||||
|
this.source
|
||||||
|
.take_while_ref(|&c| !crate::common::is_whitespace(c))
|
||||||
|
.count();
|
||||||
|
}
|
||||||
|
|
||||||
|
_ = this.push_token(Token::ParseError, start, end);
|
||||||
|
errors.push(TokenizeError {
|
||||||
|
err: e,
|
||||||
|
range: start..end,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if errors.is_empty() {
|
||||||
|
Ok(this)
|
||||||
|
} else {
|
||||||
|
Err((this, errors))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn new(bytes: &'a [u8]) -> Result<Tokenizer<'a>> {
|
pub fn new(bytes: &'a [u8]) -> Result<Tokenizer<'a>> {
|
||||||
let mut this = Self {
|
let mut this = Self {
|
||||||
source: Chars { bytes, offset: 0 },
|
source: Chars { bytes, offset: 0 },
|
||||||
|
@ -326,23 +459,16 @@ impl<'a> Tokenizer<'a> {
|
||||||
|
|
||||||
let start = self.source.position();
|
let start = self.source.position();
|
||||||
|
|
||||||
let token = self.source.try_parse_result(|source| {
|
let token = {
|
||||||
let a = try_parse_integral_type(source).map(|o| o.map(|_| Token::IntegralType));
|
let mut peeking = self.source.clone();
|
||||||
residual!(none: a);
|
|
||||||
|
|
||||||
let mut peeking = source.clone();
|
|
||||||
match peeking.next() {
|
match peeking.next() {
|
||||||
Some('0'..='9') => {
|
Some('0'..='9') => Some(parse_constant(&mut self.source)?),
|
||||||
return Ok(Some(parse_constant(source)?));
|
Some('.') if peeking.next().map(|c| crate::common::is_digit(c)) == Some(true) => {
|
||||||
|
Some(parse_constant(&mut self.source)?)
|
||||||
}
|
}
|
||||||
Some('.') if peeking.next().map(|c| ['b', 'x', 'o'].contains(&c)) == Some(true) => {
|
_ => None,
|
||||||
return Ok(Some(parse_constant(source)?));
|
|
||||||
}
|
}
|
||||||
_ => {}
|
};
|
||||||
}
|
|
||||||
|
|
||||||
Ok(None)
|
|
||||||
})?;
|
|
||||||
|
|
||||||
if let Some(token) = token {
|
if let Some(token) = token {
|
||||||
return self.push_token(token, start, self.source.position());
|
return self.push_token(token, start, self.source.position());
|
||||||
|
@ -623,7 +749,7 @@ fn try_parse_exp_part(source: &mut Chars) -> Result<Option<()>> {
|
||||||
/// DEC_DIGITS FloatingType?
|
/// DEC_DIGITS FloatingType?
|
||||||
/// `.` DEC_DIGITS EXP_PART? FloatingType?
|
/// `.` DEC_DIGITS EXP_PART? FloatingType?
|
||||||
/// DEC_DIGITS `.` DEC_DIGITS? EXP_PART? FloatingType?
|
/// DEC_DIGITS `.` DEC_DIGITS? EXP_PART? FloatingType?
|
||||||
fn parse_constant(source: &mut Chars) -> Result<Token> {
|
fn parse_constant_inner(source: &mut Chars) -> Result<Token> {
|
||||||
let zero = source.next_if(|&c| c == '0').is_some();
|
let zero = source.next_if(|&c| c == '0').is_some();
|
||||||
let radix = zero
|
let radix = zero
|
||||||
.then(|| source.next_if_map(|c| Radix::from_char(c)))
|
.then(|| source.next_if_map(|c| Radix::from_char(c)))
|
||||||
|
@ -680,3 +806,22 @@ fn parse_constant(source: &mut Chars) -> Result<Token> {
|
||||||
|
|
||||||
Ok(token)
|
Ok(token)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// CONSTANT <-
|
||||||
|
/// DEC_DIGITS IntegralType?
|
||||||
|
/// `0x` HEX_DIGITS IntegralType?
|
||||||
|
/// `0b` BIN_DIGITS IntegralType?
|
||||||
|
/// `0o` OCT_DIGITS IntegralType?
|
||||||
|
/// DEC_DIGITS FloatingType?
|
||||||
|
/// `.` DEC_DIGITS EXP_PART? FloatingType?
|
||||||
|
/// DEC_DIGITS `.` DEC_DIGITS? EXP_PART? FloatingType?
|
||||||
|
fn parse_constant(source: &mut Chars) -> Result<Token> {
|
||||||
|
let constant = parse_constant_inner(source)?;
|
||||||
|
// char following a constant must not be id_continue
|
||||||
|
source
|
||||||
|
.peek()
|
||||||
|
.filter(|&c| !crate::common::is_id_continue(c))
|
||||||
|
.ok_or(Error::NumericalConstantUnknownSuffix)?;
|
||||||
|
|
||||||
|
Ok(constant)
|
||||||
|
}
|
||||||
|
|
16
src/lib.rs
16
src/lib.rs
|
@ -1,8 +1,14 @@
|
||||||
#![feature(extract_if, iter_advance_by)]
|
#![feature(extract_if, iter_advance_by)]
|
||||||
#![allow(dead_code, unused_macros)]
|
#![allow(dead_code, unused_macros)]
|
||||||
|
|
||||||
mod ast;
|
pub mod ast;
|
||||||
mod common;
|
pub mod common;
|
||||||
mod lexer;
|
pub mod lexer;
|
||||||
mod parser;
|
pub mod parser;
|
||||||
mod tokens;
|
pub mod tokens;
|
||||||
|
|
||||||
|
pub fn tokenize<'a>(
|
||||||
|
bytes: &'a [u8],
|
||||||
|
) -> Result<lexer::Tokenizer<'a>, (lexer::Tokenizer<'a>, Vec<lexer::TokenizeError>)> {
|
||||||
|
lexer::Tokenizer::new_with_errors(bytes)
|
||||||
|
}
|
||||||
|
|
|
@ -4,6 +4,7 @@ use itertools::Itertools;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
ast::{FloatingType, IntegralType, LetOrVar, Node, PrimitiveType, Tag},
|
ast::{FloatingType, IntegralType, LetOrVar, Node, PrimitiveType, Tag},
|
||||||
|
common::NextIf,
|
||||||
lexer::{Radix, TokenIterator},
|
lexer::{Radix, TokenIterator},
|
||||||
tokens::Token,
|
tokens::Token,
|
||||||
};
|
};
|
||||||
|
@ -18,6 +19,8 @@ pub enum Error {
|
||||||
ExpectedTokenNotFound(Token),
|
ExpectedTokenNotFound(Token),
|
||||||
#[error("Dummy message.")]
|
#[error("Dummy message.")]
|
||||||
ExpectedLetOrVar,
|
ExpectedLetOrVar,
|
||||||
|
#[error("Dummy message.")]
|
||||||
|
IntegralTypeTooWide,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type Result<T> = core::result::Result<T, Error>;
|
pub type Result<T> = core::result::Result<T, Error>;
|
||||||
|
@ -91,6 +94,64 @@ impl Tree {
|
||||||
node
|
node
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn is_integral_type(lexeme: &str) -> Option<()> {
|
||||||
|
let mut iter = lexeme.chars();
|
||||||
|
iter.next_if(|&c| c == 'u' || c == 'i')?;
|
||||||
|
iter.next_if(|&c| crate::common::is_digit(c))?;
|
||||||
|
iter.take_while_ref(|&c| crate::common::is_digit(c)).count();
|
||||||
|
iter.next().is_none().then_some(())
|
||||||
|
}
|
||||||
|
|
||||||
|
// returns an option instead of a result because failure here means the
|
||||||
|
// lexeme is actually an identifier.
|
||||||
|
fn try_parse_integral_type(lexeme: &str) -> Result<Option<IntegralType>> {
|
||||||
|
let mut iter = lexeme.chars().peekable();
|
||||||
|
let signed = match iter.next() {
|
||||||
|
Some('u') => false,
|
||||||
|
Some('i') => true,
|
||||||
|
_ => {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// need 1 digit for an integral type
|
||||||
|
if iter.peek().map(|&c| crate::common::is_digit(c)) != Some(true) {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
// need no nondigits after digits
|
||||||
|
if iter
|
||||||
|
.clone()
|
||||||
|
.skip_while(|&c| crate::common::is_digit(c))
|
||||||
|
.next()
|
||||||
|
.is_some()
|
||||||
|
{
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut bits = 0u16;
|
||||||
|
loop {
|
||||||
|
let Some(digit) = iter.next().map(|c| c as u8 - b'0') else {
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
|
||||||
|
match bits
|
||||||
|
.checked_mul(10)
|
||||||
|
.and_then(|bits| bits.checked_add(digit as u16))
|
||||||
|
{
|
||||||
|
Some(val) => {
|
||||||
|
bits = val;
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
// this IS an integral type, but it is bigger than u/i65535
|
||||||
|
return Err(Error::IntegralTypeTooWide);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Some(IntegralType { signed, bits }))
|
||||||
|
}
|
||||||
|
|
||||||
/// returns (signed, bits)
|
/// returns (signed, bits)
|
||||||
fn parse_integral_type(lexeme: &str) -> IntegralType {
|
fn parse_integral_type(lexeme: &str) -> IntegralType {
|
||||||
let mut iter = lexeme.chars();
|
let mut iter = lexeme.chars();
|
||||||
|
@ -164,10 +225,6 @@ impl Tree {
|
||||||
pub fn parse_primitive_type(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
|
pub fn parse_primitive_type(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
|
||||||
let token = tokens.next().ok_or(Error::UnexpectedEndOfTokens)?;
|
let token = tokens.next().ok_or(Error::UnexpectedEndOfTokens)?;
|
||||||
let prim = match token.token() {
|
let prim = match token.token() {
|
||||||
Token::IntegralType => {
|
|
||||||
let int = Self::parse_integral_type(token.lexeme());
|
|
||||||
return Ok(self.push_tag(Tag::IntegralType(int)));
|
|
||||||
}
|
|
||||||
Token::Void => PrimitiveType::Void,
|
Token::Void => PrimitiveType::Void,
|
||||||
Token::Bool => PrimitiveType::Bool,
|
Token::Bool => PrimitiveType::Bool,
|
||||||
Token::F32 => PrimitiveType::FloatingType(FloatingType::Binary32),
|
Token::F32 => PrimitiveType::FloatingType(FloatingType::Binary32),
|
||||||
|
@ -191,9 +248,15 @@ impl Tree {
|
||||||
pub fn parse_typename(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
|
pub fn parse_typename(&mut self, tokens: &mut TokenIterator) -> Result<Node> {
|
||||||
match tokens.peek_token_or_err()?.token() {
|
match tokens.peek_token_or_err()?.token() {
|
||||||
Token::Star => self.parse_pointer(tokens),
|
Token::Star => self.parse_pointer(tokens),
|
||||||
Token::Ident => Ok(self.push_tag(Tag::Ident {
|
Token::Ident => {
|
||||||
name: tokens.next().unwrap().lexeme().to_owned(),
|
let token = tokens.next().unwrap();
|
||||||
|
match Self::try_parse_integral_type(token.lexeme())? {
|
||||||
|
Some(int) => Ok(self.push_tag(Tag::IntegralType(int))),
|
||||||
|
None => Ok(self.push_tag(Tag::Ident {
|
||||||
|
name: token.lexeme().to_owned(),
|
||||||
})),
|
})),
|
||||||
|
}
|
||||||
|
}
|
||||||
_ => self.parse_primitive_type(tokens),
|
_ => self.parse_primitive_type(tokens),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,6 +53,7 @@ macro_rules! tokens {
|
||||||
|
|
||||||
tokens!(pub Token: {
|
tokens!(pub Token: {
|
||||||
Eof,
|
Eof,
|
||||||
|
ParseError,
|
||||||
// Marker Token for any Comment
|
// Marker Token for any Comment
|
||||||
Comment,
|
Comment,
|
||||||
DocComment,
|
DocComment,
|
||||||
|
|
4
tests/faulty/non_id_start.sea
Normal file
4
tests/faulty/non_id_start.sea
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
|
||||||
|
fn 234test() {
|
||||||
|
return 3;
|
||||||
|
}
|
Loading…
Reference in a new issue