Add lexing of escaped characters, add parsing of characters

This commit is contained in:
Sofia 2025-07-22 14:53:36 +03:00
parent e220900ac3
commit 9b9398ac26
4 changed files with 41 additions and 10 deletions

View File

@ -32,6 +32,7 @@ pub enum TypeKind {
F80,
F128PPC,
Str,
Char,
Array(Box<TypeKind>, u64),
Custom(String),
Borrow(Box<TypeKind>, bool),
@ -44,7 +45,7 @@ pub enum Literal {
Decimal(f64),
Bool(bool),
String(String),
Char(String),
Char(char),
}
#[derive(Debug, Clone)]

View File

@ -61,6 +61,7 @@ impl Parse for Type {
"f128" => TypeKind::F128,
"f128ppc" => TypeKind::F128PPC,
"str" => TypeKind::Str,
"char" => TypeKind::Char,
_ => TypeKind::Custom(ident),
}
} else {
@ -177,6 +178,20 @@ impl Parse for PrimaryExpression {
stream.get_range().unwrap(),
)
}
Token::CharLit(v) => {
stream.next(); // Consume
let chars = v.as_bytes();
if chars.len() == 0 {
stream.expected_err("char to not be empty")?;
} else if chars.len() > 0 {
stream.expected_err("char to only have one char inside it")?;
}
Expression(
Kind::Literal(Literal::Char(v.chars().next().unwrap())),
stream.get_range().unwrap(),
)
}
Token::True => {
stream.next(); // Consume
Expression(

View File

@ -289,7 +289,7 @@ impl ast::Literal {
ast::Literal::Bool(v) => mir::Literal::Bool(*v),
ast::Literal::String(val) => mir::Literal::String(val.clone()),
ast::Literal::Decimal(v) => mir::Literal::Vague(mir::VagueLiteral::Decimal(*v)),
ast::Literal::Char(inner) => mir::Literal::Char(inner.chars().next().unwrap()),
ast::Literal::Char(inner) => mir::Literal::Char(*inner),
}
}
}
@ -326,6 +326,7 @@ impl From<ast::TypeKind> for mir::TypeKind {
ast::TypeKind::F80 => mir::TypeKind::F80,
ast::TypeKind::F128 => mir::TypeKind::F128,
ast::TypeKind::F128PPC => mir::TypeKind::F128PPC,
ast::TypeKind::Char => todo!(),
}
}
}

View File

@ -250,16 +250,21 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Error
}
'\"' | '\'' => {
let mut value = String::new();
let mut ignore_next = false;
let mut escape_next = false;
while cursor.first().is_some()
&& (cursor.first() != Some(*character) || ignore_next)
&& (cursor.first() != Some(*character) || escape_next)
{
if cursor.first() == Some('\\') && !ignore_next {
cursor.next(); // Consume backslash anjd always add next character
ignore_next = true;
if cursor.first() == Some('\\') && !escape_next {
cursor.next(); // Consume backslash and always add next character
escape_next = true;
} else {
ignore_next = false;
value += &cursor.next().unwrap().to_string();
let c = &cursor.next().unwrap();
if escape_next {
value += &escape_char(&c).to_string();
} else {
value += &c.to_string();
}
escape_next = false;
}
}
if cursor.first() == Some(*character) {
@ -268,7 +273,7 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Error
return Err(Error::MissingQuotation(position));
}
match character {
'\'' => Token::StringLit(value),
'\'' => Token::CharLit(value),
'\"' => Token::StringLit(value),
_ => unsafe { unreachable_unchecked() },
}
@ -356,6 +361,15 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Error
Ok(tokens)
}
fn escape_char(c: &char) -> char {
match c {
't' => '\t',
'n' => '\n',
'r' => '\r',
_ => *c,
}
}
#[derive(thiserror::Error, Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub enum Error {
#[error("Invalid token '{}' ", .0)]