Add lexing of escaped characters, add parsing of characters

This commit is contained in:
Sofia 2025-07-22 14:53:36 +03:00
parent e220900ac3
commit 9b9398ac26
4 changed files with 41 additions and 10 deletions

View File

@ -32,6 +32,7 @@ pub enum TypeKind {
F80, F80,
F128PPC, F128PPC,
Str, Str,
Char,
Array(Box<TypeKind>, u64), Array(Box<TypeKind>, u64),
Custom(String), Custom(String),
Borrow(Box<TypeKind>, bool), Borrow(Box<TypeKind>, bool),
@ -44,7 +45,7 @@ pub enum Literal {
Decimal(f64), Decimal(f64),
Bool(bool), Bool(bool),
String(String), String(String),
Char(String), Char(char),
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]

View File

@ -61,6 +61,7 @@ impl Parse for Type {
"f128" => TypeKind::F128, "f128" => TypeKind::F128,
"f128ppc" => TypeKind::F128PPC, "f128ppc" => TypeKind::F128PPC,
"str" => TypeKind::Str, "str" => TypeKind::Str,
"char" => TypeKind::Char,
_ => TypeKind::Custom(ident), _ => TypeKind::Custom(ident),
} }
} else { } else {
@ -177,6 +178,20 @@ impl Parse for PrimaryExpression {
stream.get_range().unwrap(), stream.get_range().unwrap(),
) )
} }
Token::CharLit(v) => {
stream.next(); // Consume
let chars = v.as_bytes();
if chars.len() == 0 {
stream.expected_err("char to not be empty")?;
} else if chars.len() > 0 {
stream.expected_err("char to only have one char inside it")?;
}
Expression(
Kind::Literal(Literal::Char(v.chars().next().unwrap())),
stream.get_range().unwrap(),
)
}
Token::True => { Token::True => {
stream.next(); // Consume stream.next(); // Consume
Expression( Expression(

View File

@ -289,7 +289,7 @@ impl ast::Literal {
ast::Literal::Bool(v) => mir::Literal::Bool(*v), ast::Literal::Bool(v) => mir::Literal::Bool(*v),
ast::Literal::String(val) => mir::Literal::String(val.clone()), ast::Literal::String(val) => mir::Literal::String(val.clone()),
ast::Literal::Decimal(v) => mir::Literal::Vague(mir::VagueLiteral::Decimal(*v)), ast::Literal::Decimal(v) => mir::Literal::Vague(mir::VagueLiteral::Decimal(*v)),
ast::Literal::Char(inner) => mir::Literal::Char(inner.chars().next().unwrap()), ast::Literal::Char(inner) => mir::Literal::Char(*inner),
} }
} }
} }
@ -326,6 +326,7 @@ impl From<ast::TypeKind> for mir::TypeKind {
ast::TypeKind::F80 => mir::TypeKind::F80, ast::TypeKind::F80 => mir::TypeKind::F80,
ast::TypeKind::F128 => mir::TypeKind::F128, ast::TypeKind::F128 => mir::TypeKind::F128,
ast::TypeKind::F128PPC => mir::TypeKind::F128PPC, ast::TypeKind::F128PPC => mir::TypeKind::F128PPC,
ast::TypeKind::Char => todo!(),
} }
} }
} }

View File

@ -250,16 +250,21 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Error
} }
'\"' | '\'' => { '\"' | '\'' => {
let mut value = String::new(); let mut value = String::new();
let mut ignore_next = false; let mut escape_next = false;
while cursor.first().is_some() while cursor.first().is_some()
&& (cursor.first() != Some(*character) || ignore_next) && (cursor.first() != Some(*character) || escape_next)
{ {
if cursor.first() == Some('\\') && !ignore_next { if cursor.first() == Some('\\') && !escape_next {
cursor.next(); // Consume backslash anjd always add next character cursor.next(); // Consume backslash and always add next character
ignore_next = true; escape_next = true;
} else { } else {
ignore_next = false; let c = &cursor.next().unwrap();
value += &cursor.next().unwrap().to_string(); if escape_next {
value += &escape_char(&c).to_string();
} else {
value += &c.to_string();
}
escape_next = false;
} }
} }
if cursor.first() == Some(*character) { if cursor.first() == Some(*character) {
@ -268,7 +273,7 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Error
return Err(Error::MissingQuotation(position)); return Err(Error::MissingQuotation(position));
} }
match character { match character {
'\'' => Token::StringLit(value), '\'' => Token::CharLit(value),
'\"' => Token::StringLit(value), '\"' => Token::StringLit(value),
_ => unsafe { unreachable_unchecked() }, _ => unsafe { unreachable_unchecked() },
} }
@ -356,6 +361,15 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Error
Ok(tokens) Ok(tokens)
} }
fn escape_char(c: &char) -> char {
match c {
't' => '\t',
'n' => '\n',
'r' => '\r',
_ => *c,
}
}
#[derive(thiserror::Error, Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] #[derive(thiserror::Error, Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub enum Error { pub enum Error {
#[error("Invalid token '{}' ", .0)] #[error("Invalid token '{}' ", .0)]