Add parser, token stream, successfully parse let statement

This commit is contained in:
Sofia 2023-07-27 21:17:44 +03:00
parent cca69976dd
commit 6170eb0990
4 changed files with 155 additions and 23 deletions

View File

@ -7,7 +7,7 @@ pub static EASIEST: &str = include_str!("../easiest.reid");
static DECIMAL_NUMERICS: &[char] = &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'];
pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<Token>, String> {
pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, String> {
let to_tokenize = to_tokenize.into();
let mut position = (0, 1);
let mut cursor = Cursor {
@ -17,7 +17,7 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<Token>, String> {
let mut tokens = Vec::new();
while let Some(character) = &cursor.consume() {
while let Some(character) = &cursor.next() {
position.0 += 1;
if *character == '\n' {
position.1 += 1;
@ -32,7 +32,7 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<Token>, String> {
// Comments
'/' if peek == Some(&'/') => {
while !matches!(&cursor.peek(), Some('\n')) {
cursor.consume();
cursor.next();
}
continue;
}
@ -44,13 +44,13 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<Token>, String> {
break;
}
value += &c.to_string();
cursor.consume();
cursor.next();
}
// Check for keywords
let variant = match value.as_str() {
"let" => TokenVariant::LetKeyword,
_ => TokenVariant::Identifier(value),
"let" => Token::LetKeyword,
_ => Token::Identifier(value),
};
variant
}
@ -62,13 +62,13 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<Token>, String> {
break;
}
value += &c.to_string();
cursor.consume();
cursor.next();
}
TokenVariant::DecimalValue(value)
Token::DecimalValue(value)
}
// Single character tokens
'=' => TokenVariant::Equals,
';' => TokenVariant::Semicolon,
'=' => Token::Equals,
';' => Token::Semicolon,
// Invalid token
_ => Err(format!(
"Unknown token '{}' at {}, {}",
@ -76,37 +76,40 @@ pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<Token>, String> {
))?,
};
tokens.push(Token { variant, position });
tokens.push(FullToken {
token: variant,
position,
});
}
position.0 += 1;
tokens.push(Token {
variant: TokenVariant::Eof,
tokens.push(FullToken {
token: Token::Eof,
position,
});
Ok(tokens)
}
pub struct Token {
variant: TokenVariant,
pub struct FullToken {
pub token: Token,
position: Position,
}
impl Debug for Token {
impl Debug for FullToken {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_fmt(format_args!(
"{:?} (Ln {}, Col {})",
self.variant, self.position.1, self.position.0
self.token, self.position.1, self.position.0
))
}
}
pub type Position = (u32, u32);
#[derive(Debug)]
pub enum TokenVariant {
#[derive(Debug, Eq, PartialEq, Clone)]
pub enum Token {
LetKeyword,
Semicolon,
Equals,
@ -122,7 +125,7 @@ pub struct Cursor<'a> {
}
impl<'a> Cursor<'a> {
fn consume(&mut self) -> Option<char> {
fn next(&mut self) -> Option<char> {
let next = self.char_stream.next();
self.position.0 += 1;
if let Some('\n') = next {

View File

@ -1,9 +1,13 @@
use crate::lexer::EASIEST;
use crate::{lexer::EASIEST, parser::LetStatement, token_stream::TokenStream};
mod lexer;
mod parser;
mod token_stream;
fn main() {
let token_stream = lexer::tokenize(EASIEST).unwrap();
let tokens = lexer::tokenize(EASIEST).unwrap();
let mut token_stream = TokenStream::from(&tokens);
dbg!(&token_stream);
dbg!(token_stream.parse::<LetStatement>().ok());
dbg!(token_stream.parse::<LetStatement>().ok());
}

47
src/parser.rs Normal file
View File

@ -0,0 +1,47 @@
use crate::{lexer::Token, token_stream::TokenStream};
pub trait Parseable
where
Self: std::marker::Sized,
{
fn parse(stream: TokenStream) -> Result<Self, ()>;
}
#[derive(Debug)]
pub enum Expression {
VariableName(String),
ContantI32(i32),
}
impl Parseable for Expression {
fn parse(mut stream: TokenStream) -> Result<Expression, ()> {
if let Some(token) = stream.next() {
Ok(match &token {
Token::Identifier(v) => Expression::VariableName(v.clone()),
Token::DecimalValue(v) => Expression::ContantI32(v.parse().unwrap()),
_ => Err(())?,
})
} else {
Err(())
}
}
}
#[derive(Debug)]
pub struct LetStatement(String, Expression);
impl Parseable for LetStatement {
fn parse(mut stream: TokenStream) -> Result<LetStatement, ()> {
stream.expect(Token::LetKeyword)?;
if let Some(Token::Identifier(variable)) = stream.next() {
stream.expect(Token::Equals)?;
let expression = stream.parse()?;
stream.expect(Token::Semicolon)?;
Ok(LetStatement(variable, expression))
} else {
Err(())
}
}
}

78
src/token_stream.rs Normal file
View File

@ -0,0 +1,78 @@
use crate::{
lexer::{FullToken, Token},
parser::Parseable,
};
pub struct TokenStream<'a, 'b> {
ref_position: Option<&'b mut usize>,
tokens: &'a [FullToken],
pub position: usize,
}
impl<'a, 'b> TokenStream<'a, 'b> {
pub fn from(tokens: &'a [FullToken]) -> Self {
TokenStream {
ref_position: None,
tokens,
position: 0,
}
}
pub fn expect(&mut self, token: Token) -> Result<(), ()> {
if let Some(peeked) = self.peek() {
if token == *peeked {
self.position += 1;
Ok(())
} else {
Err(())
}
} else {
Err(())
}
}
pub fn next(&mut self) -> Option<Token> {
let value = if self.tokens.len() < self.position {
None
} else {
Some(self.tokens[self.position].token.clone())
};
self.position += 1;
value
}
pub fn peek(&mut self) -> Option<&Token> {
if self.tokens.len() < self.position {
None
} else {
Some(&self.tokens[self.position].token)
}
}
pub fn parse<T: Parseable>(&mut self) -> Result<T, ()> {
let mut ref_pos = self.position;
let position = self.position;
let clone = TokenStream {
ref_position: Some(&mut ref_pos),
tokens: self.tokens,
position,
};
match T::parse(clone) {
Ok(res) => {
self.position = ref_pos.max(self.position);
Ok(res)
}
Err(e) => Err(e),
}
}
}
impl Drop for TokenStream<'_, '_> {
fn drop(&mut self) {
if let Some(ref_pos) = &mut self.ref_position {
**ref_pos = self.position;
}
}
}