From 6170eb0990f3a6b97dfe616c518cbc3533a80224 Mon Sep 17 00:00:00 2001 From: sofia Date: Thu, 27 Jul 2023 21:17:44 +0300 Subject: [PATCH] Add parser, token stream, successfully parse let statement --- src/lexer.rs | 43 +++++++++++++------------ src/main.rs | 10 ++++-- src/parser.rs | 47 +++++++++++++++++++++++++++ src/token_stream.rs | 78 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 155 insertions(+), 23 deletions(-) create mode 100644 src/parser.rs create mode 100644 src/token_stream.rs diff --git a/src/lexer.rs b/src/lexer.rs index 3ecfd2b..1af2c52 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -7,7 +7,7 @@ pub static EASIEST: &str = include_str!("../easiest.reid"); static DECIMAL_NUMERICS: &[char] = &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']; -pub fn tokenize>(to_tokenize: T) -> Result, String> { +pub fn tokenize>(to_tokenize: T) -> Result, String> { let to_tokenize = to_tokenize.into(); let mut position = (0, 1); let mut cursor = Cursor { @@ -17,7 +17,7 @@ pub fn tokenize>(to_tokenize: T) -> Result, String> { let mut tokens = Vec::new(); - while let Some(character) = &cursor.consume() { + while let Some(character) = &cursor.next() { position.0 += 1; if *character == '\n' { position.1 += 1; @@ -32,7 +32,7 @@ pub fn tokenize>(to_tokenize: T) -> Result, String> { // Comments '/' if peek == Some(&'/') => { while !matches!(&cursor.peek(), Some('\n')) { - cursor.consume(); + cursor.next(); } continue; } @@ -44,13 +44,13 @@ pub fn tokenize>(to_tokenize: T) -> Result, String> { break; } value += &c.to_string(); - cursor.consume(); + cursor.next(); } // Check for keywords let variant = match value.as_str() { - "let" => TokenVariant::LetKeyword, - _ => TokenVariant::Identifier(value), + "let" => Token::LetKeyword, + _ => Token::Identifier(value), }; variant } @@ -62,13 +62,13 @@ pub fn tokenize>(to_tokenize: T) -> Result, String> { break; } value += &c.to_string(); - cursor.consume(); + cursor.next(); } - TokenVariant::DecimalValue(value) + Token::DecimalValue(value) } // Single character tokens - '=' => TokenVariant::Equals, - ';' => TokenVariant::Semicolon, + '=' => Token::Equals, + ';' => Token::Semicolon, // Invalid token _ => Err(format!( "Unknown token '{}' at {}, {}", @@ -76,37 +76,40 @@ pub fn tokenize>(to_tokenize: T) -> Result, String> { ))?, }; - tokens.push(Token { variant, position }); + tokens.push(FullToken { + token: variant, + position, + }); } position.0 += 1; - tokens.push(Token { - variant: TokenVariant::Eof, + tokens.push(FullToken { + token: Token::Eof, position, }); Ok(tokens) } -pub struct Token { - variant: TokenVariant, +pub struct FullToken { + pub token: Token, position: Position, } -impl Debug for Token { +impl Debug for FullToken { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_fmt(format_args!( "{:?} (Ln {}, Col {})", - self.variant, self.position.1, self.position.0 + self.token, self.position.1, self.position.0 )) } } pub type Position = (u32, u32); -#[derive(Debug)] -pub enum TokenVariant { +#[derive(Debug, Eq, PartialEq, Clone)] +pub enum Token { LetKeyword, Semicolon, Equals, @@ -122,7 +125,7 @@ pub struct Cursor<'a> { } impl<'a> Cursor<'a> { - fn consume(&mut self) -> Option { + fn next(&mut self) -> Option { let next = self.char_stream.next(); self.position.0 += 1; if let Some('\n') = next { diff --git a/src/main.rs b/src/main.rs index 23b42f3..e122f59 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,9 +1,13 @@ -use crate::lexer::EASIEST; +use crate::{lexer::EASIEST, parser::LetStatement, token_stream::TokenStream}; mod lexer; +mod parser; +mod token_stream; fn main() { - let token_stream = lexer::tokenize(EASIEST).unwrap(); + let tokens = lexer::tokenize(EASIEST).unwrap(); + let mut token_stream = TokenStream::from(&tokens); - dbg!(&token_stream); + dbg!(token_stream.parse::().ok()); + dbg!(token_stream.parse::().ok()); } diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..a261b75 --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,47 @@ +use crate::{lexer::Token, token_stream::TokenStream}; + +pub trait Parseable +where + Self: std::marker::Sized, +{ + fn parse(stream: TokenStream) -> Result; +} + +#[derive(Debug)] +pub enum Expression { + VariableName(String), + ContantI32(i32), +} + +impl Parseable for Expression { + fn parse(mut stream: TokenStream) -> Result { + if let Some(token) = stream.next() { + Ok(match &token { + Token::Identifier(v) => Expression::VariableName(v.clone()), + Token::DecimalValue(v) => Expression::ContantI32(v.parse().unwrap()), + _ => Err(())?, + }) + } else { + Err(()) + } + } +} + +#[derive(Debug)] +pub struct LetStatement(String, Expression); + +impl Parseable for LetStatement { + fn parse(mut stream: TokenStream) -> Result { + stream.expect(Token::LetKeyword)?; + + if let Some(Token::Identifier(variable)) = stream.next() { + stream.expect(Token::Equals)?; + + let expression = stream.parse()?; + stream.expect(Token::Semicolon)?; + Ok(LetStatement(variable, expression)) + } else { + Err(()) + } + } +} diff --git a/src/token_stream.rs b/src/token_stream.rs new file mode 100644 index 0000000..dab5a91 --- /dev/null +++ b/src/token_stream.rs @@ -0,0 +1,78 @@ +use crate::{ + lexer::{FullToken, Token}, + parser::Parseable, +}; + +pub struct TokenStream<'a, 'b> { + ref_position: Option<&'b mut usize>, + tokens: &'a [FullToken], + pub position: usize, +} + +impl<'a, 'b> TokenStream<'a, 'b> { + pub fn from(tokens: &'a [FullToken]) -> Self { + TokenStream { + ref_position: None, + tokens, + position: 0, + } + } + + pub fn expect(&mut self, token: Token) -> Result<(), ()> { + if let Some(peeked) = self.peek() { + if token == *peeked { + self.position += 1; + Ok(()) + } else { + Err(()) + } + } else { + Err(()) + } + } + + pub fn next(&mut self) -> Option { + let value = if self.tokens.len() < self.position { + None + } else { + Some(self.tokens[self.position].token.clone()) + }; + self.position += 1; + value + } + + pub fn peek(&mut self) -> Option<&Token> { + if self.tokens.len() < self.position { + None + } else { + Some(&self.tokens[self.position].token) + } + } + + pub fn parse(&mut self) -> Result { + let mut ref_pos = self.position; + + let position = self.position; + let clone = TokenStream { + ref_position: Some(&mut ref_pos), + tokens: self.tokens, + position, + }; + + match T::parse(clone) { + Ok(res) => { + self.position = ref_pos.max(self.position); + Ok(res) + } + Err(e) => Err(e), + } + } +} + +impl Drop for TokenStream<'_, '_> { + fn drop(&mut self) { + if let Some(ref_pos) = &mut self.ref_position { + **ref_pos = self.position; + } + } +}