From 2dd3a5904b3132c2b6414aaa3f37f4a836fcada7 Mon Sep 17 00:00:00 2001 From: sofia Date: Tue, 29 Jul 2025 16:37:58 +0300 Subject: [PATCH] Add whitespace to lexer --- reid/src/ast/lexer.rs | 13 +++++++- reid/src/ast/token_stream.rs | 61 +++++++++++++++++++++--------------- 2 files changed, 48 insertions(+), 26 deletions(-) diff --git a/reid/src/ast/lexer.rs b/reid/src/ast/lexer.rs index 984825f..9d14c22 100644 --- a/reid/src/ast/lexer.rs +++ b/reid/src/ast/lexer.rs @@ -114,6 +114,7 @@ pub enum Token { Unknown(char), + Whitespace(String), Eof, } @@ -192,6 +193,7 @@ impl ToString for Token { Token::Eof => String::new(), Token::Slash => String::from('/'), Token::Percent => String::from('%'), + Token::Whitespace(val) => val.clone(), Token::Unknown(val) => val.to_string(), } } @@ -293,7 +295,16 @@ pub fn tokenize>(to_tokenize: T) -> Result, Error let variant = match character { // Whitespace - w if w.is_whitespace() => continue, + w if w.is_whitespace() => { + let mut whitespace = String::from(*w); + while let Some(w) = cursor.first() { + if !w.is_whitespace() { + break; + } + whitespace.push(cursor.next().unwrap()); + } + Token::Whitespace(whitespace) + } // Comments '/' if cursor.first() == Some('/') => { while !matches!(cursor.first(), Some('\n') | None) { diff --git a/reid/src/ast/token_stream.rs b/reid/src/ast/token_stream.rs index 2cc9d9d..8c810c8 100644 --- a/reid/src/ast/token_stream.rs +++ b/reid/src/ast/token_stream.rs @@ -53,9 +53,9 @@ impl<'a, 'b> TokenStream<'a, 'b> { } pub fn expect(&mut self, token: Token) -> Result<(), Error> { - if let Some(peeked) = self.peek() { - if token == peeked { - self.position += 1; + if let (pos, Some(peeked)) = self.next_token(self.position) { + if token == peeked.token { + self.position = pos + 1; Ok(()) } else { Err(self.expecting_err(token)?) @@ -66,37 +66,25 @@ impl<'a, 'b> TokenStream<'a, 'b> { } pub fn next(&mut self) -> Option { - let value = if self.tokens.len() < self.position { - None - } else { - Some(self.tokens[self.position].token.clone()) - }; - self.position += 1; - value + let (position, token) = self.next_token(self.position); + self.position = position + 1; + token.map(|t| t.token.clone()) } pub fn previous(&mut self) -> Option { - if (self.position as i32 - 1) < 0 { - None - } else { - Some(self.tokens[self.position - 1].token.clone()) - } + let (_, token) = self.previous_token(self.position); + token.map(|t| t.token.clone()) } pub fn peek(&mut self) -> Option { - if self.tokens.len() < self.position { - None - } else { - Some(self.tokens[self.position].token.clone()) - } + let (_, token) = self.next_token(self.position); + token.map(|t| t.token.clone()) } pub fn peek2(&mut self) -> Option { - if self.tokens.len() < (self.position + 1) { - None - } else { - Some(self.tokens[self.position + 1].token.clone()) - } + let (pos2, _) = self.next_token(self.position); + let (_, token) = self.next_token(pos2 + 1); + token.map(|t| t.token.clone()) } /// Parse the next value of trait Parse. If the parse succeeded, the related @@ -188,6 +176,29 @@ impl<'a, 'b> TokenStream<'a, 'b> { end: self.position - 1, }) } + + fn previous_token(&self, mut from: usize) -> (usize, Option<&'a FullToken>) { + from -= 1; + while let Some(token) = self.tokens.get(from) { + if let Token::Whitespace(_) = token.token { + from -= 1; + } else { + break; + } + } + (from, self.tokens.get(from)) + } + + fn next_token(&self, mut from: usize) -> (usize, Option<&'a FullToken>) { + while let Some(token) = self.tokens.get(from) { + if let Token::Whitespace(_) = token.token { + from += 1; + } else { + break; + } + } + (from, self.tokens.get(from)) + } } impl Drop for TokenStream<'_, '_> {