commit 044d2b9d25e6b584899c67893078e49a40e33f0f Author: Sofia Date: Sat Mar 14 15:55:00 2026 +0200 Add very simple parsing diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..2991a95 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,65 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "ferrite-lua" +version = "0.1.0" +dependencies = [ + "thiserror", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..486c860 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "ferrite-lua" +version = "0.1.0" +edition = "2024" + +[dependencies] +## Make it easier to generate errors +thiserror = "1.0.44" diff --git a/examples/test.lua b/examples/test.lua new file mode 100644 index 0000000..027df47 --- /dev/null +++ b/examples/test.lua @@ -0,0 +1,7 @@ +function max () + local m = a + -- if b > a then + -- m = b + -- end + return m +end \ No newline at end of file diff --git a/src/ast.rs b/src/ast.rs new file mode 100644 index 0000000..3d46fa8 --- /dev/null +++ b/src/ast.rs @@ -0,0 +1,237 @@ +use std::{fmt::Debug, hash::Hash, ops::Add, path::PathBuf}; + +use crate::token_stream::{ + Parse, TokenRange, TokenStream, TokenStreamError, + lexer::{Keyword, Position, Token}, +}; + +#[derive(Debug, Clone)] +pub struct Node { + pub kind: T, + pub meta: Metadata, +} + +impl PartialEq for Node { + fn eq(&self, other: &Self) -> bool { + self.kind == other.kind + } +} + +impl Eq for Node {} + +impl Hash for Node { + fn hash(&self, state: &mut H) { + self.kind.hash(state); + } +} + +impl Node { + pub fn empty(kind: T) -> Node { + Node { + kind, + meta: Metadata::empty(), + } + } + + pub fn with(&self, other: OtherT) -> Node { + Node { + kind: other, + meta: self.meta.clone(), + } + } +} + +#[derive(Clone, PartialEq, Hash, Eq)] +pub struct Metadata { + pub documentation: Option, + pub token_range: TokenRange, + pub position: Position, + pub file_path: PathBuf, +} + +impl Metadata { + pub fn empty() -> Metadata { + Metadata { + documentation: None, + token_range: Default::default(), + position: Position(0, 0), + file_path: PathBuf::new(), + } + } +} + +impl Add for Metadata { + type Output = Metadata; + + fn add(self, rhs: Metadata) -> Self::Output { + Metadata { + documentation: self.documentation, + token_range: TokenRange { + start: self.token_range.start, + end: rhs.token_range.end, + }, + position: self.position, + file_path: self.file_path, + } + } +} + +impl Debug for Metadata { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self.token_range) + } +} + +impl Parse for Node { + fn parse(mut stream: TokenStream) -> Result { + let position = stream + .get_position() + .ok_or(stream.expecting_err(std::any::type_name::()))?; + let documentation = stream.find_documentation("/").into_iter().last(); + Ok(Node { + kind: stream.parse()?, + meta: Metadata { + documentation, + token_range: stream.get_range(), + position, + file_path: stream.file_path.clone(), + }, + }) + } +} + +impl Metadata { + fn pre( + stream: &mut TokenStream, + expecting: &str, + ) -> Result<(Option, Position), TokenStreamError> { + Ok(( + stream.find_documentation("/").into_iter().last(), + stream + .get_position() + .ok_or(stream.expecting_err(expecting))?, + )) + } + + fn produce( + stream: &mut TokenStream, + (documentation, position): (Option, Position), + ) -> Metadata { + Metadata { + documentation: documentation, + token_range: stream.get_range(), + position, + file_path: stream.file_path.clone(), + } + } +} + +#[derive(Debug, Clone)] +pub struct Function { + pub name: Option>, + pub params: Vec>, + pub block: Block, + pub meta: Metadata, +} + +impl Parse for Function { + fn parse(mut stream: TokenStream) -> Result { + let pre = Metadata::pre(&mut stream, "function")?; + + stream.expect(Token::Keyword(Keyword::Function))?; + let name = stream.parse::>().ok(); + stream.expect(Token::Symbol('('))?; + + let params = Vec::new(); + + stream.expect(Token::Symbol(')'))?; + + let block = stream.parse()?; + + stream.expect(Token::Keyword(Keyword::End))?; + + Ok(Function { + name, + params, + block, + meta: Metadata::produce(&mut stream, pre), + }) + } +} + +impl Parse for String { + fn parse(mut stream: TokenStream) -> Result { + if let Some(Token::Word(text)) = stream.next() { + Ok(text) + } else { + Err(stream.expected_err("identifier")) + } + } +} + +#[derive(Debug, Clone)] +pub struct Block { + pub statements: Vec>, + pub meta: Metadata, +} + +impl Parse for Block { + fn parse(mut stream: TokenStream) -> Result { + let pre = Metadata::pre(&mut stream, "block")?; + let mut statements = Vec::new(); + + while stream.peek() != Some(Token::Keyword(Keyword::End)) { + statements.push(stream.parse()?); + } + + Ok(Block { + statements, + meta: Metadata::produce(&mut stream, pre), + }) + } +} + +#[derive(Debug, Clone)] +pub enum Statement { + Assignment(Option, Node, Node), + Return(Node), +} + +impl Parse for Statement { + fn parse(mut stream: TokenStream) -> Result { + let peeked = stream.peek(); + if peeked == Some(Token::Keyword(Keyword::Return)) { + stream.next(); + Ok(Statement::Return(stream.parse()?)) + } else if peeked == Some(Token::Keyword(Keyword::Local)) { + stream.next(); + let name = stream.parse()?; + stream.expect(Token::Symbol('='))?; + let expr = stream.parse()?; + Ok(Statement::Assignment( + Some(DefinitionKind::Local), + name, + expr, + )) + } else { + Err(stream.expecting_err("statement")) + } + } +} + +#[derive(Debug, Clone)] +pub enum DefinitionKind { + Local, + Global, +} + +#[derive(Debug, Clone)] +pub enum Expression { + ValueRef(String), +} + +impl Parse for Expression { + fn parse(mut stream: TokenStream) -> Result { + Ok(Expression::ValueRef(stream.parse()?)) + } +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..ef76162 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,29 @@ +use std::path::PathBuf; + +use crate::{ + ast::Function, + token_stream::{ + TokenStream, + lexer::{Token, tokenize}, + }, +}; + +mod ast; +mod token_stream; + +static TEST: &str = include_str!("../examples/test.lua"); + +fn main() { + let file_path = PathBuf::from("../examples/test.lua"); + let tokens = tokenize(TEST).unwrap(); + let mut stream = TokenStream::from(&file_path, &tokens); + + let mut functions = Vec::new(); + while stream.peek() != Some(Token::Eof) { + functions.push(stream.parse::().unwrap()); + } + + dbg!(functions); + + println!("Hello, world!"); +} diff --git a/src/token_stream/lexer.rs b/src/token_stream/lexer.rs new file mode 100644 index 0000000..dae9cc0 --- /dev/null +++ b/src/token_stream/lexer.rs @@ -0,0 +1,357 @@ +use std::{fmt::Debug, ops::AddAssign, str::Chars}; + +static BINARY_NUMERICS: &[char] = &['0', '1']; +static OCTAL_NUMERICS: &[char] = &['0', '1', '2', '3', '4', '5', '6', '7']; +static DECIMAL_NUMERICS: &[char] = &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']; +static HEXADECIMAL_NUMERICS: &[char] = &[ + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', +]; + +#[derive(Eq, PartialEq, Clone, PartialOrd, Ord, Hash, Debug)] +pub enum Keyword { + Function, + End, + Local, + Return, +} + +impl Keyword { + pub fn parse(from: &str) -> Option { + Some(match from { + "function" => Keyword::Function, + "end" => Keyword::End, + "local" => Keyword::Local, + "return" => Keyword::Return, + _ => None?, + }) + } +} + +impl ToString for Keyword { + fn to_string(&self) -> String { + match self { + Keyword::Function => "function", + Keyword::End => "end", + Keyword::Local => "local", + Keyword::Return => "return", + } + .to_string() + } +} + +#[derive(Eq, PartialEq, Clone, PartialOrd, Ord, Hash, Debug)] +pub enum Token { + /// Word-like-values + Word(String), + Keyword(Keyword), + /// Number in the decimal base + DecimalValue(String), + /// Integer number in the hexadecimal base + HexadecimalValue(String), + /// Integer number in the octal base + OctalValue(String), + /// Integer number in the binary base + BinaryValue(String), + /// Some string literal that was surrounded by "double-quotes". + StringLit(String), + + /// Special one-character symbol + Symbol(char), + + Whitespace(String), + Comment(String), + Eof, +} + +impl From for String { + fn from(value: Token) -> Self { + format!("{:?}", value) + } +} + +impl Token { + pub fn len(&self) -> usize { + self.to_string().len() + } +} + +impl ToString for Token { + fn to_string(&self) -> String { + match &self { + Token::Word(ident) => ident.clone(), + Token::DecimalValue(val) => val.to_string(), + Token::HexadecimalValue(val) => format!("0x{}", val), + Token::OctalValue(val) => format!("0o{}", val), + Token::BinaryValue(val) => format!("0b{}", val), + Token::StringLit(lit) => format!("\"{}\"", lit), + Token::Eof => String::new(), + Token::Whitespace(val) => val.clone(), + Token::Comment(val) => format!("--{}", val.clone()), + Token::Symbol(val) => val.to_string(), + Token::Keyword(keyword) => keyword.to_string(), + } + } +} + +/// A token with a position +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct FullToken { + pub token: Token, + pub position: Position, +} + +impl Debug for FullToken { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!("{:?} {:?}", self.token, self.position,)) + } +} + +/// (Column, Line) +#[derive(Clone, Copy, Hash, PartialEq, Eq, Ord)] +pub struct Position(pub u32, pub u32); + +impl Position { + pub fn add(&self, num: u32) -> Position { + Position(self.0 + num, self.1) + } + + pub fn sub(&self, num: u32) -> Position { + Position(self.0 - num, self.1) + } +} + +impl PartialOrd for Position { + fn partial_cmp(&self, other: &Self) -> Option { + match self.1.partial_cmp(&other.1) { + Some(core::cmp::Ordering::Equal) => {} + ord => return ord, + } + self.0.partial_cmp(&other.0) + } +} + +impl Debug for Position { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!("Ln {}, Col {}", self.1, self.0 + 1)) + } +} + +pub struct Cursor<'a> { + pub position: Position, + pub char_stream: Chars<'a>, +} + +impl<'a> Cursor<'a> { + pub fn next(&mut self) -> Option { + let next = self.char_stream.next(); + if let Some('\n') = next { + self.position.1 += 1; + self.position.0 = 0; + } + self.position.0 += 1; + next + } + + fn first(&mut self) -> Option { + // `.next()` optimizes better than `.nth(0)` + self.char_stream.clone().next() + } + + #[allow(dead_code)] // Is this actually needed? + fn second(&mut self) -> Option { + // `.next()` optimizes better than `.nth(1)` + let mut stream = self.char_stream.clone(); + stream.next(); + stream.next() + } +} + +/// Take source text and produce a list of [`FullToken`]s from it, ie. +/// tokenizing it. +pub fn tokenize>(to_tokenize: T) -> Result, Error> { + let to_tokenize = to_tokenize.into(); + let mut cursor = Cursor { + char_stream: to_tokenize.chars(), + position: Position(0, 1), + }; + + let mut tokens = Vec::new(); + + while let Some(character) = &cursor.next() { + // Save "current" token first character position + let position = cursor.position.sub(1); + + let variant = match character { + // Whitespace + w if w.is_whitespace() => { + let mut whitespace = String::from(*w); + while let Some(w) = cursor.first() { + if !w.is_whitespace() { + break; + } + whitespace.push(cursor.next().unwrap()); + } + Token::Whitespace(whitespace) + } + // Comments + '-' if cursor.first() == Some('-') => { + cursor.next(); + + let mut comment = String::new(); + while !matches!(cursor.first(), Some('\n') | None) { + if let Some(c) = cursor.next() { + comment.push(c); + } + } + Token::Comment(comment) + } + '\"' => { + let mut value = String::new(); + let mut escape_next = false; + while cursor.first().is_some() + && (cursor.first() != Some(*character) || escape_next) + { + if cursor.first() == Some('\\') && !escape_next { + cursor.next(); // Consume backslash and always add next character + escape_next = true; + } else { + let c = &cursor.next().unwrap(); + if escape_next { + value += &escape_char(&c).to_string(); + } else { + value += &c.to_string(); + } + escape_next = false; + } + } + if cursor.first() == Some(*character) { + cursor.next(); + } else { + return Err(Error::MissingQuotation(position)); + } + match character { + '\"' => Token::StringLit(value), + _ => unreachable!(), + } + } + // "words" + c if c.is_alphabetic() => { + let mut value = character.to_string(); + while let Some(c) = cursor.first() { + if !(c.is_ascii_alphanumeric() || c == '_') { + break; + } + value += &c.to_string(); + cursor.next(); + } + + if let Some(keyword) = Keyword::parse(&value) { + Token::Keyword(keyword) + } else { + Token::Word(value) + } + } + // Decimals + c if DECIMAL_NUMERICS.contains(c) => { + let mut value = NumberType::Decimal(character.to_string()); + let mut numerics = DECIMAL_NUMERICS; + if let Some(second) = cursor.second() { + if cursor.first() == Some('x') + && HEXADECIMAL_NUMERICS + .contains(&second.to_lowercase().next().unwrap_or('.')) + { + cursor.next(); + value = NumberType::Hexadecimal(String::new()); + numerics = HEXADECIMAL_NUMERICS; + } else if cursor.first() == Some('o') + && OCTAL_NUMERICS.contains(&second.to_lowercase().next().unwrap_or('.')) + { + cursor.next(); + value = NumberType::Octal(String::new()); + numerics = OCTAL_NUMERICS; + } else if cursor.first() == Some('b') + && BINARY_NUMERICS.contains(&second.to_lowercase().next().unwrap_or('.')) + { + cursor.next(); + value = NumberType::Binary(String::new()); + numerics = BINARY_NUMERICS; + } + } + while let Some(c) = cursor.first() { + if !numerics.contains(&c.to_lowercase().next().unwrap_or('.')) { + break; + } + value += c; + cursor.next(); + } + match value { + NumberType::Decimal(dec) => Token::DecimalValue(dec), + NumberType::Hexadecimal(hex) => Token::HexadecimalValue(hex), + NumberType::Octal(oct) => Token::OctalValue(oct), + NumberType::Binary(bin) => Token::BinaryValue(bin), + } + } + // Some one-character token + value => Token::Symbol(*value), + }; + + tokens.push(FullToken { + token: variant, + position, + }); + } + + tokens.push(FullToken { + token: Token::Eof, + position: cursor.position, + }); + + Ok(tokens) +} + +fn escape_char(c: &char) -> char { + match c { + 't' => '\t', + 'n' => '\n', + 'r' => '\r', + '0' => '\0', + _ => *c, + } +} + +enum NumberType { + Decimal(String), + Hexadecimal(String), + Octal(String), + Binary(String), +} + +impl AddAssign for NumberType { + fn add_assign(&mut self, rhs: char) { + *self = match self { + NumberType::Decimal(val) => NumberType::Decimal(val.to_owned() + &rhs.to_string()), + NumberType::Hexadecimal(val) => { + NumberType::Hexadecimal(val.to_owned() + &rhs.to_string()) + } + NumberType::Octal(val) => NumberType::Octal(val.to_owned() + &rhs.to_string()), + NumberType::Binary(val) => NumberType::Binary(val.to_owned() + &rhs.to_string()), + }; + } +} + +#[derive(thiserror::Error, Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum Error { + #[error("Invalid token '{}' ", .0)] + InvalidToken(char, Position), + #[error("String literal is never finished!")] + MissingQuotation(Position), +} + +impl Error { + pub fn get_position(&self) -> &Position { + match self { + Error::InvalidToken(_, pos) => pos, + Error::MissingQuotation(pos) => pos, + } + } +} diff --git a/src/token_stream/mod.rs b/src/token_stream/mod.rs new file mode 100644 index 0000000..9732814 --- /dev/null +++ b/src/token_stream/mod.rs @@ -0,0 +1,459 @@ +//! Contains relevant code for parsing tokens received from +//! Lexing/Tokenizing-stage. + +use std::{cell::RefCell, path::PathBuf, rc::Rc}; + +pub mod lexer; + +use lexer::{FullToken, Token}; + +use crate::token_stream::lexer::Position; + +pub trait Parse +where + Self: std::marker::Sized, +{ + fn parse(stream: TokenStream) -> Result; +} + +/// Utility struct that is able to parse [`FullToken`]s while being +/// failure-resistance in that it can backtrack easily, and is able to keep +/// track of parsed Token-ranges easily. +pub struct TokenStream<'a, 'b> { + pub file_path: &'a PathBuf, + ref_position: Option<&'b mut usize>, + tokens: &'a [FullToken], + errors: Rc>>, + pub position: usize, +} + +impl<'a, 'b> TokenStream<'a, 'b> { + pub fn from(file_path: &'a PathBuf, tokens: &'a [FullToken]) -> Self { + TokenStream { + file_path, + ref_position: None, + tokens, + errors: Rc::new(RefCell::new(Vec::new())), + position: 0, + } + } + + pub fn get_position(&self) -> Option { + self.next_token(self.position).1.map(|v| v.position) + } + + /// Returns a parse-error with the given help-string + pub fn parse_err>(&mut self, error: T) -> TokenStreamError { + TokenStreamError::ParseError( + error.into(), + TokenRange { + start: self.position - 1, + end: self.position - 1, + }, + ) + } + + /// Returns expected-error for the next token in-line. Useful in conjunction + /// with [`TokenStream::peek`] + pub fn expected_err>(&mut self, expected: T) -> TokenStreamError { + let next_token = self.previous().unwrap_or(Token::Eof); + TokenStreamError::Expected( + expected.into(), + next_token, + TokenRange { + start: self.position - self.position.min(1), + end: self.position - self.position.min(1), + }, + ) + } + + /// Returns expected-error for the next token in-line. Useful in conjunction + /// with [`TokenStream::peek`] + pub fn expected_err_nonfatal>(&mut self, expected: T) { + let err = self.expected_err(expected); + self.errors.borrow_mut().push(err); + } + + /// Returns expected-error for the previous token that was already consumed. + /// Useful in conjunction with [`TokenStream::next`] + pub fn expecting_err>(&mut self, expected: T) -> TokenStreamError { + let next_token = self.peek().unwrap_or(Token::Eof); + let pos = self.next_token(self.position).0; + TokenStreamError::Expected( + expected.into(), + next_token, + TokenRange { + start: pos, + end: pos, + }, + ) + } + + /// Returns expected-error for the previous token that was already consumed. + /// Useful in conjunction with [`TokenStream::next`] + pub fn expecting_err_nonfatal>(&mut self, expected: T) { + let err = self.expecting_err(expected); + self.errors.borrow_mut().push(err); + } + + pub fn expect(&mut self, token: Token) -> Result<(), TokenStreamError> { + if let (pos, Some(peeked)) = self.next_token(self.position) { + if token == peeked.token { + self.position = pos + 1; + Ok(()) + } else { + Err(self.expecting_err(token)) + } + } else { + Err(self.expecting_err(token)) + } + } + + pub fn expect_word(&mut self, text: String) -> Result<(), TokenStreamError> { + if let Some(Token::Word(inner)) = self.next() { + if text == inner { + Ok(()) + } else { + Err(self.expected_err(text)) + } + } else { + Err(self.expected_err(text)) + } + } + + pub fn expect_symbol(&mut self, symbol: char) -> Result<(), TokenStreamError> { + if let Some(Token::Symbol(inner)) = self.next() { + if symbol == inner { + Ok(()) + } else { + Err(self.expected_err(symbol.to_string())) + } + } else { + Err(self.expected_err(symbol.to_string())) + } + } + + pub fn find_documentation>(&mut self, comment_prefix: T) -> Vec { + let mut from = self.position; + let prefix = comment_prefix.into(); + let mut curr_doc = None; + let mut documentation = Vec::new(); + while let Some(token) = self.tokens.get(from) { + if matches!(token.token, Token::Whitespace(_) | Token::Comment(_)) { + from += 1; + if let Token::Comment(doctext) = &token.token { + if doctext.starts_with(&prefix) { + curr_doc = Some( + match curr_doc { + Some(t) => t + " ", + None => String::new(), + } + &doctext.replacen("/", "", 1).trim(), + ); + } else if let Some(doctext) = curr_doc.take() { + // Doctext was cut by comments + documentation.push(doctext); + } + } + } else { + break; + } + } + if let Some(doctext) = curr_doc.take() { + // Doctext was cut by comments + documentation.push(doctext); + } + documentation + } + + pub fn expect_nonfatal(&mut self, token: Token) -> Result<(), ()> { + if let (pos, Some(peeked)) = self.next_token(self.position) { + if token == peeked.token { + self.position = pos + 1; + Ok(()) + } else { + self.expecting_err_nonfatal(token); + Err(()) + } + } else { + self.expecting_err_nonfatal(token); + Err(()) + } + } + + pub fn next(&mut self) -> Option { + let (position, token) = self.next_token(self.position); + self.position = position + 1; + token.map(|t| t.token.clone()) + } + + pub fn previous(&mut self) -> Option { + let (_, token) = self.previous_token(self.position); + token.map(|t| t.token.clone()) + } + + pub fn peek(&mut self) -> Option { + let (_, token) = self.next_token(self.position); + token.map(|t| t.token.clone()) + } + + pub fn peek2(&mut self) -> Option { + let (pos2, _) = self.next_token(self.position); + let (_, token) = self.next_token(pos2 + 1); + token.map(|t| t.token.clone()) + } + + pub fn peek3(&mut self) -> Option { + let (pos2, _) = self.next_token(self.position); + let (_, token) = self.next_token(pos2 + 1); + token.map(|t| t.token.clone()) + } + + /// Parse the next value of trait Parse. If the parse succeeded, the related + /// tokens are consumed, otherwise token stream does not advance. + /// + /// Parsetime-error is returned on failure. + pub fn parse(&mut self) -> Result { + let (res, pos) = self.parse_meta()?; + self.position = pos; + Ok(res) + } + + /// Parse the next item with Parse-trait (Same as [`TokenStream::parse`]) + /// without consuming the related tokens, essentially only peeking. + pub fn parse_peek(&mut self) -> Result { + self.parse_meta().map(|(res, _)| res) + } + + /// Parse the next item with Parse-trait, also mapping it with the given + /// function. The token-stream is only consumed, if the inner function + /// retuns an Ok. + #[allow(dead_code)] + pub fn parse_map( + &mut self, + inner: F, + ) -> Result + where + F: Fn(T) -> Result, + { + let (res, pos) = self.parse_meta::()?; + match inner(res) { + Ok(mapped) => { + self.position = pos; + Ok(mapped) + } + Err(e) => Err(e), + } + } + + /// Parses the item with Parse if the condition specified by the + /// lambda-function is passed. Errors returned from this should not be + /// passed to the end-user. + pub fn parse_if( + &mut self, + inner: F, + ) -> Result + where + F: Fn(&T) -> bool, + { + let (res, pos) = self.parse_meta::()?; + if inner(&res) { + self.position = pos; + Ok(res) + } else { + Err(TokenStreamError::IfFailed) + } + } + + /// Parse the next item with Parse-trait. If successful, returning the + /// parsed item and the new position of the TokenStream. Failing, returning + /// parse-error. + /// + /// Used for [`TokenStream::parse`] and [`TokenStream::parse_peek`] + fn parse_meta(&mut self) -> Result<(T, usize), TokenStreamError> { + let mut ref_pos = self.position; + + let position = self.position; + let clone = TokenStream { + file_path: self.file_path, + ref_position: Some(&mut ref_pos), + tokens: self.tokens, + errors: self.errors.clone(), + position, + }; + + match T::parse(clone) { + Ok(res) => { + let new_pos = ref_pos.max(self.position); + Ok((res, new_pos)) + } + Err(e) => Err(e), + } + } + + pub fn parse_with(&mut self, fun: T) -> Result + where + T: FnOnce(TokenStream) -> Result, + { + let mut ref_pos = self.position; + + let position = self.position; + let clone = TokenStream { + file_path: self.file_path, + ref_position: Some(&mut ref_pos), + tokens: self.tokens, + errors: self.errors.clone(), + position, + }; + + match fun(clone) { + Ok(res) => { + self.position = ref_pos.max(self.position); + Ok(res) + } + Err(e) => Err(e), + } + } + + pub fn get_range(&self) -> TokenRange { + self.ref_position + .as_ref() + .map(|ref_pos| TokenRange { + start: **ref_pos, + end: self.position, + }) + .unwrap_or(TokenRange { + start: 0, + end: self.position, + }) + } + + /// Gets range from the previous position to the current. Useful when using + /// with [`TokenStream::next`] + pub fn get_range_prev(&self) -> Option { + self.ref_position.as_ref().map(|ref_pos| TokenRange { + start: **ref_pos, + end: self.previous_token(self.position).0, + }) + } + + /// Gets range of the previous token only. + pub fn get_range_prev_curr(&self) -> Option { + Some(TokenRange { + start: self.previous_token(self.position).0, + end: self.previous_token(self.position).0, + }) + } + + fn previous_token(&self, mut from: usize) -> (usize, Option<&'a FullToken>) { + if from == 0 { + return (from, None); + } + from -= 1; + while let Some(token) = self.tokens.get(from) { + if matches!(token.token, Token::Whitespace(_) | Token::Comment(_)) { + from -= 1; + } else { + break; + } + } + (from, self.tokens.get(from)) + } + + fn next_token(&self, mut from: usize) -> (usize, Option<&'a FullToken>) { + while let Some(token) = self.tokens.get(from) { + if matches!(token.token, Token::Whitespace(_) | Token::Comment(_)) { + from += 1; + } else { + break; + } + } + (from, self.tokens.get(from)) + } + + pub fn errors(&self) -> Vec { + self.errors.borrow().clone().clone() + } + + pub fn next_is_whitespace(&self) -> bool { + if let Some(token) = self.tokens.get(self.position) { + if let Token::Whitespace(_) = token.token { + true + } else { + false + } + } else { + true + } + } +} + +impl Drop for TokenStream<'_, '_> { + fn drop(&mut self) { + if let Some(ref_pos) = &mut self.ref_position { + **ref_pos = self.position; + } + } +} + +/// Index-range that can be used with the original array of [`FullToken`]s to +/// retrieve the precise location of a failure. +#[derive(Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct TokenRange { + pub start: usize, + pub end: usize, +} + +impl std::fmt::Debug for TokenRange { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Tokens[{} - {}]", self.start, self.end) + } +} + +impl std::ops::Add for TokenRange { + type Output = TokenRange; + + fn add(self, rhs: Self) -> Self::Output { + TokenRange { + start: self.start.min(rhs.start).min(rhs.end), + end: self.end.max(rhs.end).max(rhs.start), + } + } +} + +impl std::iter::Sum for TokenRange { + fn sum>(mut iter: I) -> Self { + let mut start = iter.next().unwrap_or(Default::default()); + for item in iter { + start = start + item; + } + start + } +} + +#[derive(thiserror::Error, Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum TokenStreamError { + #[error("Expected {} got \"{:?}\"", .0, .1)] + Expected(String, Token, TokenRange), + #[error("Error Parsing: {0}")] + ParseError(String, TokenRange), + #[error("Source file contains no tokens")] + FileEmpty, + /// Only use this error in situations where the error never ends up for the end-user! + #[error("Undefined error, should only be used in situations where the error is not emitted!")] + Undefined, + /// Condition failed for the parse-if + #[error("Condition failed for parse-if. Should never be returned to end-user.")] + IfFailed, +} + +impl TokenStreamError { + pub fn get_range(&self) -> Option<&TokenRange> { + match self { + TokenStreamError::Expected(_, _, pos) => Some(pos), + TokenStreamError::FileEmpty => None, + TokenStreamError::Undefined => None, + TokenStreamError::IfFailed => None, + TokenStreamError::ParseError(_, range) => Some(range), + } + } +}