Add very simple parsing

This commit is contained in:
Sofia 2026-03-14 15:55:00 +02:00
commit 044d2b9d25
8 changed files with 1163 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

65
Cargo.lock generated Normal file
View File

@ -0,0 +1,65 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "ferrite-lua"
version = "0.1.0"
dependencies = [
"thiserror",
]
[[package]]
name = "proc-macro2"
version = "1.0.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
dependencies = [
"proc-macro2",
]
[[package]]
name = "syn"
version = "2.0.117"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "thiserror"
version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "unicode-ident"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"

8
Cargo.toml Normal file
View File

@ -0,0 +1,8 @@
[package]
name = "ferrite-lua"
version = "0.1.0"
edition = "2024"
[dependencies]
## Make it easier to generate errors
thiserror = "1.0.44"

7
examples/test.lua Normal file
View File

@ -0,0 +1,7 @@
function max ()
local m = a
-- if b > a then
-- m = b
-- end
return m
end

237
src/ast.rs Normal file
View File

@ -0,0 +1,237 @@
use std::{fmt::Debug, hash::Hash, ops::Add, path::PathBuf};
use crate::token_stream::{
Parse, TokenRange, TokenStream, TokenStreamError,
lexer::{Keyword, Position, Token},
};
#[derive(Debug, Clone)]
pub struct Node<T: Clone + Debug> {
pub kind: T,
pub meta: Metadata,
}
impl<T: Clone + Debug + PartialEq> PartialEq for Node<T> {
fn eq(&self, other: &Self) -> bool {
self.kind == other.kind
}
}
impl<T: Clone + Debug + PartialEq> Eq for Node<T> {}
impl<T: Clone + Debug + PartialEq + Hash> Hash for Node<T> {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.kind.hash(state);
}
}
impl<T: Clone + Debug> Node<T> {
pub fn empty(kind: T) -> Node<T> {
Node {
kind,
meta: Metadata::empty(),
}
}
pub fn with<OtherT: Clone + Debug>(&self, other: OtherT) -> Node<OtherT> {
Node {
kind: other,
meta: self.meta.clone(),
}
}
}
#[derive(Clone, PartialEq, Hash, Eq)]
pub struct Metadata {
pub documentation: Option<String>,
pub token_range: TokenRange,
pub position: Position,
pub file_path: PathBuf,
}
impl Metadata {
pub fn empty() -> Metadata {
Metadata {
documentation: None,
token_range: Default::default(),
position: Position(0, 0),
file_path: PathBuf::new(),
}
}
}
impl Add<Metadata> for Metadata {
type Output = Metadata;
fn add(self, rhs: Metadata) -> Self::Output {
Metadata {
documentation: self.documentation,
token_range: TokenRange {
start: self.token_range.start,
end: rhs.token_range.end,
},
position: self.position,
file_path: self.file_path,
}
}
}
impl Debug for Metadata {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self.token_range)
}
}
impl<T: Parse + Clone + Debug> Parse for Node<T> {
fn parse(mut stream: TokenStream) -> Result<Self, TokenStreamError> {
let position = stream
.get_position()
.ok_or(stream.expecting_err(std::any::type_name::<T>()))?;
let documentation = stream.find_documentation("/").into_iter().last();
Ok(Node {
kind: stream.parse()?,
meta: Metadata {
documentation,
token_range: stream.get_range(),
position,
file_path: stream.file_path.clone(),
},
})
}
}
impl Metadata {
fn pre(
stream: &mut TokenStream,
expecting: &str,
) -> Result<(Option<String>, Position), TokenStreamError> {
Ok((
stream.find_documentation("/").into_iter().last(),
stream
.get_position()
.ok_or(stream.expecting_err(expecting))?,
))
}
fn produce(
stream: &mut TokenStream,
(documentation, position): (Option<String>, Position),
) -> Metadata {
Metadata {
documentation: documentation,
token_range: stream.get_range(),
position,
file_path: stream.file_path.clone(),
}
}
}
#[derive(Debug, Clone)]
pub struct Function {
pub name: Option<Node<String>>,
pub params: Vec<Node<String>>,
pub block: Block,
pub meta: Metadata,
}
impl Parse for Function {
fn parse(mut stream: TokenStream) -> Result<Self, TokenStreamError> {
let pre = Metadata::pre(&mut stream, "function")?;
stream.expect(Token::Keyword(Keyword::Function))?;
let name = stream.parse::<Node<String>>().ok();
stream.expect(Token::Symbol('('))?;
let params = Vec::new();
stream.expect(Token::Symbol(')'))?;
let block = stream.parse()?;
stream.expect(Token::Keyword(Keyword::End))?;
Ok(Function {
name,
params,
block,
meta: Metadata::produce(&mut stream, pre),
})
}
}
impl Parse for String {
fn parse(mut stream: TokenStream) -> Result<Self, TokenStreamError> {
if let Some(Token::Word(text)) = stream.next() {
Ok(text)
} else {
Err(stream.expected_err("identifier"))
}
}
}
#[derive(Debug, Clone)]
pub struct Block {
pub statements: Vec<Node<Statement>>,
pub meta: Metadata,
}
impl Parse for Block {
fn parse(mut stream: TokenStream) -> Result<Self, TokenStreamError> {
let pre = Metadata::pre(&mut stream, "block")?;
let mut statements = Vec::new();
while stream.peek() != Some(Token::Keyword(Keyword::End)) {
statements.push(stream.parse()?);
}
Ok(Block {
statements,
meta: Metadata::produce(&mut stream, pre),
})
}
}
#[derive(Debug, Clone)]
pub enum Statement {
Assignment(Option<DefinitionKind>, Node<String>, Node<Expression>),
Return(Node<Expression>),
}
impl Parse for Statement {
fn parse(mut stream: TokenStream) -> Result<Self, TokenStreamError> {
let peeked = stream.peek();
if peeked == Some(Token::Keyword(Keyword::Return)) {
stream.next();
Ok(Statement::Return(stream.parse()?))
} else if peeked == Some(Token::Keyword(Keyword::Local)) {
stream.next();
let name = stream.parse()?;
stream.expect(Token::Symbol('='))?;
let expr = stream.parse()?;
Ok(Statement::Assignment(
Some(DefinitionKind::Local),
name,
expr,
))
} else {
Err(stream.expecting_err("statement"))
}
}
}
#[derive(Debug, Clone)]
pub enum DefinitionKind {
Local,
Global,
}
#[derive(Debug, Clone)]
pub enum Expression {
ValueRef(String),
}
impl Parse for Expression {
fn parse(mut stream: TokenStream) -> Result<Self, TokenStreamError> {
Ok(Expression::ValueRef(stream.parse()?))
}
}

29
src/main.rs Normal file
View File

@ -0,0 +1,29 @@
use std::path::PathBuf;
use crate::{
ast::Function,
token_stream::{
TokenStream,
lexer::{Token, tokenize},
},
};
mod ast;
mod token_stream;
static TEST: &str = include_str!("../examples/test.lua");
fn main() {
let file_path = PathBuf::from("../examples/test.lua");
let tokens = tokenize(TEST).unwrap();
let mut stream = TokenStream::from(&file_path, &tokens);
let mut functions = Vec::new();
while stream.peek() != Some(Token::Eof) {
functions.push(stream.parse::<Function>().unwrap());
}
dbg!(functions);
println!("Hello, world!");
}

357
src/token_stream/lexer.rs Normal file
View File

@ -0,0 +1,357 @@
use std::{fmt::Debug, ops::AddAssign, str::Chars};
static BINARY_NUMERICS: &[char] = &['0', '1'];
static OCTAL_NUMERICS: &[char] = &['0', '1', '2', '3', '4', '5', '6', '7'];
static DECIMAL_NUMERICS: &[char] = &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'];
static HEXADECIMAL_NUMERICS: &[char] = &[
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
];
#[derive(Eq, PartialEq, Clone, PartialOrd, Ord, Hash, Debug)]
pub enum Keyword {
Function,
End,
Local,
Return,
}
impl Keyword {
pub fn parse(from: &str) -> Option<Keyword> {
Some(match from {
"function" => Keyword::Function,
"end" => Keyword::End,
"local" => Keyword::Local,
"return" => Keyword::Return,
_ => None?,
})
}
}
impl ToString for Keyword {
fn to_string(&self) -> String {
match self {
Keyword::Function => "function",
Keyword::End => "end",
Keyword::Local => "local",
Keyword::Return => "return",
}
.to_string()
}
}
#[derive(Eq, PartialEq, Clone, PartialOrd, Ord, Hash, Debug)]
pub enum Token {
/// Word-like-values
Word(String),
Keyword(Keyword),
/// Number in the decimal base
DecimalValue(String),
/// Integer number in the hexadecimal base
HexadecimalValue(String),
/// Integer number in the octal base
OctalValue(String),
/// Integer number in the binary base
BinaryValue(String),
/// Some string literal that was surrounded by "double-quotes".
StringLit(String),
/// Special one-character symbol
Symbol(char),
Whitespace(String),
Comment(String),
Eof,
}
impl From<Token> for String {
fn from(value: Token) -> Self {
format!("{:?}", value)
}
}
impl Token {
pub fn len(&self) -> usize {
self.to_string().len()
}
}
impl ToString for Token {
fn to_string(&self) -> String {
match &self {
Token::Word(ident) => ident.clone(),
Token::DecimalValue(val) => val.to_string(),
Token::HexadecimalValue(val) => format!("0x{}", val),
Token::OctalValue(val) => format!("0o{}", val),
Token::BinaryValue(val) => format!("0b{}", val),
Token::StringLit(lit) => format!("\"{}\"", lit),
Token::Eof => String::new(),
Token::Whitespace(val) => val.clone(),
Token::Comment(val) => format!("--{}", val.clone()),
Token::Symbol(val) => val.to_string(),
Token::Keyword(keyword) => keyword.to_string(),
}
}
}
/// A token with a position
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct FullToken {
pub token: Token,
pub position: Position,
}
impl Debug for FullToken {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_fmt(format_args!("{:?} {:?}", self.token, self.position,))
}
}
/// (Column, Line)
#[derive(Clone, Copy, Hash, PartialEq, Eq, Ord)]
pub struct Position(pub u32, pub u32);
impl Position {
pub fn add(&self, num: u32) -> Position {
Position(self.0 + num, self.1)
}
pub fn sub(&self, num: u32) -> Position {
Position(self.0 - num, self.1)
}
}
impl PartialOrd for Position {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
match self.1.partial_cmp(&other.1) {
Some(core::cmp::Ordering::Equal) => {}
ord => return ord,
}
self.0.partial_cmp(&other.0)
}
}
impl Debug for Position {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_fmt(format_args!("Ln {}, Col {}", self.1, self.0 + 1))
}
}
pub struct Cursor<'a> {
pub position: Position,
pub char_stream: Chars<'a>,
}
impl<'a> Cursor<'a> {
pub fn next(&mut self) -> Option<char> {
let next = self.char_stream.next();
if let Some('\n') = next {
self.position.1 += 1;
self.position.0 = 0;
}
self.position.0 += 1;
next
}
fn first(&mut self) -> Option<char> {
// `.next()` optimizes better than `.nth(0)`
self.char_stream.clone().next()
}
#[allow(dead_code)] // Is this actually needed?
fn second(&mut self) -> Option<char> {
// `.next()` optimizes better than `.nth(1)`
let mut stream = self.char_stream.clone();
stream.next();
stream.next()
}
}
/// Take source text and produce a list of [`FullToken`]s from it, ie.
/// tokenizing it.
pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Error> {
let to_tokenize = to_tokenize.into();
let mut cursor = Cursor {
char_stream: to_tokenize.chars(),
position: Position(0, 1),
};
let mut tokens = Vec::new();
while let Some(character) = &cursor.next() {
// Save "current" token first character position
let position = cursor.position.sub(1);
let variant = match character {
// Whitespace
w if w.is_whitespace() => {
let mut whitespace = String::from(*w);
while let Some(w) = cursor.first() {
if !w.is_whitespace() {
break;
}
whitespace.push(cursor.next().unwrap());
}
Token::Whitespace(whitespace)
}
// Comments
'-' if cursor.first() == Some('-') => {
cursor.next();
let mut comment = String::new();
while !matches!(cursor.first(), Some('\n') | None) {
if let Some(c) = cursor.next() {
comment.push(c);
}
}
Token::Comment(comment)
}
'\"' => {
let mut value = String::new();
let mut escape_next = false;
while cursor.first().is_some()
&& (cursor.first() != Some(*character) || escape_next)
{
if cursor.first() == Some('\\') && !escape_next {
cursor.next(); // Consume backslash and always add next character
escape_next = true;
} else {
let c = &cursor.next().unwrap();
if escape_next {
value += &escape_char(&c).to_string();
} else {
value += &c.to_string();
}
escape_next = false;
}
}
if cursor.first() == Some(*character) {
cursor.next();
} else {
return Err(Error::MissingQuotation(position));
}
match character {
'\"' => Token::StringLit(value),
_ => unreachable!(),
}
}
// "words"
c if c.is_alphabetic() => {
let mut value = character.to_string();
while let Some(c) = cursor.first() {
if !(c.is_ascii_alphanumeric() || c == '_') {
break;
}
value += &c.to_string();
cursor.next();
}
if let Some(keyword) = Keyword::parse(&value) {
Token::Keyword(keyword)
} else {
Token::Word(value)
}
}
// Decimals
c if DECIMAL_NUMERICS.contains(c) => {
let mut value = NumberType::Decimal(character.to_string());
let mut numerics = DECIMAL_NUMERICS;
if let Some(second) = cursor.second() {
if cursor.first() == Some('x')
&& HEXADECIMAL_NUMERICS
.contains(&second.to_lowercase().next().unwrap_or('.'))
{
cursor.next();
value = NumberType::Hexadecimal(String::new());
numerics = HEXADECIMAL_NUMERICS;
} else if cursor.first() == Some('o')
&& OCTAL_NUMERICS.contains(&second.to_lowercase().next().unwrap_or('.'))
{
cursor.next();
value = NumberType::Octal(String::new());
numerics = OCTAL_NUMERICS;
} else if cursor.first() == Some('b')
&& BINARY_NUMERICS.contains(&second.to_lowercase().next().unwrap_or('.'))
{
cursor.next();
value = NumberType::Binary(String::new());
numerics = BINARY_NUMERICS;
}
}
while let Some(c) = cursor.first() {
if !numerics.contains(&c.to_lowercase().next().unwrap_or('.')) {
break;
}
value += c;
cursor.next();
}
match value {
NumberType::Decimal(dec) => Token::DecimalValue(dec),
NumberType::Hexadecimal(hex) => Token::HexadecimalValue(hex),
NumberType::Octal(oct) => Token::OctalValue(oct),
NumberType::Binary(bin) => Token::BinaryValue(bin),
}
}
// Some one-character token
value => Token::Symbol(*value),
};
tokens.push(FullToken {
token: variant,
position,
});
}
tokens.push(FullToken {
token: Token::Eof,
position: cursor.position,
});
Ok(tokens)
}
fn escape_char(c: &char) -> char {
match c {
't' => '\t',
'n' => '\n',
'r' => '\r',
'0' => '\0',
_ => *c,
}
}
enum NumberType {
Decimal(String),
Hexadecimal(String),
Octal(String),
Binary(String),
}
impl AddAssign<char> for NumberType {
fn add_assign(&mut self, rhs: char) {
*self = match self {
NumberType::Decimal(val) => NumberType::Decimal(val.to_owned() + &rhs.to_string()),
NumberType::Hexadecimal(val) => {
NumberType::Hexadecimal(val.to_owned() + &rhs.to_string())
}
NumberType::Octal(val) => NumberType::Octal(val.to_owned() + &rhs.to_string()),
NumberType::Binary(val) => NumberType::Binary(val.to_owned() + &rhs.to_string()),
};
}
}
#[derive(thiserror::Error, Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub enum Error {
#[error("Invalid token '{}' ", .0)]
InvalidToken(char, Position),
#[error("String literal is never finished!")]
MissingQuotation(Position),
}
impl Error {
pub fn get_position(&self) -> &Position {
match self {
Error::InvalidToken(_, pos) => pos,
Error::MissingQuotation(pos) => pos,
}
}
}

459
src/token_stream/mod.rs Normal file
View File

@ -0,0 +1,459 @@
//! Contains relevant code for parsing tokens received from
//! Lexing/Tokenizing-stage.
use std::{cell::RefCell, path::PathBuf, rc::Rc};
pub mod lexer;
use lexer::{FullToken, Token};
use crate::token_stream::lexer::Position;
pub trait Parse
where
Self: std::marker::Sized,
{
fn parse(stream: TokenStream) -> Result<Self, TokenStreamError>;
}
/// Utility struct that is able to parse [`FullToken`]s while being
/// failure-resistance in that it can backtrack easily, and is able to keep
/// track of parsed Token-ranges easily.
pub struct TokenStream<'a, 'b> {
pub file_path: &'a PathBuf,
ref_position: Option<&'b mut usize>,
tokens: &'a [FullToken],
errors: Rc<RefCell<Vec<TokenStreamError>>>,
pub position: usize,
}
impl<'a, 'b> TokenStream<'a, 'b> {
pub fn from(file_path: &'a PathBuf, tokens: &'a [FullToken]) -> Self {
TokenStream {
file_path,
ref_position: None,
tokens,
errors: Rc::new(RefCell::new(Vec::new())),
position: 0,
}
}
pub fn get_position(&self) -> Option<Position> {
self.next_token(self.position).1.map(|v| v.position)
}
/// Returns a parse-error with the given help-string
pub fn parse_err<T: Into<String>>(&mut self, error: T) -> TokenStreamError {
TokenStreamError::ParseError(
error.into(),
TokenRange {
start: self.position - 1,
end: self.position - 1,
},
)
}
/// Returns expected-error for the next token in-line. Useful in conjunction
/// with [`TokenStream::peek`]
pub fn expected_err<T: Into<String>>(&mut self, expected: T) -> TokenStreamError {
let next_token = self.previous().unwrap_or(Token::Eof);
TokenStreamError::Expected(
expected.into(),
next_token,
TokenRange {
start: self.position - self.position.min(1),
end: self.position - self.position.min(1),
},
)
}
/// Returns expected-error for the next token in-line. Useful in conjunction
/// with [`TokenStream::peek`]
pub fn expected_err_nonfatal<T: Into<String>>(&mut self, expected: T) {
let err = self.expected_err(expected);
self.errors.borrow_mut().push(err);
}
/// Returns expected-error for the previous token that was already consumed.
/// Useful in conjunction with [`TokenStream::next`]
pub fn expecting_err<T: Into<String>>(&mut self, expected: T) -> TokenStreamError {
let next_token = self.peek().unwrap_or(Token::Eof);
let pos = self.next_token(self.position).0;
TokenStreamError::Expected(
expected.into(),
next_token,
TokenRange {
start: pos,
end: pos,
},
)
}
/// Returns expected-error for the previous token that was already consumed.
/// Useful in conjunction with [`TokenStream::next`]
pub fn expecting_err_nonfatal<T: Into<String>>(&mut self, expected: T) {
let err = self.expecting_err(expected);
self.errors.borrow_mut().push(err);
}
pub fn expect(&mut self, token: Token) -> Result<(), TokenStreamError> {
if let (pos, Some(peeked)) = self.next_token(self.position) {
if token == peeked.token {
self.position = pos + 1;
Ok(())
} else {
Err(self.expecting_err(token))
}
} else {
Err(self.expecting_err(token))
}
}
pub fn expect_word(&mut self, text: String) -> Result<(), TokenStreamError> {
if let Some(Token::Word(inner)) = self.next() {
if text == inner {
Ok(())
} else {
Err(self.expected_err(text))
}
} else {
Err(self.expected_err(text))
}
}
pub fn expect_symbol(&mut self, symbol: char) -> Result<(), TokenStreamError> {
if let Some(Token::Symbol(inner)) = self.next() {
if symbol == inner {
Ok(())
} else {
Err(self.expected_err(symbol.to_string()))
}
} else {
Err(self.expected_err(symbol.to_string()))
}
}
pub fn find_documentation<T: Into<String>>(&mut self, comment_prefix: T) -> Vec<String> {
let mut from = self.position;
let prefix = comment_prefix.into();
let mut curr_doc = None;
let mut documentation = Vec::new();
while let Some(token) = self.tokens.get(from) {
if matches!(token.token, Token::Whitespace(_) | Token::Comment(_)) {
from += 1;
if let Token::Comment(doctext) = &token.token {
if doctext.starts_with(&prefix) {
curr_doc = Some(
match curr_doc {
Some(t) => t + " ",
None => String::new(),
} + &doctext.replacen("/", "", 1).trim(),
);
} else if let Some(doctext) = curr_doc.take() {
// Doctext was cut by comments
documentation.push(doctext);
}
}
} else {
break;
}
}
if let Some(doctext) = curr_doc.take() {
// Doctext was cut by comments
documentation.push(doctext);
}
documentation
}
pub fn expect_nonfatal(&mut self, token: Token) -> Result<(), ()> {
if let (pos, Some(peeked)) = self.next_token(self.position) {
if token == peeked.token {
self.position = pos + 1;
Ok(())
} else {
self.expecting_err_nonfatal(token);
Err(())
}
} else {
self.expecting_err_nonfatal(token);
Err(())
}
}
pub fn next(&mut self) -> Option<Token> {
let (position, token) = self.next_token(self.position);
self.position = position + 1;
token.map(|t| t.token.clone())
}
pub fn previous(&mut self) -> Option<Token> {
let (_, token) = self.previous_token(self.position);
token.map(|t| t.token.clone())
}
pub fn peek(&mut self) -> Option<Token> {
let (_, token) = self.next_token(self.position);
token.map(|t| t.token.clone())
}
pub fn peek2(&mut self) -> Option<Token> {
let (pos2, _) = self.next_token(self.position);
let (_, token) = self.next_token(pos2 + 1);
token.map(|t| t.token.clone())
}
pub fn peek3(&mut self) -> Option<Token> {
let (pos2, _) = self.next_token(self.position);
let (_, token) = self.next_token(pos2 + 1);
token.map(|t| t.token.clone())
}
/// Parse the next value of trait Parse. If the parse succeeded, the related
/// tokens are consumed, otherwise token stream does not advance.
///
/// Parsetime-error is returned on failure.
pub fn parse<T: Parse + std::fmt::Debug>(&mut self) -> Result<T, TokenStreamError> {
let (res, pos) = self.parse_meta()?;
self.position = pos;
Ok(res)
}
/// Parse the next item with Parse-trait (Same as [`TokenStream::parse`])
/// without consuming the related tokens, essentially only peeking.
pub fn parse_peek<T: Parse + std::fmt::Debug>(&mut self) -> Result<T, TokenStreamError> {
self.parse_meta().map(|(res, _)| res)
}
/// Parse the next item with Parse-trait, also mapping it with the given
/// function. The token-stream is only consumed, if the inner function
/// retuns an Ok.
#[allow(dead_code)]
pub fn parse_map<T: Parse + std::fmt::Debug, F, O>(
&mut self,
inner: F,
) -> Result<O, TokenStreamError>
where
F: Fn(T) -> Result<O, TokenStreamError>,
{
let (res, pos) = self.parse_meta::<T>()?;
match inner(res) {
Ok(mapped) => {
self.position = pos;
Ok(mapped)
}
Err(e) => Err(e),
}
}
/// Parses the item with Parse if the condition specified by the
/// lambda-function is passed. Errors returned from this should not be
/// passed to the end-user.
pub fn parse_if<T: Parse + std::fmt::Debug, F>(
&mut self,
inner: F,
) -> Result<T, TokenStreamError>
where
F: Fn(&T) -> bool,
{
let (res, pos) = self.parse_meta::<T>()?;
if inner(&res) {
self.position = pos;
Ok(res)
} else {
Err(TokenStreamError::IfFailed)
}
}
/// Parse the next item with Parse-trait. If successful, returning the
/// parsed item and the new position of the TokenStream. Failing, returning
/// parse-error.
///
/// Used for [`TokenStream::parse`] and [`TokenStream::parse_peek`]
fn parse_meta<T: Parse + std::fmt::Debug>(&mut self) -> Result<(T, usize), TokenStreamError> {
let mut ref_pos = self.position;
let position = self.position;
let clone = TokenStream {
file_path: self.file_path,
ref_position: Some(&mut ref_pos),
tokens: self.tokens,
errors: self.errors.clone(),
position,
};
match T::parse(clone) {
Ok(res) => {
let new_pos = ref_pos.max(self.position);
Ok((res, new_pos))
}
Err(e) => Err(e),
}
}
pub fn parse_with<T, U>(&mut self, fun: T) -> Result<U, TokenStreamError>
where
T: FnOnce(TokenStream) -> Result<U, TokenStreamError>,
{
let mut ref_pos = self.position;
let position = self.position;
let clone = TokenStream {
file_path: self.file_path,
ref_position: Some(&mut ref_pos),
tokens: self.tokens,
errors: self.errors.clone(),
position,
};
match fun(clone) {
Ok(res) => {
self.position = ref_pos.max(self.position);
Ok(res)
}
Err(e) => Err(e),
}
}
pub fn get_range(&self) -> TokenRange {
self.ref_position
.as_ref()
.map(|ref_pos| TokenRange {
start: **ref_pos,
end: self.position,
})
.unwrap_or(TokenRange {
start: 0,
end: self.position,
})
}
/// Gets range from the previous position to the current. Useful when using
/// with [`TokenStream::next`]
pub fn get_range_prev(&self) -> Option<TokenRange> {
self.ref_position.as_ref().map(|ref_pos| TokenRange {
start: **ref_pos,
end: self.previous_token(self.position).0,
})
}
/// Gets range of the previous token only.
pub fn get_range_prev_curr(&self) -> Option<TokenRange> {
Some(TokenRange {
start: self.previous_token(self.position).0,
end: self.previous_token(self.position).0,
})
}
fn previous_token(&self, mut from: usize) -> (usize, Option<&'a FullToken>) {
if from == 0 {
return (from, None);
}
from -= 1;
while let Some(token) = self.tokens.get(from) {
if matches!(token.token, Token::Whitespace(_) | Token::Comment(_)) {
from -= 1;
} else {
break;
}
}
(from, self.tokens.get(from))
}
fn next_token(&self, mut from: usize) -> (usize, Option<&'a FullToken>) {
while let Some(token) = self.tokens.get(from) {
if matches!(token.token, Token::Whitespace(_) | Token::Comment(_)) {
from += 1;
} else {
break;
}
}
(from, self.tokens.get(from))
}
pub fn errors(&self) -> Vec<TokenStreamError> {
self.errors.borrow().clone().clone()
}
pub fn next_is_whitespace(&self) -> bool {
if let Some(token) = self.tokens.get(self.position) {
if let Token::Whitespace(_) = token.token {
true
} else {
false
}
} else {
true
}
}
}
impl Drop for TokenStream<'_, '_> {
fn drop(&mut self) {
if let Some(ref_pos) = &mut self.ref_position {
**ref_pos = self.position;
}
}
}
/// Index-range that can be used with the original array of [`FullToken`]s to
/// retrieve the precise location of a failure.
#[derive(Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct TokenRange {
pub start: usize,
pub end: usize,
}
impl std::fmt::Debug for TokenRange {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Tokens[{} - {}]", self.start, self.end)
}
}
impl std::ops::Add for TokenRange {
type Output = TokenRange;
fn add(self, rhs: Self) -> Self::Output {
TokenRange {
start: self.start.min(rhs.start).min(rhs.end),
end: self.end.max(rhs.end).max(rhs.start),
}
}
}
impl std::iter::Sum for TokenRange {
fn sum<I: Iterator<Item = Self>>(mut iter: I) -> Self {
let mut start = iter.next().unwrap_or(Default::default());
for item in iter {
start = start + item;
}
start
}
}
#[derive(thiserror::Error, Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub enum TokenStreamError {
#[error("Expected {} got \"{:?}\"", .0, .1)]
Expected(String, Token, TokenRange),
#[error("Error Parsing: {0}")]
ParseError(String, TokenRange),
#[error("Source file contains no tokens")]
FileEmpty,
/// Only use this error in situations where the error never ends up for the end-user!
#[error("Undefined error, should only be used in situations where the error is not emitted!")]
Undefined,
/// Condition failed for the parse-if
#[error("Condition failed for parse-if. Should never be returned to end-user.")]
IfFailed,
}
impl TokenStreamError {
pub fn get_range(&self) -> Option<&TokenRange> {
match self {
TokenStreamError::Expected(_, _, pos) => Some(pos),
TokenStreamError::FileEmpty => None,
TokenStreamError::Undefined => None,
TokenStreamError::IfFailed => None,
TokenStreamError::ParseError(_, range) => Some(range),
}
}
}