Add very simple parsing
This commit is contained in:
commit
044d2b9d25
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
/target
|
||||
65
Cargo.lock
generated
Normal file
65
Cargo.lock
generated
Normal file
@ -0,0 +1,65 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "ferrite-lua"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.106"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.45"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.117"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.69"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.69"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
||||
8
Cargo.toml
Normal file
8
Cargo.toml
Normal file
@ -0,0 +1,8 @@
|
||||
[package]
|
||||
name = "ferrite-lua"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
## Make it easier to generate errors
|
||||
thiserror = "1.0.44"
|
||||
7
examples/test.lua
Normal file
7
examples/test.lua
Normal file
@ -0,0 +1,7 @@
|
||||
function max ()
|
||||
local m = a
|
||||
-- if b > a then
|
||||
-- m = b
|
||||
-- end
|
||||
return m
|
||||
end
|
||||
237
src/ast.rs
Normal file
237
src/ast.rs
Normal file
@ -0,0 +1,237 @@
|
||||
use std::{fmt::Debug, hash::Hash, ops::Add, path::PathBuf};
|
||||
|
||||
use crate::token_stream::{
|
||||
Parse, TokenRange, TokenStream, TokenStreamError,
|
||||
lexer::{Keyword, Position, Token},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Node<T: Clone + Debug> {
|
||||
pub kind: T,
|
||||
pub meta: Metadata,
|
||||
}
|
||||
|
||||
impl<T: Clone + Debug + PartialEq> PartialEq for Node<T> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.kind == other.kind
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Clone + Debug + PartialEq> Eq for Node<T> {}
|
||||
|
||||
impl<T: Clone + Debug + PartialEq + Hash> Hash for Node<T> {
|
||||
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
||||
self.kind.hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Clone + Debug> Node<T> {
|
||||
pub fn empty(kind: T) -> Node<T> {
|
||||
Node {
|
||||
kind,
|
||||
meta: Metadata::empty(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with<OtherT: Clone + Debug>(&self, other: OtherT) -> Node<OtherT> {
|
||||
Node {
|
||||
kind: other,
|
||||
meta: self.meta.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Hash, Eq)]
|
||||
pub struct Metadata {
|
||||
pub documentation: Option<String>,
|
||||
pub token_range: TokenRange,
|
||||
pub position: Position,
|
||||
pub file_path: PathBuf,
|
||||
}
|
||||
|
||||
impl Metadata {
|
||||
pub fn empty() -> Metadata {
|
||||
Metadata {
|
||||
documentation: None,
|
||||
token_range: Default::default(),
|
||||
position: Position(0, 0),
|
||||
file_path: PathBuf::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Add<Metadata> for Metadata {
|
||||
type Output = Metadata;
|
||||
|
||||
fn add(self, rhs: Metadata) -> Self::Output {
|
||||
Metadata {
|
||||
documentation: self.documentation,
|
||||
token_range: TokenRange {
|
||||
start: self.token_range.start,
|
||||
end: rhs.token_range.end,
|
||||
},
|
||||
position: self.position,
|
||||
file_path: self.file_path,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for Metadata {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{:?}", self.token_range)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Parse + Clone + Debug> Parse for Node<T> {
|
||||
fn parse(mut stream: TokenStream) -> Result<Self, TokenStreamError> {
|
||||
let position = stream
|
||||
.get_position()
|
||||
.ok_or(stream.expecting_err(std::any::type_name::<T>()))?;
|
||||
let documentation = stream.find_documentation("/").into_iter().last();
|
||||
Ok(Node {
|
||||
kind: stream.parse()?,
|
||||
meta: Metadata {
|
||||
documentation,
|
||||
token_range: stream.get_range(),
|
||||
position,
|
||||
file_path: stream.file_path.clone(),
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Metadata {
|
||||
fn pre(
|
||||
stream: &mut TokenStream,
|
||||
expecting: &str,
|
||||
) -> Result<(Option<String>, Position), TokenStreamError> {
|
||||
Ok((
|
||||
stream.find_documentation("/").into_iter().last(),
|
||||
stream
|
||||
.get_position()
|
||||
.ok_or(stream.expecting_err(expecting))?,
|
||||
))
|
||||
}
|
||||
|
||||
fn produce(
|
||||
stream: &mut TokenStream,
|
||||
(documentation, position): (Option<String>, Position),
|
||||
) -> Metadata {
|
||||
Metadata {
|
||||
documentation: documentation,
|
||||
token_range: stream.get_range(),
|
||||
position,
|
||||
file_path: stream.file_path.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Function {
|
||||
pub name: Option<Node<String>>,
|
||||
pub params: Vec<Node<String>>,
|
||||
pub block: Block,
|
||||
pub meta: Metadata,
|
||||
}
|
||||
|
||||
impl Parse for Function {
|
||||
fn parse(mut stream: TokenStream) -> Result<Self, TokenStreamError> {
|
||||
let pre = Metadata::pre(&mut stream, "function")?;
|
||||
|
||||
stream.expect(Token::Keyword(Keyword::Function))?;
|
||||
let name = stream.parse::<Node<String>>().ok();
|
||||
stream.expect(Token::Symbol('('))?;
|
||||
|
||||
let params = Vec::new();
|
||||
|
||||
stream.expect(Token::Symbol(')'))?;
|
||||
|
||||
let block = stream.parse()?;
|
||||
|
||||
stream.expect(Token::Keyword(Keyword::End))?;
|
||||
|
||||
Ok(Function {
|
||||
name,
|
||||
params,
|
||||
block,
|
||||
meta: Metadata::produce(&mut stream, pre),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Parse for String {
|
||||
fn parse(mut stream: TokenStream) -> Result<Self, TokenStreamError> {
|
||||
if let Some(Token::Word(text)) = stream.next() {
|
||||
Ok(text)
|
||||
} else {
|
||||
Err(stream.expected_err("identifier"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Block {
|
||||
pub statements: Vec<Node<Statement>>,
|
||||
pub meta: Metadata,
|
||||
}
|
||||
|
||||
impl Parse for Block {
|
||||
fn parse(mut stream: TokenStream) -> Result<Self, TokenStreamError> {
|
||||
let pre = Metadata::pre(&mut stream, "block")?;
|
||||
let mut statements = Vec::new();
|
||||
|
||||
while stream.peek() != Some(Token::Keyword(Keyword::End)) {
|
||||
statements.push(stream.parse()?);
|
||||
}
|
||||
|
||||
Ok(Block {
|
||||
statements,
|
||||
meta: Metadata::produce(&mut stream, pre),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Statement {
|
||||
Assignment(Option<DefinitionKind>, Node<String>, Node<Expression>),
|
||||
Return(Node<Expression>),
|
||||
}
|
||||
|
||||
impl Parse for Statement {
|
||||
fn parse(mut stream: TokenStream) -> Result<Self, TokenStreamError> {
|
||||
let peeked = stream.peek();
|
||||
if peeked == Some(Token::Keyword(Keyword::Return)) {
|
||||
stream.next();
|
||||
Ok(Statement::Return(stream.parse()?))
|
||||
} else if peeked == Some(Token::Keyword(Keyword::Local)) {
|
||||
stream.next();
|
||||
let name = stream.parse()?;
|
||||
stream.expect(Token::Symbol('='))?;
|
||||
let expr = stream.parse()?;
|
||||
Ok(Statement::Assignment(
|
||||
Some(DefinitionKind::Local),
|
||||
name,
|
||||
expr,
|
||||
))
|
||||
} else {
|
||||
Err(stream.expecting_err("statement"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum DefinitionKind {
|
||||
Local,
|
||||
Global,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Expression {
|
||||
ValueRef(String),
|
||||
}
|
||||
|
||||
impl Parse for Expression {
|
||||
fn parse(mut stream: TokenStream) -> Result<Self, TokenStreamError> {
|
||||
Ok(Expression::ValueRef(stream.parse()?))
|
||||
}
|
||||
}
|
||||
29
src/main.rs
Normal file
29
src/main.rs
Normal file
@ -0,0 +1,29 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use crate::{
|
||||
ast::Function,
|
||||
token_stream::{
|
||||
TokenStream,
|
||||
lexer::{Token, tokenize},
|
||||
},
|
||||
};
|
||||
|
||||
mod ast;
|
||||
mod token_stream;
|
||||
|
||||
static TEST: &str = include_str!("../examples/test.lua");
|
||||
|
||||
fn main() {
|
||||
let file_path = PathBuf::from("../examples/test.lua");
|
||||
let tokens = tokenize(TEST).unwrap();
|
||||
let mut stream = TokenStream::from(&file_path, &tokens);
|
||||
|
||||
let mut functions = Vec::new();
|
||||
while stream.peek() != Some(Token::Eof) {
|
||||
functions.push(stream.parse::<Function>().unwrap());
|
||||
}
|
||||
|
||||
dbg!(functions);
|
||||
|
||||
println!("Hello, world!");
|
||||
}
|
||||
357
src/token_stream/lexer.rs
Normal file
357
src/token_stream/lexer.rs
Normal file
@ -0,0 +1,357 @@
|
||||
use std::{fmt::Debug, ops::AddAssign, str::Chars};
|
||||
|
||||
static BINARY_NUMERICS: &[char] = &['0', '1'];
|
||||
static OCTAL_NUMERICS: &[char] = &['0', '1', '2', '3', '4', '5', '6', '7'];
|
||||
static DECIMAL_NUMERICS: &[char] = &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'];
|
||||
static HEXADECIMAL_NUMERICS: &[char] = &[
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
|
||||
];
|
||||
|
||||
#[derive(Eq, PartialEq, Clone, PartialOrd, Ord, Hash, Debug)]
|
||||
pub enum Keyword {
|
||||
Function,
|
||||
End,
|
||||
Local,
|
||||
Return,
|
||||
}
|
||||
|
||||
impl Keyword {
|
||||
pub fn parse(from: &str) -> Option<Keyword> {
|
||||
Some(match from {
|
||||
"function" => Keyword::Function,
|
||||
"end" => Keyword::End,
|
||||
"local" => Keyword::Local,
|
||||
"return" => Keyword::Return,
|
||||
_ => None?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl ToString for Keyword {
|
||||
fn to_string(&self) -> String {
|
||||
match self {
|
||||
Keyword::Function => "function",
|
||||
Keyword::End => "end",
|
||||
Keyword::Local => "local",
|
||||
Keyword::Return => "return",
|
||||
}
|
||||
.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Eq, PartialEq, Clone, PartialOrd, Ord, Hash, Debug)]
|
||||
pub enum Token {
|
||||
/// Word-like-values
|
||||
Word(String),
|
||||
Keyword(Keyword),
|
||||
/// Number in the decimal base
|
||||
DecimalValue(String),
|
||||
/// Integer number in the hexadecimal base
|
||||
HexadecimalValue(String),
|
||||
/// Integer number in the octal base
|
||||
OctalValue(String),
|
||||
/// Integer number in the binary base
|
||||
BinaryValue(String),
|
||||
/// Some string literal that was surrounded by "double-quotes".
|
||||
StringLit(String),
|
||||
|
||||
/// Special one-character symbol
|
||||
Symbol(char),
|
||||
|
||||
Whitespace(String),
|
||||
Comment(String),
|
||||
Eof,
|
||||
}
|
||||
|
||||
impl From<Token> for String {
|
||||
fn from(value: Token) -> Self {
|
||||
format!("{:?}", value)
|
||||
}
|
||||
}
|
||||
|
||||
impl Token {
|
||||
pub fn len(&self) -> usize {
|
||||
self.to_string().len()
|
||||
}
|
||||
}
|
||||
|
||||
impl ToString for Token {
|
||||
fn to_string(&self) -> String {
|
||||
match &self {
|
||||
Token::Word(ident) => ident.clone(),
|
||||
Token::DecimalValue(val) => val.to_string(),
|
||||
Token::HexadecimalValue(val) => format!("0x{}", val),
|
||||
Token::OctalValue(val) => format!("0o{}", val),
|
||||
Token::BinaryValue(val) => format!("0b{}", val),
|
||||
Token::StringLit(lit) => format!("\"{}\"", lit),
|
||||
Token::Eof => String::new(),
|
||||
Token::Whitespace(val) => val.clone(),
|
||||
Token::Comment(val) => format!("--{}", val.clone()),
|
||||
Token::Symbol(val) => val.to_string(),
|
||||
Token::Keyword(keyword) => keyword.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A token with a position
|
||||
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct FullToken {
|
||||
pub token: Token,
|
||||
pub position: Position,
|
||||
}
|
||||
|
||||
impl Debug for FullToken {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_fmt(format_args!("{:?} {:?}", self.token, self.position,))
|
||||
}
|
||||
}
|
||||
|
||||
/// (Column, Line)
|
||||
#[derive(Clone, Copy, Hash, PartialEq, Eq, Ord)]
|
||||
pub struct Position(pub u32, pub u32);
|
||||
|
||||
impl Position {
|
||||
pub fn add(&self, num: u32) -> Position {
|
||||
Position(self.0 + num, self.1)
|
||||
}
|
||||
|
||||
pub fn sub(&self, num: u32) -> Position {
|
||||
Position(self.0 - num, self.1)
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for Position {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
match self.1.partial_cmp(&other.1) {
|
||||
Some(core::cmp::Ordering::Equal) => {}
|
||||
ord => return ord,
|
||||
}
|
||||
self.0.partial_cmp(&other.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for Position {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_fmt(format_args!("Ln {}, Col {}", self.1, self.0 + 1))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Cursor<'a> {
|
||||
pub position: Position,
|
||||
pub char_stream: Chars<'a>,
|
||||
}
|
||||
|
||||
impl<'a> Cursor<'a> {
|
||||
pub fn next(&mut self) -> Option<char> {
|
||||
let next = self.char_stream.next();
|
||||
if let Some('\n') = next {
|
||||
self.position.1 += 1;
|
||||
self.position.0 = 0;
|
||||
}
|
||||
self.position.0 += 1;
|
||||
next
|
||||
}
|
||||
|
||||
fn first(&mut self) -> Option<char> {
|
||||
// `.next()` optimizes better than `.nth(0)`
|
||||
self.char_stream.clone().next()
|
||||
}
|
||||
|
||||
#[allow(dead_code)] // Is this actually needed?
|
||||
fn second(&mut self) -> Option<char> {
|
||||
// `.next()` optimizes better than `.nth(1)`
|
||||
let mut stream = self.char_stream.clone();
|
||||
stream.next();
|
||||
stream.next()
|
||||
}
|
||||
}
|
||||
|
||||
/// Take source text and produce a list of [`FullToken`]s from it, ie.
|
||||
/// tokenizing it.
|
||||
pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Error> {
|
||||
let to_tokenize = to_tokenize.into();
|
||||
let mut cursor = Cursor {
|
||||
char_stream: to_tokenize.chars(),
|
||||
position: Position(0, 1),
|
||||
};
|
||||
|
||||
let mut tokens = Vec::new();
|
||||
|
||||
while let Some(character) = &cursor.next() {
|
||||
// Save "current" token first character position
|
||||
let position = cursor.position.sub(1);
|
||||
|
||||
let variant = match character {
|
||||
// Whitespace
|
||||
w if w.is_whitespace() => {
|
||||
let mut whitespace = String::from(*w);
|
||||
while let Some(w) = cursor.first() {
|
||||
if !w.is_whitespace() {
|
||||
break;
|
||||
}
|
||||
whitespace.push(cursor.next().unwrap());
|
||||
}
|
||||
Token::Whitespace(whitespace)
|
||||
}
|
||||
// Comments
|
||||
'-' if cursor.first() == Some('-') => {
|
||||
cursor.next();
|
||||
|
||||
let mut comment = String::new();
|
||||
while !matches!(cursor.first(), Some('\n') | None) {
|
||||
if let Some(c) = cursor.next() {
|
||||
comment.push(c);
|
||||
}
|
||||
}
|
||||
Token::Comment(comment)
|
||||
}
|
||||
'\"' => {
|
||||
let mut value = String::new();
|
||||
let mut escape_next = false;
|
||||
while cursor.first().is_some()
|
||||
&& (cursor.first() != Some(*character) || escape_next)
|
||||
{
|
||||
if cursor.first() == Some('\\') && !escape_next {
|
||||
cursor.next(); // Consume backslash and always add next character
|
||||
escape_next = true;
|
||||
} else {
|
||||
let c = &cursor.next().unwrap();
|
||||
if escape_next {
|
||||
value += &escape_char(&c).to_string();
|
||||
} else {
|
||||
value += &c.to_string();
|
||||
}
|
||||
escape_next = false;
|
||||
}
|
||||
}
|
||||
if cursor.first() == Some(*character) {
|
||||
cursor.next();
|
||||
} else {
|
||||
return Err(Error::MissingQuotation(position));
|
||||
}
|
||||
match character {
|
||||
'\"' => Token::StringLit(value),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
// "words"
|
||||
c if c.is_alphabetic() => {
|
||||
let mut value = character.to_string();
|
||||
while let Some(c) = cursor.first() {
|
||||
if !(c.is_ascii_alphanumeric() || c == '_') {
|
||||
break;
|
||||
}
|
||||
value += &c.to_string();
|
||||
cursor.next();
|
||||
}
|
||||
|
||||
if let Some(keyword) = Keyword::parse(&value) {
|
||||
Token::Keyword(keyword)
|
||||
} else {
|
||||
Token::Word(value)
|
||||
}
|
||||
}
|
||||
// Decimals
|
||||
c if DECIMAL_NUMERICS.contains(c) => {
|
||||
let mut value = NumberType::Decimal(character.to_string());
|
||||
let mut numerics = DECIMAL_NUMERICS;
|
||||
if let Some(second) = cursor.second() {
|
||||
if cursor.first() == Some('x')
|
||||
&& HEXADECIMAL_NUMERICS
|
||||
.contains(&second.to_lowercase().next().unwrap_or('.'))
|
||||
{
|
||||
cursor.next();
|
||||
value = NumberType::Hexadecimal(String::new());
|
||||
numerics = HEXADECIMAL_NUMERICS;
|
||||
} else if cursor.first() == Some('o')
|
||||
&& OCTAL_NUMERICS.contains(&second.to_lowercase().next().unwrap_or('.'))
|
||||
{
|
||||
cursor.next();
|
||||
value = NumberType::Octal(String::new());
|
||||
numerics = OCTAL_NUMERICS;
|
||||
} else if cursor.first() == Some('b')
|
||||
&& BINARY_NUMERICS.contains(&second.to_lowercase().next().unwrap_or('.'))
|
||||
{
|
||||
cursor.next();
|
||||
value = NumberType::Binary(String::new());
|
||||
numerics = BINARY_NUMERICS;
|
||||
}
|
||||
}
|
||||
while let Some(c) = cursor.first() {
|
||||
if !numerics.contains(&c.to_lowercase().next().unwrap_or('.')) {
|
||||
break;
|
||||
}
|
||||
value += c;
|
||||
cursor.next();
|
||||
}
|
||||
match value {
|
||||
NumberType::Decimal(dec) => Token::DecimalValue(dec),
|
||||
NumberType::Hexadecimal(hex) => Token::HexadecimalValue(hex),
|
||||
NumberType::Octal(oct) => Token::OctalValue(oct),
|
||||
NumberType::Binary(bin) => Token::BinaryValue(bin),
|
||||
}
|
||||
}
|
||||
// Some one-character token
|
||||
value => Token::Symbol(*value),
|
||||
};
|
||||
|
||||
tokens.push(FullToken {
|
||||
token: variant,
|
||||
position,
|
||||
});
|
||||
}
|
||||
|
||||
tokens.push(FullToken {
|
||||
token: Token::Eof,
|
||||
position: cursor.position,
|
||||
});
|
||||
|
||||
Ok(tokens)
|
||||
}
|
||||
|
||||
fn escape_char(c: &char) -> char {
|
||||
match c {
|
||||
't' => '\t',
|
||||
'n' => '\n',
|
||||
'r' => '\r',
|
||||
'0' => '\0',
|
||||
_ => *c,
|
||||
}
|
||||
}
|
||||
|
||||
enum NumberType {
|
||||
Decimal(String),
|
||||
Hexadecimal(String),
|
||||
Octal(String),
|
||||
Binary(String),
|
||||
}
|
||||
|
||||
impl AddAssign<char> for NumberType {
|
||||
fn add_assign(&mut self, rhs: char) {
|
||||
*self = match self {
|
||||
NumberType::Decimal(val) => NumberType::Decimal(val.to_owned() + &rhs.to_string()),
|
||||
NumberType::Hexadecimal(val) => {
|
||||
NumberType::Hexadecimal(val.to_owned() + &rhs.to_string())
|
||||
}
|
||||
NumberType::Octal(val) => NumberType::Octal(val.to_owned() + &rhs.to_string()),
|
||||
NumberType::Binary(val) => NumberType::Binary(val.to_owned() + &rhs.to_string()),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(thiserror::Error, Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum Error {
|
||||
#[error("Invalid token '{}' ", .0)]
|
||||
InvalidToken(char, Position),
|
||||
#[error("String literal is never finished!")]
|
||||
MissingQuotation(Position),
|
||||
}
|
||||
|
||||
impl Error {
|
||||
pub fn get_position(&self) -> &Position {
|
||||
match self {
|
||||
Error::InvalidToken(_, pos) => pos,
|
||||
Error::MissingQuotation(pos) => pos,
|
||||
}
|
||||
}
|
||||
}
|
||||
459
src/token_stream/mod.rs
Normal file
459
src/token_stream/mod.rs
Normal file
@ -0,0 +1,459 @@
|
||||
//! Contains relevant code for parsing tokens received from
|
||||
//! Lexing/Tokenizing-stage.
|
||||
|
||||
use std::{cell::RefCell, path::PathBuf, rc::Rc};
|
||||
|
||||
pub mod lexer;
|
||||
|
||||
use lexer::{FullToken, Token};
|
||||
|
||||
use crate::token_stream::lexer::Position;
|
||||
|
||||
pub trait Parse
|
||||
where
|
||||
Self: std::marker::Sized,
|
||||
{
|
||||
fn parse(stream: TokenStream) -> Result<Self, TokenStreamError>;
|
||||
}
|
||||
|
||||
/// Utility struct that is able to parse [`FullToken`]s while being
|
||||
/// failure-resistance in that it can backtrack easily, and is able to keep
|
||||
/// track of parsed Token-ranges easily.
|
||||
pub struct TokenStream<'a, 'b> {
|
||||
pub file_path: &'a PathBuf,
|
||||
ref_position: Option<&'b mut usize>,
|
||||
tokens: &'a [FullToken],
|
||||
errors: Rc<RefCell<Vec<TokenStreamError>>>,
|
||||
pub position: usize,
|
||||
}
|
||||
|
||||
impl<'a, 'b> TokenStream<'a, 'b> {
|
||||
pub fn from(file_path: &'a PathBuf, tokens: &'a [FullToken]) -> Self {
|
||||
TokenStream {
|
||||
file_path,
|
||||
ref_position: None,
|
||||
tokens,
|
||||
errors: Rc::new(RefCell::new(Vec::new())),
|
||||
position: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_position(&self) -> Option<Position> {
|
||||
self.next_token(self.position).1.map(|v| v.position)
|
||||
}
|
||||
|
||||
/// Returns a parse-error with the given help-string
|
||||
pub fn parse_err<T: Into<String>>(&mut self, error: T) -> TokenStreamError {
|
||||
TokenStreamError::ParseError(
|
||||
error.into(),
|
||||
TokenRange {
|
||||
start: self.position - 1,
|
||||
end: self.position - 1,
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns expected-error for the next token in-line. Useful in conjunction
|
||||
/// with [`TokenStream::peek`]
|
||||
pub fn expected_err<T: Into<String>>(&mut self, expected: T) -> TokenStreamError {
|
||||
let next_token = self.previous().unwrap_or(Token::Eof);
|
||||
TokenStreamError::Expected(
|
||||
expected.into(),
|
||||
next_token,
|
||||
TokenRange {
|
||||
start: self.position - self.position.min(1),
|
||||
end: self.position - self.position.min(1),
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns expected-error for the next token in-line. Useful in conjunction
|
||||
/// with [`TokenStream::peek`]
|
||||
pub fn expected_err_nonfatal<T: Into<String>>(&mut self, expected: T) {
|
||||
let err = self.expected_err(expected);
|
||||
self.errors.borrow_mut().push(err);
|
||||
}
|
||||
|
||||
/// Returns expected-error for the previous token that was already consumed.
|
||||
/// Useful in conjunction with [`TokenStream::next`]
|
||||
pub fn expecting_err<T: Into<String>>(&mut self, expected: T) -> TokenStreamError {
|
||||
let next_token = self.peek().unwrap_or(Token::Eof);
|
||||
let pos = self.next_token(self.position).0;
|
||||
TokenStreamError::Expected(
|
||||
expected.into(),
|
||||
next_token,
|
||||
TokenRange {
|
||||
start: pos,
|
||||
end: pos,
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns expected-error for the previous token that was already consumed.
|
||||
/// Useful in conjunction with [`TokenStream::next`]
|
||||
pub fn expecting_err_nonfatal<T: Into<String>>(&mut self, expected: T) {
|
||||
let err = self.expecting_err(expected);
|
||||
self.errors.borrow_mut().push(err);
|
||||
}
|
||||
|
||||
pub fn expect(&mut self, token: Token) -> Result<(), TokenStreamError> {
|
||||
if let (pos, Some(peeked)) = self.next_token(self.position) {
|
||||
if token == peeked.token {
|
||||
self.position = pos + 1;
|
||||
Ok(())
|
||||
} else {
|
||||
Err(self.expecting_err(token))
|
||||
}
|
||||
} else {
|
||||
Err(self.expecting_err(token))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn expect_word(&mut self, text: String) -> Result<(), TokenStreamError> {
|
||||
if let Some(Token::Word(inner)) = self.next() {
|
||||
if text == inner {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(self.expected_err(text))
|
||||
}
|
||||
} else {
|
||||
Err(self.expected_err(text))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn expect_symbol(&mut self, symbol: char) -> Result<(), TokenStreamError> {
|
||||
if let Some(Token::Symbol(inner)) = self.next() {
|
||||
if symbol == inner {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(self.expected_err(symbol.to_string()))
|
||||
}
|
||||
} else {
|
||||
Err(self.expected_err(symbol.to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn find_documentation<T: Into<String>>(&mut self, comment_prefix: T) -> Vec<String> {
|
||||
let mut from = self.position;
|
||||
let prefix = comment_prefix.into();
|
||||
let mut curr_doc = None;
|
||||
let mut documentation = Vec::new();
|
||||
while let Some(token) = self.tokens.get(from) {
|
||||
if matches!(token.token, Token::Whitespace(_) | Token::Comment(_)) {
|
||||
from += 1;
|
||||
if let Token::Comment(doctext) = &token.token {
|
||||
if doctext.starts_with(&prefix) {
|
||||
curr_doc = Some(
|
||||
match curr_doc {
|
||||
Some(t) => t + " ",
|
||||
None => String::new(),
|
||||
} + &doctext.replacen("/", "", 1).trim(),
|
||||
);
|
||||
} else if let Some(doctext) = curr_doc.take() {
|
||||
// Doctext was cut by comments
|
||||
documentation.push(doctext);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if let Some(doctext) = curr_doc.take() {
|
||||
// Doctext was cut by comments
|
||||
documentation.push(doctext);
|
||||
}
|
||||
documentation
|
||||
}
|
||||
|
||||
pub fn expect_nonfatal(&mut self, token: Token) -> Result<(), ()> {
|
||||
if let (pos, Some(peeked)) = self.next_token(self.position) {
|
||||
if token == peeked.token {
|
||||
self.position = pos + 1;
|
||||
Ok(())
|
||||
} else {
|
||||
self.expecting_err_nonfatal(token);
|
||||
Err(())
|
||||
}
|
||||
} else {
|
||||
self.expecting_err_nonfatal(token);
|
||||
Err(())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next(&mut self) -> Option<Token> {
|
||||
let (position, token) = self.next_token(self.position);
|
||||
self.position = position + 1;
|
||||
token.map(|t| t.token.clone())
|
||||
}
|
||||
|
||||
pub fn previous(&mut self) -> Option<Token> {
|
||||
let (_, token) = self.previous_token(self.position);
|
||||
token.map(|t| t.token.clone())
|
||||
}
|
||||
|
||||
pub fn peek(&mut self) -> Option<Token> {
|
||||
let (_, token) = self.next_token(self.position);
|
||||
token.map(|t| t.token.clone())
|
||||
}
|
||||
|
||||
pub fn peek2(&mut self) -> Option<Token> {
|
||||
let (pos2, _) = self.next_token(self.position);
|
||||
let (_, token) = self.next_token(pos2 + 1);
|
||||
token.map(|t| t.token.clone())
|
||||
}
|
||||
|
||||
pub fn peek3(&mut self) -> Option<Token> {
|
||||
let (pos2, _) = self.next_token(self.position);
|
||||
let (_, token) = self.next_token(pos2 + 1);
|
||||
token.map(|t| t.token.clone())
|
||||
}
|
||||
|
||||
/// Parse the next value of trait Parse. If the parse succeeded, the related
|
||||
/// tokens are consumed, otherwise token stream does not advance.
|
||||
///
|
||||
/// Parsetime-error is returned on failure.
|
||||
pub fn parse<T: Parse + std::fmt::Debug>(&mut self) -> Result<T, TokenStreamError> {
|
||||
let (res, pos) = self.parse_meta()?;
|
||||
self.position = pos;
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
/// Parse the next item with Parse-trait (Same as [`TokenStream::parse`])
|
||||
/// without consuming the related tokens, essentially only peeking.
|
||||
pub fn parse_peek<T: Parse + std::fmt::Debug>(&mut self) -> Result<T, TokenStreamError> {
|
||||
self.parse_meta().map(|(res, _)| res)
|
||||
}
|
||||
|
||||
/// Parse the next item with Parse-trait, also mapping it with the given
|
||||
/// function. The token-stream is only consumed, if the inner function
|
||||
/// retuns an Ok.
|
||||
#[allow(dead_code)]
|
||||
pub fn parse_map<T: Parse + std::fmt::Debug, F, O>(
|
||||
&mut self,
|
||||
inner: F,
|
||||
) -> Result<O, TokenStreamError>
|
||||
where
|
||||
F: Fn(T) -> Result<O, TokenStreamError>,
|
||||
{
|
||||
let (res, pos) = self.parse_meta::<T>()?;
|
||||
match inner(res) {
|
||||
Ok(mapped) => {
|
||||
self.position = pos;
|
||||
Ok(mapped)
|
||||
}
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses the item with Parse if the condition specified by the
|
||||
/// lambda-function is passed. Errors returned from this should not be
|
||||
/// passed to the end-user.
|
||||
pub fn parse_if<T: Parse + std::fmt::Debug, F>(
|
||||
&mut self,
|
||||
inner: F,
|
||||
) -> Result<T, TokenStreamError>
|
||||
where
|
||||
F: Fn(&T) -> bool,
|
||||
{
|
||||
let (res, pos) = self.parse_meta::<T>()?;
|
||||
if inner(&res) {
|
||||
self.position = pos;
|
||||
Ok(res)
|
||||
} else {
|
||||
Err(TokenStreamError::IfFailed)
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse the next item with Parse-trait. If successful, returning the
|
||||
/// parsed item and the new position of the TokenStream. Failing, returning
|
||||
/// parse-error.
|
||||
///
|
||||
/// Used for [`TokenStream::parse`] and [`TokenStream::parse_peek`]
|
||||
fn parse_meta<T: Parse + std::fmt::Debug>(&mut self) -> Result<(T, usize), TokenStreamError> {
|
||||
let mut ref_pos = self.position;
|
||||
|
||||
let position = self.position;
|
||||
let clone = TokenStream {
|
||||
file_path: self.file_path,
|
||||
ref_position: Some(&mut ref_pos),
|
||||
tokens: self.tokens,
|
||||
errors: self.errors.clone(),
|
||||
position,
|
||||
};
|
||||
|
||||
match T::parse(clone) {
|
||||
Ok(res) => {
|
||||
let new_pos = ref_pos.max(self.position);
|
||||
Ok((res, new_pos))
|
||||
}
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_with<T, U>(&mut self, fun: T) -> Result<U, TokenStreamError>
|
||||
where
|
||||
T: FnOnce(TokenStream) -> Result<U, TokenStreamError>,
|
||||
{
|
||||
let mut ref_pos = self.position;
|
||||
|
||||
let position = self.position;
|
||||
let clone = TokenStream {
|
||||
file_path: self.file_path,
|
||||
ref_position: Some(&mut ref_pos),
|
||||
tokens: self.tokens,
|
||||
errors: self.errors.clone(),
|
||||
position,
|
||||
};
|
||||
|
||||
match fun(clone) {
|
||||
Ok(res) => {
|
||||
self.position = ref_pos.max(self.position);
|
||||
Ok(res)
|
||||
}
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_range(&self) -> TokenRange {
|
||||
self.ref_position
|
||||
.as_ref()
|
||||
.map(|ref_pos| TokenRange {
|
||||
start: **ref_pos,
|
||||
end: self.position,
|
||||
})
|
||||
.unwrap_or(TokenRange {
|
||||
start: 0,
|
||||
end: self.position,
|
||||
})
|
||||
}
|
||||
|
||||
/// Gets range from the previous position to the current. Useful when using
|
||||
/// with [`TokenStream::next`]
|
||||
pub fn get_range_prev(&self) -> Option<TokenRange> {
|
||||
self.ref_position.as_ref().map(|ref_pos| TokenRange {
|
||||
start: **ref_pos,
|
||||
end: self.previous_token(self.position).0,
|
||||
})
|
||||
}
|
||||
|
||||
/// Gets range of the previous token only.
|
||||
pub fn get_range_prev_curr(&self) -> Option<TokenRange> {
|
||||
Some(TokenRange {
|
||||
start: self.previous_token(self.position).0,
|
||||
end: self.previous_token(self.position).0,
|
||||
})
|
||||
}
|
||||
|
||||
fn previous_token(&self, mut from: usize) -> (usize, Option<&'a FullToken>) {
|
||||
if from == 0 {
|
||||
return (from, None);
|
||||
}
|
||||
from -= 1;
|
||||
while let Some(token) = self.tokens.get(from) {
|
||||
if matches!(token.token, Token::Whitespace(_) | Token::Comment(_)) {
|
||||
from -= 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
(from, self.tokens.get(from))
|
||||
}
|
||||
|
||||
fn next_token(&self, mut from: usize) -> (usize, Option<&'a FullToken>) {
|
||||
while let Some(token) = self.tokens.get(from) {
|
||||
if matches!(token.token, Token::Whitespace(_) | Token::Comment(_)) {
|
||||
from += 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
(from, self.tokens.get(from))
|
||||
}
|
||||
|
||||
pub fn errors(&self) -> Vec<TokenStreamError> {
|
||||
self.errors.borrow().clone().clone()
|
||||
}
|
||||
|
||||
pub fn next_is_whitespace(&self) -> bool {
|
||||
if let Some(token) = self.tokens.get(self.position) {
|
||||
if let Token::Whitespace(_) = token.token {
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
} else {
|
||||
true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for TokenStream<'_, '_> {
|
||||
fn drop(&mut self) {
|
||||
if let Some(ref_pos) = &mut self.ref_position {
|
||||
**ref_pos = self.position;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Index-range that can be used with the original array of [`FullToken`]s to
|
||||
/// retrieve the precise location of a failure.
|
||||
#[derive(Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct TokenRange {
|
||||
pub start: usize,
|
||||
pub end: usize,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for TokenRange {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "Tokens[{} - {}]", self.start, self.end)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Add for TokenRange {
|
||||
type Output = TokenRange;
|
||||
|
||||
fn add(self, rhs: Self) -> Self::Output {
|
||||
TokenRange {
|
||||
start: self.start.min(rhs.start).min(rhs.end),
|
||||
end: self.end.max(rhs.end).max(rhs.start),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::iter::Sum for TokenRange {
|
||||
fn sum<I: Iterator<Item = Self>>(mut iter: I) -> Self {
|
||||
let mut start = iter.next().unwrap_or(Default::default());
|
||||
for item in iter {
|
||||
start = start + item;
|
||||
}
|
||||
start
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(thiserror::Error, Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum TokenStreamError {
|
||||
#[error("Expected {} got \"{:?}\"", .0, .1)]
|
||||
Expected(String, Token, TokenRange),
|
||||
#[error("Error Parsing: {0}")]
|
||||
ParseError(String, TokenRange),
|
||||
#[error("Source file contains no tokens")]
|
||||
FileEmpty,
|
||||
/// Only use this error in situations where the error never ends up for the end-user!
|
||||
#[error("Undefined error, should only be used in situations where the error is not emitted!")]
|
||||
Undefined,
|
||||
/// Condition failed for the parse-if
|
||||
#[error("Condition failed for parse-if. Should never be returned to end-user.")]
|
||||
IfFailed,
|
||||
}
|
||||
|
||||
impl TokenStreamError {
|
||||
pub fn get_range(&self) -> Option<&TokenRange> {
|
||||
match self {
|
||||
TokenStreamError::Expected(_, _, pos) => Some(pos),
|
||||
TokenStreamError::FileEmpty => None,
|
||||
TokenStreamError::Undefined => None,
|
||||
TokenStreamError::IfFailed => None,
|
||||
TokenStreamError::ParseError(_, range) => Some(range),
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user