Compare commits

...

23 Commits

Author SHA1 Message Date
Sofia 47b9d7e044 Disable warnings for lacking ; for if-statements 2023-12-13 00:33:13 +02:00
Sofia 5084f21ff9 Add parsing for if-statements 2023-08-03 21:24:57 +03:00
Sofia 4c8417cbee Add return types, function args 2023-08-03 20:30:00 +03:00
Sofia 420fd7b74b Implement block-level-expressions 2023-08-03 20:10:12 +03:00
Sofia 413cd87a02 Add soft/hard returns 2023-08-03 20:08:20 +03:00
Sofia f22505be91 Add function calls and mult 2023-08-03 00:53:39 +03:00
Sofia 4e89cd7355 Add more error raporting 2023-08-02 20:22:10 +03:00
Sofia 9710406747 Fix error 2023-08-02 20:17:59 +03:00
Sofia 91a6485d5a Add more error handling 2023-08-02 20:17:06 +03:00
Sofia 9b7a31c988 Add errors for lexer and parser 2023-08-02 19:58:10 +03:00
Sofia c7f11e5091 Add errors, move compiling to examples 2023-08-02 19:17:57 +03:00
Sofia e9aab4f43e Use print to string instead of dump 2023-08-02 18:48:56 +03:00
Sofia 2ced45eb40 Refactor 2023-08-02 18:43:47 +03:00
Sofia 08909d76ee Fix warnings 2023-08-02 18:38:38 +03:00
Sofia 9019e1e5a7 Split codegen properly 2023-08-02 18:37:31 +03:00
Sofia 817b7c2096 Add Scope-struct 2023-08-02 18:29:20 +03:00
Sofia 612b4d63a8 Reorganize code 2023-08-02 18:19:30 +03:00
Sofia 297ff832b5 Remove comment 2023-08-02 17:03:21 +03:00
Sofia a196857dd6 Add simple codegen from actual AST 2023-08-02 17:03:06 +03:00
Sofia a9d5302774 Add blockexpr, possible paren to exp 2023-08-02 15:54:16 +03:00
Sofia 9d7a63f25b Make return special for block 2023-08-02 15:34:20 +03:00
Sofia dca604a038 Add preliminary codegen, update parsing to require functions 2023-08-02 15:31:33 +03:00
Sofia 17ab6a0cea Update gitignore 2023-08-02 01:03:19 +03:00
19 changed files with 1149 additions and 329 deletions

2
.gitignore vendored
View File

@ -1 +1,3 @@
/target
/.vscode
.env

View File

@ -1,5 +0,0 @@
{
"rust-analyzer.linkedProjects": [
"./Cargo.toml"
]
}

56
Cargo.lock generated
View File

@ -48,6 +48,24 @@ version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]]
name = "proc-macro2"
version = "1.0.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50f3b39ccfb720540debaa0164757101c08ecb8d326b15358ce76a62c7e85965"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.9.1"
@ -82,6 +100,7 @@ name = "reid"
version = "0.1.0"
dependencies = [
"llvm-sys",
"thiserror",
]
[[package]]
@ -89,3 +108,40 @@ name = "semver"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918"
[[package]]
name = "syn"
version = "2.0.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04361975b3f5e348b2189d8dc55bc942f278b2d482a6a0365de5bdd62d351567"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "thiserror"
version = "1.0.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "unicode-ident"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"

View File

@ -6,5 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
## LLVM Bindings
llvm-sys = "160"
## Make it easier to generate errors
thiserror = "1.0.44"

11
examples/easiest.rs Normal file
View File

@ -0,0 +1,11 @@
use reid::compile;
pub static EASIEST: &str = include_str!("./reid/easiest.reid");
fn main() {
let text = match compile(EASIEST) {
Ok(t) => t,
Err(e) => panic!("{}", e),
};
println!("{}", text);
}

View File

@ -0,0 +1,20 @@
// Hello, comment here!
import std::print;
fn main() {
let hello = 32 + {
2 + 3
};
let beep = hello + fibonacci();
return beep;
}
// Fibonacci
fn fibonacci(value: i32) -> i32 {
if value < 3 {
return 1;
}
return fibonacci(value - 1) + fibonacci(value - 2);
}

12
examples/reid/easy.reid Normal file
View File

@ -0,0 +1,12 @@
// Arithmetic, function calls and imports!
import std::print;
fn main() {
let test = 5;
let simpleAdd = 2 + 2;
let arithmetic = 3 + 2 * 5 + 1 * 2;
let multiplier = 5 * 2;
return arithmetic + multiplier * arithmetic;
}

View File

@ -1,4 +0,0 @@
// Hello, comment here!
let hello = 32;
let beep =
hello ;

View File

@ -1,12 +0,0 @@
// Arithmetic, function calls and imports!
import std::print;
let test = 5;
let simpleAdd = 2 + 2;
let arithmetic = 3 + 2 * 5 + 1 * 2;
let multiplier = 5 * 2;
let result = arithmetic + multiplier * arithmetic;
print(result);
function(one, two);

388
src/ast.rs Normal file
View File

@ -0,0 +1,388 @@
use crate::{
lexer::Token,
token_stream::{Error, TokenStream},
};
pub trait Parse
where
Self: std::marker::Sized,
{
fn parse(stream: TokenStream) -> Result<Self, Error>;
}
#[derive(Debug, Clone)]
pub enum Type {
I32,
}
impl Parse for Type {
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
if let Some(Token::Identifier(ident)) = stream.next() {
Ok(match &*ident {
"i32" => Type::I32,
_ => panic!("asd"),
})
} else {
Err(stream.expected_err("type identifier")?)
}
}
}
#[derive(Debug, Clone)]
pub enum Literal {
I32(i32),
}
#[derive(Debug, Clone)]
pub enum Expression {
VariableName(String),
Literal(Literal),
Binop(BinaryOperator, Box<Expression>, Box<Expression>),
FunctionCall(Box<FunctionCallExpression>),
BlockExpr(Box<Block>),
IfExpr(Box<IfExpression>),
}
impl Parse for Expression {
fn parse(mut stream: TokenStream) -> Result<Expression, Error> {
let lhs = parse_primary_expression(&mut stream)?;
parse_binop_rhs(&mut stream, lhs, None)
}
}
fn parse_primary_expression(stream: &mut TokenStream) -> Result<Expression, Error> {
if let Ok(exp) = stream.parse() {
Ok(Expression::FunctionCall(Box::new(exp)))
} else if let Ok(block) = stream.parse() {
Ok(Expression::BlockExpr(Box::new(block)))
} else if let Ok(ifexpr) = stream.parse() {
Ok(Expression::IfExpr(Box::new(ifexpr)))
} else if let Some(token) = stream.next() {
Ok(match &token {
Token::Identifier(v) => Expression::VariableName(v.clone()),
Token::DecimalValue(v) => Expression::Literal(Literal::I32(v.parse().unwrap())),
Token::ParenOpen => {
let exp = stream.parse()?;
stream.expect(Token::ParenClose)?;
exp
}
_ => Err(stream.expected_err("identifier, constant or parentheses")?)?,
})
} else {
Err(stream.expected_err("expression")?)?
}
}
/// This algorithm seems somewhat like magic to me. I understand it if I read
/// carefully, but it is difficult to read every single time.
///
/// Reference for how the algorithm is formed:
/// https://llvm.org/docs/tutorial/MyFirstLanguageFrontend/LangImpl02.html#binary-expression-parsing
fn parse_binop_rhs(
stream: &mut TokenStream,
mut lhs: Expression,
mut operator: Option<BinaryOperator>,
) -> Result<Expression, Error> {
let expr_prec = if let Some(op) = operator {
op.get_precedence() + 1
} else {
0
};
while let Some(op) = operator.take().as_ref().or(stream.parse().as_ref().ok()) {
let curr_token_prec = op.get_precedence();
if curr_token_prec < expr_prec {
break; // Just return lhs
} else {
let mut rhs = parse_primary_expression(stream)?;
if let Ok(next_op) = stream.parse::<BinaryOperator>() {
let next_prec = next_op.get_precedence();
if curr_token_prec < next_prec {
// Operator on the right of rhs has more precedence, turn
// rhs into lhs for new binop
rhs = parse_binop_rhs(stream, rhs, Some(next_op))?;
} else {
let _ = operator.insert(next_op);
}
}
lhs = Expression::Binop(*op, Box::new(lhs), Box::new(rhs));
}
}
Ok(lhs)
}
#[derive(Debug, Clone, Copy)]
pub enum BinaryOperator {
Add,
Minus,
Mult,
And,
LessThan,
}
impl Parse for BinaryOperator {
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
Ok(match (stream.next(), stream.peek()) {
(Some(Token::Et), Some(Token::Et)) => {
stream.next();
BinaryOperator::And
}
(Some(Token::LessThan), _) => BinaryOperator::LessThan,
(Some(Token::Plus), _) => BinaryOperator::Add,
(Some(Token::Minus), _) => BinaryOperator::Minus,
(Some(Token::Times), _) => BinaryOperator::Mult,
(_, _) => Err(stream.expected_err("expected operator")?)?,
})
}
}
impl BinaryOperator {
pub fn get_precedence(&self) -> i8 {
use BinaryOperator::*;
match &self {
Add => 10,
Minus => 10,
Mult => 20,
And => 100,
LessThan => 100,
}
}
}
#[derive(Debug, Clone)]
pub struct FunctionCallExpression(pub String, pub Vec<Expression>);
impl Parse for FunctionCallExpression {
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
if let Some(Token::Identifier(name)) = stream.next() {
stream.expect(Token::ParenOpen)?;
let mut args = Vec::new();
if let Ok(exp) = stream.parse() {
args.push(exp);
while stream.expect(Token::Comma).is_ok() {
args.push(stream.parse()?);
}
}
stream.expect(Token::ParenClose)?;
Ok(FunctionCallExpression(name, args))
} else {
Err(stream.expected_err("identifier")?)
}
}
}
#[derive(Debug, Clone)]
pub struct IfExpression(Expression, pub Block);
impl Parse for IfExpression {
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
stream.expect(Token::If)?;
Ok(IfExpression(stream.parse()?, stream.parse()?))
}
}
#[derive(Debug, Clone)]
pub struct LetStatement(pub String, pub Expression);
impl Parse for LetStatement {
fn parse(mut stream: TokenStream) -> Result<LetStatement, Error> {
stream.expect(Token::LetKeyword)?;
if let Some(Token::Identifier(variable)) = stream.next() {
stream.expect(Token::Equals)?;
let expression = stream.parse()?;
stream.expect(Token::Semi)?;
Ok(LetStatement(variable, expression))
} else {
Err(stream.expected_err("identifier")?)
}
}
}
#[derive(Debug, Clone)]
pub struct ImportStatement(Vec<String>);
impl Parse for ImportStatement {
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
stream.expect(Token::ImportKeyword)?;
let mut import_list = Vec::new();
if let Some(Token::Identifier(name)) = stream.next() {
import_list.push(name);
while stream.expect(Token::Colon).is_ok() && stream.expect(Token::Colon).is_ok() {
if let Some(Token::Identifier(name)) = stream.next() {
import_list.push(name);
} else {
Err(stream.expected_err("identifier")?)?
}
}
} else {
Err(stream.expected_err("identifier")?)?
}
stream.expect(Token::Semi)?;
Ok(ImportStatement(import_list))
}
}
#[derive(Debug)]
pub struct FunctionDefinition(pub FunctionSignature, pub Block);
impl Parse for FunctionDefinition {
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
stream.expect(Token::FnKeyword)?;
Ok(FunctionDefinition(stream.parse()?, stream.parse()?))
}
}
#[derive(Debug, Clone)]
pub struct FunctionSignature {
pub name: String,
pub args: Vec<(String, Type)>,
pub return_type: Option<Type>,
}
impl Parse for FunctionSignature {
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
if let Some(Token::Identifier(name)) = stream.next() {
stream.expect(Token::ParenOpen)?;
let mut args = Vec::new();
while let Some(Token::Identifier(arg_name)) = stream.peek() {
stream.next();
stream.expect(Token::Colon)?;
args.push((arg_name, stream.parse()?));
}
stream.expect(Token::ParenClose)?;
let mut return_type = None;
if stream.expect(Token::Arrow).is_ok() {
return_type = Some(stream.parse()?);
}
Ok(FunctionSignature {
name,
args,
return_type,
})
} else {
Err(stream.expected_err("identifier")?)?
}
}
}
#[derive(Debug, Clone, Copy)]
pub enum ReturnType {
Soft,
Hard,
}
#[derive(Debug, Clone)]
pub struct Block(
pub Vec<BlockLevelStatement>,
pub Option<(ReturnType, Expression)>,
);
impl Parse for Block {
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
let mut statements = Vec::new();
let mut return_stmt = None;
stream.expect(Token::BraceOpen)?;
while !matches!(stream.peek(), Some(Token::BraceClose)) {
if let Some((r_type, e)) = return_stmt.take() {
// Special list of expressions that are simply not warned about,
// if semicolon is missing.
if !matches!(&e, &Expression::IfExpr(_)) {
dbg!(r_type, &e);
println!("Oh no, does this statement lack ;");
}
statements.push(BlockLevelStatement::Expression(e));
}
let statement = stream.parse()?;
if let BlockLevelStatement::Return((r_type, e)) = &statement {
match r_type {
ReturnType::Hard => {
return_stmt = Some((*r_type, e.clone()));
break; // Return has to be the last statement
// TODO: Make a mechanism that "can" parse even after this
}
ReturnType::Soft => {
return_stmt = Some((*r_type, e.clone()));
continue; // In theory possible to have lines after a soft return
}
};
}
statements.push(statement);
}
stream.expect(Token::BraceClose)?;
Ok(Block(statements, return_stmt))
}
}
#[derive(Debug, Clone)]
pub enum BlockLevelStatement {
Let(LetStatement),
Import(ImportStatement),
Expression(Expression),
Return((ReturnType, Expression)),
}
impl Parse for BlockLevelStatement {
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
use BlockLevelStatement as Stmt;
Ok(match stream.peek() {
Some(Token::LetKeyword) => Stmt::Let(stream.parse()?),
Some(Token::ImportKeyword) => Stmt::Import(stream.parse()?),
Some(Token::ReturnKeyword) => {
stream.next();
let exp = stream.parse()?;
stream.expect(Token::Semi)?;
Stmt::Return((ReturnType::Hard, exp))
}
_ => {
if let Ok(e) = stream.parse() {
if stream.expect(Token::Semi).is_ok() {
Stmt::Expression(e)
} else {
Stmt::Return((ReturnType::Soft, e))
}
} else {
Err(stream.expected_err("expression")?)?
}
}
})
}
}
#[derive(Debug)]
pub enum TopLevelStatement {
Import(ImportStatement),
FunctionDefinition(FunctionDefinition),
}
impl Parse for TopLevelStatement {
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
use TopLevelStatement as Stmt;
Ok(match stream.peek() {
Some(Token::ImportKeyword) => Stmt::Import(stream.parse()?),
Some(Token::FnKeyword) => Stmt::FunctionDefinition(stream.parse()?),
_ => Err(stream.expected_err("import or fn")?)?,
})
}
}

222
src/codegen.rs Normal file
View File

@ -0,0 +1,222 @@
use std::collections::{hash_map, HashMap};
use crate::{
ast::{
BinaryOperator, Block, BlockLevelStatement, Expression, FunctionCallExpression,
FunctionDefinition, FunctionSignature, ReturnType, TopLevelStatement,
},
llvm_ir::{self, IRBlock, IRFunction, IRModule, IRValue, IRValueType},
};
#[derive(Clone)]
pub struct ScopeData {
named_vars: HashMap<String, IRValue>,
defined_functions: HashMap<String, (FunctionSignature, Option<IRFunction>)>,
}
impl ScopeData {
pub fn inner<'a, 'b>(&self, block: &'b mut IRBlock<'a>) -> Scope<'a, 'b> {
Scope {
block,
data: self.clone(),
}
}
pub fn var(&self, name: &String) -> Option<&IRValue> {
self.named_vars.get(name)
}
pub fn set_var(&mut self, name: &str, val: IRValue) -> Result<(), Error> {
if let hash_map::Entry::Vacant(e) = self.named_vars.entry(name.to_owned()) {
e.insert(val);
Ok(())
} else {
Err(Error::VariableAlreadyDefined(name.to_owned()))
}
}
pub fn function(
&mut self,
name: &String,
) -> Option<&mut (FunctionSignature, Option<IRFunction>)> {
self.defined_functions.get_mut(name)
}
pub fn set_function_signature(
&mut self,
name: &str,
sig: FunctionSignature,
ir: IRFunction,
) -> Result<(), Error> {
if let hash_map::Entry::Vacant(e) = self.defined_functions.entry(name.to_owned()) {
e.insert((sig, Some(ir)));
Ok(())
} else {
Err(Error::VariableAlreadyDefined(name.to_owned()))
}
}
}
pub struct Scope<'a, 'b> {
pub block: &'b mut IRBlock<'a>,
pub data: ScopeData,
}
impl<'a, 'b> Scope<'a, 'b> {
pub fn inner<'c>(&'c mut self) -> Scope<'a, 'c> {
Scope {
block: self.block,
data: self.data.clone(),
}
}
}
pub fn codegen_from_statements(statements: Vec<TopLevelStatement>) -> Result<IRModule, Error> {
let mut module = IRModule::new("testmod");
let mut scope = ScopeData {
defined_functions: HashMap::new(),
named_vars: HashMap::new(),
};
for statement in &statements {
match statement {
TopLevelStatement::FunctionDefinition(FunctionDefinition(sig, _)) => {
let function = module.create_func(&sig.name, IRValueType::I32);
scope.set_function_signature(&sig.name.clone(), sig.clone(), function)?;
}
TopLevelStatement::Import(_) => {}
}
}
for statement in &statements {
statement.codegen(&mut module, &mut scope)?;
}
Ok(module)
}
impl TopLevelStatement {
pub fn codegen(&self, module: &mut IRModule, root_data: &mut ScopeData) -> Result<(), Error> {
match self {
TopLevelStatement::FunctionDefinition(FunctionDefinition(sig, block)) => {
if let Some((_, ir)) = root_data.function(&sig.name) {
if let Some(ir_function) = ir.take() {
let mut ir_block = module.create_block();
let mut scope = root_data.inner(&mut ir_block);
let (_, value) = match block.codegen(&mut scope)? {
Some(v) => v,
None => panic!("Void-return type function not yet implemented!"),
};
ir_function.add_definition(value, ir_block);
} else {
Err(Error::FunctionAlreadyDefined(sig.name.clone()))?
}
} else {
panic!("Function was not declared before it's definition")
}
}
TopLevelStatement::Import(_) => {}
}
Ok(())
}
}
impl Block {
pub fn codegen(&self, scope: &mut Scope) -> Result<Option<(ReturnType, IRValue)>, Error> {
for statement in &self.0 {
statement.codegen(scope)?;
}
let value = if let Some((rt, exp)) = &self.1 {
Some((*rt, exp.codegen(scope)?))
} else {
None
};
Ok(value)
}
}
impl BlockLevelStatement {
pub fn codegen(&self, scope: &mut Scope) -> Result<(), Error> {
match self {
BlockLevelStatement::Let(let_statement) => {
let val = let_statement.1.codegen(scope)?;
scope.data.set_var(&let_statement.0, val)?;
Ok(())
}
BlockLevelStatement::Return(_) => panic!("Should never happen"),
BlockLevelStatement::Import(_) => Ok(()), // TODO: To implement
BlockLevelStatement::Expression(e) => {
let _value = e.codegen(scope)?;
Ok(())
}
}
}
}
impl Expression {
pub fn codegen(&self, scope: &mut Scope) -> Result<IRValue, Error> {
use Expression::*;
match self {
Binop(op, lhs, rhs) => match op {
BinaryOperator::Add => {
let lhs = lhs.codegen(scope)?;
let rhs = rhs.codegen(scope)?;
Ok(scope.block.add(lhs, rhs)?)
}
BinaryOperator::Mult => {
let lhs = lhs.codegen(scope)?;
let rhs = rhs.codegen(scope)?;
Ok(scope.block.mul(lhs, rhs)?)
}
_ => panic!("Other binary operators not supported yet!"),
},
BlockExpr(block) => {
let mut inner = scope.inner();
Ok(match block.codegen(&mut inner)? {
Some((r_type, value)) => match r_type {
ReturnType::Soft => value,
ReturnType::Hard => {
panic!("Hard returns in inner blocks not supported yet")
}
},
None => panic!("Void-return type block not yet implemented!"),
})
}
FunctionCall(fc) => {
let FunctionCallExpression(name, _) = &**fc;
if let Some((sig, _)) = scope.data.function(name) {
Ok(scope.block.function_call(sig)?)
} else {
Err(Error::UndefinedFunction(name.clone()))?
}
}
VariableName(name) => scope
.data
.var(name)
.cloned()
.ok_or(Error::UndefinedVariable(name.clone())),
Literal(lit) => Ok(scope.block.get_const(lit)),
IfExpr(_) => panic!("if expressions not yet supported"),
}
}
}
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("Variable '{0}' already defined")]
VariableAlreadyDefined(String),
#[error("Variable '{0}' not yet defined")]
UndefinedVariable(String),
#[error("Function '{0}' not defined")]
UndefinedFunction(String),
#[error("Function '{0}' already defined")]
FunctionAlreadyDefined(String),
#[error(transparent)]
Deeper(#[from] llvm_ir::Error),
}

View File

@ -1,99 +1,7 @@
use std::{fmt::Debug, iter::Peekable, str::Chars};
use std::{fmt::Debug, str::Chars};
static DECIMAL_NUMERICS: &[char] = &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'];
pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, String> {
let to_tokenize = to_tokenize.into();
let mut position = (0, 1);
let mut cursor = Cursor {
char_stream: to_tokenize.chars().peekable(),
position,
};
let mut tokens = Vec::new();
while let Some(character) = &cursor.next() {
position.0 += 1;
if *character == '\n' {
position.1 += 1;
position.0 = 0;
}
let peek = cursor.peek();
let variant = match character {
// Whitespace
w if w.is_whitespace() => continue,
// Comments
'/' if peek == Some(&'/') => {
while !matches!(&cursor.peek(), Some('\n')) {
cursor.next();
}
continue;
}
// "words"
c if c.is_alphabetic() => {
let mut value = character.to_string();
while let Some(c) = &cursor.peek() {
if !c.is_ascii_alphanumeric() {
break;
}
value += &c.to_string();
cursor.next();
}
// Check for keywords
let variant = match value.as_str() {
"let" => Token::LetKeyword,
"import" => Token::ImportKeyword,
_ => Token::Identifier(value),
};
variant
}
// Decimals
c if DECIMAL_NUMERICS.contains(c) => {
let mut value = character.to_string();
while let Some(c) = &cursor.peek() {
if !DECIMAL_NUMERICS.contains(c) {
break;
}
value += &c.to_string();
cursor.next();
}
Token::DecimalValue(value)
}
// Single character tokens
'=' => Token::Equals,
';' => Token::Semicolon,
':' => Token::Colon,
'+' => Token::Plus,
'*' => Token::Times,
'(' => Token::ParenOpen,
')' => Token::ParenClose,
',' => Token::Comma,
// Invalid token
_ => Err(format!(
"Unknown token '{}' at {}, {}",
character, position.0, position.1
))?,
};
tokens.push(FullToken {
token: variant,
position,
});
}
position.0 += 1;
tokens.push(FullToken {
token: Token::Eof,
position,
});
Ok(tokens)
}
#[derive(Debug, Eq, PartialEq, Clone)]
pub enum Token {
// Values
@ -102,17 +10,49 @@ pub enum Token {
DecimalValue(String),
// Keywords
/// `let`
LetKeyword,
/// `import`
ImportKeyword,
/// `return`
ReturnKeyword,
/// `fn`
FnKeyword,
/// `->`
Arrow,
/// `if`
If,
// Symbols
Semicolon,
/// `;`
Semi,
/// `=`
Equals,
/// `:`
Colon,
/// `+`
Plus,
/// `*`
Times,
ParenOpen, // (
ParenClose, // )
/// `-`
Minus,
/// `>`
GreaterThan,
/// `<`
LessThan,
/// `&`
Et,
/// `(`
ParenOpen,
/// `)`
ParenClose,
/// `{`
BraceOpen,
/// `}`
BraceClose,
/// `,`
Comma,
Eof,
@ -122,15 +62,23 @@ impl Token {
pub fn get_token_prec(&self) -> i8 {
match &self {
Token::Plus => 10,
Token::Minus => 10,
Token::Times => 20,
_ => -1,
}
}
}
impl From<Token> for String {
fn from(value: Token) -> Self {
format!("{:?}", value)
}
}
#[derive(Clone)]
pub struct FullToken {
pub token: Token,
position: Position,
pub position: Position,
}
impl Debug for FullToken {
@ -146,21 +94,127 @@ pub type Position = (u32, u32);
pub struct Cursor<'a> {
pub position: Position,
char_stream: Peekable<Chars<'a>>,
char_stream: Chars<'a>,
}
impl<'a> Cursor<'a> {
fn next(&mut self) -> Option<char> {
let next = self.char_stream.next();
self.position.0 += 1;
if let Some('\n') = next {
self.position.1 += 1;
self.position.0 = 0;
}
self.position.0 += 1;
next
}
fn peek(&mut self) -> Option<&char> {
self.char_stream.peek()
fn first(&mut self) -> Option<char> {
// `.next()` optimizes better than `.nth(0)`
self.char_stream.clone().next()
}
#[allow(dead_code)] // Is this actually needed?
fn second(&mut self) -> Option<char> {
// `.next()` optimizes better than `.nth(1)`
let mut stream = self.char_stream.clone();
stream.next();
stream.next()
}
}
pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Error> {
let to_tokenize = to_tokenize.into();
let mut cursor = Cursor {
char_stream: to_tokenize.chars(),
position: (0, 1),
};
let mut tokens = Vec::new();
while let Some(character) = &cursor.next() {
let variant = match character {
// Whitespace
w if w.is_whitespace() => continue,
// Comments
'/' if cursor.first() == Some('/') => {
while !matches!(cursor.first(), Some('\n') | None) {
cursor.next();
}
continue;
}
// "words"
c if c.is_alphabetic() => {
let mut value = character.to_string();
while let Some(c) = cursor.first() {
if !c.is_ascii_alphanumeric() {
break;
}
value += &c.to_string();
cursor.next();
}
// Check for keywords
let variant = match value.as_str() {
"let" => Token::LetKeyword,
"import" => Token::ImportKeyword,
"return" => Token::ReturnKeyword,
"fn" => Token::FnKeyword,
"if" => Token::If,
_ => Token::Identifier(value),
};
variant
}
// Decimals
c if DECIMAL_NUMERICS.contains(c) => {
let mut value = character.to_string();
while let Some(c) = cursor.first() {
if !DECIMAL_NUMERICS.contains(&c) {
break;
}
value += &c.to_string();
cursor.next();
}
Token::DecimalValue(value)
}
'-' if cursor.first() == Some('>') => {
cursor.next(); // Eat `>`
Token::Arrow
}
// Single character tokens
'=' => Token::Equals,
';' => Token::Semi,
':' => Token::Colon,
'+' => Token::Plus,
'*' => Token::Times,
'-' => Token::Minus,
'>' => Token::GreaterThan,
'<' => Token::LessThan,
'&' => Token::Et,
'(' => Token::ParenOpen,
')' => Token::ParenClose,
'{' => Token::BraceOpen,
'}' => Token::BraceClose,
',' => Token::Comma,
// Invalid token
_ => Err(Error::InvalidToken(*character, cursor.position))?,
};
tokens.push(FullToken {
token: variant,
position: cursor.position,
});
}
tokens.push(FullToken {
token: Token::Eof,
position: cursor.position,
});
Ok(tokens)
}
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("Invalid token '{}' at Ln {}, Col {}", .0, (.1).1, (.1).0)]
InvalidToken(char, Position),
}

46
src/lib.rs Normal file
View File

@ -0,0 +1,46 @@
use crate::{
ast::TopLevelStatement, codegen::codegen_from_statements, lexer::Token,
token_stream::TokenStream,
};
mod ast;
mod codegen;
mod lexer;
mod llvm_ir;
mod token_stream;
// TODO:
// 1. Make it so that TopLevelStatement can only be import or function def
// 2. Make BlockLevelStatement, that has everything TopLevelStatement has now
// 3. Make it so all codegen is done with a Block-struct, that represents a
// single proper block
#[derive(thiserror::Error, Debug)]
pub enum ReidError {
#[error(transparent)]
LexerError(#[from] lexer::Error),
#[error(transparent)]
ParserError(#[from] token_stream::Error),
#[error(transparent)]
CodegenError(#[from] codegen::Error),
}
pub fn compile(source: &str) -> Result<String, ReidError> {
let tokens = lexer::tokenize(source)?;
dbg!(&tokens);
let mut token_stream = TokenStream::from(&tokens);
let mut statements = Vec::new();
while !matches!(token_stream.peek().unwrap_or(Token::Eof), Token::Eof) {
let statement = token_stream.parse::<TopLevelStatement>()?;
dbg!(&statement);
statements.push(statement);
}
let mut module = codegen_from_statements(statements)?;
let text = module.print_to_string().unwrap();
Ok(text.to_owned())
}

203
src/llvm_ir.rs Normal file
View File

@ -0,0 +1,203 @@
use std::ffi::{CStr, CString};
use std::mem;
use llvm_sys::{core::*, prelude::*, LLVMBuilder, LLVMContext, LLVMModule};
use crate::ast::{FunctionSignature, Literal};
macro_rules! cstr {
($string:expr) => {
core::ffi::CStr::from_bytes_with_nul_unchecked(concat!($string, "\0").as_bytes()).as_ptr()
};
}
#[derive(Clone, Debug)]
#[must_use = "value contains raw pointer and must be inserted somewhere"]
pub struct IRValue(IRValueType, LLVMValueRef);
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum IRValueType {
I32,
}
impl IRValueType {
unsafe fn get_llvm_type(&self, module: &mut IRModule) -> LLVMTypeRef {
match *self {
Self::I32 => LLVMInt32TypeInContext(module.context),
}
}
}
fn into_cstring<T: Into<String>>(value: T) -> CString {
let string = value.into();
unsafe { CString::from_vec_with_nul_unchecked((string + "\0").into_bytes()) }
}
pub struct IRModule {
context: *mut LLVMContext,
module: *mut LLVMModule,
builder: *mut LLVMBuilder,
}
impl IRModule {
pub fn new<T: Into<String>>(name: T) -> IRModule {
unsafe {
// Set up a context, module and builder in that context.
let context = LLVMContextCreate();
let module = LLVMModuleCreateWithNameInContext(into_cstring(name).as_ptr(), context);
let builder = LLVMCreateBuilderInContext(context);
IRModule {
context,
module,
builder,
}
}
}
pub fn create_block(&mut self) -> IRBlock {
IRBlock::create("entry", self)
}
pub fn create_func<T: Into<String>>(
&mut self,
name: T,
return_type: IRValueType,
) -> IRFunction {
unsafe {
let mut argts = [];
let func_type = LLVMFunctionType(
return_type.get_llvm_type(self),
argts.as_mut_ptr(),
argts.len() as u32,
0,
);
let anon_func = LLVMAddFunction(self.module, into_cstring(name).as_ptr(), func_type);
IRFunction {
value: IRValue(return_type, anon_func),
}
}
}
pub fn print_to_string(&mut self) -> Result<&str, std::str::Utf8Error> {
unsafe { CStr::from_ptr(LLVMPrintModuleToString(self.module)).to_str() }
}
}
impl Drop for IRModule {
fn drop(&mut self) {
// Clean up. Values created in the context mostly get cleaned up there.
unsafe {
LLVMDisposeBuilder(self.builder);
LLVMDisposeModule(self.module);
LLVMContextDispose(self.context);
}
}
}
#[derive(Clone, Debug)]
pub struct IRFunction {
value: IRValue,
}
impl IRFunction {
pub fn add_definition(self, ret: IRValue, block: IRBlock) {
unsafe {
LLVMAppendExistingBasicBlock(self.value.1, block.blockref);
LLVMBuildRet(block.module.builder, ret.1);
}
}
}
pub struct IRBlock<'a> {
module: &'a mut IRModule,
blockref: LLVMBasicBlockRef,
}
impl<'a> IRBlock<'a> {
fn create<T: Into<String>>(name: T, codegen: &'a mut IRModule) -> IRBlock<'a> {
unsafe {
let blockref =
LLVMCreateBasicBlockInContext(codegen.context, into_cstring(name).as_ptr());
LLVMPositionBuilderAtEnd(codegen.builder, blockref);
IRBlock {
module: codegen,
blockref,
}
}
}
pub fn get_const(&mut self, literal_type: &Literal) -> IRValue {
unsafe {
match *literal_type {
Literal::I32(v) => IRValue(
IRValueType::I32,
LLVMConstInt(
LLVMInt32TypeInContext(self.module.context),
mem::transmute(v as i64),
1,
),
),
}
}
}
pub fn add(&mut self, lhs: IRValue, rhs: IRValue) -> Result<IRValue, Error> {
unsafe {
if lhs.0 == rhs.0 {
Ok(IRValue(
lhs.0,
LLVMBuildAdd(self.module.builder, lhs.1, rhs.1, cstr!("tmpadd")),
))
} else {
Err(Error::TypeMismatch(lhs.0, rhs.0))
}
}
}
pub fn mul(&mut self, lhs: IRValue, rhs: IRValue) -> Result<IRValue, Error> {
unsafe {
if lhs.0 == rhs.0 {
Ok(IRValue(
lhs.0,
LLVMBuildMul(self.module.builder, lhs.1, rhs.1, cstr!("tmpadd")),
))
} else {
Err(Error::TypeMismatch(lhs.0, rhs.0))
}
}
}
pub fn function_call(&mut self, callee: &FunctionSignature) -> Result<IRValue, Error> {
unsafe {
let function = LLVMGetNamedFunction(
self.module.module,
into_cstring(callee.name.clone()).as_ptr(),
);
let ret_t = LLVMInt32TypeInContext(self.module.context);
let mut argts = [];
let mut args = [];
let fun_t = LLVMFunctionType(ret_t, argts.as_mut_ptr(), argts.len() as u32, 0);
let call = LLVMBuildCall2(
self.module.builder,
fun_t,
function,
args.as_mut_ptr(),
args.len() as u32,
into_cstring(&callee.name).as_ptr(),
);
Ok(IRValue(IRValueType::I32, call))
}
}
}
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("Type Mismatch: {0:?} {1:?}")]
TypeMismatch(IRValueType, IRValueType),
}

View File

@ -1,24 +0,0 @@
use crate::{lexer::Token, parser::TopLevelStatement, token_stream::TokenStream};
pub static EASIEST: &str = include_str!("../reid/easiest.reid");
pub static EASY: &str = include_str!("../reid/easy.reid");
pub static MEDIUM: &str = include_str!("../reid/medium.reid");
pub static HARD: &str = include_str!("../reid/hard.reid");
mod lexer;
mod parser;
mod token_stream;
fn main() {
let tokens = lexer::tokenize(EASY).unwrap();
dbg!(&tokens);
let mut token_stream = TokenStream::from(&tokens);
while let Ok(statement) = token_stream.parse::<TopLevelStatement>() {
dbg!(&statement);
}
dbg!(token_stream.expect(Token::Eof).ok());
}

View File

@ -1,176 +0,0 @@
use crate::{lexer::Token, token_stream::TokenStream};
pub trait Parse
where
Self: std::marker::Sized,
{
fn parse(stream: TokenStream) -> Result<Self, ()>;
}
#[derive(Debug, Clone)]
pub enum Expression {
VariableName(String),
ContantI32(i32),
BinopAdd(Box<Expression>, Box<Expression>),
BinopMult(Box<Expression>, Box<Expression>),
FunctionCall(Box<FunctionCallExpression>),
}
impl Parse for Expression {
fn parse(mut stream: TokenStream) -> Result<Expression, ()> {
let lhs = parse_primary_expression(&mut stream)?;
parse_binop_rhs(&mut stream, lhs, 0)
}
}
fn parse_primary_expression(stream: &mut TokenStream) -> Result<Expression, ()> {
if let Ok(exp) = stream.parse() {
Ok(Expression::FunctionCall(Box::new(exp)))
} else if let Some(token) = stream.next() {
Ok(match &token {
Token::Identifier(v) => Expression::VariableName(v.clone()),
Token::DecimalValue(v) => Expression::ContantI32(v.parse().unwrap()),
_ => Err(())?, // TODO: Add error raporting!
})
} else {
Err(()) // TODO: Add error raporting!
}
}
/// This algorithm seems somewhat like magic to me. I understand it if I read
/// carefully, but it is difficult to read every single time.
///
/// Reference for how the algorithm is formed:
/// https://llvm.org/docs/tutorial/MyFirstLanguageFrontend/LangImpl02.html#binary-expression-parsing
fn parse_binop_rhs(
stream: &mut TokenStream,
mut lhs: Expression,
expr_prec: i8,
) -> Result<Expression, ()> {
while let Some(token) = stream.peek() {
let curr_token_prec = token.get_token_prec();
if curr_token_prec < expr_prec {
break; // Just return lhs
} else {
// token has to be an operator
stream.next(); // Eat token
let mut rhs = parse_primary_expression(stream)?;
if let Some(next_op) = stream.peek() {
let next_prec = next_op.get_token_prec();
if curr_token_prec < next_prec {
// Operator on the right of rhs has more precedence, turn
// rhs into lhs for new binop
rhs = parse_binop_rhs(stream, rhs, curr_token_prec + 1)?;
}
}
lhs = match &token {
Token::Plus => Expression::BinopAdd(Box::new(lhs), Box::new(rhs)),
Token::Times => Expression::BinopMult(Box::new(lhs), Box::new(rhs)),
_ => Err(())?, // TODO: Add error raporting!
};
}
}
Ok(lhs)
}
#[derive(Debug, Clone)]
pub struct FunctionCallExpression(String, Vec<Expression>);
impl Parse for FunctionCallExpression {
fn parse(mut stream: TokenStream) -> Result<Self, ()> {
if let Some(Token::Identifier(name)) = stream.next() {
stream.expect(Token::ParenOpen)?;
let mut args = Vec::new();
if let Ok(exp) = stream.parse() {
args.push(exp);
while stream.expect(Token::Comma).is_ok() {
args.push(stream.parse()?);
}
}
stream.expect(Token::ParenClose)?;
Ok(FunctionCallExpression(name, args))
} else {
Err(())? // TODO: Add error raporting!
}
}
}
#[derive(Debug)]
pub enum TopLevelStatement {
Let(LetStatement),
Import(ImportStatement),
TLExpression(Expression),
}
impl Parse for TopLevelStatement {
fn parse(mut stream: TokenStream) -> Result<Self, ()> {
Ok(match stream.peek() {
Some(Token::LetKeyword) => TopLevelStatement::Let(stream.parse()?),
Some(Token::ImportKeyword) => TopLevelStatement::Import(stream.parse()?),
_ => {
if let Ok(e) = stream.parse() {
stream.expect(Token::Semicolon)?;
TopLevelStatement::TLExpression(e)
} else {
Err(())? // TODO: Add error raporting!
}
}
})
}
}
#[derive(Debug)]
pub struct LetStatement(String, Expression);
impl Parse for LetStatement {
fn parse(mut stream: TokenStream) -> Result<LetStatement, ()> {
stream.expect(Token::LetKeyword)?;
if let Some(Token::Identifier(variable)) = stream.next() {
stream.expect(Token::Equals)?;
let expression = stream.parse()?;
stream.expect(Token::Semicolon)?;
Ok(LetStatement(variable, expression))
} else {
Err(()) // TODO: Add error raporting!
}
}
}
#[derive(Debug)]
pub struct ImportStatement(Vec<String>);
impl Parse for ImportStatement {
fn parse(mut stream: TokenStream) -> Result<Self, ()> {
stream.expect(Token::ImportKeyword)?;
let mut import_list = Vec::new();
if let Some(Token::Identifier(name)) = stream.next() {
import_list.push(name);
while stream.expect(Token::Colon).is_ok() && stream.expect(Token::Colon).is_ok() {
if let Some(Token::Identifier(name)) = stream.next() {
import_list.push(name);
} else {
Err(())? // TODO: Add error raporting!
}
}
} else {
Err(())? // TODO: Add error raporting!
}
stream.expect(Token::Semicolon)?;
Ok(ImportStatement(import_list))
}
}

View File

@ -1,6 +1,6 @@
use crate::{
lexer::{FullToken, Token},
parser::Parse,
ast::Parse,
lexer::{FullToken, Position, Token},
};
pub struct TokenStream<'a, 'b> {
@ -18,16 +18,24 @@ impl<'a, 'b> TokenStream<'a, 'b> {
}
}
pub fn expect(&mut self, token: Token) -> Result<(), ()> {
pub fn expected_err<T: Into<String>>(&mut self, expected: T) -> Result<Error, Error> {
Ok(Error::Expected(
expected.into(),
self.peek().unwrap_or(Token::Eof),
self.get_next_position()?,
))
}
pub fn expect(&mut self, token: Token) -> Result<(), Error> {
if let Some(peeked) = self.peek() {
if token == peeked {
self.position += 1;
Ok(())
} else {
Err(())
Err(self.expected_err(token)?)
}
} else {
Err(())
Err(self.expected_err(token)?)
}
}
@ -49,7 +57,7 @@ impl<'a, 'b> TokenStream<'a, 'b> {
}
}
pub fn parse<T: Parse>(&mut self) -> Result<T, ()> {
pub fn parse<T: Parse>(&mut self) -> Result<T, Error> {
let mut ref_pos = self.position;
let position = self.position;
@ -67,6 +75,15 @@ impl<'a, 'b> TokenStream<'a, 'b> {
Err(e) => Err(e),
}
}
fn get_next_position(&self) -> Result<Position, Error> {
if self.tokens.is_empty() {
Err(Error::FileEmpty)
} else {
let token_idx = self.position.min(self.tokens.len() - 1);
Ok(self.tokens[token_idx].position)
}
}
}
impl Drop for TokenStream<'_, '_> {
@ -76,3 +93,11 @@ impl Drop for TokenStream<'_, '_> {
}
}
}
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("Expected {} at Ln {}, Col {}, got {:?}", .0, (.2).1, (.2).0, .1)]
Expected(String, Token, Position),
#[error("Source file contains no tokens")]
FileEmpty,
}