Compare commits

..

No commits in common. "main" and "42e74f49f8aad5e222991cd33df13c905109dc81" have entirely different histories.

19 changed files with 323 additions and 1295 deletions

2
.gitignore vendored
View File

@ -1,3 +1 @@
/target
/.vscode
.env

140
Cargo.lock generated
View File

@ -2,146 +2,6 @@
# It is not intended for manual editing.
version = 3
[[package]]
name = "aho-corasick"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41"
dependencies = [
"memchr",
]
[[package]]
name = "cc"
version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.147"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
[[package]]
name = "llvm-sys"
version = "160.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0bf51981ac0622b10fe4790763e3de1f3d68a0ee4222e03accaaab6731bd508d"
dependencies = [
"cc",
"lazy_static",
"libc",
"regex",
"semver",
]
[[package]]
name = "memchr"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]]
name = "proc-macro2"
version = "1.0.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50f3b39ccfb720540debaa0164757101c08ecb8d326b15358ce76a62c7e85965"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7b6d6190b7594385f61bd3911cd1be99dfddcfc365a4160cc2ab5bff4aed294"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2"
[[package]]
name = "reid"
version = "0.1.0"
dependencies = [
"llvm-sys",
"thiserror",
]
[[package]]
name = "semver"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918"
[[package]]
name = "syn"
version = "2.0.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04361975b3f5e348b2189d8dc55bc942f278b2d482a6a0365de5bdd62d351567"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "thiserror"
version = "1.0.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "unicode-ident"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"

View File

@ -6,7 +6,3 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
## LLVM Bindings
llvm-sys = "160"
## Make it easier to generate errors
thiserror = "1.0.44"

View File

@ -1,61 +0,0 @@
# Reid-LLVM
Attempt at re-creating Reid, this time using LLVM.
## Various notes in order to get this working properly
This is what worked for me, might not (probably) work for you, depending on
various versions of various libraries.
### Compiling LLVM 16.0.0
#### Context
Context for my computer. I am on ArchLinux, and here are some libraries and
their current versions that I have installed as of compiling, I'm not sure what
of them are relevant, if any, but saving them here still feels like a good idea
for the future:
- `cmake 3.27.0-1`
- `lib32-llvm-libs 15.0.7-1`
- `llvm 15.0.7-3`
- `llvm-libs 15.0.7-3`
- `gcc 13.1.1-2`
- `gcc-libs 13.1.1-2`
- `lib32-gcc-libs 13.1.1-2`
- `lld 15.0.7-2`
- `lldb 15.0.7-3`
- `clang 15.0.7-9`
- `make 4.4.1-2`
- `automake 1.16.5-2`
#### Commands
```sh
wget https://github.com/llvm/llvm-project/releases/download/llvmorg-16.0.0/llvm-16.0.0.src.tar.xz
wget https://github.com/llvm/llvm-project/releases/download/llvmorg-16.0.0/cmake-16.0.0.src.tar.xz
tar xvf llvm-16.0.0.src.tar.xz
tar xvf cmake-16.0.0.src.tar.xz
mv cmake-16.0.0.src cmake
cd llvm-16.0.0.src
cmake -B build -DCMAKE_INSTALL_PREFIX=$HOME/llvm-16 -DCMAKE_BUILD_TYPE=MinSizeRel -DLLVM_ENABLE_ASSERTIONS=ON -DLLVM_INCLUDE_TESTS=OFF
make -j8
```
*Also Note:* Building LLVM with `Ninja` was not successful for me, but this
method was. Ninja may be successful with you, to try it, add `-G Ninja` to the
`cmake`-command, and instead of `make` run `ninja install`.
### Building this crate itself
Assuming `llvm-16.0.0.src` from the previous step was at
`/path/llvm-16.0.0.src`, building this crate can be done via the following command:
```sh
LLVM_SYS_160_PREFIX=/path/llvm-16.0.0.src/build cargo build
```
## In conclusion
Good luck! It took me a good 10 hours to figure this out for myself, I sure hope
these instructions help both myself and someone else in the future!

4
easiest.reid Normal file
View File

@ -0,0 +1,4 @@
// Hello, comment here!
let hello = 32;
let beep =
hello ;

12
easy.reid Normal file
View File

@ -0,0 +1,12 @@
// Arithmetic, function calls and imports!
import std::print;
let test = 5;
let simpleAdd = 2 + 2;
let arithmetic = 3 + 2 * 5 + 1 * 2;
let multiplier = 5 * 2;
let result = arithmetic + multiplier * arithmetic;
print(result);
function(one, two);

View File

@ -1,11 +0,0 @@
use reid::compile;
pub static EASIEST: &str = include_str!("./reid/easiest.reid");
fn main() {
let text = match compile(EASIEST) {
Ok(t) => t,
Err(e) => panic!("{}", e),
};
println!("{}", text);
}

View File

@ -1,20 +0,0 @@
// Hello, comment here!
import std::print;
fn main() {
let hello = 32 + {
2 + 3
};
let beep = hello + fibonacci();
return beep;
}
// Fibonacci
fn fibonacci(value: i32) -> i32 {
if value < 3 {
return 1;
}
return fibonacci(value - 1) + fibonacci(value - 2);
}

View File

@ -1,12 +0,0 @@
// Arithmetic, function calls and imports!
import std::print;
fn main() {
let test = 5;
let simpleAdd = 2 + 2;
let arithmetic = 3 + 2 * 5 + 1 * 2;
let multiplier = 5 * 2;
return arithmetic + multiplier * arithmetic;
}

View File

@ -1,388 +0,0 @@
use crate::{
lexer::Token,
token_stream::{Error, TokenStream},
};
pub trait Parse
where
Self: std::marker::Sized,
{
fn parse(stream: TokenStream) -> Result<Self, Error>;
}
#[derive(Debug, Clone)]
pub enum Type {
I32,
}
impl Parse for Type {
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
if let Some(Token::Identifier(ident)) = stream.next() {
Ok(match &*ident {
"i32" => Type::I32,
_ => panic!("asd"),
})
} else {
Err(stream.expected_err("type identifier")?)
}
}
}
#[derive(Debug, Clone)]
pub enum Literal {
I32(i32),
}
#[derive(Debug, Clone)]
pub enum Expression {
VariableName(String),
Literal(Literal),
Binop(BinaryOperator, Box<Expression>, Box<Expression>),
FunctionCall(Box<FunctionCallExpression>),
BlockExpr(Box<Block>),
IfExpr(Box<IfExpression>),
}
impl Parse for Expression {
fn parse(mut stream: TokenStream) -> Result<Expression, Error> {
let lhs = parse_primary_expression(&mut stream)?;
parse_binop_rhs(&mut stream, lhs, None)
}
}
fn parse_primary_expression(stream: &mut TokenStream) -> Result<Expression, Error> {
if let Ok(exp) = stream.parse() {
Ok(Expression::FunctionCall(Box::new(exp)))
} else if let Ok(block) = stream.parse() {
Ok(Expression::BlockExpr(Box::new(block)))
} else if let Ok(ifexpr) = stream.parse() {
Ok(Expression::IfExpr(Box::new(ifexpr)))
} else if let Some(token) = stream.next() {
Ok(match &token {
Token::Identifier(v) => Expression::VariableName(v.clone()),
Token::DecimalValue(v) => Expression::Literal(Literal::I32(v.parse().unwrap())),
Token::ParenOpen => {
let exp = stream.parse()?;
stream.expect(Token::ParenClose)?;
exp
}
_ => Err(stream.expected_err("identifier, constant or parentheses")?)?,
})
} else {
Err(stream.expected_err("expression")?)?
}
}
/// This algorithm seems somewhat like magic to me. I understand it if I read
/// carefully, but it is difficult to read every single time.
///
/// Reference for how the algorithm is formed:
/// https://llvm.org/docs/tutorial/MyFirstLanguageFrontend/LangImpl02.html#binary-expression-parsing
fn parse_binop_rhs(
stream: &mut TokenStream,
mut lhs: Expression,
mut operator: Option<BinaryOperator>,
) -> Result<Expression, Error> {
let expr_prec = if let Some(op) = operator {
op.get_precedence() + 1
} else {
0
};
while let Some(op) = operator.take().as_ref().or(stream.parse().as_ref().ok()) {
let curr_token_prec = op.get_precedence();
if curr_token_prec < expr_prec {
break; // Just return lhs
} else {
let mut rhs = parse_primary_expression(stream)?;
if let Ok(next_op) = stream.parse::<BinaryOperator>() {
let next_prec = next_op.get_precedence();
if curr_token_prec < next_prec {
// Operator on the right of rhs has more precedence, turn
// rhs into lhs for new binop
rhs = parse_binop_rhs(stream, rhs, Some(next_op))?;
} else {
let _ = operator.insert(next_op);
}
}
lhs = Expression::Binop(*op, Box::new(lhs), Box::new(rhs));
}
}
Ok(lhs)
}
#[derive(Debug, Clone, Copy)]
pub enum BinaryOperator {
Add,
Minus,
Mult,
And,
LessThan,
}
impl Parse for BinaryOperator {
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
Ok(match (stream.next(), stream.peek()) {
(Some(Token::Et), Some(Token::Et)) => {
stream.next();
BinaryOperator::And
}
(Some(Token::LessThan), _) => BinaryOperator::LessThan,
(Some(Token::Plus), _) => BinaryOperator::Add,
(Some(Token::Minus), _) => BinaryOperator::Minus,
(Some(Token::Times), _) => BinaryOperator::Mult,
(_, _) => Err(stream.expected_err("expected operator")?)?,
})
}
}
impl BinaryOperator {
pub fn get_precedence(&self) -> i8 {
use BinaryOperator::*;
match &self {
Add => 10,
Minus => 10,
Mult => 20,
And => 100,
LessThan => 100,
}
}
}
#[derive(Debug, Clone)]
pub struct FunctionCallExpression(pub String, pub Vec<Expression>);
impl Parse for FunctionCallExpression {
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
if let Some(Token::Identifier(name)) = stream.next() {
stream.expect(Token::ParenOpen)?;
let mut args = Vec::new();
if let Ok(exp) = stream.parse() {
args.push(exp);
while stream.expect(Token::Comma).is_ok() {
args.push(stream.parse()?);
}
}
stream.expect(Token::ParenClose)?;
Ok(FunctionCallExpression(name, args))
} else {
Err(stream.expected_err("identifier")?)
}
}
}
#[derive(Debug, Clone)]
pub struct IfExpression(Expression, pub Block);
impl Parse for IfExpression {
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
stream.expect(Token::If)?;
Ok(IfExpression(stream.parse()?, stream.parse()?))
}
}
#[derive(Debug, Clone)]
pub struct LetStatement(pub String, pub Expression);
impl Parse for LetStatement {
fn parse(mut stream: TokenStream) -> Result<LetStatement, Error> {
stream.expect(Token::LetKeyword)?;
if let Some(Token::Identifier(variable)) = stream.next() {
stream.expect(Token::Equals)?;
let expression = stream.parse()?;
stream.expect(Token::Semi)?;
Ok(LetStatement(variable, expression))
} else {
Err(stream.expected_err("identifier")?)
}
}
}
#[derive(Debug, Clone)]
pub struct ImportStatement(Vec<String>);
impl Parse for ImportStatement {
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
stream.expect(Token::ImportKeyword)?;
let mut import_list = Vec::new();
if let Some(Token::Identifier(name)) = stream.next() {
import_list.push(name);
while stream.expect(Token::Colon).is_ok() && stream.expect(Token::Colon).is_ok() {
if let Some(Token::Identifier(name)) = stream.next() {
import_list.push(name);
} else {
Err(stream.expected_err("identifier")?)?
}
}
} else {
Err(stream.expected_err("identifier")?)?
}
stream.expect(Token::Semi)?;
Ok(ImportStatement(import_list))
}
}
#[derive(Debug)]
pub struct FunctionDefinition(pub FunctionSignature, pub Block);
impl Parse for FunctionDefinition {
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
stream.expect(Token::FnKeyword)?;
Ok(FunctionDefinition(stream.parse()?, stream.parse()?))
}
}
#[derive(Debug, Clone)]
pub struct FunctionSignature {
pub name: String,
pub args: Vec<(String, Type)>,
pub return_type: Option<Type>,
}
impl Parse for FunctionSignature {
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
if let Some(Token::Identifier(name)) = stream.next() {
stream.expect(Token::ParenOpen)?;
let mut args = Vec::new();
while let Some(Token::Identifier(arg_name)) = stream.peek() {
stream.next();
stream.expect(Token::Colon)?;
args.push((arg_name, stream.parse()?));
}
stream.expect(Token::ParenClose)?;
let mut return_type = None;
if stream.expect(Token::Arrow).is_ok() {
return_type = Some(stream.parse()?);
}
Ok(FunctionSignature {
name,
args,
return_type,
})
} else {
Err(stream.expected_err("identifier")?)?
}
}
}
#[derive(Debug, Clone, Copy)]
pub enum ReturnType {
Soft,
Hard,
}
#[derive(Debug, Clone)]
pub struct Block(
pub Vec<BlockLevelStatement>,
pub Option<(ReturnType, Expression)>,
);
impl Parse for Block {
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
let mut statements = Vec::new();
let mut return_stmt = None;
stream.expect(Token::BraceOpen)?;
while !matches!(stream.peek(), Some(Token::BraceClose)) {
if let Some((r_type, e)) = return_stmt.take() {
// Special list of expressions that are simply not warned about,
// if semicolon is missing.
if !matches!(&e, &Expression::IfExpr(_)) {
dbg!(r_type, &e);
println!("Oh no, does this statement lack ;");
}
statements.push(BlockLevelStatement::Expression(e));
}
let statement = stream.parse()?;
if let BlockLevelStatement::Return((r_type, e)) = &statement {
match r_type {
ReturnType::Hard => {
return_stmt = Some((*r_type, e.clone()));
break; // Return has to be the last statement
// TODO: Make a mechanism that "can" parse even after this
}
ReturnType::Soft => {
return_stmt = Some((*r_type, e.clone()));
continue; // In theory possible to have lines after a soft return
}
};
}
statements.push(statement);
}
stream.expect(Token::BraceClose)?;
Ok(Block(statements, return_stmt))
}
}
#[derive(Debug, Clone)]
pub enum BlockLevelStatement {
Let(LetStatement),
Import(ImportStatement),
Expression(Expression),
Return((ReturnType, Expression)),
}
impl Parse for BlockLevelStatement {
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
use BlockLevelStatement as Stmt;
Ok(match stream.peek() {
Some(Token::LetKeyword) => Stmt::Let(stream.parse()?),
Some(Token::ImportKeyword) => Stmt::Import(stream.parse()?),
Some(Token::ReturnKeyword) => {
stream.next();
let exp = stream.parse()?;
stream.expect(Token::Semi)?;
Stmt::Return((ReturnType::Hard, exp))
}
_ => {
if let Ok(e) = stream.parse() {
if stream.expect(Token::Semi).is_ok() {
Stmt::Expression(e)
} else {
Stmt::Return((ReturnType::Soft, e))
}
} else {
Err(stream.expected_err("expression")?)?
}
}
})
}
}
#[derive(Debug)]
pub enum TopLevelStatement {
Import(ImportStatement),
FunctionDefinition(FunctionDefinition),
}
impl Parse for TopLevelStatement {
fn parse(mut stream: TokenStream) -> Result<Self, Error> {
use TopLevelStatement as Stmt;
Ok(match stream.peek() {
Some(Token::ImportKeyword) => Stmt::Import(stream.parse()?),
Some(Token::FnKeyword) => Stmt::FunctionDefinition(stream.parse()?),
_ => Err(stream.expected_err("import or fn")?)?,
})
}
}

View File

@ -1,222 +0,0 @@
use std::collections::{hash_map, HashMap};
use crate::{
ast::{
BinaryOperator, Block, BlockLevelStatement, Expression, FunctionCallExpression,
FunctionDefinition, FunctionSignature, ReturnType, TopLevelStatement,
},
llvm_ir::{self, IRBlock, IRFunction, IRModule, IRValue, IRValueType},
};
#[derive(Clone)]
pub struct ScopeData {
named_vars: HashMap<String, IRValue>,
defined_functions: HashMap<String, (FunctionSignature, Option<IRFunction>)>,
}
impl ScopeData {
pub fn inner<'a, 'b>(&self, block: &'b mut IRBlock<'a>) -> Scope<'a, 'b> {
Scope {
block,
data: self.clone(),
}
}
pub fn var(&self, name: &String) -> Option<&IRValue> {
self.named_vars.get(name)
}
pub fn set_var(&mut self, name: &str, val: IRValue) -> Result<(), Error> {
if let hash_map::Entry::Vacant(e) = self.named_vars.entry(name.to_owned()) {
e.insert(val);
Ok(())
} else {
Err(Error::VariableAlreadyDefined(name.to_owned()))
}
}
pub fn function(
&mut self,
name: &String,
) -> Option<&mut (FunctionSignature, Option<IRFunction>)> {
self.defined_functions.get_mut(name)
}
pub fn set_function_signature(
&mut self,
name: &str,
sig: FunctionSignature,
ir: IRFunction,
) -> Result<(), Error> {
if let hash_map::Entry::Vacant(e) = self.defined_functions.entry(name.to_owned()) {
e.insert((sig, Some(ir)));
Ok(())
} else {
Err(Error::VariableAlreadyDefined(name.to_owned()))
}
}
}
pub struct Scope<'a, 'b> {
pub block: &'b mut IRBlock<'a>,
pub data: ScopeData,
}
impl<'a, 'b> Scope<'a, 'b> {
pub fn inner<'c>(&'c mut self) -> Scope<'a, 'c> {
Scope {
block: self.block,
data: self.data.clone(),
}
}
}
pub fn codegen_from_statements(statements: Vec<TopLevelStatement>) -> Result<IRModule, Error> {
let mut module = IRModule::new("testmod");
let mut scope = ScopeData {
defined_functions: HashMap::new(),
named_vars: HashMap::new(),
};
for statement in &statements {
match statement {
TopLevelStatement::FunctionDefinition(FunctionDefinition(sig, _)) => {
let function = module.create_func(&sig.name, IRValueType::I32);
scope.set_function_signature(&sig.name.clone(), sig.clone(), function)?;
}
TopLevelStatement::Import(_) => {}
}
}
for statement in &statements {
statement.codegen(&mut module, &mut scope)?;
}
Ok(module)
}
impl TopLevelStatement {
pub fn codegen(&self, module: &mut IRModule, root_data: &mut ScopeData) -> Result<(), Error> {
match self {
TopLevelStatement::FunctionDefinition(FunctionDefinition(sig, block)) => {
if let Some((_, ir)) = root_data.function(&sig.name) {
if let Some(ir_function) = ir.take() {
let mut ir_block = module.create_block();
let mut scope = root_data.inner(&mut ir_block);
let (_, value) = match block.codegen(&mut scope)? {
Some(v) => v,
None => panic!("Void-return type function not yet implemented!"),
};
ir_function.add_definition(value, ir_block);
} else {
Err(Error::FunctionAlreadyDefined(sig.name.clone()))?
}
} else {
panic!("Function was not declared before it's definition")
}
}
TopLevelStatement::Import(_) => {}
}
Ok(())
}
}
impl Block {
pub fn codegen(&self, scope: &mut Scope) -> Result<Option<(ReturnType, IRValue)>, Error> {
for statement in &self.0 {
statement.codegen(scope)?;
}
let value = if let Some((rt, exp)) = &self.1 {
Some((*rt, exp.codegen(scope)?))
} else {
None
};
Ok(value)
}
}
impl BlockLevelStatement {
pub fn codegen(&self, scope: &mut Scope) -> Result<(), Error> {
match self {
BlockLevelStatement::Let(let_statement) => {
let val = let_statement.1.codegen(scope)?;
scope.data.set_var(&let_statement.0, val)?;
Ok(())
}
BlockLevelStatement::Return(_) => panic!("Should never happen"),
BlockLevelStatement::Import(_) => Ok(()), // TODO: To implement
BlockLevelStatement::Expression(e) => {
let _value = e.codegen(scope)?;
Ok(())
}
}
}
}
impl Expression {
pub fn codegen(&self, scope: &mut Scope) -> Result<IRValue, Error> {
use Expression::*;
match self {
Binop(op, lhs, rhs) => match op {
BinaryOperator::Add => {
let lhs = lhs.codegen(scope)?;
let rhs = rhs.codegen(scope)?;
Ok(scope.block.add(lhs, rhs)?)
}
BinaryOperator::Mult => {
let lhs = lhs.codegen(scope)?;
let rhs = rhs.codegen(scope)?;
Ok(scope.block.mul(lhs, rhs)?)
}
_ => panic!("Other binary operators not supported yet!"),
},
BlockExpr(block) => {
let mut inner = scope.inner();
Ok(match block.codegen(&mut inner)? {
Some((r_type, value)) => match r_type {
ReturnType::Soft => value,
ReturnType::Hard => {
panic!("Hard returns in inner blocks not supported yet")
}
},
None => panic!("Void-return type block not yet implemented!"),
})
}
FunctionCall(fc) => {
let FunctionCallExpression(name, _) = &**fc;
if let Some((sig, _)) = scope.data.function(name) {
Ok(scope.block.function_call(sig)?)
} else {
Err(Error::UndefinedFunction(name.clone()))?
}
}
VariableName(name) => scope
.data
.var(name)
.cloned()
.ok_or(Error::UndefinedVariable(name.clone())),
Literal(lit) => Ok(scope.block.get_const(lit)),
IfExpr(_) => panic!("if expressions not yet supported"),
}
}
}
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("Variable '{0}' already defined")]
VariableAlreadyDefined(String),
#[error("Variable '{0}' not yet defined")]
UndefinedVariable(String),
#[error("Function '{0}' not defined")]
UndefinedFunction(String),
#[error("Function '{0}' already defined")]
FunctionAlreadyDefined(String),
#[error(transparent)]
Deeper(#[from] llvm_ir::Error),
}

View File

@ -1,7 +1,99 @@
use std::{fmt::Debug, str::Chars};
use std::{fmt::Debug, iter::Peekable, str::Chars};
static DECIMAL_NUMERICS: &[char] = &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'];
pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, String> {
let to_tokenize = to_tokenize.into();
let mut position = (0, 1);
let mut cursor = Cursor {
char_stream: to_tokenize.chars().peekable(),
position,
};
let mut tokens = Vec::new();
while let Some(character) = &cursor.next() {
position.0 += 1;
if *character == '\n' {
position.1 += 1;
position.0 = 0;
}
let peek = cursor.peek();
let variant = match character {
// Whitespace
w if w.is_whitespace() => continue,
// Comments
'/' if peek == Some(&'/') => {
while !matches!(&cursor.peek(), Some('\n')) {
cursor.next();
}
continue;
}
// "words"
c if c.is_alphabetic() => {
let mut value = character.to_string();
while let Some(c) = &cursor.peek() {
if !c.is_ascii_alphanumeric() {
break;
}
value += &c.to_string();
cursor.next();
}
// Check for keywords
let variant = match value.as_str() {
"let" => Token::LetKeyword,
"import" => Token::ImportKeyword,
_ => Token::Identifier(value),
};
variant
}
// Decimals
c if DECIMAL_NUMERICS.contains(c) => {
let mut value = character.to_string();
while let Some(c) = &cursor.peek() {
if !DECIMAL_NUMERICS.contains(c) {
break;
}
value += &c.to_string();
cursor.next();
}
Token::DecimalValue(value)
}
// Single character tokens
'=' => Token::Equals,
';' => Token::Semicolon,
':' => Token::Colon,
'+' => Token::Plus,
'*' => Token::Times,
'(' => Token::ParenOpen,
')' => Token::ParenClose,
',' => Token::Comma,
// Invalid token
_ => Err(format!(
"Unknown token '{}' at {}, {}",
character, position.0, position.1
))?,
};
tokens.push(FullToken {
token: variant,
position,
});
}
position.0 += 1;
tokens.push(FullToken {
token: Token::Eof,
position,
});
Ok(tokens)
}
#[derive(Debug, Eq, PartialEq, Clone)]
pub enum Token {
// Values
@ -10,49 +102,17 @@ pub enum Token {
DecimalValue(String),
// Keywords
/// `let`
LetKeyword,
/// `import`
ImportKeyword,
/// `return`
ReturnKeyword,
/// `fn`
FnKeyword,
/// `->`
Arrow,
/// `if`
If,
// Symbols
/// `;`
Semi,
/// `=`
Semicolon,
Equals,
/// `:`
Colon,
/// `+`
Plus,
/// `*`
Times,
/// `-`
Minus,
/// `>`
GreaterThan,
/// `<`
LessThan,
/// `&`
Et,
/// `(`
ParenOpen,
/// `)`
ParenClose,
/// `{`
BraceOpen,
/// `}`
BraceClose,
/// `,`
ParenOpen, // (
ParenClose, // )
Comma,
Eof,
@ -62,23 +122,15 @@ impl Token {
pub fn get_token_prec(&self) -> i8 {
match &self {
Token::Plus => 10,
Token::Minus => 10,
Token::Times => 20,
_ => -1,
}
}
}
impl From<Token> for String {
fn from(value: Token) -> Self {
format!("{:?}", value)
}
}
#[derive(Clone)]
pub struct FullToken {
pub token: Token,
pub position: Position,
position: Position,
}
impl Debug for FullToken {
@ -94,127 +146,21 @@ pub type Position = (u32, u32);
pub struct Cursor<'a> {
pub position: Position,
char_stream: Chars<'a>,
char_stream: Peekable<Chars<'a>>,
}
impl<'a> Cursor<'a> {
fn next(&mut self) -> Option<char> {
let next = self.char_stream.next();
self.position.0 += 1;
if let Some('\n') = next {
self.position.1 += 1;
self.position.0 = 0;
}
self.position.0 += 1;
next
}
fn first(&mut self) -> Option<char> {
// `.next()` optimizes better than `.nth(0)`
self.char_stream.clone().next()
}
#[allow(dead_code)] // Is this actually needed?
fn second(&mut self) -> Option<char> {
// `.next()` optimizes better than `.nth(1)`
let mut stream = self.char_stream.clone();
stream.next();
stream.next()
fn peek(&mut self) -> Option<&char> {
self.char_stream.peek()
}
}
pub fn tokenize<T: Into<String>>(to_tokenize: T) -> Result<Vec<FullToken>, Error> {
let to_tokenize = to_tokenize.into();
let mut cursor = Cursor {
char_stream: to_tokenize.chars(),
position: (0, 1),
};
let mut tokens = Vec::new();
while let Some(character) = &cursor.next() {
let variant = match character {
// Whitespace
w if w.is_whitespace() => continue,
// Comments
'/' if cursor.first() == Some('/') => {
while !matches!(cursor.first(), Some('\n') | None) {
cursor.next();
}
continue;
}
// "words"
c if c.is_alphabetic() => {
let mut value = character.to_string();
while let Some(c) = cursor.first() {
if !c.is_ascii_alphanumeric() {
break;
}
value += &c.to_string();
cursor.next();
}
// Check for keywords
let variant = match value.as_str() {
"let" => Token::LetKeyword,
"import" => Token::ImportKeyword,
"return" => Token::ReturnKeyword,
"fn" => Token::FnKeyword,
"if" => Token::If,
_ => Token::Identifier(value),
};
variant
}
// Decimals
c if DECIMAL_NUMERICS.contains(c) => {
let mut value = character.to_string();
while let Some(c) = cursor.first() {
if !DECIMAL_NUMERICS.contains(&c) {
break;
}
value += &c.to_string();
cursor.next();
}
Token::DecimalValue(value)
}
'-' if cursor.first() == Some('>') => {
cursor.next(); // Eat `>`
Token::Arrow
}
// Single character tokens
'=' => Token::Equals,
';' => Token::Semi,
':' => Token::Colon,
'+' => Token::Plus,
'*' => Token::Times,
'-' => Token::Minus,
'>' => Token::GreaterThan,
'<' => Token::LessThan,
'&' => Token::Et,
'(' => Token::ParenOpen,
')' => Token::ParenClose,
'{' => Token::BraceOpen,
'}' => Token::BraceClose,
',' => Token::Comma,
// Invalid token
_ => Err(Error::InvalidToken(*character, cursor.position))?,
};
tokens.push(FullToken {
token: variant,
position: cursor.position,
});
}
tokens.push(FullToken {
token: Token::Eof,
position: cursor.position,
});
Ok(tokens)
}
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("Invalid token '{}' at Ln {}, Col {}", .0, (.1).1, (.1).0)]
InvalidToken(char, Position),
}

View File

@ -1,46 +0,0 @@
use crate::{
ast::TopLevelStatement, codegen::codegen_from_statements, lexer::Token,
token_stream::TokenStream,
};
mod ast;
mod codegen;
mod lexer;
mod llvm_ir;
mod token_stream;
// TODO:
// 1. Make it so that TopLevelStatement can only be import or function def
// 2. Make BlockLevelStatement, that has everything TopLevelStatement has now
// 3. Make it so all codegen is done with a Block-struct, that represents a
// single proper block
#[derive(thiserror::Error, Debug)]
pub enum ReidError {
#[error(transparent)]
LexerError(#[from] lexer::Error),
#[error(transparent)]
ParserError(#[from] token_stream::Error),
#[error(transparent)]
CodegenError(#[from] codegen::Error),
}
pub fn compile(source: &str) -> Result<String, ReidError> {
let tokens = lexer::tokenize(source)?;
dbg!(&tokens);
let mut token_stream = TokenStream::from(&tokens);
let mut statements = Vec::new();
while !matches!(token_stream.peek().unwrap_or(Token::Eof), Token::Eof) {
let statement = token_stream.parse::<TopLevelStatement>()?;
dbg!(&statement);
statements.push(statement);
}
let mut module = codegen_from_statements(statements)?;
let text = module.print_to_string().unwrap();
Ok(text.to_owned())
}

View File

@ -1,203 +0,0 @@
use std::ffi::{CStr, CString};
use std::mem;
use llvm_sys::{core::*, prelude::*, LLVMBuilder, LLVMContext, LLVMModule};
use crate::ast::{FunctionSignature, Literal};
macro_rules! cstr {
($string:expr) => {
core::ffi::CStr::from_bytes_with_nul_unchecked(concat!($string, "\0").as_bytes()).as_ptr()
};
}
#[derive(Clone, Debug)]
#[must_use = "value contains raw pointer and must be inserted somewhere"]
pub struct IRValue(IRValueType, LLVMValueRef);
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum IRValueType {
I32,
}
impl IRValueType {
unsafe fn get_llvm_type(&self, module: &mut IRModule) -> LLVMTypeRef {
match *self {
Self::I32 => LLVMInt32TypeInContext(module.context),
}
}
}
fn into_cstring<T: Into<String>>(value: T) -> CString {
let string = value.into();
unsafe { CString::from_vec_with_nul_unchecked((string + "\0").into_bytes()) }
}
pub struct IRModule {
context: *mut LLVMContext,
module: *mut LLVMModule,
builder: *mut LLVMBuilder,
}
impl IRModule {
pub fn new<T: Into<String>>(name: T) -> IRModule {
unsafe {
// Set up a context, module and builder in that context.
let context = LLVMContextCreate();
let module = LLVMModuleCreateWithNameInContext(into_cstring(name).as_ptr(), context);
let builder = LLVMCreateBuilderInContext(context);
IRModule {
context,
module,
builder,
}
}
}
pub fn create_block(&mut self) -> IRBlock {
IRBlock::create("entry", self)
}
pub fn create_func<T: Into<String>>(
&mut self,
name: T,
return_type: IRValueType,
) -> IRFunction {
unsafe {
let mut argts = [];
let func_type = LLVMFunctionType(
return_type.get_llvm_type(self),
argts.as_mut_ptr(),
argts.len() as u32,
0,
);
let anon_func = LLVMAddFunction(self.module, into_cstring(name).as_ptr(), func_type);
IRFunction {
value: IRValue(return_type, anon_func),
}
}
}
pub fn print_to_string(&mut self) -> Result<&str, std::str::Utf8Error> {
unsafe { CStr::from_ptr(LLVMPrintModuleToString(self.module)).to_str() }
}
}
impl Drop for IRModule {
fn drop(&mut self) {
// Clean up. Values created in the context mostly get cleaned up there.
unsafe {
LLVMDisposeBuilder(self.builder);
LLVMDisposeModule(self.module);
LLVMContextDispose(self.context);
}
}
}
#[derive(Clone, Debug)]
pub struct IRFunction {
value: IRValue,
}
impl IRFunction {
pub fn add_definition(self, ret: IRValue, block: IRBlock) {
unsafe {
LLVMAppendExistingBasicBlock(self.value.1, block.blockref);
LLVMBuildRet(block.module.builder, ret.1);
}
}
}
pub struct IRBlock<'a> {
module: &'a mut IRModule,
blockref: LLVMBasicBlockRef,
}
impl<'a> IRBlock<'a> {
fn create<T: Into<String>>(name: T, codegen: &'a mut IRModule) -> IRBlock<'a> {
unsafe {
let blockref =
LLVMCreateBasicBlockInContext(codegen.context, into_cstring(name).as_ptr());
LLVMPositionBuilderAtEnd(codegen.builder, blockref);
IRBlock {
module: codegen,
blockref,
}
}
}
pub fn get_const(&mut self, literal_type: &Literal) -> IRValue {
unsafe {
match *literal_type {
Literal::I32(v) => IRValue(
IRValueType::I32,
LLVMConstInt(
LLVMInt32TypeInContext(self.module.context),
mem::transmute(v as i64),
1,
),
),
}
}
}
pub fn add(&mut self, lhs: IRValue, rhs: IRValue) -> Result<IRValue, Error> {
unsafe {
if lhs.0 == rhs.0 {
Ok(IRValue(
lhs.0,
LLVMBuildAdd(self.module.builder, lhs.1, rhs.1, cstr!("tmpadd")),
))
} else {
Err(Error::TypeMismatch(lhs.0, rhs.0))
}
}
}
pub fn mul(&mut self, lhs: IRValue, rhs: IRValue) -> Result<IRValue, Error> {
unsafe {
if lhs.0 == rhs.0 {
Ok(IRValue(
lhs.0,
LLVMBuildMul(self.module.builder, lhs.1, rhs.1, cstr!("tmpadd")),
))
} else {
Err(Error::TypeMismatch(lhs.0, rhs.0))
}
}
}
pub fn function_call(&mut self, callee: &FunctionSignature) -> Result<IRValue, Error> {
unsafe {
let function = LLVMGetNamedFunction(
self.module.module,
into_cstring(callee.name.clone()).as_ptr(),
);
let ret_t = LLVMInt32TypeInContext(self.module.context);
let mut argts = [];
let mut args = [];
let fun_t = LLVMFunctionType(ret_t, argts.as_mut_ptr(), argts.len() as u32, 0);
let call = LLVMBuildCall2(
self.module.builder,
fun_t,
function,
args.as_mut_ptr(),
args.len() as u32,
into_cstring(&callee.name).as_ptr(),
);
Ok(IRValue(IRValueType::I32, call))
}
}
}
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("Type Mismatch: {0:?} {1:?}")]
TypeMismatch(IRValueType, IRValueType),
}

24
src/main.rs Normal file
View File

@ -0,0 +1,24 @@
use crate::{lexer::Token, parser::TopLevelStatement, token_stream::TokenStream};
pub static EASIEST: &str = include_str!("../easiest.reid");
pub static EASY: &str = include_str!("../easy.reid");
pub static MEDIUM: &str = include_str!("../medium.reid");
pub static HARD: &str = include_str!("../hard.reid");
mod lexer;
mod parser;
mod token_stream;
fn main() {
let tokens = lexer::tokenize(EASY).unwrap();
dbg!(&tokens);
let mut token_stream = TokenStream::from(&tokens);
while let Ok(statement) = token_stream.parse::<TopLevelStatement>() {
dbg!(&statement);
}
dbg!(token_stream.expect(Token::Eof).ok());
}

176
src/parser.rs Normal file
View File

@ -0,0 +1,176 @@
use crate::{lexer::Token, token_stream::TokenStream};
pub trait Parse
where
Self: std::marker::Sized,
{
fn parse(stream: TokenStream) -> Result<Self, ()>;
}
#[derive(Debug, Clone)]
pub enum Expression {
VariableName(String),
ContantI32(i32),
BinopAdd(Box<Expression>, Box<Expression>),
BinopMult(Box<Expression>, Box<Expression>),
FunctionCall(Box<FunctionCallExpression>),
}
impl Parse for Expression {
fn parse(mut stream: TokenStream) -> Result<Expression, ()> {
let lhs = parse_primary_expression(&mut stream)?;
parse_binop_rhs(&mut stream, lhs, 0)
}
}
fn parse_primary_expression(stream: &mut TokenStream) -> Result<Expression, ()> {
if let Ok(exp) = stream.parse() {
Ok(Expression::FunctionCall(Box::new(exp)))
} else if let Some(token) = stream.next() {
Ok(match &token {
Token::Identifier(v) => Expression::VariableName(v.clone()),
Token::DecimalValue(v) => Expression::ContantI32(v.parse().unwrap()),
_ => Err(())?, // TODO: Add error raporting!
})
} else {
Err(()) // TODO: Add error raporting!
}
}
/// This algorithm seems somewhat like magic to me. I understand it if I read
/// carefully, but it is difficult to read every single time.
///
/// Reference for how the algorithm is formed:
/// https://llvm.org/docs/tutorial/MyFirstLanguageFrontend/LangImpl02.html#binary-expression-parsing
fn parse_binop_rhs(
stream: &mut TokenStream,
mut lhs: Expression,
expr_prec: i8,
) -> Result<Expression, ()> {
while let Some(token) = stream.peek() {
let curr_token_prec = token.get_token_prec();
if curr_token_prec < expr_prec {
break; // Just return lhs
} else {
// token has to be an operator
stream.next(); // Eat token
let mut rhs = parse_primary_expression(stream)?;
if let Some(next_op) = stream.peek() {
let next_prec = next_op.get_token_prec();
if curr_token_prec < next_prec {
// Operator on the right of rhs has more precedence, turn
// rhs into lhs for new binop
rhs = parse_binop_rhs(stream, rhs, curr_token_prec + 1)?;
}
}
lhs = match &token {
Token::Plus => Expression::BinopAdd(Box::new(lhs), Box::new(rhs)),
Token::Times => Expression::BinopMult(Box::new(lhs), Box::new(rhs)),
_ => Err(())?, // TODO: Add error raporting!
};
}
}
Ok(lhs)
}
#[derive(Debug, Clone)]
pub struct FunctionCallExpression(String, Vec<Expression>);
impl Parse for FunctionCallExpression {
fn parse(mut stream: TokenStream) -> Result<Self, ()> {
if let Some(Token::Identifier(name)) = stream.next() {
stream.expect(Token::ParenOpen)?;
let mut args = Vec::new();
if let Ok(exp) = stream.parse() {
args.push(exp);
while stream.expect(Token::Comma).is_ok() {
args.push(stream.parse()?);
}
}
stream.expect(Token::ParenClose)?;
Ok(FunctionCallExpression(name, args))
} else {
Err(())? // TODO: Add error raporting!
}
}
}
#[derive(Debug)]
pub enum TopLevelStatement {
Let(LetStatement),
Import(ImportStatement),
TLExpression(Expression),
}
impl Parse for TopLevelStatement {
fn parse(mut stream: TokenStream) -> Result<Self, ()> {
Ok(match stream.peek() {
Some(Token::LetKeyword) => TopLevelStatement::Let(stream.parse()?),
Some(Token::ImportKeyword) => TopLevelStatement::Import(stream.parse()?),
_ => {
if let Ok(e) = stream.parse() {
stream.expect(Token::Semicolon)?;
TopLevelStatement::TLExpression(e)
} else {
Err(())? // TODO: Add error raporting!
}
}
})
}
}
#[derive(Debug)]
pub struct LetStatement(String, Expression);
impl Parse for LetStatement {
fn parse(mut stream: TokenStream) -> Result<LetStatement, ()> {
stream.expect(Token::LetKeyword)?;
if let Some(Token::Identifier(variable)) = stream.next() {
stream.expect(Token::Equals)?;
let expression = stream.parse()?;
stream.expect(Token::Semicolon)?;
Ok(LetStatement(variable, expression))
} else {
Err(()) // TODO: Add error raporting!
}
}
}
#[derive(Debug)]
pub struct ImportStatement(Vec<String>);
impl Parse for ImportStatement {
fn parse(mut stream: TokenStream) -> Result<Self, ()> {
stream.expect(Token::ImportKeyword)?;
let mut import_list = Vec::new();
if let Some(Token::Identifier(name)) = stream.next() {
import_list.push(name);
while stream.expect(Token::Colon).is_ok() && stream.expect(Token::Colon).is_ok() {
if let Some(Token::Identifier(name)) = stream.next() {
import_list.push(name);
} else {
Err(())? // TODO: Add error raporting!
}
}
} else {
Err(())? // TODO: Add error raporting!
}
stream.expect(Token::Semicolon)?;
Ok(ImportStatement(import_list))
}
}

View File

@ -1,6 +1,6 @@
use crate::{
ast::Parse,
lexer::{FullToken, Position, Token},
lexer::{FullToken, Token},
parser::Parse,
};
pub struct TokenStream<'a, 'b> {
@ -18,24 +18,16 @@ impl<'a, 'b> TokenStream<'a, 'b> {
}
}
pub fn expected_err<T: Into<String>>(&mut self, expected: T) -> Result<Error, Error> {
Ok(Error::Expected(
expected.into(),
self.peek().unwrap_or(Token::Eof),
self.get_next_position()?,
))
}
pub fn expect(&mut self, token: Token) -> Result<(), Error> {
pub fn expect(&mut self, token: Token) -> Result<(), ()> {
if let Some(peeked) = self.peek() {
if token == peeked {
self.position += 1;
Ok(())
} else {
Err(self.expected_err(token)?)
Err(())
}
} else {
Err(self.expected_err(token)?)
Err(())
}
}
@ -57,7 +49,7 @@ impl<'a, 'b> TokenStream<'a, 'b> {
}
}
pub fn parse<T: Parse>(&mut self) -> Result<T, Error> {
pub fn parse<T: Parse>(&mut self) -> Result<T, ()> {
let mut ref_pos = self.position;
let position = self.position;
@ -75,15 +67,6 @@ impl<'a, 'b> TokenStream<'a, 'b> {
Err(e) => Err(e),
}
}
fn get_next_position(&self) -> Result<Position, Error> {
if self.tokens.is_empty() {
Err(Error::FileEmpty)
} else {
let token_idx = self.position.min(self.tokens.len() - 1);
Ok(self.tokens[token_idx].position)
}
}
}
impl Drop for TokenStream<'_, '_> {
@ -93,11 +76,3 @@ impl Drop for TokenStream<'_, '_> {
}
}
}
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("Expected {} at Ln {}, Col {}, got {:?}", .0, (.2).1, (.2).0, .1)]
Expected(String, Token, Position),
#[error("Source file contains no tokens")]
FileEmpty,
}