diff --git a/Cargo.toml b/Cargo.toml index 4d8f5f3..e1b07fd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,12 +1,4 @@ -[package] -name = "reid" -version = "0.1.0" -edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -## LLVM Bindings -llvm-sys = "160" -## Make it easier to generate errors -thiserror = "1.0.44" \ No newline at end of file +[workspace] +members = [ + "reid" +] \ No newline at end of file diff --git a/reid/Cargo.toml b/reid/Cargo.toml new file mode 100644 index 0000000..4d8f5f3 --- /dev/null +++ b/reid/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "reid" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +## LLVM Bindings +llvm-sys = "160" +## Make it easier to generate errors +thiserror = "1.0.44" \ No newline at end of file diff --git a/examples/arithmetic.rs b/reid/examples/arithmetic.rs similarity index 100% rename from examples/arithmetic.rs rename to reid/examples/arithmetic.rs diff --git a/examples/reid/arithmetic.reid b/reid/examples/reid/arithmetic.reid similarity index 100% rename from examples/reid/arithmetic.reid rename to reid/examples/reid/arithmetic.reid diff --git a/examples/reid/fibonacci.reid b/reid/examples/reid/fibonacci.reid similarity index 100% rename from examples/reid/fibonacci.reid rename to reid/examples/reid/fibonacci.reid diff --git a/examples/reid/hard.reid b/reid/examples/reid/hard.reid similarity index 100% rename from examples/reid/hard.reid rename to reid/examples/reid/hard.reid diff --git a/examples/reid/medium.reid b/reid/examples/reid/medium.reid similarity index 100% rename from examples/reid/medium.reid rename to reid/examples/reid/medium.reid diff --git a/src/ast.rs b/reid/src/ast.rs similarity index 100% rename from src/ast.rs rename to reid/src/ast.rs diff --git a/src/codegen/llvm.rs b/reid/src/codegen/llvm.rs similarity index 100% rename from src/codegen/llvm.rs rename to reid/src/codegen/llvm.rs diff --git a/src/codegen/mod.rs b/reid/src/codegen/mod.rs similarity index 100% rename from src/codegen/mod.rs rename to reid/src/codegen/mod.rs diff --git a/src/lexer.rs b/reid/src/lexer.rs similarity index 100% rename from src/lexer.rs rename to reid/src/lexer.rs diff --git a/src/lib.rs b/reid/src/lib.rs similarity index 100% rename from src/lib.rs rename to reid/src/lib.rs diff --git a/reid/src/old_llvm/codegen.rs b/reid/src/old_llvm/codegen.rs new file mode 100644 index 0000000..83b1e12 --- /dev/null +++ b/reid/src/old_llvm/codegen.rs @@ -0,0 +1,222 @@ +use std::collections::{hash_map, HashMap}; + +use crate::{ + ast::{ + BinaryOperator, Block, BlockLevelStatement, Expression, FunctionCallExpression, + FunctionDefinition, FunctionSignature, ReturnType, TopLevelStatement, + }, + llvm_ir::{self, IRBlock, IRFunction, IRModule, IRValue, IRValueType}, +}; + +#[derive(Clone)] +pub struct ScopeData { + named_vars: HashMap, + defined_functions: HashMap)>, +} + +impl ScopeData { + pub fn inner<'a, 'b>(&self, block: &'b mut IRBlock<'a>) -> Scope<'a, 'b> { + Scope { + block, + data: self.clone(), + } + } + + pub fn var(&self, name: &String) -> Option<&IRValue> { + self.named_vars.get(name) + } + + pub fn set_var(&mut self, name: &str, val: IRValue) -> Result<(), Error> { + if let hash_map::Entry::Vacant(e) = self.named_vars.entry(name.to_owned()) { + e.insert(val); + Ok(()) + } else { + Err(Error::VariableAlreadyDefined(name.to_owned())) + } + } + + pub fn function( + &mut self, + name: &String, + ) -> Option<&mut (FunctionSignature, Option)> { + self.defined_functions.get_mut(name) + } + + pub fn set_function_signature( + &mut self, + name: &str, + sig: FunctionSignature, + ir: IRFunction, + ) -> Result<(), Error> { + if let hash_map::Entry::Vacant(e) = self.defined_functions.entry(name.to_owned()) { + e.insert((sig, Some(ir))); + Ok(()) + } else { + Err(Error::VariableAlreadyDefined(name.to_owned())) + } + } +} + +pub struct Scope<'a, 'b> { + pub block: &'b mut IRBlock<'a>, + pub data: ScopeData, +} + +impl<'a, 'b> Scope<'a, 'b> { + pub fn inner<'c>(&'c mut self) -> Scope<'a, 'c> { + Scope { + block: self.block, + data: self.data.clone(), + } + } +} + +pub fn codegen_from_statements(statements: Vec) -> Result { + let mut module = IRModule::new("testmod"); + + let mut scope = ScopeData { + defined_functions: HashMap::new(), + named_vars: HashMap::new(), + }; + + for statement in &statements { + match statement { + TopLevelStatement::FunctionDefinition(FunctionDefinition(sig, _)) => { + let function = module.create_func(&sig.name, IRValueType::I32); + scope.set_function_signature(&sig.name.clone(), sig.clone(), function)?; + } + TopLevelStatement::Import(_) => {} + } + } + + for statement in &statements { + statement.codegen(&mut module, &mut scope)?; + } + + Ok(module) +} + +impl TopLevelStatement { + pub fn codegen(&self, module: &mut IRModule, root_data: &mut ScopeData) -> Result<(), Error> { + match self { + TopLevelStatement::FunctionDefinition(FunctionDefinition(sig, block)) => { + if let Some((_, ir)) = root_data.function(&sig.name) { + if let Some(ir_function) = ir.take() { + let mut ir_block = module.create_block(); + let mut scope = root_data.inner(&mut ir_block); + + let (_, value) = match block.codegen(&mut scope)? { + Some(v) => v, + None => panic!("Void-return type function not yet implemented!"), + }; + + ir_function.add_definition(value, ir_block); + } else { + Err(Error::FunctionAlreadyDefined(sig.name.clone()))? + } + } else { + panic!("Function was not declared before it's definition") + } + } + TopLevelStatement::Import(_) => {} + } + Ok(()) + } +} + +impl Block { + pub fn codegen(&self, scope: &mut Scope) -> Result, Error> { + for statement in &self.0 { + statement.codegen(scope)?; + } + + let value = if let Some((rt, exp)) = &self.1 { + Some((*rt, exp.codegen(scope)?)) + } else { + None + }; + + Ok(value) + } +} + +impl BlockLevelStatement { + pub fn codegen(&self, scope: &mut Scope) -> Result<(), Error> { + match self { + BlockLevelStatement::Let(let_statement) => { + let val = let_statement.1.codegen(scope)?; + scope.data.set_var(&let_statement.0, val)?; + Ok(()) + } + BlockLevelStatement::Return(_) => panic!("Should never happen"), + BlockLevelStatement::Import(_) => Ok(()), // TODO: To implement + BlockLevelStatement::Expression(e) => { + let _value = e.codegen(scope)?; + Ok(()) + } + } + } +} + +impl Expression { + pub fn codegen(&self, scope: &mut Scope) -> Result { + use Expression::*; + match self { + Binop(op, lhs, rhs) => match op { + BinaryOperator::Add => { + let lhs = lhs.codegen(scope)?; + let rhs = rhs.codegen(scope)?; + Ok(scope.block.add(lhs, rhs)?) + } + BinaryOperator::Mult => { + let lhs = lhs.codegen(scope)?; + let rhs = rhs.codegen(scope)?; + Ok(scope.block.mul(lhs, rhs)?) + } + _ => panic!("Other binary operators not supported yet!"), + }, + BlockExpr(block) => { + let mut inner = scope.inner(); + + Ok(match block.codegen(&mut inner)? { + Some((r_type, value)) => match r_type { + ReturnType::Soft => value, + ReturnType::Hard => { + panic!("Hard returns in inner blocks not supported yet") + } + }, + None => panic!("Void-return type block not yet implemented!"), + }) + } + FunctionCall(fc) => { + let FunctionCallExpression(name, _) = &**fc; + if let Some((sig, _)) = scope.data.function(name) { + Ok(scope.block.function_call(sig)?) + } else { + Err(Error::UndefinedFunction(name.clone()))? + } + } + VariableName(name) => scope + .data + .var(name) + .cloned() + .ok_or(Error::UndefinedVariable(name.clone())), + Literal(lit) => Ok(scope.block.get_const(lit)), + IfExpr(_) => panic!("if expressions not yet supported"), + } + } +} + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("Variable '{0}' already defined")] + VariableAlreadyDefined(String), + #[error("Variable '{0}' not yet defined")] + UndefinedVariable(String), + #[error("Function '{0}' not defined")] + UndefinedFunction(String), + #[error("Function '{0}' already defined")] + FunctionAlreadyDefined(String), + #[error(transparent)] + Deeper(#[from] llvm_ir::Error), +} diff --git a/reid/src/old_llvm/llvm_ir.rs b/reid/src/old_llvm/llvm_ir.rs new file mode 100644 index 0000000..d0c754f --- /dev/null +++ b/reid/src/old_llvm/llvm_ir.rs @@ -0,0 +1,203 @@ +use std::ffi::{CStr, CString}; +use std::mem; + +use llvm_sys::{core::*, prelude::*, LLVMBuilder, LLVMContext, LLVMModule}; + +use crate::ast::{FunctionSignature, Literal}; + +macro_rules! cstr { + ($string:expr) => { + core::ffi::CStr::from_bytes_with_nul_unchecked(concat!($string, "\0").as_bytes()).as_ptr() + }; +} + +#[derive(Clone, Debug)] +#[must_use = "value contains raw pointer and must be inserted somewhere"] +pub struct IRValue(IRValueType, LLVMValueRef); + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum IRValueType { + I32, +} + +impl IRValueType { + unsafe fn get_llvm_type(&self, module: &mut IRModule) -> LLVMTypeRef { + match *self { + Self::I32 => LLVMInt32TypeInContext(module.context), + } + } +} + +fn into_cstring>(value: T) -> CString { + let string = value.into(); + unsafe { CString::from_vec_with_nul_unchecked((string + "\0").into_bytes()) } +} + +pub struct IRModule { + context: *mut LLVMContext, + module: *mut LLVMModule, + builder: *mut LLVMBuilder, +} + +impl IRModule { + pub fn new>(name: T) -> IRModule { + unsafe { + // Set up a context, module and builder in that context. + let context = LLVMContextCreate(); + let module = LLVMModuleCreateWithNameInContext(into_cstring(name).as_ptr(), context); + let builder = LLVMCreateBuilderInContext(context); + + IRModule { + context, + module, + builder, + } + } + } + + pub fn create_block(&mut self) -> IRBlock { + IRBlock::create("entry", self) + } + + pub fn create_func>( + &mut self, + name: T, + return_type: IRValueType, + ) -> IRFunction { + unsafe { + let mut argts = []; + let func_type = LLVMFunctionType( + return_type.get_llvm_type(self), + argts.as_mut_ptr(), + argts.len() as u32, + 0, + ); + + let anon_func = LLVMAddFunction(self.module, into_cstring(name).as_ptr(), func_type); + IRFunction { + value: IRValue(return_type, anon_func), + } + } + } + + pub fn print_to_string(&mut self) -> Result<&str, std::str::Utf8Error> { + unsafe { CStr::from_ptr(LLVMPrintModuleToString(self.module)).to_str() } + } +} + +impl Drop for IRModule { + fn drop(&mut self) { + // Clean up. Values created in the context mostly get cleaned up there. + unsafe { + LLVMDisposeBuilder(self.builder); + LLVMDisposeModule(self.module); + LLVMContextDispose(self.context); + } + } +} + +#[derive(Clone, Debug)] +pub struct IRFunction { + value: IRValue, +} + +impl IRFunction { + pub fn add_definition(self, ret: IRValue, block: IRBlock) { + unsafe { + LLVMAppendExistingBasicBlock(self.value.1, block.blockref); + LLVMBuildRet(block.module.builder, ret.1); + } + } +} + +pub struct IRBlock<'a> { + module: &'a mut IRModule, + blockref: LLVMBasicBlockRef, +} + +impl<'a> IRBlock<'a> { + fn create>(name: T, codegen: &'a mut IRModule) -> IRBlock<'a> { + unsafe { + let blockref = + LLVMCreateBasicBlockInContext(codegen.context, into_cstring(name).as_ptr()); + LLVMPositionBuilderAtEnd(codegen.builder, blockref); + IRBlock { + module: codegen, + blockref, + } + } + } + + pub fn get_const(&mut self, literal_type: &Literal) -> IRValue { + unsafe { + match *literal_type { + Literal::I32(v) => IRValue( + IRValueType::I32, + LLVMConstInt( + LLVMInt32TypeInContext(self.module.context), + mem::transmute(v as i64), + 1, + ), + ), + } + } + } + + pub fn add(&mut self, lhs: IRValue, rhs: IRValue) -> Result { + unsafe { + if lhs.0 == rhs.0 { + Ok(IRValue( + lhs.0, + LLVMBuildAdd(self.module.builder, lhs.1, rhs.1, cstr!("tmpadd")), + )) + } else { + Err(Error::TypeMismatch(lhs.0, rhs.0)) + } + } + } + + pub fn mul(&mut self, lhs: IRValue, rhs: IRValue) -> Result { + unsafe { + if lhs.0 == rhs.0 { + Ok(IRValue( + lhs.0, + LLVMBuildMul(self.module.builder, lhs.1, rhs.1, cstr!("tmpadd")), + )) + } else { + Err(Error::TypeMismatch(lhs.0, rhs.0)) + } + } + } + + pub fn function_call(&mut self, callee: &FunctionSignature) -> Result { + unsafe { + let function = LLVMGetNamedFunction( + self.module.module, + into_cstring(callee.name.clone()).as_ptr(), + ); + + let ret_t = LLVMInt32TypeInContext(self.module.context); + let mut argts = []; + let mut args = []; + + let fun_t = LLVMFunctionType(ret_t, argts.as_mut_ptr(), argts.len() as u32, 0); + + let call = LLVMBuildCall2( + self.module.builder, + fun_t, + function, + args.as_mut_ptr(), + args.len() as u32, + into_cstring(&callee.name).as_ptr(), + ); + + Ok(IRValue(IRValueType::I32, call)) + } + } +} + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("Type Mismatch: {0:?} {1:?}")] + TypeMismatch(IRValueType, IRValueType), +} diff --git a/src/token_stream.rs b/reid/src/token_stream.rs similarity index 100% rename from src/token_stream.rs rename to reid/src/token_stream.rs