From dd3e0618aec999dcbd37240c9107d1acdac35aa9 Mon Sep 17 00:00:00 2001 From: sofia Date: Wed, 17 Jul 2024 20:43:28 +0300 Subject: [PATCH] Add the very most basic codegen --- .gitignore | 2 + src/codegen.rs | 222 -------------------------------------------- src/codegen/llvm.rs | 86 +++++++++++++++++ src/codegen/mod.rs | 19 ++++ src/lib.rs | 13 ++- src/llvm_ir.rs | 203 ---------------------------------------- 6 files changed, 115 insertions(+), 430 deletions(-) delete mode 100644 src/codegen.rs create mode 100644 src/codegen/llvm.rs create mode 100644 src/codegen/mod.rs delete mode 100644 src/llvm_ir.rs diff --git a/.gitignore b/.gitignore index cbdf84f..a179d95 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ + +src/old_llvm /target /.vscode .env \ No newline at end of file diff --git a/src/codegen.rs b/src/codegen.rs deleted file mode 100644 index 83b1e12..0000000 --- a/src/codegen.rs +++ /dev/null @@ -1,222 +0,0 @@ -use std::collections::{hash_map, HashMap}; - -use crate::{ - ast::{ - BinaryOperator, Block, BlockLevelStatement, Expression, FunctionCallExpression, - FunctionDefinition, FunctionSignature, ReturnType, TopLevelStatement, - }, - llvm_ir::{self, IRBlock, IRFunction, IRModule, IRValue, IRValueType}, -}; - -#[derive(Clone)] -pub struct ScopeData { - named_vars: HashMap, - defined_functions: HashMap)>, -} - -impl ScopeData { - pub fn inner<'a, 'b>(&self, block: &'b mut IRBlock<'a>) -> Scope<'a, 'b> { - Scope { - block, - data: self.clone(), - } - } - - pub fn var(&self, name: &String) -> Option<&IRValue> { - self.named_vars.get(name) - } - - pub fn set_var(&mut self, name: &str, val: IRValue) -> Result<(), Error> { - if let hash_map::Entry::Vacant(e) = self.named_vars.entry(name.to_owned()) { - e.insert(val); - Ok(()) - } else { - Err(Error::VariableAlreadyDefined(name.to_owned())) - } - } - - pub fn function( - &mut self, - name: &String, - ) -> Option<&mut (FunctionSignature, Option)> { - self.defined_functions.get_mut(name) - } - - pub fn set_function_signature( - &mut self, - name: &str, - sig: FunctionSignature, - ir: IRFunction, - ) -> Result<(), Error> { - if let hash_map::Entry::Vacant(e) = self.defined_functions.entry(name.to_owned()) { - e.insert((sig, Some(ir))); - Ok(()) - } else { - Err(Error::VariableAlreadyDefined(name.to_owned())) - } - } -} - -pub struct Scope<'a, 'b> { - pub block: &'b mut IRBlock<'a>, - pub data: ScopeData, -} - -impl<'a, 'b> Scope<'a, 'b> { - pub fn inner<'c>(&'c mut self) -> Scope<'a, 'c> { - Scope { - block: self.block, - data: self.data.clone(), - } - } -} - -pub fn codegen_from_statements(statements: Vec) -> Result { - let mut module = IRModule::new("testmod"); - - let mut scope = ScopeData { - defined_functions: HashMap::new(), - named_vars: HashMap::new(), - }; - - for statement in &statements { - match statement { - TopLevelStatement::FunctionDefinition(FunctionDefinition(sig, _)) => { - let function = module.create_func(&sig.name, IRValueType::I32); - scope.set_function_signature(&sig.name.clone(), sig.clone(), function)?; - } - TopLevelStatement::Import(_) => {} - } - } - - for statement in &statements { - statement.codegen(&mut module, &mut scope)?; - } - - Ok(module) -} - -impl TopLevelStatement { - pub fn codegen(&self, module: &mut IRModule, root_data: &mut ScopeData) -> Result<(), Error> { - match self { - TopLevelStatement::FunctionDefinition(FunctionDefinition(sig, block)) => { - if let Some((_, ir)) = root_data.function(&sig.name) { - if let Some(ir_function) = ir.take() { - let mut ir_block = module.create_block(); - let mut scope = root_data.inner(&mut ir_block); - - let (_, value) = match block.codegen(&mut scope)? { - Some(v) => v, - None => panic!("Void-return type function not yet implemented!"), - }; - - ir_function.add_definition(value, ir_block); - } else { - Err(Error::FunctionAlreadyDefined(sig.name.clone()))? - } - } else { - panic!("Function was not declared before it's definition") - } - } - TopLevelStatement::Import(_) => {} - } - Ok(()) - } -} - -impl Block { - pub fn codegen(&self, scope: &mut Scope) -> Result, Error> { - for statement in &self.0 { - statement.codegen(scope)?; - } - - let value = if let Some((rt, exp)) = &self.1 { - Some((*rt, exp.codegen(scope)?)) - } else { - None - }; - - Ok(value) - } -} - -impl BlockLevelStatement { - pub fn codegen(&self, scope: &mut Scope) -> Result<(), Error> { - match self { - BlockLevelStatement::Let(let_statement) => { - let val = let_statement.1.codegen(scope)?; - scope.data.set_var(&let_statement.0, val)?; - Ok(()) - } - BlockLevelStatement::Return(_) => panic!("Should never happen"), - BlockLevelStatement::Import(_) => Ok(()), // TODO: To implement - BlockLevelStatement::Expression(e) => { - let _value = e.codegen(scope)?; - Ok(()) - } - } - } -} - -impl Expression { - pub fn codegen(&self, scope: &mut Scope) -> Result { - use Expression::*; - match self { - Binop(op, lhs, rhs) => match op { - BinaryOperator::Add => { - let lhs = lhs.codegen(scope)?; - let rhs = rhs.codegen(scope)?; - Ok(scope.block.add(lhs, rhs)?) - } - BinaryOperator::Mult => { - let lhs = lhs.codegen(scope)?; - let rhs = rhs.codegen(scope)?; - Ok(scope.block.mul(lhs, rhs)?) - } - _ => panic!("Other binary operators not supported yet!"), - }, - BlockExpr(block) => { - let mut inner = scope.inner(); - - Ok(match block.codegen(&mut inner)? { - Some((r_type, value)) => match r_type { - ReturnType::Soft => value, - ReturnType::Hard => { - panic!("Hard returns in inner blocks not supported yet") - } - }, - None => panic!("Void-return type block not yet implemented!"), - }) - } - FunctionCall(fc) => { - let FunctionCallExpression(name, _) = &**fc; - if let Some((sig, _)) = scope.data.function(name) { - Ok(scope.block.function_call(sig)?) - } else { - Err(Error::UndefinedFunction(name.clone()))? - } - } - VariableName(name) => scope - .data - .var(name) - .cloned() - .ok_or(Error::UndefinedVariable(name.clone())), - Literal(lit) => Ok(scope.block.get_const(lit)), - IfExpr(_) => panic!("if expressions not yet supported"), - } - } -} - -#[derive(thiserror::Error, Debug)] -pub enum Error { - #[error("Variable '{0}' already defined")] - VariableAlreadyDefined(String), - #[error("Variable '{0}' not yet defined")] - UndefinedVariable(String), - #[error("Function '{0}' not defined")] - UndefinedFunction(String), - #[error("Function '{0}' already defined")] - FunctionAlreadyDefined(String), - #[error(transparent)] - Deeper(#[from] llvm_ir::Error), -} diff --git a/src/codegen/llvm.rs b/src/codegen/llvm.rs new file mode 100644 index 0000000..fddc2ff --- /dev/null +++ b/src/codegen/llvm.rs @@ -0,0 +1,86 @@ +use std::ffi::{CStr, CString}; +use std::mem; + +use llvm_sys::{core::*, prelude::*, LLVMBuilder, LLVMContext, LLVMModule}; + +fn into_cstring>(value: T) -> CString { + let string = value.into(); + unsafe { CString::from_vec_with_nul_unchecked((string + "\0").into_bytes()) } +} + +pub struct IRContext { + context: *mut LLVMContext, + builder: *mut LLVMBuilder, +} + +impl IRContext { + pub fn new() -> IRContext { + unsafe { + // Set up a context, module and builder in that context. + let context = LLVMContextCreate(); + let builder = LLVMCreateBuilderInContext(context); + IRContext { context, builder } + } + } + + pub fn module<'a>(&'a mut self, name: String) -> IRModule<'a> { + unsafe { + let module = + LLVMModuleCreateWithNameInContext(into_cstring(name).as_ptr(), self.context); + + // TODO, fix later! + + let t = LLVMInt32TypeInContext(self.context); + + let mut argts = []; + let func_type = LLVMFunctionType(t, argts.as_mut_ptr(), argts.len() as u32, 0); + + let anon_func = LLVMAddFunction(module, into_cstring("test").as_ptr(), func_type); + + let blockref = + LLVMCreateBasicBlockInContext(self.context, into_cstring("otus").as_ptr()); + LLVMPositionBuilderAtEnd(self.builder, blockref); + + // What is the last 1 ? + let val = LLVMConstInt(t, mem::transmute(3 as i64), 1); + + LLVMAppendExistingBasicBlock(anon_func, blockref); + LLVMBuildRet(self.builder, val); + + IRModule { + context: self, + module, + } + } + } +} + +impl Drop for IRContext { + fn drop(&mut self) { + // Clean up. Values created in the context mostly get cleaned up there. + unsafe { + LLVMDisposeBuilder(self.builder); + LLVMContextDispose(self.context); + } + } +} + +pub struct IRModule<'a> { + context: &'a mut IRContext, + module: *mut LLVMModule, +} + +impl<'a> IRModule<'a> { + pub fn print_to_string(&mut self) -> Result<&str, std::str::Utf8Error> { + unsafe { CStr::from_ptr(LLVMPrintModuleToString(self.module)).to_str() } + } +} + +impl<'a> Drop for IRModule<'a> { + fn drop(&mut self) { + // Clean up. Values created in the context mostly get cleaned up there. + unsafe { + LLVMDisposeModule(self.module); + } + } +} diff --git a/src/codegen/mod.rs b/src/codegen/mod.rs new file mode 100644 index 0000000..15ddff9 --- /dev/null +++ b/src/codegen/mod.rs @@ -0,0 +1,19 @@ +mod llvm; + +use llvm::{IRContext, IRModule}; + +use crate::TopLevelStatement; + +pub fn form_context() -> IRContext { + IRContext::new() +} + +pub fn from_statements<'a>( + context: &'a mut IRContext, + statements: Vec, +) -> Result, Error> { + Ok(context.module("hello".to_owned())) +} + +#[derive(thiserror::Error, Debug)] +pub enum Error {} diff --git a/src/lib.rs b/src/lib.rs index 79147d9..e43c8be 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,9 +1,11 @@ +use codegen::{form_context, from_statements}; + use crate::{ast::TopLevelStatement, lexer::Token, token_stream::TokenStream}; mod ast; -// mod codegen; +mod codegen; mod lexer; -mod llvm_ir; +// mod llvm_ir; mod token_stream; // TODO: @@ -37,7 +39,8 @@ pub fn compile(source: &str) -> Result { statements.push(statement); } - // let mut module = codegen_from_statements(statements)?; - // let text = module.print_to_string().unwrap(); - Ok("text".to_owned()) + let mut context = form_context(); + let mut module = from_statements(&mut context, statements).unwrap(); + let text = module.print_to_string().unwrap(); + Ok(text.to_owned()) } diff --git a/src/llvm_ir.rs b/src/llvm_ir.rs deleted file mode 100644 index d0c754f..0000000 --- a/src/llvm_ir.rs +++ /dev/null @@ -1,203 +0,0 @@ -use std::ffi::{CStr, CString}; -use std::mem; - -use llvm_sys::{core::*, prelude::*, LLVMBuilder, LLVMContext, LLVMModule}; - -use crate::ast::{FunctionSignature, Literal}; - -macro_rules! cstr { - ($string:expr) => { - core::ffi::CStr::from_bytes_with_nul_unchecked(concat!($string, "\0").as_bytes()).as_ptr() - }; -} - -#[derive(Clone, Debug)] -#[must_use = "value contains raw pointer and must be inserted somewhere"] -pub struct IRValue(IRValueType, LLVMValueRef); - -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum IRValueType { - I32, -} - -impl IRValueType { - unsafe fn get_llvm_type(&self, module: &mut IRModule) -> LLVMTypeRef { - match *self { - Self::I32 => LLVMInt32TypeInContext(module.context), - } - } -} - -fn into_cstring>(value: T) -> CString { - let string = value.into(); - unsafe { CString::from_vec_with_nul_unchecked((string + "\0").into_bytes()) } -} - -pub struct IRModule { - context: *mut LLVMContext, - module: *mut LLVMModule, - builder: *mut LLVMBuilder, -} - -impl IRModule { - pub fn new>(name: T) -> IRModule { - unsafe { - // Set up a context, module and builder in that context. - let context = LLVMContextCreate(); - let module = LLVMModuleCreateWithNameInContext(into_cstring(name).as_ptr(), context); - let builder = LLVMCreateBuilderInContext(context); - - IRModule { - context, - module, - builder, - } - } - } - - pub fn create_block(&mut self) -> IRBlock { - IRBlock::create("entry", self) - } - - pub fn create_func>( - &mut self, - name: T, - return_type: IRValueType, - ) -> IRFunction { - unsafe { - let mut argts = []; - let func_type = LLVMFunctionType( - return_type.get_llvm_type(self), - argts.as_mut_ptr(), - argts.len() as u32, - 0, - ); - - let anon_func = LLVMAddFunction(self.module, into_cstring(name).as_ptr(), func_type); - IRFunction { - value: IRValue(return_type, anon_func), - } - } - } - - pub fn print_to_string(&mut self) -> Result<&str, std::str::Utf8Error> { - unsafe { CStr::from_ptr(LLVMPrintModuleToString(self.module)).to_str() } - } -} - -impl Drop for IRModule { - fn drop(&mut self) { - // Clean up. Values created in the context mostly get cleaned up there. - unsafe { - LLVMDisposeBuilder(self.builder); - LLVMDisposeModule(self.module); - LLVMContextDispose(self.context); - } - } -} - -#[derive(Clone, Debug)] -pub struct IRFunction { - value: IRValue, -} - -impl IRFunction { - pub fn add_definition(self, ret: IRValue, block: IRBlock) { - unsafe { - LLVMAppendExistingBasicBlock(self.value.1, block.blockref); - LLVMBuildRet(block.module.builder, ret.1); - } - } -} - -pub struct IRBlock<'a> { - module: &'a mut IRModule, - blockref: LLVMBasicBlockRef, -} - -impl<'a> IRBlock<'a> { - fn create>(name: T, codegen: &'a mut IRModule) -> IRBlock<'a> { - unsafe { - let blockref = - LLVMCreateBasicBlockInContext(codegen.context, into_cstring(name).as_ptr()); - LLVMPositionBuilderAtEnd(codegen.builder, blockref); - IRBlock { - module: codegen, - blockref, - } - } - } - - pub fn get_const(&mut self, literal_type: &Literal) -> IRValue { - unsafe { - match *literal_type { - Literal::I32(v) => IRValue( - IRValueType::I32, - LLVMConstInt( - LLVMInt32TypeInContext(self.module.context), - mem::transmute(v as i64), - 1, - ), - ), - } - } - } - - pub fn add(&mut self, lhs: IRValue, rhs: IRValue) -> Result { - unsafe { - if lhs.0 == rhs.0 { - Ok(IRValue( - lhs.0, - LLVMBuildAdd(self.module.builder, lhs.1, rhs.1, cstr!("tmpadd")), - )) - } else { - Err(Error::TypeMismatch(lhs.0, rhs.0)) - } - } - } - - pub fn mul(&mut self, lhs: IRValue, rhs: IRValue) -> Result { - unsafe { - if lhs.0 == rhs.0 { - Ok(IRValue( - lhs.0, - LLVMBuildMul(self.module.builder, lhs.1, rhs.1, cstr!("tmpadd")), - )) - } else { - Err(Error::TypeMismatch(lhs.0, rhs.0)) - } - } - } - - pub fn function_call(&mut self, callee: &FunctionSignature) -> Result { - unsafe { - let function = LLVMGetNamedFunction( - self.module.module, - into_cstring(callee.name.clone()).as_ptr(), - ); - - let ret_t = LLVMInt32TypeInContext(self.module.context); - let mut argts = []; - let mut args = []; - - let fun_t = LLVMFunctionType(ret_t, argts.as_mut_ptr(), argts.len() as u32, 0); - - let call = LLVMBuildCall2( - self.module.builder, - fun_t, - function, - args.as_mut_ptr(), - args.len() as u32, - into_cstring(&callee.name).as_ptr(), - ); - - Ok(IRValue(IRValueType::I32, call)) - } - } -} - -#[derive(Debug, thiserror::Error)] -pub enum Error { - #[error("Type Mismatch: {0:?} {1:?}")] - TypeMismatch(IRValueType, IRValueType), -}