From a196857dd6671f5675083c2da34052159aa0d008 Mon Sep 17 00:00:00 2001 From: sofia Date: Wed, 2 Aug 2023 17:03:06 +0300 Subject: [PATCH] Add simple codegen from actual AST --- reid/easiest.reid | 10 +++- src/codegen.rs | 127 +++++++++++++++++++++++++++++++--------------- src/main.rs | 71 +++++++++++++++++++++++--- src/parser.rs | 8 +-- 4 files changed, 161 insertions(+), 55 deletions(-) diff --git a/reid/easiest.reid b/reid/easiest.reid index 62fd5b9..c3239cd 100644 --- a/reid/easiest.reid +++ b/reid/easiest.reid @@ -1,8 +1,14 @@ // Hello, comment here! +import std::print; + +fn somethingelse() { + return 12; +} + fn main() { - let hello = 32; + let hello = 32 + 2 + 4; let beep = - hello ; + hello + 100; return beep; } \ No newline at end of file diff --git a/src/codegen.rs b/src/codegen.rs index 232532f..873425d 100644 --- a/src/codegen.rs +++ b/src/codegen.rs @@ -1,3 +1,4 @@ +use std::ffi::CString; use std::mem; use llvm_sys::{core::*, prelude::*, LLVMBuilder, LLVMContext, LLVMModule}; @@ -10,37 +11,43 @@ macro_rules! cstr { }; } -#[derive(PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq, Eq)] pub enum ValueType { I32, } impl ValueType { - unsafe fn get_llvm_type(&self, codegen: &mut CodeGenerator) -> LLVMTypeRef { + unsafe fn get_llvm_type(&self, codegen: &mut IRModule) -> LLVMTypeRef { match *self { Self::I32 => LLVMInt32TypeInContext(codegen.context), } } } +#[derive(Clone, Debug)] #[must_use = "value contains raw pointer and must be inserted somewhere"] pub struct Value(ValueType, LLVMValueRef); -pub struct CodeGenerator { +fn into_cstring>(value: T) -> CString { + let string = value.into(); + unsafe { CString::from_vec_with_nul_unchecked((string + "\0").into_bytes()) } +} + +pub struct IRModule { context: *mut LLVMContext, module: *mut LLVMModule, builder: *mut LLVMBuilder, } -impl CodeGenerator { - pub fn new() -> CodeGenerator { +impl IRModule { + pub fn new() -> IRModule { unsafe { // Set up a context, module and builder in that context. let context = LLVMContextCreate(); let module = LLVMModuleCreateWithNameInContext(cstr!("testmodule"), context); let builder = LLVMCreateBuilderInContext(context); - CodeGenerator { + IRModule { context, module, builder, @@ -48,13 +55,83 @@ impl CodeGenerator { } } + pub fn create_block(&mut self) -> IRBlock { + IRBlock::create("entry", self) + } + + pub fn create_func>(&mut self, name: T, return_type: ValueType) -> IRFunction { + unsafe { + let mut argts = []; + let func_type = LLVMFunctionType( + return_type.get_llvm_type(self), + argts.as_mut_ptr(), + argts.len() as u32, + 0, + ); + + let anon_func = LLVMAddFunction(self.module, into_cstring(name).as_ptr(), func_type); + IRFunction { + value: Value(return_type, anon_func), + } + + // // Create a basic block in the function and set our builder to generate + // // code in it. + // let bb = LLVMAppendBasicBlockInContext(self.context, anon_func, cstr!("entry")); + // LLVMPositionBuilderAtEnd(self.builder, bb); + + // // Emit a `ret i64` into the function to return the computed sum. + // LLVMBuildRet(self.builder, ret.1); + } + } +} + +impl Drop for IRModule { + fn drop(&mut self) { + // Clean up. Values created in the context mostly get cleaned up there. + unsafe { + LLVMDisposeBuilder(self.builder); + LLVMDumpModule(self.module); + LLVMDisposeModule(self.module); + LLVMContextDispose(self.context); + } + } +} + +pub struct IRFunction { + value: Value, +} + +impl IRFunction { + pub fn add_definition(self, ret: Value, block: IRBlock) { + unsafe { + LLVMAppendExistingBasicBlock(self.value.1, block.blockref); + LLVMBuildRet(block.codegen.builder, ret.1); + } + } +} + +pub struct IRBlock<'a> { + codegen: &'a mut IRModule, + blockref: LLVMBasicBlockRef, +} + +impl<'a> IRBlock<'a> { + fn create>(name: T, codegen: &'a mut IRModule) -> IRBlock<'a> { + unsafe { + let blockref = + LLVMCreateBasicBlockInContext(codegen.context, into_cstring(name).as_ptr()); + LLVMPositionBuilderAtEnd(codegen.builder, blockref); + IRBlock { codegen, blockref } + } + } + pub fn get_const(&mut self, literal_type: &Literal) -> Value { unsafe { match *literal_type { Literal::I32(v) => Value( ValueType::I32, LLVMConstInt( - LLVMInt32TypeInContext(self.context), + LLVMInt32TypeInContext(self.codegen.context), mem::transmute(v as i64), 1, ), @@ -68,45 +145,11 @@ impl CodeGenerator { if lhs.0 == rhs.0 { Ok(Value( lhs.0, - LLVMBuildAdd(self.builder, lhs.1, rhs.1, cstr!("tmpadd")), + LLVMBuildAdd(self.codegen.builder, lhs.1, rhs.1, cstr!("tmpadd")), )) } else { Err(()) } } } - - pub fn create_func(&mut self, ret: Value) { - unsafe { - let mut argts = []; - let func_type = LLVMFunctionType( - ret.0.get_llvm_type(self), - argts.as_mut_ptr(), - argts.len() as u32, - 0, - ); - - let anon_func = LLVMAddFunction(self.module, cstr!("_anon_func"), func_type); - - // Create a basic block in the function and set our builder to generate - // code in it. - let bb = LLVMAppendBasicBlockInContext(self.context, anon_func, cstr!("entry")); - LLVMPositionBuilderAtEnd(self.builder, bb); - - // Emit a `ret i64` into the function to return the computed sum. - LLVMBuildRet(self.builder, ret.1); - } - } -} - -impl Drop for CodeGenerator { - fn drop(&mut self) { - // Clean up. Values created in the context mostly get cleaned up there. - unsafe { - LLVMDisposeBuilder(self.builder); - LLVMDumpModule(self.module); - LLVMDisposeModule(self.module); - LLVMContextDispose(self.context); - } - } } diff --git a/src/main.rs b/src/main.rs index fbf164c..0d174f5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,13 @@ +use std::collections::HashMap; + +use codegen::IRBlock; +use parser::Expression; + use crate::{ - codegen::CodeGenerator, lexer::Token, parser::TopLevelStatement, token_stream::TokenStream, + codegen::{IRModule, Value}, + lexer::Token, + parser::{FunctionDefinition, Literal, TopLevelStatement}, + token_stream::TokenStream, }; pub static EASIEST: &str = include_str!("../reid/easiest.reid"); @@ -25,16 +33,65 @@ fn main() { let mut token_stream = TokenStream::from(&tokens); + let mut statements = Vec::new(); + while !matches!(token_stream.peek().unwrap_or(Token::Eof), Token::Eof) { let statement = token_stream.parse::().unwrap(); dbg!(&statement); + statements.push(statement); } - let mut c = CodeGenerator::new(); - let x = c.get_const(&parser::Literal::I32(3)); - let y = c.get_const(&parser::Literal::I32(4)); - let add = c.add(x, y).unwrap(); - c.create_func(add); + let mut c = IRModule::new(); + for statement in statements { + match statement { + TopLevelStatement::FunctionDefinition(FunctionDefinition(sig, block)) => { + let mut named_vars: HashMap = HashMap::new(); - // dbg!(token_stream.expect(Token::Eof).ok()); + let func = c.create_func(sig.name, codegen::ValueType::I32); + let mut c_block = c.create_block(); + + for stmt in block.0 { + match stmt { + parser::BlockLevelStatement::Let(let_statement) => { + let value = codegen_exp(&mut c_block, &named_vars, let_statement.1); + named_vars.insert(let_statement.0, value); + } + parser::BlockLevelStatement::Return(_) => panic!("Should never exist!"), + parser::BlockLevelStatement::Import(_) => {} + parser::BlockLevelStatement::Expression(_) => {} + } + } + + let value = if let Some(exp) = block.1 { + codegen_exp(&mut c_block, &named_vars, exp) + } else { + c_block.get_const(&Literal::I32(0)) + }; + func.add_definition(value, c_block); + } + TopLevelStatement::Import(_) => {} + } + } +} + +fn codegen_exp( + c_block: &mut IRBlock, + named_vars: &HashMap, + expression: Expression, +) -> Value { + use parser::Expression::*; + match expression { + Binop(op, lhs, rhs) => match op { + parser::BinaryOperator::Add => { + let lhs = codegen_exp(c_block, named_vars, *lhs); + let rhs = codegen_exp(c_block, named_vars, *rhs); + c_block.add(lhs, rhs).unwrap() + } + parser::BinaryOperator::Mult => panic!("Not implemented!"), + }, + BlockExpr(_) => panic!("Not implemented!"), + FunctionCall(_) => panic!("Not implemented!"), + VariableName(name) => named_vars.get(&name).cloned().unwrap(), + Literal(lit) => c_block.get_const(&lit), + } } diff --git a/src/parser.rs b/src/parser.rs index b5b3bab..4e99dd7 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -125,7 +125,7 @@ impl Parse for FunctionCallExpression { } #[derive(Debug, Clone)] -pub struct LetStatement(String, Expression); +pub struct LetStatement(pub String, pub Expression); impl Parse for LetStatement { fn parse(mut stream: TokenStream) -> Result { @@ -172,7 +172,7 @@ impl Parse for ImportStatement { } #[derive(Debug)] -pub struct FunctionDefinition(FunctionSignature, Block); +pub struct FunctionDefinition(pub FunctionSignature, pub Block); impl Parse for FunctionDefinition { fn parse(mut stream: TokenStream) -> Result { @@ -183,7 +183,7 @@ impl Parse for FunctionDefinition { #[derive(Debug)] pub struct FunctionSignature { - name: String, + pub name: String, } impl Parse for FunctionSignature { @@ -199,7 +199,7 @@ impl Parse for FunctionSignature { } #[derive(Debug, Clone)] -pub struct Block(Vec, Option); +pub struct Block(pub Vec, pub Option); impl Parse for Block { fn parse(mut stream: TokenStream) -> Result {