Add the very most basic codegen

This commit is contained in:
Sofia 2024-07-17 20:43:28 +03:00
parent 8176dc98a3
commit dd3e0618ae
6 changed files with 115 additions and 430 deletions

2
.gitignore vendored
View File

@ -1,3 +1,5 @@
src/old_llvm
/target /target
/.vscode /.vscode
.env .env

View File

@ -1,222 +0,0 @@
use std::collections::{hash_map, HashMap};
use crate::{
ast::{
BinaryOperator, Block, BlockLevelStatement, Expression, FunctionCallExpression,
FunctionDefinition, FunctionSignature, ReturnType, TopLevelStatement,
},
llvm_ir::{self, IRBlock, IRFunction, IRModule, IRValue, IRValueType},
};
#[derive(Clone)]
pub struct ScopeData {
named_vars: HashMap<String, IRValue>,
defined_functions: HashMap<String, (FunctionSignature, Option<IRFunction>)>,
}
impl ScopeData {
pub fn inner<'a, 'b>(&self, block: &'b mut IRBlock<'a>) -> Scope<'a, 'b> {
Scope {
block,
data: self.clone(),
}
}
pub fn var(&self, name: &String) -> Option<&IRValue> {
self.named_vars.get(name)
}
pub fn set_var(&mut self, name: &str, val: IRValue) -> Result<(), Error> {
if let hash_map::Entry::Vacant(e) = self.named_vars.entry(name.to_owned()) {
e.insert(val);
Ok(())
} else {
Err(Error::VariableAlreadyDefined(name.to_owned()))
}
}
pub fn function(
&mut self,
name: &String,
) -> Option<&mut (FunctionSignature, Option<IRFunction>)> {
self.defined_functions.get_mut(name)
}
pub fn set_function_signature(
&mut self,
name: &str,
sig: FunctionSignature,
ir: IRFunction,
) -> Result<(), Error> {
if let hash_map::Entry::Vacant(e) = self.defined_functions.entry(name.to_owned()) {
e.insert((sig, Some(ir)));
Ok(())
} else {
Err(Error::VariableAlreadyDefined(name.to_owned()))
}
}
}
pub struct Scope<'a, 'b> {
pub block: &'b mut IRBlock<'a>,
pub data: ScopeData,
}
impl<'a, 'b> Scope<'a, 'b> {
pub fn inner<'c>(&'c mut self) -> Scope<'a, 'c> {
Scope {
block: self.block,
data: self.data.clone(),
}
}
}
pub fn codegen_from_statements(statements: Vec<TopLevelStatement>) -> Result<IRModule, Error> {
let mut module = IRModule::new("testmod");
let mut scope = ScopeData {
defined_functions: HashMap::new(),
named_vars: HashMap::new(),
};
for statement in &statements {
match statement {
TopLevelStatement::FunctionDefinition(FunctionDefinition(sig, _)) => {
let function = module.create_func(&sig.name, IRValueType::I32);
scope.set_function_signature(&sig.name.clone(), sig.clone(), function)?;
}
TopLevelStatement::Import(_) => {}
}
}
for statement in &statements {
statement.codegen(&mut module, &mut scope)?;
}
Ok(module)
}
impl TopLevelStatement {
pub fn codegen(&self, module: &mut IRModule, root_data: &mut ScopeData) -> Result<(), Error> {
match self {
TopLevelStatement::FunctionDefinition(FunctionDefinition(sig, block)) => {
if let Some((_, ir)) = root_data.function(&sig.name) {
if let Some(ir_function) = ir.take() {
let mut ir_block = module.create_block();
let mut scope = root_data.inner(&mut ir_block);
let (_, value) = match block.codegen(&mut scope)? {
Some(v) => v,
None => panic!("Void-return type function not yet implemented!"),
};
ir_function.add_definition(value, ir_block);
} else {
Err(Error::FunctionAlreadyDefined(sig.name.clone()))?
}
} else {
panic!("Function was not declared before it's definition")
}
}
TopLevelStatement::Import(_) => {}
}
Ok(())
}
}
impl Block {
pub fn codegen(&self, scope: &mut Scope) -> Result<Option<(ReturnType, IRValue)>, Error> {
for statement in &self.0 {
statement.codegen(scope)?;
}
let value = if let Some((rt, exp)) = &self.1 {
Some((*rt, exp.codegen(scope)?))
} else {
None
};
Ok(value)
}
}
impl BlockLevelStatement {
pub fn codegen(&self, scope: &mut Scope) -> Result<(), Error> {
match self {
BlockLevelStatement::Let(let_statement) => {
let val = let_statement.1.codegen(scope)?;
scope.data.set_var(&let_statement.0, val)?;
Ok(())
}
BlockLevelStatement::Return(_) => panic!("Should never happen"),
BlockLevelStatement::Import(_) => Ok(()), // TODO: To implement
BlockLevelStatement::Expression(e) => {
let _value = e.codegen(scope)?;
Ok(())
}
}
}
}
impl Expression {
pub fn codegen(&self, scope: &mut Scope) -> Result<IRValue, Error> {
use Expression::*;
match self {
Binop(op, lhs, rhs) => match op {
BinaryOperator::Add => {
let lhs = lhs.codegen(scope)?;
let rhs = rhs.codegen(scope)?;
Ok(scope.block.add(lhs, rhs)?)
}
BinaryOperator::Mult => {
let lhs = lhs.codegen(scope)?;
let rhs = rhs.codegen(scope)?;
Ok(scope.block.mul(lhs, rhs)?)
}
_ => panic!("Other binary operators not supported yet!"),
},
BlockExpr(block) => {
let mut inner = scope.inner();
Ok(match block.codegen(&mut inner)? {
Some((r_type, value)) => match r_type {
ReturnType::Soft => value,
ReturnType::Hard => {
panic!("Hard returns in inner blocks not supported yet")
}
},
None => panic!("Void-return type block not yet implemented!"),
})
}
FunctionCall(fc) => {
let FunctionCallExpression(name, _) = &**fc;
if let Some((sig, _)) = scope.data.function(name) {
Ok(scope.block.function_call(sig)?)
} else {
Err(Error::UndefinedFunction(name.clone()))?
}
}
VariableName(name) => scope
.data
.var(name)
.cloned()
.ok_or(Error::UndefinedVariable(name.clone())),
Literal(lit) => Ok(scope.block.get_const(lit)),
IfExpr(_) => panic!("if expressions not yet supported"),
}
}
}
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("Variable '{0}' already defined")]
VariableAlreadyDefined(String),
#[error("Variable '{0}' not yet defined")]
UndefinedVariable(String),
#[error("Function '{0}' not defined")]
UndefinedFunction(String),
#[error("Function '{0}' already defined")]
FunctionAlreadyDefined(String),
#[error(transparent)]
Deeper(#[from] llvm_ir::Error),
}

86
src/codegen/llvm.rs Normal file
View File

@ -0,0 +1,86 @@
use std::ffi::{CStr, CString};
use std::mem;
use llvm_sys::{core::*, prelude::*, LLVMBuilder, LLVMContext, LLVMModule};
fn into_cstring<T: Into<String>>(value: T) -> CString {
let string = value.into();
unsafe { CString::from_vec_with_nul_unchecked((string + "\0").into_bytes()) }
}
pub struct IRContext {
context: *mut LLVMContext,
builder: *mut LLVMBuilder,
}
impl IRContext {
pub fn new() -> IRContext {
unsafe {
// Set up a context, module and builder in that context.
let context = LLVMContextCreate();
let builder = LLVMCreateBuilderInContext(context);
IRContext { context, builder }
}
}
pub fn module<'a>(&'a mut self, name: String) -> IRModule<'a> {
unsafe {
let module =
LLVMModuleCreateWithNameInContext(into_cstring(name).as_ptr(), self.context);
// TODO, fix later!
let t = LLVMInt32TypeInContext(self.context);
let mut argts = [];
let func_type = LLVMFunctionType(t, argts.as_mut_ptr(), argts.len() as u32, 0);
let anon_func = LLVMAddFunction(module, into_cstring("test").as_ptr(), func_type);
let blockref =
LLVMCreateBasicBlockInContext(self.context, into_cstring("otus").as_ptr());
LLVMPositionBuilderAtEnd(self.builder, blockref);
// What is the last 1 ?
let val = LLVMConstInt(t, mem::transmute(3 as i64), 1);
LLVMAppendExistingBasicBlock(anon_func, blockref);
LLVMBuildRet(self.builder, val);
IRModule {
context: self,
module,
}
}
}
}
impl Drop for IRContext {
fn drop(&mut self) {
// Clean up. Values created in the context mostly get cleaned up there.
unsafe {
LLVMDisposeBuilder(self.builder);
LLVMContextDispose(self.context);
}
}
}
pub struct IRModule<'a> {
context: &'a mut IRContext,
module: *mut LLVMModule,
}
impl<'a> IRModule<'a> {
pub fn print_to_string(&mut self) -> Result<&str, std::str::Utf8Error> {
unsafe { CStr::from_ptr(LLVMPrintModuleToString(self.module)).to_str() }
}
}
impl<'a> Drop for IRModule<'a> {
fn drop(&mut self) {
// Clean up. Values created in the context mostly get cleaned up there.
unsafe {
LLVMDisposeModule(self.module);
}
}
}

19
src/codegen/mod.rs Normal file
View File

@ -0,0 +1,19 @@
mod llvm;
use llvm::{IRContext, IRModule};
use crate::TopLevelStatement;
pub fn form_context() -> IRContext {
IRContext::new()
}
pub fn from_statements<'a>(
context: &'a mut IRContext,
statements: Vec<TopLevelStatement>,
) -> Result<IRModule<'a>, Error> {
Ok(context.module("hello".to_owned()))
}
#[derive(thiserror::Error, Debug)]
pub enum Error {}

View File

@ -1,9 +1,11 @@
use codegen::{form_context, from_statements};
use crate::{ast::TopLevelStatement, lexer::Token, token_stream::TokenStream}; use crate::{ast::TopLevelStatement, lexer::Token, token_stream::TokenStream};
mod ast; mod ast;
// mod codegen; mod codegen;
mod lexer; mod lexer;
mod llvm_ir; // mod llvm_ir;
mod token_stream; mod token_stream;
// TODO: // TODO:
@ -37,7 +39,8 @@ pub fn compile(source: &str) -> Result<String, ReidError> {
statements.push(statement); statements.push(statement);
} }
// let mut module = codegen_from_statements(statements)?; let mut context = form_context();
// let text = module.print_to_string().unwrap(); let mut module = from_statements(&mut context, statements).unwrap();
Ok("text".to_owned()) let text = module.print_to_string().unwrap();
Ok(text.to_owned())
} }

View File

@ -1,203 +0,0 @@
use std::ffi::{CStr, CString};
use std::mem;
use llvm_sys::{core::*, prelude::*, LLVMBuilder, LLVMContext, LLVMModule};
use crate::ast::{FunctionSignature, Literal};
macro_rules! cstr {
($string:expr) => {
core::ffi::CStr::from_bytes_with_nul_unchecked(concat!($string, "\0").as_bytes()).as_ptr()
};
}
#[derive(Clone, Debug)]
#[must_use = "value contains raw pointer and must be inserted somewhere"]
pub struct IRValue(IRValueType, LLVMValueRef);
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum IRValueType {
I32,
}
impl IRValueType {
unsafe fn get_llvm_type(&self, module: &mut IRModule) -> LLVMTypeRef {
match *self {
Self::I32 => LLVMInt32TypeInContext(module.context),
}
}
}
fn into_cstring<T: Into<String>>(value: T) -> CString {
let string = value.into();
unsafe { CString::from_vec_with_nul_unchecked((string + "\0").into_bytes()) }
}
pub struct IRModule {
context: *mut LLVMContext,
module: *mut LLVMModule,
builder: *mut LLVMBuilder,
}
impl IRModule {
pub fn new<T: Into<String>>(name: T) -> IRModule {
unsafe {
// Set up a context, module and builder in that context.
let context = LLVMContextCreate();
let module = LLVMModuleCreateWithNameInContext(into_cstring(name).as_ptr(), context);
let builder = LLVMCreateBuilderInContext(context);
IRModule {
context,
module,
builder,
}
}
}
pub fn create_block(&mut self) -> IRBlock {
IRBlock::create("entry", self)
}
pub fn create_func<T: Into<String>>(
&mut self,
name: T,
return_type: IRValueType,
) -> IRFunction {
unsafe {
let mut argts = [];
let func_type = LLVMFunctionType(
return_type.get_llvm_type(self),
argts.as_mut_ptr(),
argts.len() as u32,
0,
);
let anon_func = LLVMAddFunction(self.module, into_cstring(name).as_ptr(), func_type);
IRFunction {
value: IRValue(return_type, anon_func),
}
}
}
pub fn print_to_string(&mut self) -> Result<&str, std::str::Utf8Error> {
unsafe { CStr::from_ptr(LLVMPrintModuleToString(self.module)).to_str() }
}
}
impl Drop for IRModule {
fn drop(&mut self) {
// Clean up. Values created in the context mostly get cleaned up there.
unsafe {
LLVMDisposeBuilder(self.builder);
LLVMDisposeModule(self.module);
LLVMContextDispose(self.context);
}
}
}
#[derive(Clone, Debug)]
pub struct IRFunction {
value: IRValue,
}
impl IRFunction {
pub fn add_definition(self, ret: IRValue, block: IRBlock) {
unsafe {
LLVMAppendExistingBasicBlock(self.value.1, block.blockref);
LLVMBuildRet(block.module.builder, ret.1);
}
}
}
pub struct IRBlock<'a> {
module: &'a mut IRModule,
blockref: LLVMBasicBlockRef,
}
impl<'a> IRBlock<'a> {
fn create<T: Into<String>>(name: T, codegen: &'a mut IRModule) -> IRBlock<'a> {
unsafe {
let blockref =
LLVMCreateBasicBlockInContext(codegen.context, into_cstring(name).as_ptr());
LLVMPositionBuilderAtEnd(codegen.builder, blockref);
IRBlock {
module: codegen,
blockref,
}
}
}
pub fn get_const(&mut self, literal_type: &Literal) -> IRValue {
unsafe {
match *literal_type {
Literal::I32(v) => IRValue(
IRValueType::I32,
LLVMConstInt(
LLVMInt32TypeInContext(self.module.context),
mem::transmute(v as i64),
1,
),
),
}
}
}
pub fn add(&mut self, lhs: IRValue, rhs: IRValue) -> Result<IRValue, Error> {
unsafe {
if lhs.0 == rhs.0 {
Ok(IRValue(
lhs.0,
LLVMBuildAdd(self.module.builder, lhs.1, rhs.1, cstr!("tmpadd")),
))
} else {
Err(Error::TypeMismatch(lhs.0, rhs.0))
}
}
}
pub fn mul(&mut self, lhs: IRValue, rhs: IRValue) -> Result<IRValue, Error> {
unsafe {
if lhs.0 == rhs.0 {
Ok(IRValue(
lhs.0,
LLVMBuildMul(self.module.builder, lhs.1, rhs.1, cstr!("tmpadd")),
))
} else {
Err(Error::TypeMismatch(lhs.0, rhs.0))
}
}
}
pub fn function_call(&mut self, callee: &FunctionSignature) -> Result<IRValue, Error> {
unsafe {
let function = LLVMGetNamedFunction(
self.module.module,
into_cstring(callee.name.clone()).as_ptr(),
);
let ret_t = LLVMInt32TypeInContext(self.module.context);
let mut argts = [];
let mut args = [];
let fun_t = LLVMFunctionType(ret_t, argts.as_mut_ptr(), argts.len() as u32, 0);
let call = LLVMBuildCall2(
self.module.builder,
fun_t,
function,
args.as_mut_ptr(),
args.len() as u32,
into_cstring(&callee.name).as_ptr(),
);
Ok(IRValue(IRValueType::I32, call))
}
}
}
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("Type Mismatch: {0:?} {1:?}")]
TypeMismatch(IRValueType, IRValueType),
}