Add the very most basic codegen
This commit is contained in:
parent
8176dc98a3
commit
dd3e0618ae
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,3 +1,5 @@
|
||||
|
||||
src/old_llvm
|
||||
/target
|
||||
/.vscode
|
||||
.env
|
222
src/codegen.rs
222
src/codegen.rs
@ -1,222 +0,0 @@
|
||||
use std::collections::{hash_map, HashMap};
|
||||
|
||||
use crate::{
|
||||
ast::{
|
||||
BinaryOperator, Block, BlockLevelStatement, Expression, FunctionCallExpression,
|
||||
FunctionDefinition, FunctionSignature, ReturnType, TopLevelStatement,
|
||||
},
|
||||
llvm_ir::{self, IRBlock, IRFunction, IRModule, IRValue, IRValueType},
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ScopeData {
|
||||
named_vars: HashMap<String, IRValue>,
|
||||
defined_functions: HashMap<String, (FunctionSignature, Option<IRFunction>)>,
|
||||
}
|
||||
|
||||
impl ScopeData {
|
||||
pub fn inner<'a, 'b>(&self, block: &'b mut IRBlock<'a>) -> Scope<'a, 'b> {
|
||||
Scope {
|
||||
block,
|
||||
data: self.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn var(&self, name: &String) -> Option<&IRValue> {
|
||||
self.named_vars.get(name)
|
||||
}
|
||||
|
||||
pub fn set_var(&mut self, name: &str, val: IRValue) -> Result<(), Error> {
|
||||
if let hash_map::Entry::Vacant(e) = self.named_vars.entry(name.to_owned()) {
|
||||
e.insert(val);
|
||||
Ok(())
|
||||
} else {
|
||||
Err(Error::VariableAlreadyDefined(name.to_owned()))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn function(
|
||||
&mut self,
|
||||
name: &String,
|
||||
) -> Option<&mut (FunctionSignature, Option<IRFunction>)> {
|
||||
self.defined_functions.get_mut(name)
|
||||
}
|
||||
|
||||
pub fn set_function_signature(
|
||||
&mut self,
|
||||
name: &str,
|
||||
sig: FunctionSignature,
|
||||
ir: IRFunction,
|
||||
) -> Result<(), Error> {
|
||||
if let hash_map::Entry::Vacant(e) = self.defined_functions.entry(name.to_owned()) {
|
||||
e.insert((sig, Some(ir)));
|
||||
Ok(())
|
||||
} else {
|
||||
Err(Error::VariableAlreadyDefined(name.to_owned()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Scope<'a, 'b> {
|
||||
pub block: &'b mut IRBlock<'a>,
|
||||
pub data: ScopeData,
|
||||
}
|
||||
|
||||
impl<'a, 'b> Scope<'a, 'b> {
|
||||
pub fn inner<'c>(&'c mut self) -> Scope<'a, 'c> {
|
||||
Scope {
|
||||
block: self.block,
|
||||
data: self.data.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn codegen_from_statements(statements: Vec<TopLevelStatement>) -> Result<IRModule, Error> {
|
||||
let mut module = IRModule::new("testmod");
|
||||
|
||||
let mut scope = ScopeData {
|
||||
defined_functions: HashMap::new(),
|
||||
named_vars: HashMap::new(),
|
||||
};
|
||||
|
||||
for statement in &statements {
|
||||
match statement {
|
||||
TopLevelStatement::FunctionDefinition(FunctionDefinition(sig, _)) => {
|
||||
let function = module.create_func(&sig.name, IRValueType::I32);
|
||||
scope.set_function_signature(&sig.name.clone(), sig.clone(), function)?;
|
||||
}
|
||||
TopLevelStatement::Import(_) => {}
|
||||
}
|
||||
}
|
||||
|
||||
for statement in &statements {
|
||||
statement.codegen(&mut module, &mut scope)?;
|
||||
}
|
||||
|
||||
Ok(module)
|
||||
}
|
||||
|
||||
impl TopLevelStatement {
|
||||
pub fn codegen(&self, module: &mut IRModule, root_data: &mut ScopeData) -> Result<(), Error> {
|
||||
match self {
|
||||
TopLevelStatement::FunctionDefinition(FunctionDefinition(sig, block)) => {
|
||||
if let Some((_, ir)) = root_data.function(&sig.name) {
|
||||
if let Some(ir_function) = ir.take() {
|
||||
let mut ir_block = module.create_block();
|
||||
let mut scope = root_data.inner(&mut ir_block);
|
||||
|
||||
let (_, value) = match block.codegen(&mut scope)? {
|
||||
Some(v) => v,
|
||||
None => panic!("Void-return type function not yet implemented!"),
|
||||
};
|
||||
|
||||
ir_function.add_definition(value, ir_block);
|
||||
} else {
|
||||
Err(Error::FunctionAlreadyDefined(sig.name.clone()))?
|
||||
}
|
||||
} else {
|
||||
panic!("Function was not declared before it's definition")
|
||||
}
|
||||
}
|
||||
TopLevelStatement::Import(_) => {}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Block {
|
||||
pub fn codegen(&self, scope: &mut Scope) -> Result<Option<(ReturnType, IRValue)>, Error> {
|
||||
for statement in &self.0 {
|
||||
statement.codegen(scope)?;
|
||||
}
|
||||
|
||||
let value = if let Some((rt, exp)) = &self.1 {
|
||||
Some((*rt, exp.codegen(scope)?))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl BlockLevelStatement {
|
||||
pub fn codegen(&self, scope: &mut Scope) -> Result<(), Error> {
|
||||
match self {
|
||||
BlockLevelStatement::Let(let_statement) => {
|
||||
let val = let_statement.1.codegen(scope)?;
|
||||
scope.data.set_var(&let_statement.0, val)?;
|
||||
Ok(())
|
||||
}
|
||||
BlockLevelStatement::Return(_) => panic!("Should never happen"),
|
||||
BlockLevelStatement::Import(_) => Ok(()), // TODO: To implement
|
||||
BlockLevelStatement::Expression(e) => {
|
||||
let _value = e.codegen(scope)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Expression {
|
||||
pub fn codegen(&self, scope: &mut Scope) -> Result<IRValue, Error> {
|
||||
use Expression::*;
|
||||
match self {
|
||||
Binop(op, lhs, rhs) => match op {
|
||||
BinaryOperator::Add => {
|
||||
let lhs = lhs.codegen(scope)?;
|
||||
let rhs = rhs.codegen(scope)?;
|
||||
Ok(scope.block.add(lhs, rhs)?)
|
||||
}
|
||||
BinaryOperator::Mult => {
|
||||
let lhs = lhs.codegen(scope)?;
|
||||
let rhs = rhs.codegen(scope)?;
|
||||
Ok(scope.block.mul(lhs, rhs)?)
|
||||
}
|
||||
_ => panic!("Other binary operators not supported yet!"),
|
||||
},
|
||||
BlockExpr(block) => {
|
||||
let mut inner = scope.inner();
|
||||
|
||||
Ok(match block.codegen(&mut inner)? {
|
||||
Some((r_type, value)) => match r_type {
|
||||
ReturnType::Soft => value,
|
||||
ReturnType::Hard => {
|
||||
panic!("Hard returns in inner blocks not supported yet")
|
||||
}
|
||||
},
|
||||
None => panic!("Void-return type block not yet implemented!"),
|
||||
})
|
||||
}
|
||||
FunctionCall(fc) => {
|
||||
let FunctionCallExpression(name, _) = &**fc;
|
||||
if let Some((sig, _)) = scope.data.function(name) {
|
||||
Ok(scope.block.function_call(sig)?)
|
||||
} else {
|
||||
Err(Error::UndefinedFunction(name.clone()))?
|
||||
}
|
||||
}
|
||||
VariableName(name) => scope
|
||||
.data
|
||||
.var(name)
|
||||
.cloned()
|
||||
.ok_or(Error::UndefinedVariable(name.clone())),
|
||||
Literal(lit) => Ok(scope.block.get_const(lit)),
|
||||
IfExpr(_) => panic!("if expressions not yet supported"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum Error {
|
||||
#[error("Variable '{0}' already defined")]
|
||||
VariableAlreadyDefined(String),
|
||||
#[error("Variable '{0}' not yet defined")]
|
||||
UndefinedVariable(String),
|
||||
#[error("Function '{0}' not defined")]
|
||||
UndefinedFunction(String),
|
||||
#[error("Function '{0}' already defined")]
|
||||
FunctionAlreadyDefined(String),
|
||||
#[error(transparent)]
|
||||
Deeper(#[from] llvm_ir::Error),
|
||||
}
|
86
src/codegen/llvm.rs
Normal file
86
src/codegen/llvm.rs
Normal file
@ -0,0 +1,86 @@
|
||||
use std::ffi::{CStr, CString};
|
||||
use std::mem;
|
||||
|
||||
use llvm_sys::{core::*, prelude::*, LLVMBuilder, LLVMContext, LLVMModule};
|
||||
|
||||
fn into_cstring<T: Into<String>>(value: T) -> CString {
|
||||
let string = value.into();
|
||||
unsafe { CString::from_vec_with_nul_unchecked((string + "\0").into_bytes()) }
|
||||
}
|
||||
|
||||
pub struct IRContext {
|
||||
context: *mut LLVMContext,
|
||||
builder: *mut LLVMBuilder,
|
||||
}
|
||||
|
||||
impl IRContext {
|
||||
pub fn new() -> IRContext {
|
||||
unsafe {
|
||||
// Set up a context, module and builder in that context.
|
||||
let context = LLVMContextCreate();
|
||||
let builder = LLVMCreateBuilderInContext(context);
|
||||
IRContext { context, builder }
|
||||
}
|
||||
}
|
||||
|
||||
pub fn module<'a>(&'a mut self, name: String) -> IRModule<'a> {
|
||||
unsafe {
|
||||
let module =
|
||||
LLVMModuleCreateWithNameInContext(into_cstring(name).as_ptr(), self.context);
|
||||
|
||||
// TODO, fix later!
|
||||
|
||||
let t = LLVMInt32TypeInContext(self.context);
|
||||
|
||||
let mut argts = [];
|
||||
let func_type = LLVMFunctionType(t, argts.as_mut_ptr(), argts.len() as u32, 0);
|
||||
|
||||
let anon_func = LLVMAddFunction(module, into_cstring("test").as_ptr(), func_type);
|
||||
|
||||
let blockref =
|
||||
LLVMCreateBasicBlockInContext(self.context, into_cstring("otus").as_ptr());
|
||||
LLVMPositionBuilderAtEnd(self.builder, blockref);
|
||||
|
||||
// What is the last 1 ?
|
||||
let val = LLVMConstInt(t, mem::transmute(3 as i64), 1);
|
||||
|
||||
LLVMAppendExistingBasicBlock(anon_func, blockref);
|
||||
LLVMBuildRet(self.builder, val);
|
||||
|
||||
IRModule {
|
||||
context: self,
|
||||
module,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for IRContext {
|
||||
fn drop(&mut self) {
|
||||
// Clean up. Values created in the context mostly get cleaned up there.
|
||||
unsafe {
|
||||
LLVMDisposeBuilder(self.builder);
|
||||
LLVMContextDispose(self.context);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct IRModule<'a> {
|
||||
context: &'a mut IRContext,
|
||||
module: *mut LLVMModule,
|
||||
}
|
||||
|
||||
impl<'a> IRModule<'a> {
|
||||
pub fn print_to_string(&mut self) -> Result<&str, std::str::Utf8Error> {
|
||||
unsafe { CStr::from_ptr(LLVMPrintModuleToString(self.module)).to_str() }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Drop for IRModule<'a> {
|
||||
fn drop(&mut self) {
|
||||
// Clean up. Values created in the context mostly get cleaned up there.
|
||||
unsafe {
|
||||
LLVMDisposeModule(self.module);
|
||||
}
|
||||
}
|
||||
}
|
19
src/codegen/mod.rs
Normal file
19
src/codegen/mod.rs
Normal file
@ -0,0 +1,19 @@
|
||||
mod llvm;
|
||||
|
||||
use llvm::{IRContext, IRModule};
|
||||
|
||||
use crate::TopLevelStatement;
|
||||
|
||||
pub fn form_context() -> IRContext {
|
||||
IRContext::new()
|
||||
}
|
||||
|
||||
pub fn from_statements<'a>(
|
||||
context: &'a mut IRContext,
|
||||
statements: Vec<TopLevelStatement>,
|
||||
) -> Result<IRModule<'a>, Error> {
|
||||
Ok(context.module("hello".to_owned()))
|
||||
}
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum Error {}
|
13
src/lib.rs
13
src/lib.rs
@ -1,9 +1,11 @@
|
||||
use codegen::{form_context, from_statements};
|
||||
|
||||
use crate::{ast::TopLevelStatement, lexer::Token, token_stream::TokenStream};
|
||||
|
||||
mod ast;
|
||||
// mod codegen;
|
||||
mod codegen;
|
||||
mod lexer;
|
||||
mod llvm_ir;
|
||||
// mod llvm_ir;
|
||||
mod token_stream;
|
||||
|
||||
// TODO:
|
||||
@ -37,7 +39,8 @@ pub fn compile(source: &str) -> Result<String, ReidError> {
|
||||
statements.push(statement);
|
||||
}
|
||||
|
||||
// let mut module = codegen_from_statements(statements)?;
|
||||
// let text = module.print_to_string().unwrap();
|
||||
Ok("text".to_owned())
|
||||
let mut context = form_context();
|
||||
let mut module = from_statements(&mut context, statements).unwrap();
|
||||
let text = module.print_to_string().unwrap();
|
||||
Ok(text.to_owned())
|
||||
}
|
||||
|
203
src/llvm_ir.rs
203
src/llvm_ir.rs
@ -1,203 +0,0 @@
|
||||
use std::ffi::{CStr, CString};
|
||||
use std::mem;
|
||||
|
||||
use llvm_sys::{core::*, prelude::*, LLVMBuilder, LLVMContext, LLVMModule};
|
||||
|
||||
use crate::ast::{FunctionSignature, Literal};
|
||||
|
||||
macro_rules! cstr {
|
||||
($string:expr) => {
|
||||
core::ffi::CStr::from_bytes_with_nul_unchecked(concat!($string, "\0").as_bytes()).as_ptr()
|
||||
};
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[must_use = "value contains raw pointer and must be inserted somewhere"]
|
||||
pub struct IRValue(IRValueType, LLVMValueRef);
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub enum IRValueType {
|
||||
I32,
|
||||
}
|
||||
|
||||
impl IRValueType {
|
||||
unsafe fn get_llvm_type(&self, module: &mut IRModule) -> LLVMTypeRef {
|
||||
match *self {
|
||||
Self::I32 => LLVMInt32TypeInContext(module.context),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn into_cstring<T: Into<String>>(value: T) -> CString {
|
||||
let string = value.into();
|
||||
unsafe { CString::from_vec_with_nul_unchecked((string + "\0").into_bytes()) }
|
||||
}
|
||||
|
||||
pub struct IRModule {
|
||||
context: *mut LLVMContext,
|
||||
module: *mut LLVMModule,
|
||||
builder: *mut LLVMBuilder,
|
||||
}
|
||||
|
||||
impl IRModule {
|
||||
pub fn new<T: Into<String>>(name: T) -> IRModule {
|
||||
unsafe {
|
||||
// Set up a context, module and builder in that context.
|
||||
let context = LLVMContextCreate();
|
||||
let module = LLVMModuleCreateWithNameInContext(into_cstring(name).as_ptr(), context);
|
||||
let builder = LLVMCreateBuilderInContext(context);
|
||||
|
||||
IRModule {
|
||||
context,
|
||||
module,
|
||||
builder,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create_block(&mut self) -> IRBlock {
|
||||
IRBlock::create("entry", self)
|
||||
}
|
||||
|
||||
pub fn create_func<T: Into<String>>(
|
||||
&mut self,
|
||||
name: T,
|
||||
return_type: IRValueType,
|
||||
) -> IRFunction {
|
||||
unsafe {
|
||||
let mut argts = [];
|
||||
let func_type = LLVMFunctionType(
|
||||
return_type.get_llvm_type(self),
|
||||
argts.as_mut_ptr(),
|
||||
argts.len() as u32,
|
||||
0,
|
||||
);
|
||||
|
||||
let anon_func = LLVMAddFunction(self.module, into_cstring(name).as_ptr(), func_type);
|
||||
IRFunction {
|
||||
value: IRValue(return_type, anon_func),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn print_to_string(&mut self) -> Result<&str, std::str::Utf8Error> {
|
||||
unsafe { CStr::from_ptr(LLVMPrintModuleToString(self.module)).to_str() }
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for IRModule {
|
||||
fn drop(&mut self) {
|
||||
// Clean up. Values created in the context mostly get cleaned up there.
|
||||
unsafe {
|
||||
LLVMDisposeBuilder(self.builder);
|
||||
LLVMDisposeModule(self.module);
|
||||
LLVMContextDispose(self.context);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct IRFunction {
|
||||
value: IRValue,
|
||||
}
|
||||
|
||||
impl IRFunction {
|
||||
pub fn add_definition(self, ret: IRValue, block: IRBlock) {
|
||||
unsafe {
|
||||
LLVMAppendExistingBasicBlock(self.value.1, block.blockref);
|
||||
LLVMBuildRet(block.module.builder, ret.1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct IRBlock<'a> {
|
||||
module: &'a mut IRModule,
|
||||
blockref: LLVMBasicBlockRef,
|
||||
}
|
||||
|
||||
impl<'a> IRBlock<'a> {
|
||||
fn create<T: Into<String>>(name: T, codegen: &'a mut IRModule) -> IRBlock<'a> {
|
||||
unsafe {
|
||||
let blockref =
|
||||
LLVMCreateBasicBlockInContext(codegen.context, into_cstring(name).as_ptr());
|
||||
LLVMPositionBuilderAtEnd(codegen.builder, blockref);
|
||||
IRBlock {
|
||||
module: codegen,
|
||||
blockref,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_const(&mut self, literal_type: &Literal) -> IRValue {
|
||||
unsafe {
|
||||
match *literal_type {
|
||||
Literal::I32(v) => IRValue(
|
||||
IRValueType::I32,
|
||||
LLVMConstInt(
|
||||
LLVMInt32TypeInContext(self.module.context),
|
||||
mem::transmute(v as i64),
|
||||
1,
|
||||
),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add(&mut self, lhs: IRValue, rhs: IRValue) -> Result<IRValue, Error> {
|
||||
unsafe {
|
||||
if lhs.0 == rhs.0 {
|
||||
Ok(IRValue(
|
||||
lhs.0,
|
||||
LLVMBuildAdd(self.module.builder, lhs.1, rhs.1, cstr!("tmpadd")),
|
||||
))
|
||||
} else {
|
||||
Err(Error::TypeMismatch(lhs.0, rhs.0))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn mul(&mut self, lhs: IRValue, rhs: IRValue) -> Result<IRValue, Error> {
|
||||
unsafe {
|
||||
if lhs.0 == rhs.0 {
|
||||
Ok(IRValue(
|
||||
lhs.0,
|
||||
LLVMBuildMul(self.module.builder, lhs.1, rhs.1, cstr!("tmpadd")),
|
||||
))
|
||||
} else {
|
||||
Err(Error::TypeMismatch(lhs.0, rhs.0))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn function_call(&mut self, callee: &FunctionSignature) -> Result<IRValue, Error> {
|
||||
unsafe {
|
||||
let function = LLVMGetNamedFunction(
|
||||
self.module.module,
|
||||
into_cstring(callee.name.clone()).as_ptr(),
|
||||
);
|
||||
|
||||
let ret_t = LLVMInt32TypeInContext(self.module.context);
|
||||
let mut argts = [];
|
||||
let mut args = [];
|
||||
|
||||
let fun_t = LLVMFunctionType(ret_t, argts.as_mut_ptr(), argts.len() as u32, 0);
|
||||
|
||||
let call = LLVMBuildCall2(
|
||||
self.module.builder,
|
||||
fun_t,
|
||||
function,
|
||||
args.as_mut_ptr(),
|
||||
args.len() as u32,
|
||||
into_cstring(&callee.name).as_ptr(),
|
||||
);
|
||||
|
||||
Ok(IRValue(IRValueType::I32, call))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum Error {
|
||||
#[error("Type Mismatch: {0:?} {1:?}")]
|
||||
TypeMismatch(IRValueType, IRValueType),
|
||||
}
|
Loading…
Reference in New Issue
Block a user