Compare commits

...

12 Commits

Author SHA1 Message Date
05c585d47c Add sub and fibonacci calculation 2025-06-29 19:11:56 +03:00
7b93ab5d2e Fix from_cstring from taking ownership 2025-06-29 18:19:59 +03:00
814b816450 Add phi 2025-06-29 01:18:17 +03:00
5b23d7d4d5 rework 2025-06-28 21:58:35 +03:00
740aee1382 Add SIGN to IntegerType 2025-06-28 18:56:24 +03:00
02d8b37424 Move context.rs to lib.rs 2025-06-28 18:49:54 +03:00
85564c74c9 Rework api 2025-06-28 00:14:56 +03:00
54f25481f4 Misc changes 2025-06-27 20:37:31 +03:00
922afaa672 Change clang-command to ld, add util.rs 2025-06-25 21:38:30 +03:00
d0aa3e1410 Add the ability to call functions 2025-06-24 23:30:29 +03:00
5f93b7c9c2 Add rudamentary LLVM lib stuff, make a fully compiling executable 2025-06-24 23:10:44 +03:00
6d3d0fd03e Move reid to subproject 2025-06-24 17:17:15 +03:00
24 changed files with 1375 additions and 15 deletions

4
.gitignore vendored
View File

@ -2,4 +2,6 @@
src/old_llvm
/target
/.vscode
.env
.env
hello.*
main

10
Cargo.lock generated
View File

@ -1,6 +1,6 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
version = 4
[[package]]
name = "aho-corasick"
@ -103,6 +103,14 @@ dependencies = [
"thiserror",
]
[[package]]
name = "reid-lib"
version = "0.1.0"
dependencies = [
"llvm-sys",
"thiserror",
]
[[package]]
name = "semver"
version = "1.0.18"

View File

@ -1,12 +1,5 @@
[package]
name = "reid"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
## LLVM Bindings
llvm-sys = "160"
## Make it easier to generate errors
thiserror = "1.0.44"
[workspace]
members = [
"reid",
"reid-llvm-lib"
]

46
libtest.sh Executable file
View File

@ -0,0 +1,46 @@
#!/bin/sh
# Compiles example libtest, which produces hello.o and hello.asm, which is then
# compiled with main.cpp and executed for final result
#
# Do note this file is extremely simply for my own personal convenience
export .env
cargo run --example libtest && \
# clang hello.o -o main && \
ld -dynamic-linker /lib64/ld-linux-x86-64.so.2 \
-o main /usr/lib/crt1.o hello.o -lc && \
./main ; echo "Return value: ""$?"
## Command from: clang -v hello.o -o test
## Original command:
# ld --hash-style=gnu \
# --build-id \
# --eh-frame-hdr \
# -m elf_x86_64 \
# -pie \
# -dynamic-linker /lib64/ld-linux-x86-64.so.2 \
# -o test \
# /usr/bin/../lib64/gcc/x86_64-pc-linux-gnu/15.1.1/../../../../lib64/Scrt1.o \
# /usr/bin/../lib64/gcc/x86_64-pc-linux-gnu/15.1.1/../../../../lib64/crti.o \
# /usr/bin/../lib64/gcc/x86_64-pc-linux-gnu/15.1.1/crtbeginS.o \
# -L/usr/bin/../lib64/gcc/x86_64-pc-linux-gnu/15.1.1 \
# -L/usr/bin/../lib64/gcc/x86_64-pc-linux-gnu/15.1.1/../../../../lib64 \
# -L/lib/../lib64 \
# -L/usr/lib/../lib64 \
# -L/lib \
# -L/usr/lib \
# hello.o \
# -lgcc \
# --as-needed \
# -lgcc_s \
# --no-as-needed \
# -lc \
# -lgcc \
# --as-needed \
# -lgcc_s \
# --no-as-needed \
# /usr/bin/../lib64/gcc/x86_64-pc-linux-gnu/15.1.1/crtendS.o \
# /usr/bin/../lib64/gcc/x86_64-pc-linux-gnu/15.1.1/../../../../lib64/crtn.o \
# && \

7
main.cpp Normal file
View File

@ -0,0 +1,7 @@
#include <iostream>
extern "C" {
int mainfunc();
}
int main() { std::cout << "Return value of test: " << mainfunc() << std::endl; }

12
reid-llvm-lib/Cargo.toml Normal file
View File

@ -0,0 +1,12 @@
[package]
name = "reid-lib"
version = "0.1.0"
edition = "2024"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
## LLVM Bindings
llvm-sys = "160"
## Make it easier to generate errors
thiserror = "1.0.44"

View File

@ -0,0 +1,101 @@
use reid_lib::{
Context, IntPredicate,
types::{BasicType, IntegerType, IntegerValue, Value},
};
pub fn main() {
// Notes from inkwell:
// - Creating new values should probably just be functions in the context
// - Creating functions should probably be functions from module
// - Builder could well be it's own struct
// - Although, I do like the fact where blocks move the builder by itself..
let context = Context::new();
let module = context.module("testmodule");
let int_32 = context.type_i32();
let fibonacci = module.add_function(int_32.function_type(vec![&int_32]), "fibonacci");
let f_main = fibonacci.block("main");
let param = fibonacci.get_param::<IntegerValue>(0).unwrap();
let cmp = f_main
.integer_compare(&param, &int_32.from_unsigned(3), &IntPredicate::ULT, "cmp")
.unwrap();
let (done, recurse) = f_main.conditional_br(&cmp, "done", "recurse").unwrap();
done.ret(&int_32.from_unsigned(1)).unwrap();
let minus_one = recurse
.sub(&param, &int_32.from_unsigned(1), "minus_one")
.unwrap();
let minus_two = recurse
.sub(&param, &int_32.from_unsigned(2), "minus_two")
.unwrap();
let one = recurse
.call(&fibonacci, vec![Value::Integer(minus_one)], "call_one")
.unwrap();
let two = recurse
.call(&fibonacci, vec![Value::Integer(minus_two)], "call_two")
.unwrap();
let add = recurse.add(&one, &two, "add").unwrap();
recurse.ret(&add).unwrap();
let main_f = module.add_function(int_32.function_type(Vec::new()), "main");
let main_b = main_f.block("main");
let call = main_b
.call(
&fibonacci,
vec![Value::Integer(int_32.from_unsigned(8))],
"fib_call",
)
.unwrap();
main_b.ret(&call).unwrap();
// let secondary = module.add_function(int_32.function_type(&[]), "secondary");
// let s_entry = secondary.block("entry");
// s_entry.ret(&int_32.from_signed(54)).unwrap();
// let function = module.add_function(int_32.function_type(&[]), "main");
// let entry = function.block("entry");
// let call = entry.call(&secondary, vec![], "call").unwrap();
// let add = entry.add(&int_32.from_signed(100), &call, "add").unwrap();
// let rhs_cmp = int_32.from_signed(200);
// let cond_res = entry
// .integer_compare(&add, &rhs_cmp, &IntPredicate::SLT, "cmp")
// .unwrap();
// let (lhs, rhs) = entry.conditional_br(&cond_res, "lhs", "rhs").unwrap();
// let left = lhs.add(&call, &int_32.from_signed(20), "add").unwrap();
// let right = rhs.add(&call, &int_32.from_signed(30), "add").unwrap();
// let final_block = function.block("final");
// let phi = final_block
// .phi::<IntegerValue>(&int_32, "phi")
// .unwrap()
// .add_incoming(&left, &lhs)
// .add_incoming(&right, &rhs)
// .build();
// lhs.br(&final_block).unwrap();
// rhs.br(&final_block).unwrap();
// let val = final_block
// .add(&phi, &int_32.from_signed(11), "add")
// .unwrap();
// final_block.ret(&val).unwrap();
match module.print_to_string() {
Ok(v) => println!("{}", v),
Err(e) => println!("Err: {:?}", e),
}
}

454
reid-llvm-lib/src/lib.rs Normal file
View File

@ -0,0 +1,454 @@
use std::ffi::CString;
use std::marker::PhantomData;
use std::net::Incoming;
use std::ptr::null_mut;
use llvm_sys::analysis::LLVMVerifyModule;
use llvm_sys::target::{
LLVM_InitializeAllAsmParsers, LLVM_InitializeAllAsmPrinters, LLVM_InitializeAllTargetInfos,
LLVM_InitializeAllTargetMCs, LLVM_InitializeAllTargets, LLVMSetModuleDataLayout,
};
use llvm_sys::target_machine::{
LLVMCodeGenFileType, LLVMCreateTargetDataLayout, LLVMCreateTargetMachine,
LLVMGetDefaultTargetTriple, LLVMGetTargetFromTriple, LLVMTargetMachineEmitToFile,
};
use llvm_sys::{LLVMBuilder, LLVMContext, LLVMIntPredicate, core::*, prelude::*};
use types::{BasicType, BasicValue, FunctionType, IntegerType, Value};
use util::{ErrorMessageHolder, from_cstring, into_cstring};
pub mod types;
mod util;
pub enum IntPredicate {
ULT,
SLT,
}
impl IntPredicate {
pub fn as_llvm(&self) -> LLVMIntPredicate {
match *self {
Self::ULT => LLVMIntPredicate::LLVMIntULT,
Self::SLT => LLVMIntPredicate::LLVMIntSLT,
}
}
}
pub struct Context {
pub(crate) context_ref: *mut LLVMContext,
pub(crate) builder_ref: *mut LLVMBuilder,
}
impl Context {
pub fn new() -> Context {
unsafe {
// Set up a context, module and builder in that context.
let context = LLVMContextCreate();
let builder = LLVMCreateBuilderInContext(context);
Context {
context_ref: context,
builder_ref: builder,
}
}
}
pub fn type_i1<'a>(&'a self) -> IntegerType<'a> {
IntegerType::in_context(&self, 1)
}
pub fn type_i8<'a>(&'a self) -> IntegerType<'a> {
IntegerType::in_context(&self, 8)
}
pub fn type_i16<'a>(&'a self) -> IntegerType<'a> {
IntegerType::in_context(&self, 16)
}
pub fn type_i32<'a>(&'a self) -> IntegerType<'a> {
IntegerType::in_context(&self, 32)
}
pub fn module<T: Into<String>>(&self, name: T) -> Module {
Module::with_name(self, name.into())
}
}
impl Drop for Context {
fn drop(&mut self) {
// Clean up. Values created in the context mostly get cleaned up there.
unsafe {
LLVMDisposeBuilder(self.builder_ref);
LLVMContextDispose(self.context_ref);
}
}
}
pub struct Module<'ctx> {
context: &'ctx Context,
module_ref: LLVMModuleRef,
name: CString,
}
impl<'ctx> Module<'ctx> {
fn with_name(context: &'ctx Context, name: String) -> Module<'ctx> {
unsafe {
let cstring_name = into_cstring(name);
let module_ref =
LLVMModuleCreateWithNameInContext(cstring_name.as_ptr(), context.context_ref);
Module {
context,
module_ref,
name: cstring_name,
}
}
}
pub fn add_function<ReturnValue: BasicValue<'ctx>>(
&'ctx self,
fn_type: FunctionType<'ctx, ReturnValue::BaseType>,
name: &str,
) -> Function<'ctx, ReturnValue> {
unsafe {
let name_cstring = into_cstring(name);
let function_ref =
LLVMAddFunction(self.module_ref, name_cstring.as_ptr(), fn_type.llvm_type());
Function {
module: self,
fn_type,
name: name_cstring,
fn_ref: function_ref,
}
}
}
pub fn print_to_string(&self) -> Result<String, String> {
unsafe {
LLVM_InitializeAllTargets();
LLVM_InitializeAllTargetInfos();
LLVM_InitializeAllTargetMCs();
LLVM_InitializeAllAsmParsers();
LLVM_InitializeAllAsmPrinters();
let triple = LLVMGetDefaultTargetTriple();
let mut target: _ = null_mut();
let mut err = ErrorMessageHolder::null();
LLVMGetTargetFromTriple(triple, &mut target, err.borrow_mut());
println!("{:?}, {:?}", from_cstring(triple), target);
err.into_result().unwrap();
let target_machine = LLVMCreateTargetMachine(
target,
triple,
c"generic".as_ptr(),
c"".as_ptr(),
llvm_sys::target_machine::LLVMCodeGenOptLevel::LLVMCodeGenLevelNone,
llvm_sys::target_machine::LLVMRelocMode::LLVMRelocDefault,
llvm_sys::target_machine::LLVMCodeModel::LLVMCodeModelDefault,
);
let data_layout = LLVMCreateTargetDataLayout(target_machine);
LLVMSetTarget(self.module_ref, triple);
LLVMSetModuleDataLayout(self.module_ref, data_layout);
let mut err = ErrorMessageHolder::null();
LLVMVerifyModule(
self.module_ref,
llvm_sys::analysis::LLVMVerifierFailureAction::LLVMPrintMessageAction,
err.borrow_mut(),
);
err.into_result().unwrap();
let mut err = ErrorMessageHolder::null();
LLVMTargetMachineEmitToFile(
target_machine,
self.module_ref,
CString::new("hello.asm").unwrap().into_raw(),
LLVMCodeGenFileType::LLVMAssemblyFile,
err.borrow_mut(),
);
err.into_result().unwrap();
let mut err = ErrorMessageHolder::null();
LLVMTargetMachineEmitToFile(
target_machine,
self.module_ref,
CString::new("hello.o").unwrap().into_raw(),
LLVMCodeGenFileType::LLVMObjectFile,
err.borrow_mut(),
);
err.into_result().unwrap();
from_cstring(LLVMPrintModuleToString(self.module_ref)).ok_or("UTF-8 error".to_owned())
}
}
}
impl<'a> Drop for Module<'a> {
fn drop(&mut self) {
// Clean up. Values created in the context mostly get cleaned up there.
unsafe {
LLVMDisposeModule(self.module_ref);
}
}
}
pub struct Function<'ctx, ReturnValue: BasicValue<'ctx>> {
module: &'ctx Module<'ctx>,
name: CString,
fn_type: FunctionType<'ctx, ReturnValue::BaseType>,
fn_ref: LLVMValueRef,
}
impl<'ctx, ReturnValue: BasicValue<'ctx>> Function<'ctx, ReturnValue> {
pub fn block<T: Into<String>>(&'ctx self, name: T) -> BasicBlock<'ctx, ReturnValue> {
BasicBlock::in_function(&self, name.into())
}
pub fn get_param<T: BasicValue<'ctx>>(&'ctx self, nth: usize) -> Result<T, String> {
if let Some(param_type) = self.fn_type.param_types.iter().nth(nth) {
if self.fn_type.return_type(self.module.context).llvm_type() != *param_type {
return Err(String::from("Wrong type"));
}
} else {
return Err(String::from("nth too large"));
}
unsafe { Ok(T::from_llvm(LLVMGetParam(self.fn_ref, nth as u32))) }
}
}
pub struct BasicBlock<'ctx, ReturnValue: BasicValue<'ctx>> {
function: &'ctx Function<'ctx, ReturnValue>,
builder_ref: LLVMBuilderRef,
name: CString,
blockref: LLVMBasicBlockRef,
inserted: bool,
}
impl<'ctx, ReturnValue: BasicValue<'ctx>> BasicBlock<'ctx, ReturnValue> {
fn in_function(
function: &'ctx Function<'ctx, ReturnValue>,
name: String,
) -> BasicBlock<'ctx, ReturnValue> {
unsafe {
let block_name = into_cstring(name);
let block_ref = LLVMCreateBasicBlockInContext(
function.module.context.context_ref,
block_name.as_ptr(),
);
BasicBlock {
function: function,
builder_ref: function.module.context.builder_ref,
name: block_name,
blockref: block_ref,
inserted: false,
}
}
}
#[must_use]
pub fn integer_compare<T: BasicValue<'ctx>>(
&self,
lhs: &T,
rhs: &T,
comparison: &IntPredicate,
name: &str,
) -> Result<T, ()> {
unsafe {
LLVMPositionBuilderAtEnd(self.builder_ref, self.blockref);
let value = LLVMBuildICmp(
self.builder_ref,
comparison.as_llvm(),
lhs.llvm_value(),
rhs.llvm_value(),
into_cstring(name).as_ptr(),
);
Ok(T::from_llvm(value))
}
}
#[must_use]
pub fn call(
&self,
callee: &'ctx Function<'ctx, ReturnValue>,
params: Vec<Value<'ctx>>,
name: &str,
) -> Result<ReturnValue, ()> {
if params.len() != callee.fn_type.param_types.len() {
return Err(()); // TODO invalid amount of parameters
}
for (t1, t2) in callee.fn_type.param_types.iter().zip(&params) {
if t1 != &t2.llvm_type() {
return Err(()); // TODO wrong types in parameters
}
}
unsafe {
let mut param_list: Vec<LLVMValueRef> = params.iter().map(|p| p.llvm_value()).collect();
LLVMPositionBuilderAtEnd(self.builder_ref, self.blockref);
let ret_val = LLVMBuildCall2(
self.builder_ref,
callee.fn_type.llvm_type(),
callee.fn_ref,
param_list.as_mut_ptr(),
param_list.len() as u32,
into_cstring(name).as_ptr(),
);
Ok(ReturnValue::from_llvm(ret_val))
}
}
#[must_use]
pub fn add<T: BasicValue<'ctx>>(&self, lhs: &T, rhs: &T, name: &str) -> Result<T, ()> {
if lhs.llvm_type() != rhs.llvm_type() {
return Err(()); // TODO error
}
unsafe {
LLVMPositionBuilderAtEnd(self.builder_ref, self.blockref);
let add_value_ref = LLVMBuildAdd(
self.builder_ref,
lhs.llvm_value(),
rhs.llvm_value(),
into_cstring(name).as_ptr(),
);
Ok(T::from_llvm(add_value_ref))
}
}
#[must_use]
pub fn sub<T: BasicValue<'ctx>>(&self, lhs: &T, rhs: &T, name: &str) -> Result<T, ()> {
if lhs.llvm_type() != rhs.llvm_type() {
return Err(()); // TODO error
}
unsafe {
LLVMPositionBuilderAtEnd(self.builder_ref, self.blockref);
let add_value_ref = LLVMBuildSub(
self.builder_ref,
lhs.llvm_value(),
rhs.llvm_value(),
into_cstring(name).as_ptr(),
);
Ok(T::from_llvm(add_value_ref))
}
}
#[must_use]
pub fn phi<PhiValue: BasicValue<'ctx>>(
&self,
phi_type: &'ctx PhiValue::BaseType,
name: &str,
) -> Result<PhiBuilder<'ctx, ReturnValue, PhiValue>, ()> {
unsafe {
LLVMPositionBuilderAtEnd(self.builder_ref, self.blockref);
let phi_node = LLVMBuildPhi(
self.builder_ref,
phi_type.llvm_type(),
into_cstring(name).as_ptr(),
);
Ok(PhiBuilder::new(phi_node))
}
}
#[must_use]
pub fn br(self, into: &BasicBlock<'ctx, ReturnValue>) -> Result<(), ()> {
unsafe {
LLVMPositionBuilderAtEnd(self.builder_ref, self.blockref);
LLVMBuildBr(self.builder_ref, into.blockref);
self.terminate();
Ok(())
}
}
#[must_use]
pub fn conditional_br<T: BasicValue<'ctx>>(
self,
condition: &T,
lhs_name: &str,
rhs_name: &str,
) -> Result<(BasicBlock<'ctx, ReturnValue>, BasicBlock<'ctx, ReturnValue>), ()> {
unsafe {
let lhs = BasicBlock::in_function(&self.function, lhs_name.into());
let rhs = BasicBlock::in_function(&self.function, rhs_name.into());
LLVMPositionBuilderAtEnd(self.builder_ref, self.blockref);
LLVMBuildCondBr(
self.builder_ref,
condition.llvm_value(),
lhs.blockref,
rhs.blockref,
);
self.terminate();
Ok((lhs, rhs))
}
}
#[must_use]
pub fn ret(self, return_value: &ReturnValue) -> Result<(), ()> {
if self
.function
.fn_type
.return_type(self.function.module.context)
.llvm_type()
!= return_value.llvm_type()
{
return Err(());
}
unsafe {
LLVMPositionBuilderAtEnd(self.builder_ref, self.blockref);
LLVMBuildRet(self.builder_ref, return_value.llvm_value());
self.terminate();
Ok(())
}
}
unsafe fn terminate(mut self) {
unsafe {
LLVMAppendExistingBasicBlock(self.function.fn_ref, self.blockref);
self.inserted = true;
}
}
}
impl<'ctx, ReturnValue: BasicValue<'ctx>> Drop for BasicBlock<'ctx, ReturnValue> {
fn drop(&mut self) {
if !self.inserted {
unsafe {
LLVMDeleteBasicBlock(self.blockref);
}
}
}
}
pub struct PhiBuilder<'ctx, ReturnValue: BasicValue<'ctx>, PhiValue: BasicValue<'ctx>> {
phi_node: LLVMValueRef,
phantom: PhantomData<&'ctx (PhiValue, ReturnValue)>,
}
impl<'ctx, ReturnValue: BasicValue<'ctx>, PhiValue: BasicValue<'ctx>>
PhiBuilder<'ctx, ReturnValue, PhiValue>
{
fn new(phi_node: LLVMValueRef) -> PhiBuilder<'ctx, ReturnValue, PhiValue> {
PhiBuilder {
phi_node,
phantom: PhantomData,
}
}
pub fn add_incoming(&self, value: &PhiValue, block: &BasicBlock<'ctx, ReturnValue>) -> &Self {
let mut values = vec![value.llvm_value()];
let mut blocks = vec![block.blockref];
unsafe {
LLVMAddIncoming(
self.phi_node,
values.as_mut_ptr(),
blocks.as_mut_ptr(),
values.len() as u32,
);
self
}
}
pub fn build(&self) -> PhiValue {
unsafe { PhiValue::from_llvm(self.phi_node) }
}
}

245
reid-llvm-lib/src/types.rs Normal file
View File

@ -0,0 +1,245 @@
use std::{any::Any, marker::PhantomData, ptr::null_mut};
use llvm_sys::{
LLVMTypeKind,
core::*,
prelude::{LLVMTypeRef, LLVMValueRef},
};
use crate::Context;
pub trait BasicType<'ctx> {
fn llvm_type(&self) -> LLVMTypeRef;
fn from_llvm(context: &'ctx Context, llvm_type: LLVMTypeRef) -> Self
where
Self: Sized;
fn function_type(&'ctx self, params: Vec<&'ctx dyn BasicType>) -> FunctionType<'ctx, Self>
where
Self: Sized,
{
unsafe {
let mut typerefs: Vec<LLVMTypeRef> = params.iter().map(|b| b.llvm_type()).collect();
let param_ptr = typerefs.as_mut_ptr();
let param_len = typerefs.len();
FunctionType {
phantom: PhantomData,
param_types: typerefs,
type_ref: LLVMFunctionType(self.llvm_type(), param_ptr, param_len as u32, 0),
}
}
}
fn array_type(&'ctx self, length: u32) -> ArrayType<'ctx, Self>
where
Self: Sized,
{
ArrayType {
element_type: self,
length,
type_ref: unsafe { LLVMArrayType(self.llvm_type(), length) },
}
}
}
impl<'ctx> PartialEq for &dyn BasicType<'ctx> {
fn eq(&self, other: &Self) -> bool {
self.llvm_type() == other.llvm_type()
}
}
impl<'ctx> PartialEq<LLVMTypeRef> for &dyn BasicType<'ctx> {
fn eq(&self, other: &LLVMTypeRef) -> bool {
self.llvm_type() == *other
}
}
pub struct IntegerType<'ctx> {
context: &'ctx Context,
type_ref: LLVMTypeRef,
}
impl<'ctx> BasicType<'ctx> for IntegerType<'ctx> {
fn llvm_type(&self) -> LLVMTypeRef {
self.type_ref
}
fn from_llvm(context: &'ctx Context, llvm_type: LLVMTypeRef) -> Self
where
Self: Sized,
{
IntegerType {
context,
type_ref: llvm_type,
}
}
}
impl<'ctx> IntegerType<'ctx> {
pub(crate) fn in_context(context: &Context, width: u32) -> IntegerType {
let type_ref = unsafe {
match width {
128 => LLVMInt128TypeInContext(context.context_ref),
64 => LLVMInt64TypeInContext(context.context_ref),
32 => LLVMInt32TypeInContext(context.context_ref),
16 => LLVMInt16TypeInContext(context.context_ref),
8 => LLVMInt8TypeInContext(context.context_ref),
1 => LLVMInt1TypeInContext(context.context_ref),
_ => LLVMIntTypeInContext(context.context_ref, width),
}
};
IntegerType { context, type_ref }
}
pub fn from_signed(&self, value: i64) -> IntegerValue<'_> {
self.from_const(value as u64, true)
}
pub fn from_unsigned(&self, value: i64) -> IntegerValue<'_> {
self.from_const(value as u64, false)
}
fn from_const(&self, value: u64, sign: bool) -> IntegerValue<'_> {
unsafe {
IntegerValue::from_llvm(LLVMConstInt(
self.type_ref,
value,
match sign {
true => 1,
false => 0,
},
))
}
}
}
pub struct FunctionType<'ctx, ReturnType: BasicType<'ctx>> {
phantom: PhantomData<&'ctx ReturnType>,
pub(crate) param_types: Vec<LLVMTypeRef>,
type_ref: LLVMTypeRef,
}
impl<'ctx, ReturnType: BasicType<'ctx>> BasicType<'ctx> for FunctionType<'ctx, ReturnType> {
fn llvm_type(&self) -> LLVMTypeRef {
self.type_ref
}
fn from_llvm(_context: &'ctx Context, fn_type: LLVMTypeRef) -> Self
where
Self: Sized,
{
unsafe {
let param_count = LLVMCountParamTypes(fn_type);
let param_types_ptr: *mut LLVMTypeRef = null_mut();
LLVMGetParamTypes(fn_type, param_types_ptr);
let param_types: Vec<LLVMTypeRef> =
std::slice::from_raw_parts(param_types_ptr, param_count as usize)
.iter()
.map(|t| *t)
.collect();
FunctionType {
phantom: PhantomData,
param_types,
type_ref: fn_type,
}
}
}
}
impl<'ctx, ReturnType: BasicType<'ctx>> FunctionType<'ctx, ReturnType> {
pub fn return_type(&self, context: &'ctx Context) -> ReturnType {
unsafe {
let return_type = LLVMGetReturnType(self.type_ref);
ReturnType::from_llvm(context, return_type)
}
}
}
pub struct ArrayType<'ctx, T: BasicType<'ctx>> {
element_type: &'ctx T,
length: u32,
type_ref: LLVMTypeRef,
}
impl<'ctx, T: BasicType<'ctx>> BasicType<'ctx> for ArrayType<'ctx, T> {
fn llvm_type(&self) -> LLVMTypeRef {
self.type_ref
}
fn from_llvm(context: &'ctx Context, llvm_type: LLVMTypeRef) -> Self
where
Self: Sized,
{
unsafe {
let length = LLVMGetArrayLength(llvm_type);
todo!()
}
}
}
pub trait BasicValue<'ctx> {
type BaseType: BasicType<'ctx>;
unsafe fn from_llvm(value: LLVMValueRef) -> Self
where
Self: Sized;
fn llvm_value(&self) -> LLVMValueRef;
fn llvm_type(&self) -> LLVMTypeRef;
}
pub struct IntegerValue<'ctx> {
phantom: PhantomData<&'ctx ()>,
pub(crate) value_ref: LLVMValueRef,
}
impl<'ctx> BasicValue<'ctx> for IntegerValue<'ctx> {
type BaseType = IntegerType<'ctx>;
unsafe fn from_llvm(value: LLVMValueRef) -> Self {
IntegerValue {
phantom: PhantomData,
value_ref: value,
}
}
fn llvm_value(&self) -> LLVMValueRef {
self.value_ref
}
fn llvm_type(&self) -> LLVMTypeRef {
unsafe { LLVMTypeOf(self.value_ref) }
}
}
pub enum Value<'ctx> {
Integer(IntegerValue<'ctx>),
}
impl<'ctx> Value<'ctx> {
unsafe fn from_llvm(value: LLVMValueRef) -> Self
where
Self: Sized,
{
unsafe {
use LLVMTypeKind::*;
let llvm_type = LLVMTypeOf(value);
let type_kind = LLVMGetTypeKind(llvm_type);
match type_kind {
LLVMIntegerTypeKind => Value::Integer(IntegerValue::from_llvm(value)),
_ => panic!("asd"),
}
}
}
pub fn llvm_value(&self) -> LLVMValueRef {
match self {
Self::Integer(i) => i.llvm_value(),
}
}
pub fn llvm_type(&self) -> LLVMTypeRef {
match self {
Self::Integer(i) => i.llvm_type(),
}
}
}

51
reid-llvm-lib/src/util.rs Normal file
View File

@ -0,0 +1,51 @@
use std::{
ffi::{CStr, CString, c_char},
ptr::null_mut,
};
use llvm_sys::error::LLVMDisposeErrorMessage;
pub fn into_cstring<T: Into<String>>(value: T) -> CString {
let string = value.into();
unsafe { CString::from_vec_with_nul_unchecked((string + "\0").into_bytes()) }
}
pub fn from_cstring(pointer: *mut c_char) -> Option<String> {
if pointer.is_null() {
None
} else {
unsafe { CStr::from_ptr(pointer).to_str().ok().map(|s| s.to_owned()) }
}
}
fn cstring_to_err(value: *mut c_char) -> Result<(), String> {
from_cstring(value)
.filter(|s| !s.is_empty())
.map_or(Ok(()), |s| Err(s))
}
pub struct ErrorMessageHolder(*mut c_char);
impl ErrorMessageHolder {
pub fn null() -> Self {
ErrorMessageHolder(null_mut())
}
pub fn borrow_mut(&mut self) -> *mut *mut c_char {
&mut self.0
}
pub fn into_result(&self) -> Result<(), String> {
cstring_to_err(self.0)
}
}
impl Drop for ErrorMessageHolder {
fn drop(&mut self) {
unsafe {
if !self.0.is_null() {
LLVMDisposeErrorMessage(self.0);
}
}
}
}

12
reid/Cargo.toml Normal file
View File

@ -0,0 +1,12 @@
[package]
name = "reid"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
## LLVM Bindings
llvm-sys = "160"
## Make it easier to generate errors
thiserror = "1.0.44"

View File

@ -4,8 +4,12 @@ use std::mem;
use std::ptr::null_mut;
use llvm_sys::analysis::LLVMVerifyModule;
use llvm_sys::transforms::pass_manager_builder::{
self, LLVMOpaquePassManagerBuilder, LLVMPassManagerBuilderCreate,
LLVMPassManagerBuilderSetOptLevel,
};
use llvm_sys::{
core::*, prelude::*, LLVMBasicBlock, LLVMBuilder, LLVMContext, LLVMModule, LLVMType, LLVMValue,
LLVMBasicBlock, LLVMBuilder, LLVMContext, LLVMModule, LLVMType, LLVMValue, core::*, prelude::*,
};
use crate::ast;

View File

@ -0,0 +1,222 @@
use std::collections::{hash_map, HashMap};
use crate::{
ast::{
BinaryOperator, Block, BlockLevelStatement, Expression, FunctionCallExpression,
FunctionDefinition, FunctionSignature, ReturnType, TopLevelStatement,
},
llvm_ir::{self, IRBlock, IRFunction, IRModule, IRValue, IRValueType},
};
#[derive(Clone)]
pub struct ScopeData {
named_vars: HashMap<String, IRValue>,
defined_functions: HashMap<String, (FunctionSignature, Option<IRFunction>)>,
}
impl ScopeData {
pub fn inner<'a, 'b>(&self, block: &'b mut IRBlock<'a>) -> Scope<'a, 'b> {
Scope {
block,
data: self.clone(),
}
}
pub fn var(&self, name: &String) -> Option<&IRValue> {
self.named_vars.get(name)
}
pub fn set_var(&mut self, name: &str, val: IRValue) -> Result<(), Error> {
if let hash_map::Entry::Vacant(e) = self.named_vars.entry(name.to_owned()) {
e.insert(val);
Ok(())
} else {
Err(Error::VariableAlreadyDefined(name.to_owned()))
}
}
pub fn function(
&mut self,
name: &String,
) -> Option<&mut (FunctionSignature, Option<IRFunction>)> {
self.defined_functions.get_mut(name)
}
pub fn set_function_signature(
&mut self,
name: &str,
sig: FunctionSignature,
ir: IRFunction,
) -> Result<(), Error> {
if let hash_map::Entry::Vacant(e) = self.defined_functions.entry(name.to_owned()) {
e.insert((sig, Some(ir)));
Ok(())
} else {
Err(Error::VariableAlreadyDefined(name.to_owned()))
}
}
}
pub struct Scope<'a, 'b> {
pub block: &'b mut IRBlock<'a>,
pub data: ScopeData,
}
impl<'a, 'b> Scope<'a, 'b> {
pub fn inner<'c>(&'c mut self) -> Scope<'a, 'c> {
Scope {
block: self.block,
data: self.data.clone(),
}
}
}
pub fn codegen_from_statements(statements: Vec<TopLevelStatement>) -> Result<IRModule, Error> {
let mut module = IRModule::new("testmod");
let mut scope = ScopeData {
defined_functions: HashMap::new(),
named_vars: HashMap::new(),
};
for statement in &statements {
match statement {
TopLevelStatement::FunctionDefinition(FunctionDefinition(sig, _)) => {
let function = module.create_func(&sig.name, IRValueType::I32);
scope.set_function_signature(&sig.name.clone(), sig.clone(), function)?;
}
TopLevelStatement::Import(_) => {}
}
}
for statement in &statements {
statement.codegen(&mut module, &mut scope)?;
}
Ok(module)
}
impl TopLevelStatement {
pub fn codegen(&self, module: &mut IRModule, root_data: &mut ScopeData) -> Result<(), Error> {
match self {
TopLevelStatement::FunctionDefinition(FunctionDefinition(sig, block)) => {
if let Some((_, ir)) = root_data.function(&sig.name) {
if let Some(ir_function) = ir.take() {
let mut ir_block = module.create_block();
let mut scope = root_data.inner(&mut ir_block);
let (_, value) = match block.codegen(&mut scope)? {
Some(v) => v,
None => panic!("Void-return type function not yet implemented!"),
};
ir_function.add_definition(value, ir_block);
} else {
Err(Error::FunctionAlreadyDefined(sig.name.clone()))?
}
} else {
panic!("Function was not declared before it's definition")
}
}
TopLevelStatement::Import(_) => {}
}
Ok(())
}
}
impl Block {
pub fn codegen(&self, scope: &mut Scope) -> Result<Option<(ReturnType, IRValue)>, Error> {
for statement in &self.0 {
statement.codegen(scope)?;
}
let value = if let Some((rt, exp)) = &self.1 {
Some((*rt, exp.codegen(scope)?))
} else {
None
};
Ok(value)
}
}
impl BlockLevelStatement {
pub fn codegen(&self, scope: &mut Scope) -> Result<(), Error> {
match self {
BlockLevelStatement::Let(let_statement) => {
let val = let_statement.1.codegen(scope)?;
scope.data.set_var(&let_statement.0, val)?;
Ok(())
}
BlockLevelStatement::Return(_) => panic!("Should never happen"),
BlockLevelStatement::Import(_) => Ok(()), // TODO: To implement
BlockLevelStatement::Expression(e) => {
let _value = e.codegen(scope)?;
Ok(())
}
}
}
}
impl Expression {
pub fn codegen(&self, scope: &mut Scope) -> Result<IRValue, Error> {
use Expression::*;
match self {
Binop(op, lhs, rhs) => match op {
BinaryOperator::Add => {
let lhs = lhs.codegen(scope)?;
let rhs = rhs.codegen(scope)?;
Ok(scope.block.add(lhs, rhs)?)
}
BinaryOperator::Mult => {
let lhs = lhs.codegen(scope)?;
let rhs = rhs.codegen(scope)?;
Ok(scope.block.mul(lhs, rhs)?)
}
_ => panic!("Other binary operators not supported yet!"),
},
BlockExpr(block) => {
let mut inner = scope.inner();
Ok(match block.codegen(&mut inner)? {
Some((r_type, value)) => match r_type {
ReturnType::Soft => value,
ReturnType::Hard => {
panic!("Hard returns in inner blocks not supported yet")
}
},
None => panic!("Void-return type block not yet implemented!"),
})
}
FunctionCall(fc) => {
let FunctionCallExpression(name, _) = &**fc;
if let Some((sig, _)) = scope.data.function(name) {
Ok(scope.block.function_call(sig)?)
} else {
Err(Error::UndefinedFunction(name.clone()))?
}
}
VariableName(name) => scope
.data
.var(name)
.cloned()
.ok_or(Error::UndefinedVariable(name.clone())),
Literal(lit) => Ok(scope.block.get_const(lit)),
IfExpr(_) => panic!("if expressions not yet supported"),
}
}
}
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("Variable '{0}' already defined")]
VariableAlreadyDefined(String),
#[error("Variable '{0}' not yet defined")]
UndefinedVariable(String),
#[error("Function '{0}' not defined")]
UndefinedFunction(String),
#[error("Function '{0}' already defined")]
FunctionAlreadyDefined(String),
#[error(transparent)]
Deeper(#[from] llvm_ir::Error),
}

View File

@ -0,0 +1,203 @@
use std::ffi::{CStr, CString};
use std::mem;
use llvm_sys::{core::*, prelude::*, LLVMBuilder, LLVMContext, LLVMModule};
use crate::ast::{FunctionSignature, Literal};
macro_rules! cstr {
($string:expr) => {
core::ffi::CStr::from_bytes_with_nul_unchecked(concat!($string, "\0").as_bytes()).as_ptr()
};
}
#[derive(Clone, Debug)]
#[must_use = "value contains raw pointer and must be inserted somewhere"]
pub struct IRValue(IRValueType, LLVMValueRef);
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum IRValueType {
I32,
}
impl IRValueType {
unsafe fn get_llvm_type(&self, module: &mut IRModule) -> LLVMTypeRef {
match *self {
Self::I32 => LLVMInt32TypeInContext(module.context),
}
}
}
fn into_cstring<T: Into<String>>(value: T) -> CString {
let string = value.into();
unsafe { CString::from_vec_with_nul_unchecked((string + "\0").into_bytes()) }
}
pub struct IRModule {
context: *mut LLVMContext,
module: *mut LLVMModule,
builder: *mut LLVMBuilder,
}
impl IRModule {
pub fn new<T: Into<String>>(name: T) -> IRModule {
unsafe {
// Set up a context, module and builder in that context.
let context = LLVMContextCreate();
let module = LLVMModuleCreateWithNameInContext(into_cstring(name).as_ptr(), context);
let builder = LLVMCreateBuilderInContext(context);
IRModule {
context,
module,
builder,
}
}
}
pub fn create_block(&mut self) -> IRBlock {
IRBlock::create("entry", self)
}
pub fn create_func<T: Into<String>>(
&mut self,
name: T,
return_type: IRValueType,
) -> IRFunction {
unsafe {
let mut argts = [];
let func_type = LLVMFunctionType(
return_type.get_llvm_type(self),
argts.as_mut_ptr(),
argts.len() as u32,
0,
);
let anon_func = LLVMAddFunction(self.module, into_cstring(name).as_ptr(), func_type);
IRFunction {
value: IRValue(return_type, anon_func),
}
}
}
pub fn print_to_string(&mut self) -> Result<&str, std::str::Utf8Error> {
unsafe { CStr::from_ptr(LLVMPrintModuleToString(self.module)).to_str() }
}
}
impl Drop for IRModule {
fn drop(&mut self) {
// Clean up. Values created in the context mostly get cleaned up there.
unsafe {
LLVMDisposeBuilder(self.builder);
LLVMDisposeModule(self.module);
LLVMContextDispose(self.context);
}
}
}
#[derive(Clone, Debug)]
pub struct IRFunction {
value: IRValue,
}
impl IRFunction {
pub fn add_definition(self, ret: IRValue, block: IRBlock) {
unsafe {
LLVMAppendExistingBasicBlock(self.value.1, block.blockref);
LLVMBuildRet(block.module.builder, ret.1);
}
}
}
pub struct IRBlock<'a> {
module: &'a mut IRModule,
blockref: LLVMBasicBlockRef,
}
impl<'a> IRBlock<'a> {
fn create<T: Into<String>>(name: T, codegen: &'a mut IRModule) -> IRBlock<'a> {
unsafe {
let blockref =
LLVMCreateBasicBlockInContext(codegen.context, into_cstring(name).as_ptr());
LLVMPositionBuilderAtEnd(codegen.builder, blockref);
IRBlock {
module: codegen,
blockref,
}
}
}
pub fn get_const(&mut self, literal_type: &Literal) -> IRValue {
unsafe {
match *literal_type {
Literal::I32(v) => IRValue(
IRValueType::I32,
LLVMConstInt(
LLVMInt32TypeInContext(self.module.context),
mem::transmute(v as i64),
1,
),
),
}
}
}
pub fn add(&mut self, lhs: IRValue, rhs: IRValue) -> Result<IRValue, Error> {
unsafe {
if lhs.0 == rhs.0 {
Ok(IRValue(
lhs.0,
LLVMBuildAdd(self.module.builder, lhs.1, rhs.1, cstr!("tmpadd")),
))
} else {
Err(Error::TypeMismatch(lhs.0, rhs.0))
}
}
}
pub fn mul(&mut self, lhs: IRValue, rhs: IRValue) -> Result<IRValue, Error> {
unsafe {
if lhs.0 == rhs.0 {
Ok(IRValue(
lhs.0,
LLVMBuildMul(self.module.builder, lhs.1, rhs.1, cstr!("tmpadd")),
))
} else {
Err(Error::TypeMismatch(lhs.0, rhs.0))
}
}
}
pub fn function_call(&mut self, callee: &FunctionSignature) -> Result<IRValue, Error> {
unsafe {
let function = LLVMGetNamedFunction(
self.module.module,
into_cstring(callee.name.clone()).as_ptr(),
);
let ret_t = LLVMInt32TypeInContext(self.module.context);
let mut argts = [];
let mut args = [];
let fun_t = LLVMFunctionType(ret_t, argts.as_mut_ptr(), argts.len() as u32, 0);
let call = LLVMBuildCall2(
self.module.builder,
fun_t,
function,
args.as_mut_ptr(),
args.len() as u32,
into_cstring(&callee.name).as_ptr(),
);
Ok(IRValue(IRValueType::I32, call))
}
}
}
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("Type Mismatch: {0:?} {1:?}")]
TypeMismatch(IRValueType, IRValueType),
}